*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*
* lustre/osp/osp_precreate.c
*
*
* each time OSP gets connected to OST, we should start from precreation cleanup
*/
-static inline bool osp_precreate_running(struct osp_device *d)
-{
- return !!(d->opd_pre_thread.t_flags & SVC_RUNNING);
-}
-
-static inline bool osp_precreate_stopped(struct osp_device *d)
-{
- return !!(d->opd_pre_thread.t_flags & SVC_STOPPED);
-}
-
static void osp_statfs_timer_cb(cfs_timer_cb_arg_t data)
{
struct osp_device *d = cfs_from_timer(d, data, opd_statfs_timer);
LASSERT(d);
- if (osp_precreate_running(d))
+ if (d->opd_pre_task)
wake_up(&d->opd_pre_waitq);
}
union ptlrpc_async_args *aa = args;
struct obd_import *imp = req->rq_import;
struct obd_statfs *msfs;
+ struct obd_statfs *sfs;
struct osp_device *d;
u64 maxage_ns;
jiffies + cfs_time_seconds(d->opd_statfs_maxage));
d->opd_statfs_update_in_progress = 0;
- CDEBUG(D_CACHE, "updated statfs %p\n", d);
+ sfs = &d->opd_statfs;
+ CDEBUG(D_CACHE, "%s (%p): %llu blocks, %llu free, %llu avail, "
+ "%u bsize, %u reserved mb low, %u reserved mb high,"
+ "%llu files, %llu free files\n", d->opd_obd->obd_name, d,
+ sfs->os_blocks, sfs->os_bfree, sfs->os_bavail, sfs->os_bsize,
+ d->opd_reserved_mb_low, d->opd_reserved_mb_high,
+ sfs->os_files, sfs->os_ffree);
RETURN(0);
out:
/* couldn't update statfs, try again with a small delay */
d->opd_statfs_fresh_till = ktime_add_ns(ktime_get(), 10 * NSEC_PER_SEC);
d->opd_statfs_update_in_progress = 0;
- if (d->opd_pre != NULL && osp_precreate_running(d))
+ if (d->opd_pre && d->opd_pre_task)
wake_up(&d->opd_pre_waitq);
if (req->rq_import_generation == imp->imp_generation)
d->opd_obd->obd_name,
atomic_read(&d->opd_sync_changes));
osp_sync_add_commit_cb_1s(env, d, th);
- dt_trans_stop(env, d->opd_storage, th);
}
+ dt_trans_stop(env, d->opd_storage, th);
}
out:
if (rc) {
CERROR("%s: can't precreate: rc = %d\n", d->opd_obd->obd_name,
rc);
+ if (req->rq_net_err)
+ /* have osp_precreate_reserve() to wait for repeat */
+ rc = -ENOTCONN;
GOTO(out_req, rc);
}
LASSERT(req->rq_transno == 0);
osp_pre_update_status(d, rc);
wake_up(&d->opd_pre_user_waitq);
+ /* pause to let osp_precreate_reserve to go first */
+ CFS_FAIL_TIMEOUT(OBD_FAIL_OSP_PRECREATE_PAUSE, 2);
+
ptlrpc_req_finished(req);
RETURN(rc);
}
struct ptlrpc_request *req = NULL;
struct obd_import *imp;
struct ost_body *body;
- struct l_wait_info lwi = { 0 };
int update_status = 0;
int rc;
int diff;
* catch all osp_precreate_reserve() calls who find
* "!opd_pre_recovering".
*/
- l_wait_event(d->opd_pre_waitq,
- (!d->opd_pre_reserved && d->opd_recovery_completed) ||
- !osp_precreate_running(d) || d->opd_got_disconnected,
- &lwi);
- if (!osp_precreate_running(d) || d->opd_got_disconnected)
+ wait_event_idle(d->opd_pre_waitq,
+ (!d->opd_pre_reserved && d->opd_recovery_completed) ||
+ !d->opd_pre_task || d->opd_got_disconnected);
+ if (!d->opd_pre_task || d->opd_got_disconnected)
GOTO(out, rc = -EAGAIN);
CDEBUG(D_HA, "%s: going to cleanup orphans since "DFID"\n",
available_mb = (msfs->os_bavail * (msfs->os_bsize >> 10)) >> 10;
if (msfs->os_ffree < reserved_ino_low)
- msfs->os_state |= OS_STATE_ENOINO;
+ msfs->os_state |= OS_STATFS_ENOINO;
else if (msfs->os_ffree <= reserved_ino_high)
- msfs->os_state |= old_state & OS_STATE_ENOINO;
+ msfs->os_state |= old_state & OS_STATFS_ENOINO;
/* else don't clear flags in new msfs->os_state sent from OST */
CDEBUG(D_INFO,
msfs->os_files, msfs->os_ffree, msfs->os_state,
d->opd_pre_status);
if (available_mb < d->opd_reserved_mb_low)
- msfs->os_state |= OS_STATE_ENOSPC;
+ msfs->os_state |= OS_STATFS_ENOSPC;
else if (available_mb <= d->opd_reserved_mb_high)
- msfs->os_state |= old_state & OS_STATE_ENOSPC;
+ msfs->os_state |= old_state & OS_STATFS_ENOSPC;
/* else don't clear flags in new msfs->os_state sent from OST */
- if (msfs->os_state & (OS_STATE_ENOINO | OS_STATE_ENOSPC)) {
+ if (msfs->os_state & (OS_STATFS_ENOINO | OS_STATFS_ENOSPC)) {
d->opd_pre_status = -ENOSPC;
- if (!(old_state & (OS_STATE_ENOINO | OS_STATE_ENOSPC)))
+ if (!(old_state & (OS_STATFS_ENOINO | OS_STATFS_ENOSPC)))
CDEBUG(D_INFO, "%s: full: state=%x: rc = %x\n",
d->opd_obd->obd_name, msfs->os_state,
d->opd_pre_status);
CDEBUG(D_INFO, "uncommitted changes=%u in_progress=%u\n",
atomic_read(&d->opd_sync_changes),
atomic_read(&d->opd_sync_rpcs_in_progress));
- } else if (old_state & (OS_STATE_ENOINO | OS_STATE_ENOSPC)) {
+ } else if (old_state & (OS_STATFS_ENOINO | OS_STATFS_ENOSPC)) {
d->opd_pre_status = 0;
spin_lock(&d->opd_pre_lock);
d->opd_pre_create_slow = 0;
/* Object precreation skipped on OST if manually disabled */
if (d->opd_pre_max_create_count == 0)
- msfs->os_state |= OS_STATE_NOPRECREATE;
+ msfs->os_state |= OS_STATFS_NOPRECREATE;
/* else don't clear flags in new msfs->os_state sent from OST */
/* copy only new statfs state to make it visible to MDS threads */
RETURN(rc);
}
+struct opt_args {
+ struct osp_device *opta_dev;
+ struct lu_env opta_env;
+ struct completion *opta_started;
+};
/**
* The core of precreate functionality
*
* \retval 0 on success
* \retval negative negated errno on error
*/
-static int osp_precreate_thread(void *_arg)
+static int osp_precreate_thread(void *_args)
{
- struct osp_device *d = _arg;
- struct ptlrpc_thread *thread = &d->opd_pre_thread;
- struct l_wait_info lwi = { 0 };
- struct l_wait_info lwi2 = LWI_TIMEOUT(cfs_time_seconds(5),
- back_to_sleep, NULL);
- struct lu_env env;
+ struct opt_args *args = _args;
+ struct osp_device *d = args->opta_dev;
+ struct lu_env *env = &args->opta_env;
int rc;
ENTRY;
- rc = lu_env_init(&env, d->opd_dt_dev.dd_lu_dev.ld_type->ldt_ctx_tags);
- if (rc) {
- CERROR("%s: init env error: rc = %d\n", d->opd_obd->obd_name,
- rc);
-
- spin_lock(&d->opd_pre_lock);
- thread->t_flags = SVC_STOPPED;
- spin_unlock(&d->opd_pre_lock);
- wake_up(&thread->t_ctl_waitq);
-
- RETURN(rc);
- }
-
- spin_lock(&d->opd_pre_lock);
- thread->t_flags = SVC_RUNNING;
- spin_unlock(&d->opd_pre_lock);
- wake_up(&thread->t_ctl_waitq);
-
- while (osp_precreate_running(d)) {
+ complete(args->opta_started);
+ while (!kthread_should_stop()) {
/*
* need to be connected to OST
*/
- while (osp_precreate_running(d)) {
+ while (!kthread_should_stop()) {
if ((d->opd_pre == NULL || d->opd_pre_recovering) &&
d->opd_imp_connected &&
!d->opd_got_disconnected)
break;
- l_wait_event(d->opd_pre_waitq,
- !osp_precreate_running(d) ||
- d->opd_new_connection,
- &lwi);
+ wait_event_idle(d->opd_pre_waitq,
+ kthread_should_stop() ||
+ d->opd_new_connection);
if (!d->opd_new_connection)
continue;
+ OBD_FAIL_TIMEOUT(OBD_FAIL_OSP_CON_EVENT_DELAY,
+ cfs_fail_val);
d->opd_new_connection = 0;
d->opd_got_disconnected = 0;
break;
}
- if (!osp_precreate_running(d))
+ if (kthread_should_stop())
break;
if (d->opd_pre) {
}
}
- if (osp_statfs_update(&env, d)) {
- l_wait_event(d->opd_pre_waitq,
- !osp_precreate_running(d), &lwi2);
+ if (osp_statfs_update(env, d)) {
+ if (wait_event_idle_timeout(d->opd_pre_waitq,
+ kthread_should_stop(),
+ cfs_time_seconds(5)) == 0)
+ l_wait_event_abortable(
+ d->opd_pre_waitq,
+ kthread_should_stop());
continue;
}
/*
* Clean up orphans or recreate missing objects.
*/
- rc = osp_precreate_cleanup_orphans(&env, d);
+ rc = osp_precreate_cleanup_orphans(env, d);
if (rc != 0) {
schedule_timeout_interruptible(cfs_time_seconds(1));
continue;
/*
* connected, can handle precreates now
*/
- while (osp_precreate_running(d)) {
- l_wait_event(d->opd_pre_waitq,
- !osp_precreate_running(d) ||
- osp_precreate_near_empty(&env, d) ||
- osp_statfs_need_update(d) ||
- d->opd_got_disconnected, &lwi);
-
- if (!osp_precreate_running(d))
+ while (!kthread_should_stop()) {
+ wait_event_idle(d->opd_pre_waitq,
+ kthread_should_stop() ||
+ osp_precreate_near_empty(env, d) ||
+ osp_statfs_need_update(d) ||
+ d->opd_got_disconnected);
+
+ if (kthread_should_stop())
break;
/* something happened to the connection
break;
if (osp_statfs_need_update(d))
- if (osp_statfs_update(&env, d))
+ if (osp_statfs_update(env, d))
break;
if (d->opd_pre == NULL)
/* To avoid handling different seq in precreate/orphan
* cleanup, it will hold precreate until current seq is
* used up. */
- if (unlikely(osp_precreate_end_seq(&env, d) &&
- !osp_create_end_seq(&env, d)))
+ if (unlikely(osp_precreate_end_seq(env, d) &&
+ !osp_create_end_seq(env, d)))
continue;
- if (unlikely(osp_precreate_end_seq(&env, d) &&
- osp_create_end_seq(&env, d))) {
+ if (unlikely(osp_precreate_end_seq(env, d) &&
+ osp_create_end_seq(env, d))) {
LCONSOLE_INFO("%s:%#llx is used up."
" Update to new seq\n",
d->opd_obd->obd_name,
fid_seq(&d->opd_pre_last_created_fid));
- rc = osp_precreate_rollover_new_seq(&env, d);
+ rc = osp_precreate_rollover_new_seq(env, d);
if (rc)
continue;
}
- if (osp_precreate_near_empty(&env, d)) {
- rc = osp_precreate_send(&env, d);
+ if (osp_precreate_near_empty(env, d)) {
+ rc = osp_precreate_send(env, d);
/* osp_precreate_send() sets opd_pre_status
* in case of error, that prevent the using of
* failed device. */
}
}
- thread->t_flags = SVC_STOPPED;
- lu_env_fini(&env);
- wake_up(&thread->t_ctl_waitq);
+ lu_env_fini(env);
+ OBD_FREE_PTR(args);
RETURN(0);
}
return 0;
}
-static int osp_precreate_timeout_condition(void *data)
-{
- struct osp_device *d = data;
-
- CDEBUG(D_HA, "%s: slow creates, last="DFID", next="DFID", "
- "reserved=%llu, sync_changes=%u, "
- "sync_rpcs_in_progress=%d, status=%d\n",
- d->opd_obd->obd_name, PFID(&d->opd_pre_last_created_fid),
- PFID(&d->opd_pre_used_fid), d->opd_pre_reserved,
- atomic_read(&d->opd_sync_changes),
- atomic_read(&d->opd_sync_rpcs_in_progress),
- d->opd_pre_status);
-
- return 1;
-}
-
/**
* Reserve object in precreate pool
*
* \retval -EAGAIN try later, slow precreation in progress
* \retval -EIO when no access to OST
*/
-int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d)
+int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d,
+ bool can_block)
{
time64_t expire = ktime_get_seconds() + obd_timeout;
- struct l_wait_info lwi;
int precreated, rc, synced = 0;
ENTRY;
synced = 1;
}
if (atomic_read(&d->opd_sync_rpcs_in_progress)) {
- /* just wait till destroys are done */
- /* see l_wait_even() few lines below */
+ /* just wait till destroys are done
+ * see wait_event_idle_timeout() below
+ */
}
if (atomic_read(&d->opd_sync_changes) +
atomic_read(&d->opd_sync_rpcs_in_progress) == 0) {
/* XXX: don't wake up if precreation is in progress */
wake_up(&d->opd_pre_waitq);
- lwi = LWI_TIMEOUT(cfs_time_seconds(obd_timeout),
- osp_precreate_timeout_condition, d);
if (ktime_get_seconds() >= expire) {
rc = -ETIMEDOUT;
break;
}
- l_wait_event(d->opd_pre_user_waitq,
- osp_precreate_ready_condition(env, d), &lwi);
+ if (!can_block) {
+ LASSERT(d->opd_pre);
+ rc = -ENOBUFS;
+ break;
+ }
+
+ if (wait_event_idle_timeout(
+ d->opd_pre_user_waitq,
+ osp_precreate_ready_condition(env, d),
+ cfs_time_seconds(obd_timeout)) == 0) {
+ CDEBUG(D_HA,
+ "%s: slow creates, last="DFID", next="DFID", "
+ "reserved=%llu, sync_changes=%u, "
+ "sync_rpcs_in_progress=%d, status=%d\n",
+ d->opd_obd->obd_name,
+ PFID(&d->opd_pre_last_created_fid),
+ PFID(&d->opd_pre_used_fid), d->opd_pre_reserved,
+ atomic_read(&d->opd_sync_changes),
+ atomic_read(&d->opd_sync_rpcs_in_progress),
+ d->opd_pre_status);
+ }
}
RETURN(rc);
* XXX: decide how do we do here with resend
* if we don't resend, then client may see wrong file size
* if we do resend, then MDS thread can get stuck for quite long
- * and if we don't resend, then client will also get -EWOULDBLOCK !!
+ * and if we don't resend, then client will also get -EAGAIN !!
* (see LU-7975 and sanity/test_27F use cases)
* but let's decide not to resend/delay this truncate request to OST
* and allow Client to decide to resend, in a less agressive way from
* after_reply(), by returning -EINPROGRESS instead of
- * -EAGAIN/-EWOULDBLOCK upon return from ptlrpc_queue_wait() at the
+ * -EAGAIN/-EAGAIN upon return from ptlrpc_queue_wait() at the
* end of this routine
*/
req->rq_no_resend = req->rq_no_delay = 1;
rc = ptlrpc_queue_wait(req);
if (rc) {
- /* -EWOULDBLOCK/-EAGAIN means OST is unreachable at the moment
+ /* -EAGAIN/-EWOULDBLOCK means OST is unreachable at the moment
* since we have decided not to resend/delay, but this could
* lead to wrong size to be seen at Client side and even process
* trying to open to exit/fail if not itself handling -EAGAIN.
* So it should be better to return -EINPROGRESS instead and
* leave the decision to resend at Client side in after_reply()
*/
- if (rc == -EWOULDBLOCK) {
+ if (rc == -EAGAIN) {
rc = -EINPROGRESS;
CDEBUG(D_HA, "returning -EINPROGRESS instead of "
"-EWOULDBLOCK/-EAGAIN to allow Client to "
int osp_init_statfs(struct osp_device *d)
{
- struct l_wait_info lwi = { 0 };
- struct task_struct *task;
+ struct task_struct *task;
+ struct opt_args *args;
+ DECLARE_COMPLETION_ONSTACK(started);
+ int rc;
ENTRY;
spin_lock_init(&d->opd_pre_lock);
init_waitqueue_head(&d->opd_pre_waitq);
- thread_set_flags(&d->opd_pre_thread, SVC_INIT);
- init_waitqueue_head(&d->opd_pre_thread.t_ctl_waitq);
/*
* Initialize statfs-related things
if (d->opd_storage->dd_rdonly)
RETURN(0);
+ OBD_ALLOC_PTR(args);
+ if (!args)
+ RETURN(0);
+ args->opta_dev = d;
+ args->opta_started = &started;
+ rc = lu_env_init(&args->opta_env,
+ d->opd_dt_dev.dd_lu_dev.ld_type->ldt_ctx_tags);
+ if (rc) {
+ CERROR("%s: init env error: rc = %d\n", d->opd_obd->obd_name,
+ rc);
+ OBD_FREE_PTR(args);
+ RETURN(0);
+ }
+
/*
* start thread handling precreation and statfs updates
*/
- task = kthread_run(osp_precreate_thread, d,
- "osp-pre-%u-%u", d->opd_index, d->opd_group);
+ task = kthread_create(osp_precreate_thread, args,
+ "osp-pre-%u-%u", d->opd_index, d->opd_group);
if (IS_ERR(task)) {
CERROR("can't start precreate thread %ld\n", PTR_ERR(task));
+ lu_env_fini(&args->opta_env);
+ OBD_FREE_PTR(args);
RETURN(PTR_ERR(task));
}
-
- l_wait_event(d->opd_pre_thread.t_ctl_waitq,
- osp_precreate_running(d) || osp_precreate_stopped(d),
- &lwi);
+ d->opd_pre_task = task;
+ wake_up_process(task);
+ wait_for_completion(&started);
RETURN(0);
}
void osp_statfs_fini(struct osp_device *d)
{
- struct ptlrpc_thread *thread = &d->opd_pre_thread;
+ struct task_struct *task = d->opd_pre_task;
ENTRY;
del_timer(&d->opd_statfs_timer);
- if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
- thread->t_flags = SVC_STOPPING;
- wake_up(&d->opd_pre_waitq);
- wait_event(thread->t_ctl_waitq, thread_is_stopped(thread));
- }
+ d->opd_pre_task = NULL;
+ if (task)
+ kthread_stop(task);
EXIT;
}