#include <uapi/linux/lustre/lustre_param.h>
#include <lustre_update.h>
#include <lustre_log.h>
+#include <lustre_lmv.h>
#include "lod_internal.h"
struct lod_recovery_data {
struct lod_device *lrd_lod;
struct lod_tgt_desc *lrd_ltd;
- struct ptlrpc_thread *lrd_thread;
+ struct task_struct **lrd_task;
u32 lrd_idx;
+ struct lu_env lrd_env;
+ struct completion *lrd_started;
};
struct lod_recovery_data *lrd = arg;
struct lod_device *lod = lrd->lrd_lod;
struct dt_device *dt;
- struct ptlrpc_thread *thread = lrd->lrd_thread;
struct llog_ctxt *ctxt = NULL;
- struct lu_env env;
+ struct lu_env *env = &lrd->lrd_env;
struct lu_target *lut;
struct lu_tgt_desc *mdt = NULL;
time64_t start;
ENTRY;
- thread->t_flags = SVC_RUNNING;
- wake_up(&thread->t_ctl_waitq);
-
- rc = lu_env_init(&env, LCT_LOCAL | LCT_MD_THREAD);
- if (rc != 0) {
- OBD_FREE_PTR(lrd);
- CERROR("%s: can't initialize env: rc = %d\n",
- lod2obd(lod)->obd_name, rc);
- RETURN(rc);
- }
-
lut = lod2lu_dev(lod)->ld_site->ls_tgt;
atomic_inc(&lut->lut_tdtd->tdtd_recovery_threads_count);
if (!lrd->lrd_ltd)
dt = lrd->lrd_ltd->ltd_tgt;
start = ktime_get_real_seconds();
+ complete(lrd->lrd_started);
again:
- rc = lod_sub_prep_llog(&env, lod, dt, lrd->lrd_idx);
+
+ if (unlikely(OBD_FAIL_PRECHECK(OBD_FAIL_TGT_RECOVERY_CONNECT)) &&
+ lrd->lrd_ltd) {
+ OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_RECOVERY_CONNECT, cfs_fail_val);
+ rc = -EIO;
+ } else {
+ rc = lod_sub_prep_llog(env, lod, dt, lrd->lrd_idx);
+ }
if (!rc && !lod->lod_child->dd_rdonly) {
/* Process the recovery record */
ctxt = llog_get_context(dt->dd_lu_dev.ld_obd,
LASSERT(ctxt != NULL);
LASSERT(ctxt->loc_handle != NULL);
- rc = llog_cat_process(&env, ctxt->loc_handle,
+ rc = llog_cat_process(env, ctxt->loc_handle,
lod_process_recovery_updates, lrd, 0, 0);
}
!top_device->ld_obd->obd_stopping) {
if (ctxt) {
if (ctxt->loc_handle)
- llog_cat_close(&env,
+ llog_cat_close(env,
ctxt->loc_handle);
llog_ctxt_put(ctxt);
}
EXIT;
out:
- OBD_FREE_PTR(lrd);
- thread->t_flags = SVC_STOPPED;
atomic_dec(&lut->lut_tdtd->tdtd_recovery_threads_count);
wake_up(&lut->lut_tdtd->tdtd_recovery_threads_waitq);
- wake_up(&thread->t_ctl_waitq);
- lu_env_fini(&env);
- return rc;
+ if (xchg(lrd->lrd_task, NULL) == NULL)
+ /* Someone is waiting for us to finish, need
+ * to synchronize cleanly.
+ */
+ wait_var_event(lrd, kthread_should_stop());
+ lu_env_fini(env);
+ OBD_FREE_PTR(lrd);
+ return 0;
}
/**
* \param[in] thread recovery thread on this sub device
*/
void lod_sub_fini_llog(const struct lu_env *env,
- struct dt_device *dt, struct ptlrpc_thread *thread)
+ struct dt_device *dt, struct task_struct **thread)
{
struct obd_device *obd;
struct llog_ctxt *ctxt;
+ struct task_struct *task = NULL;
ENTRY;
obd = dt->dd_lu_dev.ld_obd;
CDEBUG(D_INFO, "%s: finish sub llog\n", obd->obd_name);
- /* Stop recovery thread first */
- if (thread && thread->t_flags & SVC_RUNNING) {
- thread->t_flags = SVC_STOPPING;
- wake_up(&thread->t_ctl_waitq);
- wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED);
- }
+ /* Wait for recovery thread to complete */
+ if (thread)
+ task = xchg(thread, NULL);
+ if (task)
+ kthread_stop(task);
ctxt = llog_get_context(obd, LLOG_UPDATELOG_ORIG_CTXT);
if (!ctxt)
{
struct obd_device *obd;
struct lod_recovery_data *lrd = NULL;
- struct ptlrpc_thread *thread;
+ DECLARE_COMPLETION_ONSTACK(started);
+ struct task_struct **taskp;
struct task_struct *task;
struct lod_tgt_desc *subtgt = NULL;
u32 index;
RETURN(-ENOMEM);
if (lod->lod_child == dt) {
- thread = &lod->lod_child_recovery_thread;
+ taskp = &lod->lod_child_recovery_task;
index = master_index;
} else {
struct lu_tgt_desc *mdt;
}
}
LASSERT(subtgt != NULL);
- OBD_ALLOC_PTR(subtgt->ltd_recovery_thread);
- if (!subtgt->ltd_recovery_thread)
- GOTO(free_lrd, rc = -ENOMEM);
-
- thread = subtgt->ltd_recovery_thread;
+ taskp = &subtgt->ltd_recovery_task;
}
CDEBUG(D_INFO, "%s init sub log %s\n", lod2obd(lod)->obd_name,
dt->dd_lu_dev.ld_obd->obd_name);
lrd->lrd_lod = lod;
lrd->lrd_ltd = subtgt;
- lrd->lrd_thread = thread;
+ lrd->lrd_task = taskp;
lrd->lrd_idx = index;
- init_waitqueue_head(&thread->t_ctl_waitq);
+ lrd->lrd_started = &started;
obd = dt->dd_lu_dev.ld_obd;
obd->obd_lvfs_ctxt.dt = dt;
if (rc < 0) {
CERROR("%s: cannot setup updatelog llog: rc = %d\n",
obd->obd_name, rc);
- GOTO(free_thread, rc);
+ GOTO(free_lrd, rc);
+ }
+
+ rc = lu_env_init(&lrd->lrd_env, LCT_LOCAL | LCT_MD_THREAD);
+ if (rc != 0) {
+ CERROR("%s: can't initialize env: rc = %d\n",
+ lod2obd(lod)->obd_name, rc);
+ GOTO(free_lrd, rc);
}
/* Start the recovery thread */
- task = kthread_run(lod_sub_recovery_thread, lrd, "lod%04x_rec%04x",
- master_index, index);
+ task = kthread_create(lod_sub_recovery_thread, lrd, "lod%04x_rec%04x",
+ master_index, index);
if (IS_ERR(task)) {
rc = PTR_ERR(task);
CERROR("%s: cannot start recovery thread: rc = %d\n",
obd->obd_name, rc);
+ lu_env_fini(&lrd->lrd_env);
GOTO(out_llog, rc);
}
-
- wait_event_idle(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING ||
- thread->t_flags & SVC_STOPPED);
+ *taskp = task;
+ wake_up_process(task);
+ wait_for_completion(&started);
RETURN(0);
out_llog:
- lod_sub_fini_llog(env, dt, thread);
-free_thread:
- if (lod->lod_child != dt) {
- OBD_FREE_PTR(subtgt->ltd_recovery_thread);
- subtgt->ltd_recovery_thread = NULL;
- }
+ lod_sub_fini_llog(env, dt, taskp);
free_lrd:
OBD_FREE_PTR(lrd);
RETURN(rc);
static void lod_sub_stop_recovery_threads(const struct lu_env *env,
struct lod_device *lod)
{
- struct ptlrpc_thread *thread;
+ struct task_struct *task;
struct lu_tgt_desc *mdt;
/*
* Stop the update log commit cancel threads and finish master
* llog ctxt
*/
- thread = &lod->lod_child_recovery_thread;
- /* Stop recovery thread first */
- if (thread && thread->t_flags & SVC_RUNNING) {
- thread->t_flags = SVC_STOPPING;
- wake_up(&thread->t_ctl_waitq);
- wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED);
- }
+ task = xchg(&lod->lod_child_recovery_task, NULL);
+ if (task)
+ kthread_stop(task);
lod_getref(&lod->lod_mdt_descs);
lod_foreach_mdt(lod, mdt) {
- thread = mdt->ltd_recovery_thread;
- if (thread && thread->t_flags & SVC_RUNNING) {
- thread->t_flags = SVC_STOPPING;
- wake_up(&thread->t_ctl_waitq);
- wait_event(thread->t_ctl_waitq,
- thread->t_flags & SVC_STOPPED);
- OBD_FREE_PTR(mdt->ltd_recovery_thread);
- mdt->ltd_recovery_thread = NULL;
- }
+ task = xchg(&mdt->ltd_recovery_task, NULL);
+ if (task)
+ kthread_stop(task);
}
lod_putref(lod, &lod->lod_mdt_descs);
}
* llog ctxt
*/
lod_sub_fini_llog(env, lod->lod_child,
- &lod->lod_child_recovery_thread);
+ &lod->lod_child_recovery_task);
lod_getref(&lod->lod_mdt_descs);
lod_foreach_mdt(lod, mdt)
lod_sub_fini_llog(env, mdt->ltd_tgt,
- mdt->ltd_recovery_thread);
+ &mdt->ltd_recovery_task);
lod_putref(lod, &lod->lod_mdt_descs);
}
rc = lodname2mdt_index(lod2obd(lod)->obd_name, &i);
LASSERTF(rc == 0, "Fail to parse target index: rc = %d\n", rc);
- rc = snprintf(buf + len, *size - len, " %04x", i);
+ rc = scnprintf(buf + len, *size - len, " %04x", i);
LASSERT(rc > 0);
len += rc;
lod_foreach_mdt(lod, mdt) {
if (!mdt->ltd_got_update_log) {
- rc = snprintf(buf + len, *size - len, " %04x",
- mdt->ltd_index);
+ rc = scnprintf(buf + len, *size - len, " %04x",
+ mdt->ltd_index);
if (unlikely(rc <= 0))
break;
case LCFG_PRE_CLEANUP: {
lod_sub_process_config(env, lod, &lod->lod_mdt_descs, lcfg);
lod_sub_process_config(env, lod, &lod->lod_ost_descs, lcfg);
+ OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_RECOVERY_CONNECT, cfs_fail_val * 2);
next = &lod->lod_child->dd_lu_dev;
rc = next->ld_ops->ldo_process_config(env, next, lcfg);
if (rc != 0)
RETURN(rc);
}
+/**
+ * Implementation of lu_device_operations::ldo_fid_alloc() for LOD
+ *
+ * Find corresponding device by passed parent and name, and allocate FID from
+ * there.
+ *
+ * see include/lu_object.h for the details.
+ */
+static int lod_fid_alloc(const struct lu_env *env, struct lu_device *d,
+ struct lu_fid *fid, struct lu_object *parent,
+ const struct lu_name *name)
+{
+ struct lod_device *lod = lu2lod_dev(d);
+ struct lod_object *lo = lu2lod_obj(parent);
+ struct dt_device *next;
+ int rc;
+
+ ENTRY;
+
+ /* if @parent is remote, we don't know whether its layout was changed,
+ * always reload layout.
+ */
+ if (lu_object_remote(parent))
+ lod_striping_free(env, lo);
+
+ rc = lod_striping_load(env, lo);
+ if (rc)
+ RETURN(rc);
+
+ if (lo->ldo_dir_stripe_count > 0 && name) {
+ struct dt_object *stripe;
+ int idx;
+
+ idx = __lmv_name_to_stripe_index(lo->ldo_dir_hash_type,
+ lo->ldo_dir_stripe_count,
+ lo->ldo_dir_migrate_hash,
+ lo->ldo_dir_migrate_offset,
+ name->ln_name,
+ name->ln_namelen, true);
+ if (idx < 0)
+ RETURN(idx);
+
+ stripe = lo->ldo_stripe[idx];
+ if (!stripe || !dt_object_exists(stripe))
+ RETURN(-ENODEV);
+
+ next = lu2dt_dev(stripe->do_lu.lo_dev);
+ } else {
+ next = lod->lod_child;
+ }
+
+ rc = dt_fid_alloc(env, next, fid, parent, name);
+
+ RETURN(rc);
+}
+
const struct lu_device_operations lod_lu_ops = {
.ldo_object_alloc = lod_object_alloc,
.ldo_process_config = lod_process_config,
.ldo_recovery_complete = lod_recovery_complete,
.ldo_prepare = lod_prepare,
+ .ldo_fid_alloc = lod_fid_alloc,
};
/**
(int)sfs->os_bsize, (int)ost_sfs.os_bsize);
}
lod_putref(lod, &lod->lod_ost_descs);
- sfs->os_state |= OS_STATE_SUM;
+ sfs->os_state |= OS_STATFS_SUM;
/* If we have _some_ OSTs, but don't have as many free objects on the
* OSTs as inodes on the MDTs, reduce the reported number of inodes
RETURN(rc);
}
+static int lod_lsfs_init(const struct lu_env *env, struct lod_device *d)
+{
+ struct obd_statfs sfs;
+ int rc;
+
+ rc = dt_statfs(env, d->lod_child, &sfs);
+ if (rc) {
+ CDEBUG(D_LAYOUT, "%s: failed to get OSD statfs, rc = %d\n",
+ lod2obd(d)->obd_name, rc);
+ return rc;
+ }
+
+ /* udpate local OSD cached statfs data */
+ spin_lock_init(&d->lod_lsfs_lock);
+ d->lod_lsfs_age = ktime_get_seconds();
+ d->lod_lsfs_total_mb = (sfs.os_blocks * sfs.os_bsize) >> 20;
+ d->lod_lsfs_free_mb = (sfs.os_bfree * sfs.os_bsize) >> 20;
+ return 0;
+}
+
/**
* Initialize LOD device at setup.
*
dt_conf_get(env, &lod->lod_dt_dev, &ddp);
lod->lod_osd_max_easize = ddp.ddp_max_ea_size;
- lod->lod_dom_max_stripesize = (1ULL << 20); /* 1Mb as default value */
+ lod->lod_dom_stripesize_max_kb = (1ULL << 10); /* 1Mb is default */
+
+ /* initialize local statfs cached values */
+ rc = lod_lsfs_init(env, lod);
+ if (rc)
+ GOTO(out_disconnect, rc);
+
+ /* default threshold as half of total space, in MiB */
+ lod->lod_dom_threshold_free_mb = lod->lod_lsfs_total_mb / 2;
+ /* set default DoM stripe size based on free space amount */
+ lod_dom_stripesize_recalc(lod);
/* setup obd to be used with old lov code */
rc = lod_pools_init(lod, cfg);
ENTRY;
- if (atomic_read(&lu->ld_ref) > 0 &&
- !cfs_hash_is_empty(lu->ld_site->ls_obj_hash)) {
- LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL);
- lu_site_print(env, lu->ld_site, &msgdata, lu_cdebug_printer);
+ if (atomic_read(&lu->ld_site->ls_obj_hash.nelems)) {
+ lu_site_print(env, lu->ld_site, &lu->ld_ref, D_ERROR,
+ lu_cdebug_printer);
}
LASSERTF(atomic_read(&lu->ld_ref) == 0, "lu is %p\n", lu);
dt_device_fini(&lod->lod_dt_dev);
static void lod_avoid_guide_fini(struct lod_avoid_guide *lag)
{
if (lag->lag_oss_avoid_array)
- OBD_FREE(lag->lag_oss_avoid_array,
- sizeof(u32) * lag->lag_oaa_size);
- if (lag->lag_ost_avoid_bitmap)
- CFS_FREE_BITMAP(lag->lag_ost_avoid_bitmap);
+ OBD_FREE_PTR_ARRAY(lag->lag_oss_avoid_array,
+ lag->lag_oaa_size);
+ bitmap_free(lag->lag_ost_avoid_bitmap);
}
/**
lod_free_def_comp_entries(&info->lti_def_striping);
if (info->lti_comp_size > 0)
- OBD_FREE(info->lti_comp_idx,
- info->lti_comp_size * sizeof(u32));
+ OBD_FREE_PTR_ARRAY(info->lti_comp_idx,
+ info->lti_comp_size);
lod_avoid_guide_fini(&info->lti_avoid);
RETURN(rc);
}
-static struct obd_ops lod_obd_device_ops = {
+static const struct obd_ops lod_obd_device_ops = {
.o_owner = THIS_MODULE,
.o_connect = lod_obd_connect,
.o_disconnect = lod_obd_disconnect,