* Copyright 2009 Sun Microsystems, Inc. All rights reserved
* Use is subject to license terms.
*
- * Copyright (c) 2012, 2016, Intel Corporation.
+ * Copyright (c) 2012, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#include <obd_class.h>
#include <md_object.h>
#include <lustre_fid.h>
-#include <uapi/linux/lustre_param.h>
+#include <uapi/linux/lustre/lustre_param.h>
#include <lustre_update.h>
#include <lustre_log.h>
if (lod_obj == NULL)
RETURN(ERR_PTR(-ENOMEM));
+ mutex_init(&lod_obj->ldo_layout_mutex);
lu_obj = lod2lu_obj(lod_obj);
dt_object_init(&lod_obj->ldo_obj, NULL, dev);
lod_obj->ldo_obj.do_ops = &lod_obj_ops;
*/
static int lod_sub_recovery_thread(void *arg)
{
- struct lod_recovery_data *lrd = arg;
- struct lod_device *lod = lrd->lrd_lod;
- struct dt_device *dt;
- struct ptlrpc_thread *thread = lrd->lrd_thread;
- struct llog_ctxt *ctxt = NULL;
- struct lu_env env;
+ struct lod_recovery_data *lrd = arg;
+ struct lod_device *lod = lrd->lrd_lod;
+ struct dt_device *dt;
+ struct ptlrpc_thread *thread = lrd->lrd_thread;
+ struct llog_ctxt *ctxt = NULL;
+ struct lu_env env;
struct lu_target *lut;
-
-
- int rc;
+ struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
+ struct lod_tgt_desc *tgt = NULL;
+ time64_t start;
+ int retries = 0;
+ int i;
+ int rc;
ENTRY;
thread->t_flags = SVC_RUNNING;
else
dt = lrd->lrd_ltd->ltd_tgt;
+ start = ktime_get_real_seconds();
+
again:
rc = lod_sub_prep_llog(&env, lod, dt, lrd->lrd_idx);
if (!rc && !lod->lod_child->dd_rdonly) {
ctxt->loc_handle);
llog_ctxt_put(ctxt);
}
+ retries++;
+ CDEBUG(D_HA, "%s get update log failed %d, retry\n",
+ dt->dd_lu_dev.ld_obd->obd_name, rc);
goto again;
}
- CERROR("%s getting update log failed: rc = %d\n",
+ CERROR("%s get update log failed: rc = %d\n",
dt->dd_lu_dev.ld_obd->obd_name, rc);
llog_ctxt_put(ctxt);
}
llog_ctxt_put(ctxt);
- CDEBUG(D_HA, "%s retrieve update log: rc = %d\n",
- dt->dd_lu_dev.ld_obd->obd_name, rc);
+ CDEBUG(D_HA, "%s retrieved update log, duration %lld, retries %d\n",
+ dt->dd_lu_dev.ld_obd->obd_name, ktime_get_real_seconds() - start,
+ retries);
+ spin_lock(&lod->lod_lock);
if (lrd->lrd_ltd == NULL)
lod->lod_child_got_update_log = 1;
else
lrd->lrd_ltd->ltd_got_update_log = 1;
- if (lod->lod_child_got_update_log) {
- struct lod_tgt_descs *ltd = &lod->lod_mdt_descs;
- struct lod_tgt_desc *tgt = NULL;
- bool all_got_log = true;
- int i;
-
- cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) {
- tgt = LTD_TGT(ltd, i);
- if (!tgt->ltd_got_update_log) {
- all_got_log = false;
- break;
- }
- }
+ if (!lod->lod_child_got_update_log) {
+ spin_unlock(&lod->lod_lock);
+ GOTO(out, rc = 0);
+ }
- if (all_got_log) {
- CDEBUG(D_HA, "%s got update logs from all MDTs.\n",
- lut->lut_obd->obd_name);
- lut->lut_tdtd->tdtd_replay_ready = 1;
- wake_up(&lut->lut_obd->obd_next_transno_waitq);
+ cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) {
+ tgt = LTD_TGT(ltd, i);
+ if (!tgt->ltd_got_update_log) {
+ spin_unlock(&lod->lod_lock);
+ GOTO(out, rc = 0);
}
}
+ lut->lut_tdtd->tdtd_replay_ready = 1;
+ spin_unlock(&lod->lod_lock);
+
+ CDEBUG(D_HA, "%s got update logs from all MDTs.\n",
+ lut->lut_obd->obd_name);
+ wake_up(&lut->lut_obd->obd_next_transno_waitq);
+ EXIT;
out:
OBD_FREE_PTR(lrd);
wake_up(&lut->lut_tdtd->tdtd_recovery_threads_waitq);
wake_up(&thread->t_ctl_waitq);
lu_env_fini(&env);
- RETURN(rc);
+ return rc;
}
/**
case LCFG_PARAM: {
struct obd_device *obd;
+ ssize_t count;
char *param;
/* Check if it is activate/deactivate mdc
GOTO(out, rc);
}
- obd = lod2obd(lod);
- rc = class_process_proc_param(PARAM_LOV, obd->obd_vars,
- lcfg, obd);
- if (rc > 0)
- rc = 0;
+
+ if (strstr(param, PARAM_LOD) != NULL)
+ count = class_modify_config(lcfg, PARAM_LOD,
+ &lod->lod_dt_dev.dd_kobj);
+ else
+ count = class_modify_config(lcfg, PARAM_LOV,
+ &lod->lod_dt_dev.dd_kobj);
+ rc = count > 0 ? 0 : count;
GOTO(out, rc);
}
case LCFG_PRE_CLEANUP: {
- if (lod->lod_md_root != NULL) {
- dt_object_put(env, &lod->lod_md_root->ldo_obj);
- lod->lod_md_root = NULL;
- }
-
lod_sub_process_config(env, lod, &lod->lod_mdt_descs, lcfg);
lod_sub_process_config(env, lod, &lod->lod_ost_descs, lcfg);
next = &lod->lod_child->dd_lu_dev;
break;
}
case LCFG_CLEANUP: {
+ if (lod->lod_md_root != NULL) {
+ dt_object_put(env, &lod->lod_md_root->ldo_obj);
+ lod->lod_md_root = NULL;
+ }
+
/*
* do cleanup on underlying storage only when
* all OSPs are cleaned up, as they use that OSD as well
return dt_root_get(env, dt2lod_dev(dev)->lod_child, f);
}
+static void lod_statfs_sum(struct obd_statfs *sfs,
+ struct obd_statfs *ost_sfs, int *bs)
+{
+ while (ost_sfs->os_bsize < *bs) {
+ *bs >>= 1;
+ sfs->os_bsize >>= 1;
+ sfs->os_bavail <<= 1;
+ sfs->os_blocks <<= 1;
+ sfs->os_bfree <<= 1;
+ sfs->os_granted <<= 1;
+ }
+ while (ost_sfs->os_bsize > *bs) {
+ ost_sfs->os_bsize >>= 1;
+ ost_sfs->os_bavail <<= 1;
+ ost_sfs->os_blocks <<= 1;
+ ost_sfs->os_bfree <<= 1;
+ ost_sfs->os_granted <<= 1;
+ }
+ sfs->os_bavail += ost_sfs->os_bavail;
+ sfs->os_blocks += ost_sfs->os_blocks;
+ sfs->os_bfree += ost_sfs->os_bfree;
+ sfs->os_granted += ost_sfs->os_granted;
+}
+
/**
* Implementation of dt_device_operations::dt_statfs() for LOD
*
static int lod_statfs(const struct lu_env *env,
struct dt_device *dev, struct obd_statfs *sfs)
{
- return dt_statfs(env, dt2lod_dev(dev)->lod_child, sfs);
+ struct lod_device *lod = dt2lod_dev(dev);
+ struct lod_ost_desc *ost;
+ struct lod_mdt_desc *mdt;
+ struct obd_statfs ost_sfs;
+ int i, rc, bs;
+ bool mdtonly;
+
+ rc = dt_statfs(env, dt2lod_dev(dev)->lod_child, sfs);
+ if (rc)
+ GOTO(out, rc);
+
+ bs = sfs->os_bsize;
+
+ sfs->os_bavail = 0;
+ sfs->os_blocks = 0;
+ sfs->os_bfree = 0;
+ sfs->os_granted = 0;
+
+ lod_getref(&lod->lod_mdt_descs);
+ lod_foreach_mdt(lod, i) {
+ mdt = MDT_TGT(lod, i);
+ LASSERT(mdt && mdt->ltd_mdt);
+ rc = dt_statfs(env, mdt->ltd_mdt, &ost_sfs);
+ /* ignore errors */
+ if (rc)
+ continue;
+ sfs->os_files += ost_sfs.os_files;
+ sfs->os_ffree += ost_sfs.os_ffree;
+ lod_statfs_sum(sfs, &ost_sfs, &bs);
+ }
+ lod_putref(lod, &lod->lod_mdt_descs);
+
+ /* at some point we can check whether DoM is enabled and
+ * decide how to account MDT space. for simplicity let's
+ * just fallback to pre-DoM policy if any OST is alive */
+ mdtonly = true;
+
+ lod_getref(&lod->lod_ost_descs);
+ lod_foreach_ost(lod, i) {
+ ost = OST_TGT(lod, i);
+ LASSERT(ost && ost->ltd_ost);
+ rc = dt_statfs(env, ost->ltd_ost, &ost_sfs);
+ /* ignore errors */
+ if (rc || ost_sfs.os_bsize == 0)
+ continue;
+ if (mdtonly) {
+ /* if only MDTs and DoM report MDT space,
+ * otherwise only OST space */
+ sfs->os_bavail = 0;
+ sfs->os_blocks = 0;
+ sfs->os_bfree = 0;
+ sfs->os_granted = 0;
+ mdtonly = false;
+ }
+ ost_sfs.os_bavail += ost_sfs.os_granted;
+ lod_statfs_sum(sfs, &ost_sfs, &bs);
+ LASSERTF(bs == ost_sfs.os_bsize, "%d != %d\n",
+ (int)sfs->os_bsize, (int)ost_sfs.os_bsize);
+ }
+ lod_putref(lod, &lod->lod_ost_descs);
+ sfs->os_state |= OS_STATE_SUM;
+
+ /* a single successful statfs should be enough */
+ rc = 0;
+
+out:
+ RETURN(rc);
}
/**
lod_foreach_ost(lod, i) {
ost = OST_TGT(lod, i);
LASSERT(ost && ost->ltd_ost);
+ if (!ost->ltd_active)
+ continue;
rc = dt_sync(env, ost->ltd_ost);
if (rc) {
- CERROR("%s: can't sync ost %u: %d\n",
- lod2obd(lod)->obd_name, i, rc);
- break;
+ if (rc != -ENOTCONN) {
+ CERROR("%s: can't sync ost %u: %d\n",
+ lod2obd(lod)->obd_name, i, rc);
+ break;
+ }
+ rc = 0;
}
}
lod_putref(lod, &lod->lod_ost_descs);
lod_foreach_mdt(lod, i) {
mdt = MDT_TGT(lod, i);
LASSERT(mdt && mdt->ltd_mdt);
+ if (!mdt->ltd_active)
+ continue;
rc = dt_sync(env, mdt->ltd_mdt);
if (rc) {
- CERROR("%s: can't sync mdt %u: %d\n",
- lod2obd(lod)->obd_name, i, rc);
- break;
+ if (rc != -ENOTCONN) {
+ CERROR("%s: can't sync mdt %u: %d\n",
+ lod2obd(lod)->obd_name, i, rc);
+ break;
+ }
+ rc = 0;
}
}
lod_putref(lod, &lod->lod_mdt_descs);
dt_conf_get(env, &lod->lod_dt_dev, &ddp);
lod->lod_osd_max_easize = ddp.ddp_max_ea_size;
+ lod->lod_dom_max_stripesize = (1ULL << 20); /* 1Mb as default value */
/* setup obd to be used with old lov code */
rc = lod_pools_init(lod, cfg);
struct lu_device *next = &lod->lod_child->dd_lu_dev;
ENTRY;
+ if (atomic_read(&lu->ld_ref) > 0 &&
+ !cfs_hash_is_empty(lu->ld_site->ls_obj_hash)) {
+ LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL);
+ lu_site_print(env, lu->ld_site, &msgdata, lu_cdebug_printer);
+ }
LASSERTF(atomic_read(&lu->ld_ref) == 0, "lu is %p\n", lu);
dt_device_fini(&lod->lod_dt_dev);
OBD_FREE_PTR(lod);
return lu_dev;
}
+static void lod_avoid_guide_fini(struct lod_avoid_guide *lag)
+{
+ if (lag->lag_oss_avoid_array)
+ OBD_FREE(lag->lag_oss_avoid_array,
+ sizeof(__u32) * lag->lag_oaa_size);
+ if (lag->lag_ost_avoid_bitmap)
+ CFS_FREE_BITMAP(lag->lag_ost_avoid_bitmap);
+}
+
/**
* Implementation of lu_device_type_operations::ldto_device_fini() for LOD
*
struct lod_thread_info *info = data;
struct lod_layout_component *lds =
info->lti_def_striping.lds_def_comp_entries;
- struct ost_pool *inuse = &info->lti_inuse_osts;
/* allocated in lod_get_lov_ea
* XXX: this is overload, a tread may have such store but used only
if (lds != NULL)
lod_free_def_comp_entries(&info->lti_def_striping);
- if (inuse->op_size)
- OBD_FREE(inuse->op_array, inuse->op_size);
+ if (info->lti_comp_size > 0)
+ OBD_FREE(info->lti_comp_idx,
+ info->lti_comp_size * sizeof(__u32));
+
+ lod_avoid_guide_fini(&info->lti_avoid);
OBD_FREE_PTR(info);
}
if (no_set) {
- rc2 = ptlrpc_set_wait(set);
+ rc2 = ptlrpc_set_wait(env, set);
if (rc2 == 0 && rc == 0)
rc = rc2;
ptlrpc_set_destroy(set);
.o_pool_del = lod_pool_del,
};
+static struct obd_type sym;
+
static int __init lod_init(void)
{
+ struct dentry *symlink;
struct obd_type *type;
+ struct kobject *kobj;
+ struct qstr dname;
int rc;
rc = lu_kmem_init(lod_caches);
return rc;
}
- /* create "lov" entry in procfs for compatibility purposes */
+ /* create "lov" entry for compatibility purposes */
+ dname.name = "lov";
+ dname.len = strlen(dname.name);
+ dname.hash = ll_full_name_hash(debugfs_lustre_root, dname.name,
+ dname.len);
+ symlink = d_lookup(debugfs_lustre_root, &dname);
+ if (!symlink) {
+ symlink = debugfs_create_dir(dname.name, debugfs_lustre_root);
+ if (IS_ERR_OR_NULL(symlink)) {
+ rc = symlink ? PTR_ERR(symlink) : -ENOMEM;
+ GOTO(no_lov, rc);
+ }
+ sym.typ_debugfs_entry = symlink;
+ } else {
+ dput(symlink);
+ }
+
+ kobj = kset_find_obj(lustre_kset, dname.name);
+ if (kobj) {
+ kobject_put(kobj);
+ goto try_proc;
+ }
+
+ kobj = class_setup_tunables(dname.name);
+ if (IS_ERR(kobj)) {
+ rc = PTR_ERR(kobj);
+ if (sym.typ_debugfs_entry)
+ ldebugfs_remove(&sym.typ_debugfs_entry);
+ GOTO(no_lov, rc);
+ }
+ sym.typ_kobj = kobj;
+
+try_proc:
type = class_search_type(LUSTRE_LOV_NAME);
if (type != NULL && type->typ_procroot != NULL)
- return rc;
+ GOTO(no_lov, rc);
type = class_search_type(LUSTRE_LOD_NAME);
type->typ_procsym = lprocfs_register("lov", proc_lustre_root,
(int)PTR_ERR(type->typ_procsym));
type->typ_procsym = NULL;
}
+no_lov:
return rc;
}
static void __exit lod_exit(void)
{
+ ldebugfs_remove(&sym.typ_debugfs_entry);
+ kobject_put(sym.typ_kobj);
class_unregister_type(LUSTRE_LOD_NAME);
lu_kmem_fini(lod_caches);
}