X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Flod%2Flod_dev.c;h=99ccbb0a06c4e522238816aaee54dffa67bb2de4;hp=7886704dfc7513d64e28becb65c200e15f6ca16c;hb=c228613e18d4496d026d56040e394fe90273de2f;hpb=2a874ec011e680f49405a7e901d8d0d35dcb4f1a diff --git a/lustre/lod/lod_dev.c b/lustre/lod/lod_dev.c index 7886704..99ccbb0 100644 --- a/lustre/lod/lod_dev.c +++ b/lustre/lod/lod_dev.c @@ -23,7 +23,7 @@ * Copyright 2009 Sun Microsystems, Inc. All rights reserved * Use is subject to license terms. * - * Copyright (c) 2012, 2014, Intel Corporation. + * Copyright (c) 2012, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -93,7 +93,7 @@ #include #include #include -#include +#include #include #include @@ -119,11 +119,12 @@ static const char lod_update_log_dir_name[] = "update_log_dir"; * \retval negative negated errno on error **/ int lod_fld_lookup(const struct lu_env *env, struct lod_device *lod, - const struct lu_fid *fid, __u32 *tgt, int *type) + const struct lu_fid *fid, u32 *tgt, int *type) { - struct lu_seq_range range = { 0 }; - struct lu_server_fld *server_fld; + struct lu_seq_range range = { 0 }; + struct lu_server_fld *server_fld; int rc; + ENTRY; if (!fid_is_sane(fid)) { @@ -153,7 +154,7 @@ int lod_fld_lookup(const struct lu_env *env, struct lod_device *lod, } server_fld = lu_site2seq(lod2lu_dev(lod)->ld_site)->ss_server_fld; - if (server_fld == NULL) + if (!server_fld) RETURN(-EIO); fld_range_set_type(&range, *type); @@ -164,7 +165,7 @@ int lod_fld_lookup(const struct lu_env *env, struct lod_device *lod, *tgt = range.lsr_index; *type = range.lsr_flags; - CDEBUG(D_INFO, "%s: got tgt %x for sequence: "LPX64"\n", + CDEBUG(D_INFO, "%s: got tgt %x for sequence: %#llx\n", lod2obd(lod)->obd_name, *tgt, fid_seq(fid)); RETURN(0); @@ -205,14 +206,16 @@ static struct lu_object *lod_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *dev) { - struct lod_object *lod_obj; - struct lu_object *lu_obj; + struct lod_object *lod_obj; + struct lu_object *lu_obj; + ENTRY; OBD_SLAB_ALLOC_PTR_GFP(lod_obj, lod_object_kmem, GFP_NOFS); - if (lod_obj == NULL) + if (!lod_obj) RETURN(ERR_PTR(-ENOMEM)); + mutex_init(&lod_obj->ldo_layout_mutex); lu_obj = lod2lu_obj(lod_obj); dt_object_init(&lod_obj->ldo_obj, NULL, dev); lod_obj->ldo_obj.do_ops = &lod_obj_ops; @@ -241,26 +244,20 @@ static int lod_sub_process_config(const struct lu_env *env, struct lod_tgt_descs *ltd, struct lustre_cfg *lcfg) { - struct lu_device *next; + struct lu_device *next; + struct lu_tgt_desc *tgt; int rc = 0; - unsigned int i; lod_getref(ltd); - if (ltd->ltd_tgts_size <= 0) { - lod_putref(lod, ltd); - return 0; - } - cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) { - struct lod_tgt_desc *tgt; + ltd_foreach_tgt(ltd, tgt) { int rc1; - tgt = LTD_TGT(ltd, i); LASSERT(tgt && tgt->ltd_tgt); next = &tgt->ltd_tgt->dd_lu_dev; rc1 = next->ld_ops->ldo_process_config(env, next, lcfg); if (rc1) { - CERROR("%s: error cleaning up LOD index %u: cmd %#x" - ": rc = %d\n", lod2obd(lod)->obd_name, i, + CERROR("%s: error cleaning up LOD index %u: cmd %#x : rc = %d\n", + lod2obd(lod)->obd_name, tgt->ltd_index, lcfg->lcfg_command, rc1); rc = rc1; } @@ -273,7 +270,7 @@ struct lod_recovery_data { struct lod_device *lrd_lod; struct lod_tgt_desc *lrd_ltd; struct ptlrpc_thread *lrd_thread; - __u32 lrd_idx; + u32 lrd_idx; }; @@ -298,13 +295,14 @@ static int lod_process_recovery_updates(const struct lu_env *env, struct llog_rec_hdr *rec, void *data) { - struct lod_recovery_data *lrd = data; - struct llog_cookie *cookie = &lod_env_info(env)->lti_cookie; - struct lu_target *lut; - __u32 index = 0; + struct lod_recovery_data *lrd = data; + struct llog_cookie *cookie = &lod_env_info(env)->lti_cookie; + struct lu_target *lut; + u32 index = 0; + ENTRY; - if (lrd->lrd_ltd == NULL) { + if (!lrd->lrd_ltd) { int rc; rc = lodname2mdt_index(lod2obd(lrd->lrd_lod)->obd_name, &index); @@ -316,21 +314,25 @@ static int lod_process_recovery_updates(const struct lu_env *env, if (rec->lrh_len != llog_update_record_size((struct llog_update_record *)rec)) { - CERROR("%s broken update record! index %u "DOSTID":%u :" - " rc = %d\n", lod2obd(lrd->lrd_lod)->obd_name, index, - POSTID(&llh->lgh_id.lgl_oi), rec->lrh_index, -EIO); - return -EIO; + CERROR("%s: broken update record! index %u "DFID".%u: rc = %d\n", + lod2obd(lrd->lrd_lod)->obd_name, index, + PFID(&llh->lgh_id.lgl_oi.oi_fid), rec->lrh_index, -EIO); + return -EINVAL; } cookie->lgc_lgl = llh->lgh_id; cookie->lgc_index = rec->lrh_index; cookie->lgc_subsys = LLOG_UPDATELOG_ORIG_CTXT; - CDEBUG(D_HA, "%s: process recovery updates "DOSTID":%u\n", + CDEBUG(D_HA, "%s: process recovery updates "DFID".%u\n", lod2obd(lrd->lrd_lod)->obd_name, - POSTID(&llh->lgh_id.lgl_oi), rec->lrh_index); + PFID(&llh->lgh_id.lgl_oi.oi_fid), rec->lrh_index); lut = lod2lu_dev(lrd->lrd_lod)->ld_site->ls_tgt; + if (lut->lut_obd->obd_stopping || + lut->lut_obd->obd_abort_recovery) + return -ESHUTDOWN; + return insert_update_records_to_replay_list(lut->lut_tdtd, (struct llog_update_record *)rec, cookie, index); @@ -349,13 +351,19 @@ static int lod_process_recovery_updates(const struct lu_env *env, */ static int lod_sub_recovery_thread(void *arg) { - struct lod_recovery_data *lrd = arg; - struct lod_device *lod = lrd->lrd_lod; - struct dt_device *dt; - struct ptlrpc_thread *thread = lrd->lrd_thread; - struct llog_ctxt *ctxt; - struct lu_env env; - int rc; + struct lod_recovery_data *lrd = arg; + struct lod_device *lod = lrd->lrd_lod; + struct dt_device *dt; + struct ptlrpc_thread *thread = lrd->lrd_thread; + struct llog_ctxt *ctxt = NULL; + struct lu_env env; + struct lu_target *lut; + struct lod_tgt_descs *ltd = &lod->lod_mdt_descs; + struct lod_tgt_desc *tgt = NULL; + time64_t start; + int retries = 0; + int rc; + ENTRY; thread->t_flags = SVC_RUNNING; @@ -369,69 +377,103 @@ static int lod_sub_recovery_thread(void *arg) RETURN(rc); } - if (lrd->lrd_ltd == NULL) + lut = lod2lu_dev(lod)->ld_site->ls_tgt; + atomic_inc(&lut->lut_tdtd->tdtd_recovery_threads_count); + if (!lrd->lrd_ltd) dt = lod->lod_child; else dt = lrd->lrd_ltd->ltd_tgt; + start = ktime_get_real_seconds(); + +again: rc = lod_sub_prep_llog(&env, lod, dt, lrd->lrd_idx); - if (rc != 0) - GOTO(out, rc); + if (!rc && !lod->lod_child->dd_rdonly) { + /* Process the recovery record */ + ctxt = llog_get_context(dt->dd_lu_dev.ld_obd, + LLOG_UPDATELOG_ORIG_CTXT); + LASSERT(ctxt != NULL); + LASSERT(ctxt->loc_handle != NULL); + + rc = llog_cat_process(&env, ctxt->loc_handle, + lod_process_recovery_updates, lrd, 0, 0); + } - /* Process the recovery record */ - ctxt = llog_get_context(dt->dd_lu_dev.ld_obd, LLOG_UPDATELOG_ORIG_CTXT); - LASSERT(ctxt != NULL); - LASSERT(ctxt->loc_handle != NULL); + if (rc < 0) { + struct lu_device *top_device; - rc = llog_cat_process(&env, ctxt->loc_handle, - lod_process_recovery_updates, lrd, 0, 0); - llog_ctxt_put(ctxt); + top_device = lod->lod_dt_dev.dd_lu_dev.ld_site->ls_top_dev; + /* + * Because the remote target might failover at the same time, + * let's retry here + */ + if ((rc == -ETIMEDOUT || rc == -EAGAIN || rc == -EIO) && + dt != lod->lod_child && + !top_device->ld_obd->obd_abort_recovery && + !top_device->ld_obd->obd_stopping) { + if (ctxt) { + if (ctxt->loc_handle) + llog_cat_close(&env, + ctxt->loc_handle); + llog_ctxt_put(ctxt); + } + retries++; + CDEBUG(D_HA, "%s get update log failed %d, retry\n", + dt->dd_lu_dev.ld_obd->obd_name, rc); + goto again; + } - if (rc < 0) { - CERROR("%s getting update log failed: rc = %d\n", + CERROR("%s get update log failed: rc = %d\n", dt->dd_lu_dev.ld_obd->obd_name, rc); + llog_ctxt_put(ctxt); + + spin_lock(&top_device->ld_obd->obd_dev_lock); + if (!top_device->ld_obd->obd_abort_recovery && + !top_device->ld_obd->obd_stopping) + top_device->ld_obd->obd_abort_recovery = 1; + spin_unlock(&top_device->ld_obd->obd_dev_lock); + GOTO(out, rc); } + llog_ctxt_put(ctxt); - CDEBUG(D_HA, "%s retrieve update log: rc = %d\n", - dt->dd_lu_dev.ld_obd->obd_name, rc); + CDEBUG(D_HA, "%s retrieved update log, duration %lld, retries %d\n", + dt->dd_lu_dev.ld_obd->obd_name, ktime_get_real_seconds() - start, + retries); - if (lrd->lrd_ltd == NULL) + spin_lock(&lod->lod_lock); + if (!lrd->lrd_ltd) lod->lod_child_got_update_log = 1; else lrd->lrd_ltd->ltd_got_update_log = 1; - if (lod->lod_child_got_update_log) { - struct lod_tgt_descs *ltd = &lod->lod_mdt_descs; - struct lod_tgt_desc *tgt = NULL; - bool all_got_log = true; - int i; - - cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) { - tgt = LTD_TGT(ltd, i); - if (!tgt->ltd_got_update_log) { - all_got_log = false; - break; - } - } - - if (all_got_log) { - struct lu_target *lut; + if (!lod->lod_child_got_update_log) { + spin_unlock(&lod->lod_lock); + GOTO(out, rc = 0); + } - lut = lod2lu_dev(lod)->ld_site->ls_tgt; - CDEBUG(D_HA, "%s got update logs from all MDTs.\n", - lut->lut_obd->obd_name); - lut->lut_tdtd->tdtd_replay_ready = 1; - wake_up(&lut->lut_obd->obd_next_transno_waitq); + ltd_foreach_tgt(ltd, tgt) { + if (!tgt->ltd_got_update_log) { + spin_unlock(&lod->lod_lock); + GOTO(out, rc = 0); } } + lut->lut_tdtd->tdtd_replay_ready = 1; + spin_unlock(&lod->lod_lock); + + CDEBUG(D_HA, "%s got update logs from all MDTs.\n", + lut->lut_obd->obd_name); + wake_up(&lut->lut_obd->obd_next_transno_waitq); + EXIT; out: OBD_FREE_PTR(lrd); thread->t_flags = SVC_STOPPED; + atomic_dec(&lut->lut_tdtd->tdtd_recovery_threads_count); + wake_up(&lut->lut_tdtd->tdtd_recovery_threads_waitq); wake_up(&thread->t_ctl_waitq); lu_env_fini(&env); - RETURN(rc); + return rc; } /** @@ -447,24 +489,25 @@ out: void lod_sub_fini_llog(const struct lu_env *env, struct dt_device *dt, struct ptlrpc_thread *thread) { - struct obd_device *obd; - struct llog_ctxt *ctxt; + struct obd_device *obd; + struct llog_ctxt *ctxt; + ENTRY; obd = dt->dd_lu_dev.ld_obd; CDEBUG(D_INFO, "%s: finish sub llog\n", obd->obd_name); /* Stop recovery thread first */ - if (thread != NULL && thread->t_flags & SVC_RUNNING) { + if (thread && thread->t_flags & SVC_RUNNING) { thread->t_flags = SVC_STOPPING; wake_up(&thread->t_ctl_waitq); wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED); } ctxt = llog_get_context(obd, LLOG_UPDATELOG_ORIG_CTXT); - if (ctxt == NULL) + if (!ctxt) RETURN_EXIT; - if (ctxt->loc_handle != NULL) + if (ctxt->loc_handle) llog_cat_close(env, ctxt->loc_handle); llog_cleanup(env, ctxt); @@ -484,43 +527,49 @@ void lod_sub_fini_llog(const struct lu_env *env, * \retval 0 on success * \retval -EINVAL if the name is invalid */ -int lodname2mdt_index(char *lodname, __u32 *mdt_index) +int lodname2mdt_index(char *lodname, u32 *mdt_index) { unsigned long index; char *ptr, *tmp; + int rc; /* 1.8 configs don't have "-MDT0000" at the end */ ptr = strstr(lodname, "-MDT"); - if (ptr == NULL) { + if (!ptr) { *mdt_index = 0; return 0; } ptr = strrchr(lodname, '-'); - if (ptr == NULL) { - CERROR("invalid MDT index in '%s'\n", lodname); - return -EINVAL; + if (!ptr) { + rc = -EINVAL; + CERROR("invalid MDT index in '%s': rc = %d\n", lodname, rc); + return rc; } if (strncmp(ptr, "-mdtlov", 7) != 0) { - CERROR("invalid MDT index in '%s'\n", lodname); - return -EINVAL; + rc = -EINVAL; + CERROR("invalid MDT index in '%s': rc = %d\n", lodname, rc); + return rc; } if ((unsigned long)ptr - (unsigned long)lodname <= 8) { - CERROR("invalid MDT index in '%s'\n", lodname); - return -EINVAL; + rc = -EINVAL; + CERROR("invalid MDT index in '%s': rc = %d\n", lodname, rc); + return rc; } if (strncmp(ptr - 8, "-MDT", 4) != 0) { - CERROR("invalid MDT index in '%s'\n", lodname); - return -EINVAL; + rc = -EINVAL; + CERROR("invalid MDT index in '%s': rc = %d\n", lodname, rc); + return rc; } index = simple_strtol(ptr - 4, &tmp, 16); if (*tmp != '-' || index > INT_MAX) { - CERROR("invalid MDT index in '%s'\n", lodname); - return -EINVAL; + rc = -EINVAL; + CERROR("invalid MDT index in '%s': rc = %d\n", lodname, rc); + return rc; } *mdt_index = index; return 0; @@ -543,15 +592,16 @@ int lodname2mdt_index(char *lodname, __u32 *mdt_index) int lod_sub_init_llog(const struct lu_env *env, struct lod_device *lod, struct dt_device *dt) { - struct obd_device *obd; - struct lod_recovery_data *lrd = NULL; - struct ptlrpc_thread *thread; - struct task_struct *task; - struct l_wait_info lwi = { 0 }; - struct lod_tgt_desc *sub_ltd = NULL; - __u32 index; - __u32 master_index; - int rc; + struct obd_device *obd; + struct lod_recovery_data *lrd = NULL; + struct ptlrpc_thread *thread; + struct task_struct *task; + struct l_wait_info lwi = { 0 }; + struct lod_tgt_desc *subtgt = NULL; + u32 index; + u32 master_index; + int rc; + ENTRY; rc = lodname2mdt_index(lod2obd(lod)->obd_name, &master_index); @@ -559,37 +609,34 @@ int lod_sub_init_llog(const struct lu_env *env, struct lod_device *lod, RETURN(rc); OBD_ALLOC_PTR(lrd); - if (lrd == NULL) + if (!lrd) RETURN(-ENOMEM); if (lod->lod_child == dt) { thread = &lod->lod_child_recovery_thread; index = master_index; } else { - struct lod_tgt_descs *ltd = &lod->lod_mdt_descs; - struct lod_tgt_desc *tgt = NULL; - unsigned int i; + struct lu_tgt_desc *tgt; - cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) { - tgt = LTD_TGT(ltd, i); + ltd_foreach_tgt(&lod->lod_mdt_descs, tgt) { if (tgt->ltd_tgt == dt) { index = tgt->ltd_index; - sub_ltd = tgt; + subtgt = tgt; break; } } - LASSERT(sub_ltd != NULL); - OBD_ALLOC_PTR(sub_ltd->ltd_recovery_thread); - if (sub_ltd->ltd_recovery_thread == NULL) + LASSERT(subtgt != NULL); + OBD_ALLOC_PTR(subtgt->ltd_recovery_thread); + if (!subtgt->ltd_recovery_thread) GOTO(free_lrd, rc = -ENOMEM); - thread = sub_ltd->ltd_recovery_thread; + thread = subtgt->ltd_recovery_thread; } CDEBUG(D_INFO, "%s init sub log %s\n", lod2obd(lod)->obd_name, dt->dd_lu_dev.ld_obd->obd_name); lrd->lrd_lod = lod; - lrd->lrd_ltd = sub_ltd; + lrd->lrd_ltd = subtgt; lrd->lrd_thread = thread; lrd->lrd_idx = index; init_waitqueue_head(&thread->t_ctl_waitq); @@ -622,8 +669,8 @@ out_llog: lod_sub_fini_llog(env, dt, thread); free_thread: if (lod->lod_child != dt) { - OBD_FREE_PTR(sub_ltd->ltd_recovery_thread); - sub_ltd->ltd_recovery_thread = NULL; + OBD_FREE_PTR(subtgt->ltd_recovery_thread); + subtgt->ltd_recovery_thread = NULL; } free_lrd: OBD_FREE_PTR(lrd); @@ -642,26 +689,25 @@ static void lod_sub_stop_recovery_threads(const struct lu_env *env, struct lod_device *lod) { struct lod_tgt_descs *ltd = &lod->lod_mdt_descs; - struct ptlrpc_thread *thread; - unsigned int i; + struct ptlrpc_thread *thread; + struct lu_tgt_desc *tgt; - /* Stop the update log commit cancel threads and finish master - * llog ctxt */ + /* + * Stop the update log commit cancel threads and finish master + * llog ctxt + */ thread = &lod->lod_child_recovery_thread; /* Stop recovery thread first */ - if (thread != NULL && thread->t_flags & SVC_RUNNING) { + if (thread && thread->t_flags & SVC_RUNNING) { thread->t_flags = SVC_STOPPING; wake_up(&thread->t_ctl_waitq); wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED); } lod_getref(ltd); - cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) { - struct lod_tgt_desc *tgt; - - tgt = LTD_TGT(ltd, i); + ltd_foreach_tgt(ltd, tgt) { thread = tgt->ltd_recovery_thread; - if (thread != NULL && thread->t_flags & SVC_RUNNING) { + if (thread && thread->t_flags & SVC_RUNNING) { thread->t_flags = SVC_STOPPING; wake_up(&thread->t_ctl_waitq); wait_event(thread->t_ctl_waitq, @@ -686,22 +732,70 @@ static void lod_sub_fini_all_llogs(const struct lu_env *env, struct lod_device *lod) { struct lod_tgt_descs *ltd = &lod->lod_mdt_descs; - unsigned int i; + struct lu_tgt_desc *tgt; - /* Stop the update log commit cancel threads and finish master - * llog ctxt */ + /* + * Stop the update log commit cancel threads and finish master + * llog ctxt + */ lod_sub_fini_llog(env, lod->lod_child, &lod->lod_child_recovery_thread); lod_getref(ltd); - cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) { - struct lod_tgt_desc *tgt; - - tgt = LTD_TGT(ltd, i); + ltd_foreach_tgt(ltd, tgt) lod_sub_fini_llog(env, tgt->ltd_tgt, tgt->ltd_recovery_thread); + lod_putref(lod, ltd); +} + +static char *lod_show_update_logs_retrievers(void *data, int *size, int *count) +{ + struct lod_device *lod = (struct lod_device *)data; + struct lu_target *lut = lod2lu_dev(lod)->ld_site->ls_tgt; + struct lod_tgt_descs *ltd = &lod->lod_mdt_descs; + struct lod_tgt_desc *tgt = NULL; + char *buf; + int len = 0; + int rc; + int i; + + *count = atomic_read(&lut->lut_tdtd->tdtd_recovery_threads_count); + if (*count == 0) { + *size = 0; + return NULL; } - lod_putref(lod, ltd); + *size = 5 * *count + 1; + OBD_ALLOC(buf, *size); + if (!buf) + return NULL; + + *count = 0; + memset(buf, 0, *size); + + if (!lod->lod_child_got_update_log) { + rc = lodname2mdt_index(lod2obd(lod)->obd_name, &i); + LASSERTF(rc == 0, "Fail to parse target index: rc = %d\n", rc); + + rc = snprintf(buf + len, *size - len, " %04x", i); + LASSERT(rc > 0); + + len += rc; + (*count)++; + } + + ltd_foreach_tgt(ltd, tgt) { + if (!tgt->ltd_got_update_log) { + rc = snprintf(buf + len, *size - len, " %04x", + tgt->ltd_index); + if (unlikely(rc <= 0)) + break; + + len += rc; + (*count)++; + } + } + + return buf; } /** @@ -719,13 +813,14 @@ static int lod_prepare_distribute_txn(const struct lu_env *env, struct lod_device *lod) { struct target_distribute_txn_data *tdtd; - struct lu_target *lut; - int rc; + struct lu_target *lut; + int rc; + ENTRY; /* Init update recovery data */ OBD_ALLOC_PTR(tdtd); - if (tdtd == NULL) + if (!tdtd) RETURN(-ENOMEM); lut = lod2lu_dev(lod)->ld_site->ls_tgt; @@ -740,6 +835,10 @@ static int lod_prepare_distribute_txn(const struct lu_env *env, RETURN(rc); } + tdtd->tdtd_show_update_logs_retrievers = + lod_show_update_logs_retrievers; + tdtd->tdtd_show_retrievers_cbdata = lod; + lut->lut_tdtd = tdtd; RETURN(0); @@ -757,10 +856,11 @@ static int lod_prepare_distribute_txn(const struct lu_env *env, static void lod_fini_distribute_txn(const struct lu_env *env, struct lod_device *lod) { - struct lu_target *lut; + struct lu_target *lut; lut = lod2lu_dev(lod)->ld_site->ls_tgt; - if (lut->lut_tdtd == NULL) + target_recovery_fini(lut->lut_obd); + if (!lut->lut_tdtd) return; distribute_txn_fini(env, lut->lut_tdtd); @@ -807,23 +907,26 @@ static int lod_process_config(const struct lu_env *env, struct lustre_cfg *lcfg) { struct lod_device *lod = lu2lod_dev(dev); - struct lu_device *next = &lod->lod_child->dd_lu_dev; - char *arg1; - int rc = 0; + struct lu_device *next = &lod->lod_child->dd_lu_dev; + char *arg1; + int rc = 0; + ENTRY; - switch(lcfg->lcfg_command) { + switch (lcfg->lcfg_command) { case LCFG_LOV_DEL_OBD: case LCFG_LOV_ADD_INA: case LCFG_LOV_ADD_OBD: case LCFG_ADD_MDC: { - __u32 index; - __u32 mdt_index; + u32 index; + u32 mdt_index; int gen; - /* lov_modify_tgts add 0:lov_mdsA 1:osp 2:0 3:1 + /* + * lov_modify_tgts add 0:lov_mdsA 1:osp 2:0 3:1 * modify_mdc_tgts add 0:lustre-MDT0001 * 1:lustre-MDT0001-mdc0002 - * 2:2 3:1*/ + * 2:2 3:1 + */ arg1 = lustre_cfg_string(lcfg, 1); if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1) @@ -832,7 +935,7 @@ static int lod_process_config(const struct lu_env *env, GOTO(out, rc = -EINVAL); if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD) { - __u32 mdt_index; + u32 mdt_index; rc = lodname2mdt_index(lustre_cfg_string(lcfg, 0), &mdt_index); @@ -861,40 +964,39 @@ static int lod_process_config(const struct lu_env *env, case LCFG_PARAM: { struct obd_device *obd; + ssize_t count; char *param; - /* Check if it is activate/deactivate mdc - * lustre-MDTXXXX-osp-MDTXXXX.active=1 */ + /* + * Check if it is activate/deactivate mdc + * lustre-MDTXXXX-osp-MDTXXXX.active=1 + */ param = lustre_cfg_buf(lcfg, 1); - if (strstr(param, "osp") != NULL && - strstr(param, ".active=") != NULL) { - struct lod_tgt_descs *ltd = &lod->lod_mdt_descs; - struct lod_tgt_desc *sub_tgt = NULL; + if (strstr(param, "osp") && strstr(param, ".active=")) { + struct lod_tgt_descs *ltd = &lod->lod_mdt_descs; + struct lod_tgt_desc *sub_tgt = NULL; + struct lu_tgt_desc *tgt; char *ptr; char *tmp; - int i; ptr = strstr(param, "."); *ptr = '\0'; obd = class_name2obd(param); - if (obd == NULL) { + if (!obd) { CERROR("%s: can not find %s: rc = %d\n", lod2obd(lod)->obd_name, param, -EINVAL); *ptr = '.'; GOTO(out, rc); } - cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) { - struct lod_tgt_desc *tgt; - - tgt = LTD_TGT(ltd, i); + ltd_foreach_tgt(ltd, tgt) { if (tgt->ltd_tgt->dd_lu_dev.ld_obd == obd) { sub_tgt = tgt; break; } } - if (sub_tgt == NULL) { + if (!sub_tgt) { CERROR("%s: can not find %s: rc = %d\n", lod2obd(lod)->obd_name, param, -EINVAL); *ptr = '.'; @@ -910,7 +1012,7 @@ static int lod_process_config(const struct lu_env *env, obd = sub_tgt->ltd_tgt->dd_lu_dev.ld_obd; ctxt = llog_get_context(obd, LLOG_UPDATELOG_ORIG_CTXT); - if (ctxt == NULL) { + if (!ctxt) { rc = llog_setup(env, obd, &obd->obd_olg, LLOG_UPDATELOG_ORIG_CTXT, NULL, &llog_common_cat_ops); @@ -932,11 +1034,14 @@ static int lod_process_config(const struct lu_env *env, GOTO(out, rc); } - obd = lod2obd(lod); - rc = class_process_proc_param(PARAM_LOV, obd->obd_vars, - lcfg, obd); - if (rc > 0) - rc = 0; + + if (strstr(param, PARAM_LOD) != NULL) + count = class_modify_config(lcfg, PARAM_LOD, + &lod->lod_dt_dev.dd_kobj); + else + count = class_modify_config(lcfg, PARAM_LOV, + &lod->lod_dt_dev.dd_kobj); + rc = count > 0 ? 0 : count; GOTO(out, rc); } case LCFG_PRE_CLEANUP: { @@ -954,6 +1059,11 @@ static int lod_process_config(const struct lu_env *env, break; } case LCFG_CLEANUP: { + if (lod->lod_md_root) { + dt_object_put(env, &lod->lod_md_root->ldo_obj); + lod->lod_md_root = NULL; + } + /* * do cleanup on underlying storage only when * all OSPs are cleaned up, as they use that OSD as well @@ -964,19 +1074,20 @@ static int lod_process_config(const struct lu_env *env, next = &lod->lod_child->dd_lu_dev; rc = next->ld_ops->ldo_process_config(env, next, lcfg); if (rc) - CERROR("%s: can't process %u: %d\n", + CERROR("%s: can't process %u: rc = %d\n", lod2obd(lod)->obd_name, lcfg->lcfg_command, rc); rc = obd_disconnect(lod->lod_child_exp); if (rc) - CERROR("error in disconnect from storage: %d\n", rc); + CERROR("error in disconnect from storage: rc = %d\n", + rc); break; } default: - CERROR("%s: unknown command %u\n", lod2obd(lod)->obd_name, - lcfg->lcfg_command); - rc = -EINVAL; - break; + CERROR("%s: unknown command %u\n", lod2obd(lod)->obd_name, + lcfg->lcfg_command); + rc = -EINVAL; + break; } out: @@ -994,10 +1105,11 @@ out: static int lod_recovery_complete(const struct lu_env *env, struct lu_device *dev) { - struct lod_device *lod = lu2lod_dev(dev); - struct lu_device *next = &lod->lod_child->dd_lu_dev; - unsigned int i; - int rc; + struct lod_device *lod = lu2lod_dev(dev); + struct lu_device *next = &lod->lod_child->dd_lu_dev; + unsigned int i; + int rc; + ENTRY; LASSERT(lod->lod_recovery_completed == 0); @@ -1009,13 +1121,14 @@ static int lod_recovery_complete(const struct lu_env *env, if (lod->lod_osts_size > 0) { cfs_foreach_bit(lod->lod_ost_bitmap, i) { struct lod_tgt_desc *tgt; + tgt = OST_TGT(lod, i); LASSERT(tgt && tgt->ltd_tgt); next = &tgt->ltd_ost->dd_lu_dev; rc = next->ld_ops->ldo_recovery_complete(env, next); if (rc) - CERROR("%s: can't complete recovery on #%d:" - "%d\n", lod2obd(lod)->obd_name, i, rc); + CERROR("%s: can't complete recovery on #%d: rc = %d\n", + lod2obd(lod)->obd_name, i, rc); } } lod_putref(lod, &lod->lod_ost_descs); @@ -1036,13 +1149,16 @@ static int lod_recovery_complete(const struct lu_env *env, */ static int lod_sub_init_llogs(const struct lu_env *env, struct lod_device *lod) { - struct lod_tgt_descs *ltd = &lod->lod_mdt_descs; - int rc; - unsigned int i; + struct lod_tgt_descs *ltd = &lod->lod_mdt_descs; + struct lu_tgt_desc *tgt; + int rc; + ENTRY; - /* llog must be setup after LOD is initialized, because llog - * initialization include FLD lookup */ + /* + * llog must be setup after LOD is initialized, because llog + * initialization include FLD lookup + */ LASSERT(lod->lod_initialized); /* Init the llog in its own stack */ @@ -1050,10 +1166,7 @@ static int lod_sub_init_llogs(const struct lu_env *env, struct lod_device *lod) if (rc < 0) RETURN(rc); - cfs_foreach_bit(ltd->ltd_tgt_bitmap, i) { - struct lod_tgt_desc *tgt; - - tgt = LTD_TGT(ltd, i); + ltd_foreach_tgt(ltd, tgt) { rc = lod_sub_init_llog(env, lod, tgt->ltd_tgt); if (rc != 0) break; @@ -1070,13 +1183,14 @@ static int lod_sub_init_llogs(const struct lu_env *env, struct lod_device *lod) static int lod_prepare(const struct lu_env *env, struct lu_device *pdev, struct lu_device *cdev) { - struct lod_device *lod = lu2lod_dev(cdev); - struct lu_device *next = &lod->lod_child->dd_lu_dev; - struct lu_fid *fid = &lod_env_info(env)->lti_fid; - int rc; - struct dt_object *root; - struct dt_object *dto; - __u32 index; + struct lod_device *lod = lu2lod_dev(cdev); + struct lu_device *next = &lod->lod_child->dd_lu_dev; + struct lu_fid *fid = &lod_env_info(env)->lti_fid; + int rc; + struct dt_object *root; + struct dt_object *dto; + u32 index; + ENTRY; rc = next->ld_ops->ldo_prepare(env, pdev, next); @@ -1103,22 +1217,22 @@ static int lod_prepare(const struct lu_env *env, struct lu_device *pdev, dto = local_file_find_or_create_with_fid(env, lod->lod_child, fid, root, lod_update_log_name, - S_IFREG | S_IRUGO | S_IWUSR); + S_IFREG | 0644); if (IS_ERR(dto)) GOTO(out_put, rc = PTR_ERR(dto)); - lu_object_put(env, &dto->do_lu); + dt_object_put(env, dto); /* Create update log dir */ lu_update_log_dir_fid(fid, index); dto = local_file_find_or_create_with_fid(env, lod->lod_child, fid, root, lod_update_log_dir_name, - S_IFDIR | S_IRUGO | S_IWUSR); + S_IFDIR | 0644); if (IS_ERR(dto)) GOTO(out_put, rc = PTR_ERR(dto)); - lu_object_put(env, &dto->do_lu); + dt_object_put(env, dto); rc = lod_prepare_distribute_txn(env, lod); if (rc != 0) @@ -1129,7 +1243,7 @@ static int lod_prepare(const struct lu_env *env, struct lu_device *pdev, GOTO(out_put, rc); out_put: - lu_object_put(env, &root->do_lu); + dt_object_put(env, root); RETURN(rc); } @@ -1152,15 +1266,119 @@ static int lod_root_get(const struct lu_env *env, return dt_root_get(env, dt2lod_dev(dev)->lod_child, f); } +static void lod_statfs_sum(struct obd_statfs *sfs, + struct obd_statfs *ost_sfs, int *bs) +{ + while (ost_sfs->os_bsize < *bs) { + *bs >>= 1; + sfs->os_bsize >>= 1; + sfs->os_bavail <<= 1; + sfs->os_blocks <<= 1; + sfs->os_bfree <<= 1; + sfs->os_granted <<= 1; + } + while (ost_sfs->os_bsize > *bs) { + ost_sfs->os_bsize >>= 1; + ost_sfs->os_bavail <<= 1; + ost_sfs->os_blocks <<= 1; + ost_sfs->os_bfree <<= 1; + ost_sfs->os_granted <<= 1; + } + sfs->os_bavail += ost_sfs->os_bavail; + sfs->os_blocks += ost_sfs->os_blocks; + sfs->os_bfree += ost_sfs->os_bfree; + sfs->os_granted += ost_sfs->os_granted; +} + /** * Implementation of dt_device_operations::dt_statfs() for LOD * * see include/dt_object.h for the details. */ -static int lod_statfs(const struct lu_env *env, - struct dt_device *dev, struct obd_statfs *sfs) +static int lod_statfs(const struct lu_env *env, struct dt_device *dev, + struct obd_statfs *sfs, struct obd_statfs_info *info) { - return dt_statfs(env, dt2lod_dev(dev)->lod_child, sfs); + struct lod_device *lod = dt2lod_dev(dev); + struct lod_ost_desc *ost; + struct lod_mdt_desc *mdt; + struct obd_statfs ost_sfs; + u64 ost_files = 0; + u64 ost_ffree = 0; + int i, rc, bs; + + rc = dt_statfs(env, dt2lod_dev(dev)->lod_child, sfs); + if (rc) + GOTO(out, rc); + + bs = sfs->os_bsize; + + sfs->os_bavail = 0; + sfs->os_blocks = 0; + sfs->os_bfree = 0; + sfs->os_granted = 0; + + lod_getref(&lod->lod_mdt_descs); + lod_foreach_mdt(lod, i) { + mdt = MDT_TGT(lod, i); + LASSERT(mdt && mdt->ltd_mdt); + rc = dt_statfs(env, mdt->ltd_mdt, &ost_sfs); + /* ignore errors */ + if (rc) + continue; + sfs->os_files += ost_sfs.os_files; + sfs->os_ffree += ost_sfs.os_ffree; + lod_statfs_sum(sfs, &ost_sfs, &bs); + } + lod_putref(lod, &lod->lod_mdt_descs); + + /* + * at some point we can check whether DoM is enabled and + * decide how to account MDT space. for simplicity let's + * just fallback to pre-DoM policy if any OST is alive + */ + lod_getref(&lod->lod_ost_descs); + lod_foreach_ost(lod, i) { + ost = OST_TGT(lod, i); + LASSERT(ost && ost->ltd_ost); + rc = dt_statfs(env, ost->ltd_ost, &ost_sfs); + /* ignore errors */ + if (rc || ost_sfs.os_bsize == 0) + continue; + if (!ost_files) { + /* + * if only MDTs with DoM then report only MDT blocks, + * otherwise show only OST blocks, and DoM is "free" + */ + sfs->os_bavail = 0; + sfs->os_blocks = 0; + sfs->os_bfree = 0; + sfs->os_granted = 0; + } + ost_files += ost_sfs.os_files; + ost_ffree += ost_sfs.os_ffree; + ost_sfs.os_bavail += ost_sfs.os_granted; + lod_statfs_sum(sfs, &ost_sfs, &bs); + LASSERTF(bs == ost_sfs.os_bsize, "%d != %d\n", + (int)sfs->os_bsize, (int)ost_sfs.os_bsize); + } + lod_putref(lod, &lod->lod_ost_descs); + sfs->os_state |= OS_STATE_SUM; + + /* If we have _some_ OSTs, but don't have as many free objects on the + * OSTs as inodes on the MDTs, reduce the reported number of inodes + * to compensate, so that the "inodes in use" number is correct. + * This should be kept in sync with ll_statfs_internal(). + */ + if (ost_files && ost_ffree < sfs->os_ffree) { + sfs->os_files = (sfs->os_files - sfs->os_ffree) + ost_ffree; + sfs->os_ffree = ost_ffree; + } + + /* a single successful statfs should be enough */ + rc = 0; + +out: + RETURN(rc); } /** @@ -1230,7 +1448,7 @@ static int lod_add_noop_records(const struct lu_env *env, int rc = 0; top_th = container_of(th, struct top_thandle, tt_super); - if (top_th->tt_multiple_thandle == NULL) + if (!top_th->tt_multiple_thandle) return 0; fid_zero(fid); @@ -1287,24 +1505,53 @@ static void lod_conf_get(const struct lu_env *env, */ static int lod_sync(const struct lu_env *env, struct dt_device *dev) { - struct lod_device *lod = dt2lod_dev(dev); + struct lod_device *lod = dt2lod_dev(dev); struct lod_ost_desc *ost; - unsigned int i; - int rc = 0; + struct lod_mdt_desc *mdt; + unsigned int i; + int rc = 0; + ENTRY; lod_getref(&lod->lod_ost_descs); lod_foreach_ost(lod, i) { ost = OST_TGT(lod, i); LASSERT(ost && ost->ltd_ost); + if (!ost->ltd_active) + continue; rc = dt_sync(env, ost->ltd_ost); if (rc) { - CERROR("%s: can't sync %u: %d\n", - lod2obd(lod)->obd_name, i, rc); - break; + if (rc != -ENOTCONN) { + CERROR("%s: can't sync ost %u: rc = %d\n", + lod2obd(lod)->obd_name, i, rc); + break; + } + rc = 0; } } lod_putref(lod, &lod->lod_ost_descs); + + if (rc) + RETURN(rc); + + lod_getref(&lod->lod_mdt_descs); + lod_foreach_mdt(lod, i) { + mdt = MDT_TGT(lod, i); + LASSERT(mdt && mdt->ltd_mdt); + if (!mdt->ltd_active) + continue; + rc = dt_sync(env, mdt->ltd_mdt); + if (rc) { + if (rc != -ENOTCONN) { + CERROR("%s: can't sync mdt %u: rc = %d\n", + lod2obd(lod)->obd_name, i, rc); + break; + } + rc = 0; + } + } + lod_putref(lod, &lod->lod_mdt_descs); + if (rc == 0) rc = dt_sync(env, lod->lod_child); @@ -1366,15 +1613,17 @@ static int lod_connect_to_osd(const struct lu_env *env, struct lod_device *lod, struct lustre_cfg *cfg) { struct obd_connect_data *data = NULL; - struct obd_device *obd; - char *nextdev = NULL, *p, *s; - int rc, len = 0; + struct obd_device *obd; + char *nextdev = NULL, *p, *s; + int rc, len = 0; + ENTRY; LASSERT(cfg); LASSERT(lod->lod_child_exp == NULL); - /* compatibility hack: we still use old config logs + /* + * compatibility hack: we still use old config logs * which specify LOV, but we need to learn underlying * OSD device, which is supposed to be: * -MDTxxxx-osd @@ -1384,19 +1633,20 @@ static int lod_connect_to_osd(const struct lu_env *env, struct lod_device *lod, * 1.8 MGS generates lines like the following: * #03 (168)lov_setup 0:lustre-mdtlov 1:(struct lov_desc) * - * we use "-MDT" to differentiate 2.x from 1.8 */ - - if ((p = lustre_cfg_string(cfg, 0)) && strstr(p, "-mdtlov")) { + * we use "-MDT" to differentiate 2.x from 1.8 + */ + p = lustre_cfg_string(cfg, 0); + if (p && strstr(p, "-mdtlov")) { len = strlen(p) + 6; OBD_ALLOC(nextdev, len); - if (nextdev == NULL) + if (!nextdev) GOTO(out, rc = -ENOMEM); strcpy(nextdev, p); s = strstr(nextdev, "-mdtlov"); - if (unlikely(s == NULL)) { - CERROR("unable to parse device name %s\n", - lustre_cfg_string(cfg, 0)); + if (unlikely(!s)) { + CERROR("%s: unable to parse device name: rc = %d\n", + lustre_cfg_string(cfg, 0), -EINVAL); GOTO(out, rc = -EINVAL); } @@ -1408,18 +1658,19 @@ static int lod_connect_to_osd(const struct lu_env *env, struct lod_device *lod, strcpy(s, "-MDT0000-osd"); } } else { - CERROR("unable to parse device name %s\n", - lustre_cfg_string(cfg, 0)); + CERROR("%s: unable to parse device name: rc = %d\n", + lustre_cfg_string(cfg, 0), -EINVAL); GOTO(out, rc = -EINVAL); } OBD_ALLOC_PTR(data); - if (data == NULL) + if (!data) GOTO(out, rc = -ENOMEM); obd = class_name2obd(nextdev); - if (obd == NULL) { - CERROR("can not locate next device: %s\n", nextdev); + if (!obd) { + CERROR("%s: can not locate next device: rc = %d\n", + nextdev, -ENOTCONN); GOTO(out, rc = -ENOTCONN); } @@ -1429,7 +1680,8 @@ static int lod_connect_to_osd(const struct lu_env *env, struct lod_device *lod, rc = obd_connect(env, &lod->lod_child_exp, obd, &obd->obd_uuid, data, NULL); if (rc) { - CERROR("cannot connect to next dev %s (%d)\n", nextdev, rc); + CERROR("%s: cannot connect to next dev: rc = %d\n", + nextdev, rc); GOTO(out, rc); } @@ -1447,36 +1699,6 @@ out: } /** - * Allocate and initialize target table. - * - * A helper function to initialize the target table and allocate - * a bitmap of the available targets. - * - * \param[in] ltd target's table to initialize - * - * \retval 0 on success - * \retval negative negated errno on error - **/ -static int lod_tgt_desc_init(struct lod_tgt_descs *ltd) -{ - mutex_init(<d->ltd_mutex); - init_rwsem(<d->ltd_rw_sem); - - /* the OST array and bitmap are allocated/grown dynamically as OSTs are - * added to the LOD, see lod_add_device() */ - ltd->ltd_tgt_bitmap = CFS_ALLOCATE_BITMAP(32); - if (ltd->ltd_tgt_bitmap == NULL) - RETURN(-ENOMEM); - - ltd->ltd_tgts_size = 32; - ltd->ltd_tgtnr = 0; - - ltd->ltd_death_row = 0; - ltd->ltd_refcount = 0; - return 0; -} - -/** * Initialize LOD device at setup. * * Initializes the given LOD device using the original configuration command. @@ -1495,15 +1717,17 @@ static int lod_init0(const struct lu_env *env, struct lod_device *lod, struct lu_device_type *ldt, struct lustre_cfg *cfg) { struct dt_device_param ddp; - struct obd_device *obd; - int rc; + struct obd_device *obd; + int rc; + ENTRY; obd = class_name2obd(lustre_cfg_string(cfg, 0)); - if (obd == NULL) { - CERROR("Cannot find obd with name %s\n", - lustre_cfg_string(cfg, 0)); - RETURN(-ENODEV); + if (!obd) { + rc = -ENODEV; + CERROR("Cannot find obd with name '%s': rc = %d\n", + lustre_cfg_string(cfg, 0), rc); + RETURN(rc); } obd->obd_lu_dev = &lod->lod_dt_dev.dd_lu_dev; @@ -1517,6 +1741,7 @@ static int lod_init0(const struct lu_env *env, struct lod_device *lod, dt_conf_get(env, &lod->lod_dt_dev, &ddp); lod->lod_osd_max_easize = ddp.ddp_max_ea_size; + lod->lod_dom_max_stripesize = (1ULL << 20); /* 1Mb as default value */ /* setup obd to be used with old lov code */ rc = lod_pools_init(lod, cfg); @@ -1527,10 +1752,10 @@ static int lod_init0(const struct lu_env *env, struct lod_device *lod, if (rc) GOTO(out_pools, rc); - spin_lock_init(&lod->lod_desc_lock); + spin_lock_init(&lod->lod_lock); spin_lock_init(&lod->lod_connects_lock); - lod_tgt_desc_init(&lod->lod_mdt_descs); - lod_tgt_desc_init(&lod->lod_ost_descs); + lu_tgt_descs_init(&lod->lod_mdt_descs); + lu_tgt_descs_init(&lod->lod_ost_descs); RETURN(0); @@ -1553,8 +1778,14 @@ static struct lu_device *lod_device_free(const struct lu_env *env, { struct lod_device *lod = lu2lod_dev(lu); struct lu_device *next = &lod->lod_child->dd_lu_dev; + ENTRY; + if (atomic_read(&lu->ld_ref) > 0 && + !cfs_hash_is_empty(lu->ld_site->ls_obj_hash)) { + LIBCFS_DEBUG_MSG_DATA_DECL(msgdata, D_ERROR, NULL); + lu_site_print(env, lu->ld_site, &msgdata, lu_cdebug_printer); + } LASSERTF(atomic_read(&lu->ld_ref) == 0, "lu is %p\n", lu); dt_device_fini(&lod->lod_dt_dev); OBD_FREE_PTR(lod); @@ -1573,10 +1804,10 @@ static struct lu_device *lod_device_alloc(const struct lu_env *env, struct lustre_cfg *lcfg) { struct lod_device *lod; - struct lu_device *lu_dev; + struct lu_device *lu_dev; OBD_ALLOC_PTR(lod); - if (lod == NULL) { + if (!lod) { lu_dev = ERR_PTR(-ENOMEM); } else { int rc; @@ -1593,6 +1824,15 @@ static struct lu_device *lod_device_alloc(const struct lu_env *env, return lu_dev; } +static void lod_avoid_guide_fini(struct lod_avoid_guide *lag) +{ + if (lag->lag_oss_avoid_array) + OBD_FREE(lag->lag_oss_avoid_array, + sizeof(u32) * lag->lag_oaa_size); + if (lag->lag_ost_avoid_bitmap) + CFS_FREE_BITMAP(lag->lag_ost_avoid_bitmap); +} + /** * Implementation of lu_device_type_operations::ldto_device_fini() for LOD * @@ -1604,7 +1844,8 @@ static struct lu_device *lod_device_fini(const struct lu_env *env, struct lu_device *d) { struct lod_device *lod = lu2lod_dev(d); - int rc; + int rc; + ENTRY; lod_pools_fini(lod); @@ -1613,12 +1854,12 @@ static struct lu_device *lod_device_fini(const struct lu_env *env, rc = lod_fini_tgt(env, lod, &lod->lod_ost_descs, true); if (rc) - CERROR("%s:can not fini ost descs %d\n", + CERROR("%s: can not fini ost descriptors: rc = %d\n", lod2obd(lod)->obd_name, rc); rc = lod_fini_tgt(env, lod, &lod->lod_mdt_descs, false); if (rc) - CERROR("%s:can not fini mdt descs %d\n", + CERROR("%s: can not fini mdt descriptors: rc = %d\n", lod2obd(lod)->obd_name, rc); RETURN(NULL); @@ -1644,9 +1885,10 @@ static int lod_obd_connect(const struct lu_env *env, struct obd_export **exp, struct obd_device *obd, struct obd_uuid *cluuid, struct obd_connect_data *data, void *localdata) { - struct lod_device *lod = lu2lod_dev(obd->obd_lu_dev); - struct lustre_handle conn; - int rc; + struct lod_device *lod = lu2lod_dev(obd->obd_lu_dev); + struct lustre_handle conn; + int rc; + ENTRY; CDEBUG(D_CONFIG, "connect #%d\n", lod->lod_connects); @@ -1683,7 +1925,8 @@ static int lod_obd_disconnect(struct obd_export *exp) { struct obd_device *obd = exp->exp_obd; struct lod_device *lod = lu2lod_dev(obd->obd_lu_dev); - int rc, release = 0; + int rc, release = 0; + ENTRY; /* Only disconnect the underlying layers on the final disconnect. */ @@ -1715,7 +1958,11 @@ static void lod_key_fini(const struct lu_context *ctx, struct lu_context_key *key, void *data) { struct lod_thread_info *info = data; - /* allocated in lod_get_lov_ea + struct lod_layout_component *lds = + info->lti_def_striping.lds_def_comp_entries; + + /* + * allocated in lod_get_lov_ea * XXX: this is overload, a tread may have such store but used only * once. Probably better would be pool of such stores per LOD. */ @@ -1725,6 +1972,16 @@ static void lod_key_fini(const struct lu_context *ctx, info->lti_ea_store_size = 0; } lu_buf_free(&info->lti_linkea_buf); + + if (lds) + lod_free_def_comp_entries(&info->lti_def_striping); + + if (info->lti_comp_size > 0) + OBD_FREE(info->lti_comp_idx, + info->lti_comp_size * sizeof(u32)); + + lod_avoid_guide_fini(&info->lti_avoid); + OBD_FREE_PTR(info); } @@ -1774,16 +2031,16 @@ static struct lu_device_type lod_device_type = { * \retval -EINVAL if not supported key is requested **/ static int lod_obd_get_info(const struct lu_env *env, struct obd_export *exp, - __u32 keylen, void *key, __u32 *vallen, void *val) + u32 keylen, void *key, u32 *vallen, void *val) { int rc = -EINVAL; if (KEY_IS(KEY_OSP_CONNECTED)) { - struct obd_device *obd = exp->exp_obd; - struct lod_device *d; - struct lod_tgt_desc *tgt; - unsigned int i; - int rc = 1; + struct obd_device *obd = exp->exp_obd; + struct lod_device *d; + struct lod_tgt_desc *tgt; + unsigned int i; + int rc = 1; if (!obd->obd_set_up || obd->obd_stopping) RETURN(-EAGAIN); @@ -1794,7 +2051,7 @@ static int lod_obd_get_info(const struct lu_env *env, struct obd_export *exp, tgt = OST_TGT(d, i); LASSERT(tgt && tgt->ltd_tgt); rc = obd_get_info(env, tgt->ltd_exp, keylen, key, - vallen, val); + vallen, val); /* one healthy device is enough */ if (rc == 0) break; @@ -1813,14 +2070,14 @@ static int lod_obd_get_info(const struct lu_env *env, struct obd_export *exp, ctxt = llog_get_context(tgt->ltd_tgt->dd_lu_dev.ld_obd, LLOG_UPDATELOG_ORIG_CTXT); - if (ctxt == NULL) { + if (!ctxt) { CDEBUG(D_INFO, "%s: %s is not ready.\n", obd->obd_name, tgt->ltd_tgt->dd_lu_dev.ld_obd->obd_name); rc = -EAGAIN; break; } - if (ctxt->loc_handle == NULL) { + if (!ctxt->loc_handle) { CDEBUG(D_INFO, "%s: %s is not ready.\n", obd->obd_name, tgt->ltd_tgt->dd_lu_dev.ld_obd->obd_name); @@ -1838,20 +2095,80 @@ static int lod_obd_get_info(const struct lu_env *env, struct obd_export *exp, RETURN(rc); } +static int lod_obd_set_info_async(const struct lu_env *env, + struct obd_export *exp, + u32 keylen, void *key, + u32 vallen, void *val, + struct ptlrpc_request_set *set) +{ + struct obd_device *obd = class_exp2obd(exp); + struct lod_device *d; + struct lod_tgt_desc *tgt; + int no_set = 0; + int i, rc = 0, rc2; + + ENTRY; + + if (!set) { + no_set = 1; + set = ptlrpc_prep_set(); + if (!set) + RETURN(-ENOMEM); + } + + d = lu2lod_dev(obd->obd_lu_dev); + lod_getref(&d->lod_ost_descs); + lod_foreach_ost(d, i) { + tgt = OST_TGT(d, i); + LASSERT(tgt && tgt->ltd_tgt); + if (!tgt->ltd_active) + continue; + + rc2 = obd_set_info_async(env, tgt->ltd_exp, keylen, key, + vallen, val, set); + if (rc2 != 0 && rc == 0) + rc = rc2; + } + lod_putref(d, &d->lod_ost_descs); + + lod_getref(&d->lod_mdt_descs); + lod_foreach_mdt(d, i) { + tgt = MDT_TGT(d, i); + LASSERT(tgt && tgt->ltd_tgt); + if (!tgt->ltd_active) + continue; + rc2 = obd_set_info_async(env, tgt->ltd_exp, keylen, key, + vallen, val, set); + if (rc2 != 0 && rc == 0) + rc = rc2; + } + lod_putref(d, &d->lod_mdt_descs); + + + if (no_set) { + rc2 = ptlrpc_set_wait(env, set); + if (rc2 == 0 && rc == 0) + rc = rc2; + ptlrpc_set_destroy(set); + } + RETURN(rc); +} + static struct obd_ops lod_obd_device_ops = { .o_owner = THIS_MODULE, .o_connect = lod_obd_connect, .o_disconnect = lod_obd_disconnect, .o_get_info = lod_obd_get_info, + .o_set_info_async = lod_obd_set_info_async, .o_pool_new = lod_pool_new, .o_pool_rem = lod_pool_remove, .o_pool_add = lod_pool_add, .o_pool_del = lod_pool_del, }; -static int __init lod_mod_init(void) +static int __init lod_init(void) { - struct obd_type *type; + struct obd_type *sym; int rc; rc = lu_kmem_init(lod_caches); @@ -1865,32 +2182,41 @@ static int __init lod_mod_init(void) return rc; } - /* create "lov" entry in procfs for compatibility purposes */ - type = class_search_type(LUSTRE_LOV_NAME); - if (type != NULL && type->typ_procroot != NULL) - return rc; - - type = class_search_type(LUSTRE_LOD_NAME); - type->typ_procsym = lprocfs_register("lov", proc_lustre_root, - NULL, NULL); - if (IS_ERR(type->typ_procsym)) { - CERROR("lod: can't create compat entry \"lov\": %d\n", - (int)PTR_ERR(type->typ_procsym)); - type->typ_procsym = NULL; + /* create "lov" entry for compatibility purposes */ + sym = class_add_symlinks(LUSTRE_LOV_NAME, true); + if (IS_ERR(sym)) { + rc = PTR_ERR(sym); + /* does real "lov" already exist ? */ + if (rc == -EEXIST) + rc = 0; } + return rc; } -static void __exit lod_mod_exit(void) +static void __exit lod_exit(void) { + struct obd_type *sym = class_search_type(LUSTRE_LOV_NAME); + + /* if this was never fully initialized by the lov layer + * then we are responsible for freeing this obd_type + */ + if (sym) { + /* final put if we manage this obd type */ + if (sym->typ_sym_filter) + kobject_put(&sym->typ_kobj); + /* put reference taken by class_search_type */ + kobject_put(&sym->typ_kobj); + } + class_unregister_type(LUSTRE_LOD_NAME); lu_kmem_fini(lod_caches); } -MODULE_AUTHOR("Intel Corporation. "); +MODULE_AUTHOR("OpenSFS, Inc. "); MODULE_DESCRIPTION("Lustre Logical Object Device ("LUSTRE_LOD_NAME")"); +MODULE_VERSION(LUSTRE_VERSION_STRING); MODULE_LICENSE("GPL"); -module_init(lod_mod_init); -module_exit(lod_mod_exit); - +module_init(lod_init); +module_exit(lod_exit);