From: Kit Westneat Date: Mon, 11 Jul 2016 15:28:08 +0000 (-0400) Subject: LU-5092 nodemap: save nodemaps to targets for caching X-Git-Tag: 2.8.57~62 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=be7efb4ce06996444c08914305a73833a7123eeb;hp=279e4dd34e18de0f8ecda61b65d6e0f56f7c9fc8 LU-5092 nodemap: save nodemaps to targets for caching Modify nodemap config storage to save config to targets as well as MGSes. This allows targets to start with the last received nodemap configuration even if the MGS is not available. The config is replaced by the MGS' config the next time the MGS is available. Signed-off-by: Kit Westneat Change-Id: I1c5221815618fe0265908bfd900ba55f44d1021b Reviewed-on: http://review.whamcloud.com/17503 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: John L. Hammond Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lustre_nodemap.h b/lustre/include/lustre_nodemap.h index 6eb79ce..44527a0 100644 --- a/lustre/include/lustre_nodemap.h +++ b/lustre/include/lustre_nodemap.h @@ -192,6 +192,11 @@ void nodemap_config_set_active_mgc(struct nodemap_config *config); int nodemap_process_idx_pages(struct nodemap_config *config, union lu_page *lip, struct lu_nodemap **recent_nodemap); + +struct dt_device; +int nodemap_fs_init(const struct lu_env *env, struct dt_device *dev, + struct obd_device *obd, struct local_oid_storage *los); +void nodemap_fs_fini(const struct lu_env *env, struct obd_device *obd); #else /* disable nodemap processing in MGC of non-servers */ static inline int nodemap_process_idx_pages(void *config, union lu_page *lip, diff --git a/lustre/mdd/mdd_device.c b/lustre/mdd/mdd_device.c index 6129e11..2728b20 100644 --- a/lustre/mdd/mdd_device.c +++ b/lustre/mdd/mdd_device.c @@ -51,6 +51,7 @@ #include #include #include +#include #include "mdd_internal.h" @@ -883,6 +884,7 @@ static void mdd_device_shutdown(const struct lu_env *env, struct mdd_device *m, mdd_changelog_fini(env, m); orph_index_fini(env, m); mdd_dot_lustre_cleanup(env, m); + nodemap_fs_fini(env, mdd2obd_dev(m)); if (m->mdd_los != NULL) { local_oid_storage_fini(env, m->mdd_los); m->mdd_los = NULL; @@ -1056,15 +1058,24 @@ static int mdd_prepare(const struct lu_env *env, if (rc != 0) GOTO(out_changelog, rc); + rc = nodemap_fs_init(env, mdd->mdd_bottom, mdd2obd_dev(mdd), + mdd->mdd_los); + if (rc != 0) + GOTO(out_hsm, rc); + rc = lfsck_register(env, mdd->mdd_bottom, mdd->mdd_child, mdd2obd_dev(mdd), mdd_lfsck_out_notify, mdd, true); if (rc != 0) { CERROR("%s: failed to initialize lfsck: rc = %d\n", mdd2obd_dev(mdd)->obd_name, rc); - GOTO(out_hsm, rc); + GOTO(out_nodemap, rc); } + RETURN(0); + +out_nodemap: + nodemap_fs_fini(env, mdd2obd_dev(mdd)); out_hsm: mdd_hsm_actions_llog_fini(env, mdd); out_changelog: diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index 05365a9..7e596a4 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -2802,6 +2802,7 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m, struct ofd_thread_info *info = NULL; struct obd_device *obd; struct obd_statfs *osfs; + struct lu_fid fid; int rc; ENTRY; @@ -2933,14 +2934,31 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m, if (rc) GOTO(err_fini_lut, rc); - rc = ofd_start_inconsistency_verification_thread(m); + fid.f_seq = FID_SEQ_LOCAL_NAME; + fid.f_oid = 1; + fid.f_ver = 0; + rc = local_oid_storage_init(env, m->ofd_osd, &fid, + &m->ofd_los); if (rc != 0) GOTO(err_fini_fs, rc); + rc = nodemap_fs_init(env, m->ofd_osd, obd, m->ofd_los); + if (rc != 0) + GOTO(err_fini_los, rc); + + rc = ofd_start_inconsistency_verification_thread(m); + if (rc != 0) + GOTO(err_fini_nm, rc); + tgt_adapt_sptlrpc_conf(&m->ofd_lut, 1); RETURN(0); +err_fini_nm: + nodemap_fs_fini(env, obd); +err_fini_los: + local_oid_storage_fini(env, m->ofd_los); + m->ofd_los = NULL; err_fini_fs: ofd_fs_cleanup(env, m); err_fini_lut: @@ -2985,6 +3003,12 @@ static void ofd_fini(const struct lu_env *env, struct ofd_device *m) ofd_stop_inconsistency_verification_thread(m); lfsck_degister(env, m->ofd_osd); ofd_fs_cleanup(env, m); + nodemap_fs_fini(env, obd); + + if (m->ofd_los != NULL) { + local_oid_storage_fini(env, m->ofd_los); + m->ofd_los = NULL; + } if (m->ofd_namespace != NULL) { ldlm_namespace_free_post(m->ofd_namespace); diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index 648c859..e2955d2 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -124,6 +124,7 @@ struct ofd_device { /* last_rcvd file */ struct lu_target ofd_lut; struct dt_object *ofd_health_check_file; + struct local_oid_storage *ofd_los; int ofd_subdir_count; __u64 ofd_inconsistency_self_detected; diff --git a/lustre/ptlrpc/nodemap_handler.c b/lustre/ptlrpc/nodemap_handler.c index 2086246..59b1734 100644 --- a/lustre/ptlrpc/nodemap_handler.c +++ b/lustre/ptlrpc/nodemap_handler.c @@ -1399,9 +1399,13 @@ void nodemap_config_dealloc(struct nodemap_config *config) } EXPORT_SYMBOL(nodemap_config_dealloc); -static int nm_hash_list_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd, - struct hlist_node *hnode, - void *nodemap_list_head) +/* + * callback for cfs_hash_for_each_safe used to convert a nodemap hash to a + * nodemap list, generally for locking purposes as a hash cb can't sleep. + */ +int nm_hash_list_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode, + void *nodemap_list_head) { struct lu_nodemap *nodemap; diff --git a/lustre/ptlrpc/nodemap_internal.h b/lustre/ptlrpc/nodemap_internal.h index 4eec107..5d75fd7 100644 --- a/lustre/ptlrpc/nodemap_internal.h +++ b/lustre/ptlrpc/nodemap_internal.h @@ -171,6 +171,9 @@ struct rb_node *nm_rb_next_postorder(const struct rb_node *node); struct rb_node *nm_rb_first_postorder(const struct rb_root *root); void nodemap_getref(struct lu_nodemap *nodemap); void nodemap_putref(struct lu_nodemap *nodemap); +int nm_hash_list_cb(struct cfs_hash *hs, struct cfs_hash_bd *bd, + struct hlist_node *hnode, + void *nodemap_list_head); #define nm_rbtree_postorder_for_each_entry_safe(pos, n, \ root, field) \ diff --git a/lustre/ptlrpc/nodemap_storage.c b/lustre/ptlrpc/nodemap_storage.c index 8586745..f53a841 100644 --- a/lustre/ptlrpc/nodemap_storage.c +++ b/lustre/ptlrpc/nodemap_storage.c @@ -153,6 +153,69 @@ static void nodemap_inc_version(const struct lu_env *env, dt_version_set(env, nodemap_idx, ver + 1, th); } +static struct dt_object *nodemap_cache_find_create(const struct lu_env *env, + struct dt_device *dev, + struct local_oid_storage *los, + bool force_create) +{ + struct lu_fid root_fid; + struct dt_object *root_obj; + struct dt_object *nm_obj; + int rc = 0; + + rc = dt_root_get(env, dev, &root_fid); + if (rc < 0) + GOTO(out, nm_obj = ERR_PTR(rc)); + + root_obj = dt_locate(env, dev, &root_fid); + if (unlikely(IS_ERR(root_obj))) + GOTO(out, nm_obj = root_obj); + +again: + /* if loading index fails the first time, try again with force_create */ + if (force_create) { + CDEBUG(D_INFO, "removing old index, creating new one\n"); + rc = local_object_unlink(env, dev, root_obj, + LUSTRE_NODEMAP_NAME); + if (rc < 0) { + /* XXX not sure the best way to get obd name. */ + CERROR("cannot destroy nodemap index: rc = %d\n", + rc); + GOTO(out_root, nm_obj = ERR_PTR(rc)); + } + } + + nm_obj = local_index_find_or_create(env, los, root_obj, + LUSTRE_NODEMAP_NAME, + S_IFREG | S_IRUGO | S_IWUSR, + &dt_nodemap_features); + if (IS_ERR(nm_obj)) + GOTO(out_root, nm_obj); + + if (nm_obj->do_index_ops == NULL) { + rc = nm_obj->do_ops->do_index_try(env, nm_obj, + &dt_nodemap_features); + /* even if loading from tgt fails, connecting to MGS will + * rewrite the config + */ + if (rc < 0 && !force_create) { + CERROR("cannot load nodemap index from disk, creating " + "new index: rc = %d\n", rc); + lu_object_put(env, &nm_obj->do_lu); + force_create = true; + goto again; + } + } + + if (rc < 0) + nm_obj = ERR_PTR(rc); + +out_root: + lu_object_put(env, &root_obj->do_lu); +out: + return nm_obj; +} + static int nodemap_idx_insert(const struct lu_env *env, struct dt_object *idx, const struct nodemap_key *nk, @@ -805,6 +868,137 @@ out: RETURN(rc); } +/** + * Step through active config and write to disk. + */ +int nodemap_save_config_cache(const struct lu_env *env, + struct nm_config_file *ncf) +{ + struct dt_device *dev; + struct dt_object *o; + struct lu_nodemap *nodemap; + struct lu_nodemap *nm_tmp; + struct lu_nid_range *range; + struct lu_nid_range *range_temp; + struct lu_idmap *idmap; + struct lu_idmap *id_tmp; + struct rb_root root; + struct nodemap_key nk; + union nodemap_rec nr; + LIST_HEAD(nodemap_list_head); + int rc = 0, rc2; + + ENTRY; + + if (ncf->ncf_los == NULL || ncf->ncf_obj == NULL) + RETURN(-EIO); + + dev = lu2dt_dev(ncf->ncf_obj->do_lu.lo_dev); + + /* nodemap_cache_find_create will delete old conf file, so put here */ + lu_object_put_nocache(env, &ncf->ncf_obj->do_lu); + ncf->ncf_obj = NULL; + + /* force create a new index file to fill with active config */ + o = nodemap_cache_find_create(env, dev, ncf->ncf_los, true); + if (IS_ERR(o)) + GOTO(out, rc = PTR_ERR(o)); + + ncf->ncf_obj = o; + + mutex_lock(&active_config_lock); + + /* convert hash to list so we don't spin */ + cfs_hash_for_each_safe(active_config->nmc_nodemap_hash, + nm_hash_list_cb, &nodemap_list_head); + + list_for_each_entry_safe(nodemap, nm_tmp, &nodemap_list_head, nm_list) { + nodemap_cluster_key_init(&nk, nodemap->nm_id); + nodemap_cluster_rec_init(&nr, nodemap); + + rc2 = nodemap_idx_insert(env, o, &nk, &nr); + if (rc2 < 0) { + rc = rc2; + continue; + } + + down_read(&active_config->nmc_range_tree_lock); + list_for_each_entry_safe(range, range_temp, &nodemap->nm_ranges, + rn_list) { + lnet_nid_t nid[2] = { + range->rn_node.in_extent.start, + range->rn_node.in_extent.end + }; + nodemap_range_key_init(&nk, nodemap->nm_id, + range->rn_id); + nodemap_range_rec_init(&nr, nid); + rc2 = nodemap_idx_insert(env, o, &nk, &nr); + if (rc2 < 0) + rc = rc2; + } + up_read(&active_config->nmc_range_tree_lock); + + /* we don't need to take nm_idmap_lock because active config + * lock prevents changes from happening to nodemaps + */ + root = nodemap->nm_client_to_fs_uidmap; + nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root, + id_client_to_fs) { + nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_UID, + idmap->id_client); + nodemap_idmap_rec_init(&nr, idmap->id_fs); + rc2 = nodemap_idx_insert(env, o, &nk, &nr); + if (rc2 < 0) + rc = rc2; + } + + root = nodemap->nm_client_to_fs_gidmap; + nm_rbtree_postorder_for_each_entry_safe(idmap, id_tmp, &root, + id_client_to_fs) { + nodemap_idmap_key_init(&nk, nodemap->nm_id, NODEMAP_GID, + idmap->id_client); + nodemap_idmap_rec_init(&nr, idmap->id_fs); + rc2 = nodemap_idx_insert(env, o, &nk, &nr); + if (rc2 < 0) + rc = rc2; + } + } + nodemap_global_key_init(&nk); + nodemap_global_rec_init(&nr, active_config->nmc_nodemap_is_active); + rc2 = nodemap_idx_insert(env, o, &nk, &nr); + if (rc2 < 0) + rc = rc2; + +out: + mutex_unlock(&active_config_lock); + RETURN(rc); +} + +static void nodemap_save_all_caches(void) +{ + struct nm_config_file *ncf; + struct lu_env env; + int rc = 0; + + /* recreating nodemap cache requires fld_thread_key be in env */ + rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD | LCT_MG_THREAD); + if (rc != 0) { + CWARN("cannot init env for nodemap config: rc = %d\n", rc); + return; + } + + mutex_lock(&ncf_list_lock); + list_for_each_entry(ncf, &ncf_list_head, ncf_list) { + rc = nodemap_save_config_cache(&env, ncf); + if (rc < 0 && ncf->ncf_obj != NULL) + CWARN("%s: error writing to nodemap config: rc = %d\n", + ncf->ncf_obj->do_lu.lo_dev->ld_obd->obd_name, rc); + } + mutex_unlock(&ncf_list_lock); + + lu_env_fini(&env); +} + /* tracks if config still needs to be loaded, either from disk or network */ static bool nodemap_config_loaded; static DEFINE_MUTEX(nodemap_config_loaded_lock); @@ -820,6 +1014,7 @@ void nodemap_config_set_active_mgc(struct nodemap_config *config) mutex_lock(&nodemap_config_loaded_lock); nodemap_config_set_active(config); nodemap_config_loaded = true; + nodemap_save_all_caches(); mutex_unlock(&nodemap_config_loaded_lock); } EXPORT_SYMBOL(nodemap_config_set_active_mgc); @@ -843,6 +1038,7 @@ struct nm_config_file *nm_config_file_register(const struct lu_env *env, enum nm_config_file_type ncf_type) { struct nm_config_file *ncf; + bool save_config = false; int rc = 0; ENTRY; @@ -867,9 +1063,15 @@ struct nm_config_file *nm_config_file_register(const struct lu_env *env, mutex_lock(&nodemap_config_loaded_lock); if (ncf_type == NCFT_MGS || !nodemap_config_loaded) rc = nodemap_load_entries(env, obj); + else + save_config = true; nodemap_config_loaded = true; mutex_unlock(&nodemap_config_loaded_lock); + /* sync on disk caches with loaded config in memory */ + if (save_config) + rc = nodemap_save_config_cache(env, ncf); + if (rc < 0) { if (ncf_type == NCFT_MGS) { nodemap_mgs_ncf = NULL; @@ -1086,3 +1288,51 @@ out: return rc; } EXPORT_SYMBOL(nodemap_get_config_req); + +int nodemap_fs_init(const struct lu_env *env, struct dt_device *dev, + struct obd_device *obd, struct local_oid_storage *los) +{ + struct dt_object *config_obj; + struct nm_config_file *nm_config_file; + int rc = 0; + ENTRY; + + CDEBUG(D_INFO, "%s: finding nodemap index\n", obd->obd_name); + /* load or create the index file from disk (don't force create) */ + config_obj = nodemap_cache_find_create(env, dev, los, false); + if (IS_ERR(config_obj)) + GOTO(out, rc = PTR_ERR(config_obj)); + + CDEBUG(D_INFO, "%s: registering nodemap index\n", obd->obd_name); + + nm_config_file = nm_config_file_register(env, config_obj, los, + NCFT_TGT); + if (IS_ERR(nm_config_file)) { + CERROR("%s: error loading nodemap config file, file must be " + "removed via ldiskfs: rc = %ld\n", + obd->obd_name, PTR_ERR(nm_config_file)); + GOTO(out, rc = PTR_ERR(nm_config_file)); + } + + obd->u.obt.obt_nodemap_config_file = nm_config_file; + + /* save los in case object needs to be re-created */ + nm_config_file->ncf_los = los; + + EXIT; + +out: + return rc; +} +EXPORT_SYMBOL(nodemap_fs_init); + +void nodemap_fs_fini(const struct lu_env *env, struct obd_device *obd) +{ + if (obd->u.obt.obt_nodemap_config_file == NULL) + return; + + nm_config_file_deregister(env, obd->u.obt.obt_nodemap_config_file, + NCFT_TGT); + obd->u.obt.obt_nodemap_config_file = NULL; +} +EXPORT_SYMBOL(nodemap_fs_fini);