X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fobdclass%2Fobd_mount_server.c;h=ca68fb02556a6bb6434e51279e3bd5e133833f50;hb=ac5044566b97c7f6881bed817c2ed9752a0c6d63;hp=f0b169e4d08e7819044c35383e12ec1f7dccf322;hpb=166c5ba95cb2a4771317e030a3649e4480c8cbad;p=fs%2Flustre-release.git diff --git a/lustre/obdclass/obd_mount_server.c b/lustre/obdclass/obd_mount_server.c index f0b169e..ca68fb0 100644 --- a/lustre/obdclass/obd_mount_server.c +++ b/lustre/obdclass/obd_mount_server.c @@ -27,7 +27,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2013, Intel Corporation. + * Copyright (c) 2013, 2015, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -46,24 +46,28 @@ #define PRINT_CMD CDEBUG #define PRINT_MASK (D_SUPER | D_CONFIG) -#include -#include -#include +#include +#include +#include #include -#include -#include -#include -#include #ifdef HAVE_KERNEL_LOCKED #include #endif -#ifdef HAVE_SELINUX_IS_ENABLED -#include -#endif + +#include +#include + +#include +#include +#include +#include +#include +#include +#include /*********** mount lookup *********/ -DEFINE_MUTEX(lustre_mount_info_lock); +static DEFINE_MUTEX(lustre_mount_info_lock); static struct list_head server_mount_info_list = LIST_HEAD_INIT(server_mount_info_list); @@ -240,8 +244,8 @@ static int server_start_mgs(struct super_block *sb) if (!rc) { rc = lustre_start_simple(LUSTRE_MGS_OBDNAME, LUSTRE_MGS_NAME, - LUSTRE_MGS_OBDNAME, 0, 0, - lsi->lsi_osd_obdname, 0); + LUSTRE_MGS_OBDNAME, NULL, NULL, + lsi->lsi_osd_obdname, NULL); /* server_deregister_mount() is not called previously, for lsi * and other stuff can't be freed cleanly when mgs calls * server_put_mount() in error handling case (see b=17758), @@ -263,8 +267,14 @@ static int server_stop_mgs(struct super_block *sb) { struct obd_device *obd; int rc; + struct lustre_mount_info *lmi; ENTRY; + /* Do not stop MGS if this device is not the running MGT */ + lmi = server_find_mount(LUSTRE_MGS_OBDNAME); + if (lmi != NULL && lmi->lmi_sb != sb) + RETURN(0); + CDEBUG(D_MOUNT, "Stop MGS service %s\n", LUSTRE_MGS_OBDNAME); /* There better be only one MGS */ @@ -282,7 +292,8 @@ static int server_stop_mgs(struct super_block *sb) /* Since there's only one mgc per node, we have to change it's fs to get access to the right disk. */ -static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb) +static int server_mgc_set_fs(const struct lu_env *env, + struct obd_device *mgc, struct super_block *sb) { struct lustre_sb_info *lsi = s2lsi(sb); int rc; @@ -291,7 +302,7 @@ static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb) CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev); /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */ - rc = obd_set_info_async(NULL, mgc->obd_self_export, + rc = obd_set_info_async(env, mgc->obd_self_export, sizeof(KEY_SET_FS), KEY_SET_FS, sizeof(*sb), sb, NULL); if (rc != 0) @@ -300,14 +311,15 @@ static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb) RETURN(rc); } -static int server_mgc_clear_fs(struct obd_device *mgc) +static int server_mgc_clear_fs(const struct lu_env *env, + struct obd_device *mgc) { int rc; ENTRY; CDEBUG(D_MOUNT, "Unassign mgc disk\n"); - rc = obd_set_info_async(NULL, mgc->obd_self_export, + rc = obd_set_info_async(env, mgc->obd_self_export, sizeof(KEY_CLEAR_FS), KEY_CLEAR_FS, 0, NULL, NULL); RETURN(rc); @@ -377,7 +389,18 @@ EXPORT_SYMBOL(tgt_name2lwp_name); static struct list_head lwp_register_list = LIST_HEAD_INIT(lwp_register_list); -DEFINE_MUTEX(lwp_register_list_lock); +static DEFINE_SPINLOCK(lwp_register_list_lock); + +static void lustre_put_lwp_item(struct lwp_register_item *lri) +{ + if (atomic_dec_and_test(&lri->lri_ref)) { + LASSERT(list_empty(&lri->lri_list)); + + if (*lri->lri_exp != NULL) + class_export_put(*lri->lri_exp); + OBD_FREE_PTR(lri); + } +} int lustre_register_lwp_item(const char *lwpname, struct obd_export **exp, register_lwp_cb cb_func, void *cb_data) @@ -394,15 +417,12 @@ int lustre_register_lwp_item(const char *lwpname, struct obd_export **exp, if (lri == NULL) RETURN(-ENOMEM); - mutex_lock(&lwp_register_list_lock); - lwp = class_name2obd(lwpname); if (lwp != NULL && lwp->obd_set_up == 1) { struct obd_uuid *uuid; OBD_ALLOC_PTR(uuid); if (uuid == NULL) { - mutex_unlock(&lwp_register_list_lock); OBD_FREE_PTR(lri); RETURN(-ENOMEM); } @@ -416,31 +436,40 @@ int lustre_register_lwp_item(const char *lwpname, struct obd_export **exp, lri->lri_cb_func = cb_func; lri->lri_cb_data = cb_data; INIT_LIST_HEAD(&lri->lri_list); + /* + * Initialize the lri_ref at 2, one will be released before + * current function returned via lustre_put_lwp_item(), the + * other will be released in lustre_deregister_lwp_item(). + */ + atomic_set(&lri->lri_ref, 2); + + spin_lock(&lwp_register_list_lock); list_add(&lri->lri_list, &lwp_register_list); + spin_unlock(&lwp_register_list_lock); if (*exp != NULL && cb_func != NULL) cb_func(cb_data); + lustre_put_lwp_item(lri); - mutex_unlock(&lwp_register_list_lock); RETURN(0); } EXPORT_SYMBOL(lustre_register_lwp_item); void lustre_deregister_lwp_item(struct obd_export **exp) { - struct lwp_register_item *lri, *tmp; + struct lwp_register_item *lri; - mutex_lock(&lwp_register_list_lock); - list_for_each_entry_safe(lri, tmp, &lwp_register_list, lri_list) { + spin_lock(&lwp_register_list_lock); + list_for_each_entry(lri, &lwp_register_list, lri_list) { if (exp == lri->lri_exp) { - if (*exp) - class_export_put(*exp); - list_del(&lri->lri_list); - OBD_FREE_PTR(lri); - break; + list_del_init(&lri->lri_list); + spin_unlock(&lwp_register_list_lock); + + lustre_put_lwp_item(lri); + return; } } - mutex_unlock(&lwp_register_list_lock); + spin_unlock(&lwp_register_list_lock); } EXPORT_SYMBOL(lustre_deregister_lwp_item); @@ -471,7 +500,7 @@ struct obd_export *lustre_find_lwp_by_index(const char *dev, __u32 idx) list_for_each_entry(lwp, &lsi->lsi_lwp_list, obd_lwp_list) { char *ptr = strstr(lwp->obd_name, lwp_name); - if (ptr != NULL) { + if (ptr != NULL && lwp->obd_lwp_export != NULL) { exp = class_export_get(lwp->obd_lwp_export); break; } @@ -485,23 +514,36 @@ err_lmi: } EXPORT_SYMBOL(lustre_find_lwp_by_index); -static void lustre_notify_lwp_list(struct obd_export *exp) +void lustre_notify_lwp_list(struct obd_export *exp) { - struct lwp_register_item *lri, *tmp; + struct lwp_register_item *lri; LASSERT(exp != NULL); - mutex_lock(&lwp_register_list_lock); - list_for_each_entry_safe(lri, tmp, &lwp_register_list, lri_list) { +again: + spin_lock(&lwp_register_list_lock); + list_for_each_entry(lri, &lwp_register_list, lri_list) { if (strcmp(exp->exp_obd->obd_name, lri->lri_name)) continue; if (*lri->lri_exp != NULL) continue; *lri->lri_exp = class_export_get(exp); + atomic_inc(&lri->lri_ref); + spin_unlock(&lwp_register_list_lock); + if (lri->lri_cb_func != NULL) lri->lri_cb_func(lri->lri_cb_data); + lustre_put_lwp_item(lri); + + /* Others may have changed the list after we unlock, we have + * to rescan the list from the beginning. Usually, the list + * 'lwp_register_list' is very short, and there is 'guard' + * lri::lri_exp that will prevent the callback to be done + * repeatedly. So rescanning the list has no problem. */ + goto again; } - mutex_unlock(&lwp_register_list_lock); + spin_unlock(&lwp_register_list_lock); } +EXPORT_SYMBOL(lustre_notify_lwp_list); static int lustre_lwp_connect(struct obd_device *lwp) { @@ -532,7 +574,8 @@ static int lustre_lwp_connect(struct obd_device *lwp) data->ocd_connect_flags |= OBD_CONNECT_MDS_MDS | OBD_CONNECT_FID | OBD_CONNECT_AT | OBD_CONNECT_LRU_RESIZE | OBD_CONNECT_FULL20 | OBD_CONNECT_LVB_TYPE | - OBD_CONNECT_LIGHTWEIGHT | OBD_CONNECT_LFSCK; + OBD_CONNECT_LIGHTWEIGHT | OBD_CONNECT_LFSCK | + OBD_CONNECT_BULK_MBITS; OBD_ALLOC_PTR(uuid); if (uuid == NULL) GOTO(out, rc = -ENOMEM); @@ -552,7 +595,6 @@ static int lustre_lwp_connect(struct obd_device *lwp) if (unlikely(lwp->obd_lwp_export != NULL)) class_export_put(lwp->obd_lwp_export); lwp->obd_lwp_export = class_export_get(exp); - lustre_notify_lwp_list(exp); } GOTO(out, rc); @@ -608,7 +650,7 @@ static int lustre_lwp_setup(struct lustre_cfg *lcfg, struct lustre_sb_info *lsi, sprintf(lwpuuid, "%s_UUID", lwpname); rc = lustre_start_simple(lwpname, LUSTRE_LWP_NAME, lwpuuid, lustre_cfg_string(lcfg, 1), - 0, 0, 0); + NULL, NULL, NULL); if (rc) { CERROR("%s: setup up failed: rc %d\n", lwpname, rc); GOTO(out, rc); @@ -876,7 +918,7 @@ static int lustre_disconnect_lwp(struct super_block *sb) /* end log first */ cfg->cfg_instance = sb; rc = lustre_end_log(sb, logname, cfg); - if (rc != 0) + if (rc != 0 && rc != -ENOENT) GOTO(out, rc); lsi->lsi_lwp_started = 0; @@ -985,8 +1027,8 @@ static int lustre_start_lwp(struct super_block *sb) cfg->cfg_callback = client_lwp_config_process; cfg->cfg_instance = sb; rc = lustre_process_log(sb, logname, cfg); - if (rc == 0) - lsi->lsi_lwp_started = 1; + /* need to remove config llog from mgc */ + lsi->lsi_lwp_started = 1; GOTO(out, rc); @@ -998,7 +1040,7 @@ out: return rc; } -DEFINE_MUTEX(server_start_lock); +static DEFINE_MUTEX(server_start_lock); /* Stop MDS/OSS if nobody is using them */ static int server_stop_servers(int lsiflags) @@ -1046,7 +1088,6 @@ int server_mti_print(const char *title, struct mgs_target_info *mti) mti->mti_config_ver, mti->mti_flags); return 0; } -EXPORT_SYMBOL(server_mti_print); /* Generate data for registration */ static int server_lsi2mti(struct lustre_sb_info *lsi, @@ -1090,6 +1131,13 @@ static int server_lsi2mti(struct lustre_sb_info *lsi, } } + if (mti->mti_nid_count == 0) { + CERROR("Failed to get NID for server %s, please check whether " + "the target is specifed with improper --servicenode or " + "--network options.\n", mti->mti_svname); + RETURN(-EINVAL); + } + mti->mti_lustre_ver = LUSTRE_VERSION_CODE; mti->mti_config_ver = 0; @@ -1163,8 +1211,8 @@ static int server_register_target(struct lustre_sb_info *lsi) "rc = %d. Is the MGS running?\n", lsi->lsi_svname, rc); } else { - CERROR("%s: error registering with the MGS: rc = %d " - "(not fatal)\n", lsi->lsi_svname, rc); + CDEBUG(D_HA, "%s: error registering with the MGS: " + "rc = %d (not fatal)\n", lsi->lsi_svname, rc); /* reset the error code for non-fatal error. */ rc = 0; } @@ -1229,7 +1277,7 @@ static int server_start_targets(struct super_block *sb) struct obd_device *obd; struct lustre_sb_info *lsi = s2lsi(sb); struct config_llog_instance cfg; - struct lu_env env; + struct lu_env mgc_env; struct lu_device *dev; int rc; ENTRY; @@ -1244,7 +1292,7 @@ static int server_start_targets(struct super_block *sb) rc = lustre_start_simple(LUSTRE_MDS_OBDNAME, LUSTRE_MDS_NAME, LUSTRE_MDS_OBDNAME"_uuid", - 0, 0, 0, 0); + NULL, NULL, NULL, NULL); if (rc) { mutex_unlock(&server_start_lock); CERROR("failed to start MDS: %d\n", rc); @@ -1263,7 +1311,7 @@ static int server_start_targets(struct super_block *sb) rc = lustre_start_simple(LUSTRE_OSS_OBDNAME, LUSTRE_OSS_NAME, LUSTRE_OSS_OBDNAME"_uuid", - 0, 0, 0, 0); + NULL, NULL, NULL, NULL); if (rc) { mutex_unlock(&server_start_lock); CERROR("failed to start OSS: %d\n", rc); @@ -1273,11 +1321,15 @@ static int server_start_targets(struct super_block *sb) mutex_unlock(&server_start_lock); } + rc = lu_env_init(&mgc_env, LCT_MG_THREAD); + if (rc != 0) + GOTO(out_stop_service, rc); + /* Set the mgc fs to our server disk. This allows the MGC to * read and write configs locally, in case it can't talk to the MGS. */ - rc = server_mgc_set_fs(lsi->lsi_mgc, sb); + rc = server_mgc_set_fs(&mgc_env, lsi->lsi_mgc, sb); if (rc) - GOTO(out_stop_service, rc); + GOTO(out_env, rc); /* Register with MGS */ rc = server_register_target(lsi); @@ -1329,6 +1381,8 @@ static int server_start_targets(struct super_block *sb) /* log has been fully processed, let clients connect */ dev = obd->obd_lu_dev; if (dev && dev->ld_ops->ldo_prepare) { + struct lu_env env; + rc = lu_env_init(&env, dev->ld_type->ldt_ctx_tags); if (rc == 0) { struct lu_context session_ctx; @@ -1356,8 +1410,9 @@ static int server_start_targets(struct super_block *sb) out_mgc: /* Release the mgc fs for others to use */ - server_mgc_clear_fs(lsi->lsi_mgc); - + server_mgc_clear_fs(&mgc_env, lsi->lsi_mgc); +out_env: + lu_env_fini(&mgc_env); out_stop_service: if (rc != 0) server_stop_servers(lsi->lsi_flags); @@ -1367,6 +1422,8 @@ out_stop_service: static int lsi_prepare(struct lustre_sb_info *lsi) { + const char *osd_type; + const char *fstype; __u32 index; int rc; ENTRY; @@ -1380,28 +1437,26 @@ static int lsi_prepare(struct lustre_sb_info *lsi) RETURN(-EINVAL); } - if (strlen(lsi->lsi_lmd->lmd_profile) >= sizeof(lsi->lsi_svname)) - RETURN(-ENAMETOOLONG); - - strcpy(lsi->lsi_svname, lsi->lsi_lmd->lmd_profile); - /* Determine osd type */ - if (lsi->lsi_lmd->lmd_osd_type != NULL) { - if (strlen(lsi->lsi_lmd->lmd_osd_type) >= - sizeof(lsi->lsi_osd_type)) - RETURN(-ENAMETOOLONG); - - strcpy(lsi->lsi_osd_type, lsi->lsi_lmd->lmd_osd_type); + if (lsi->lsi_lmd->lmd_osd_type == NULL) { + osd_type = LUSTRE_OSD_LDISKFS_NAME; + fstype = "ldiskfs"; } else { - strcpy(lsi->lsi_osd_type, LUSTRE_OSD_LDISKFS_NAME); + osd_type = lsi->lsi_lmd->lmd_osd_type; + fstype = lsi->lsi_lmd->lmd_osd_type; } + if (strlen(lsi->lsi_lmd->lmd_profile) >= sizeof(lsi->lsi_svname) || + strlen(osd_type) >= sizeof(lsi->lsi_osd_type) || + strlen(fstype) >= sizeof(lsi->lsi_fstype)) + RETURN(-ENAMETOOLONG); + + strlcpy(lsi->lsi_svname, lsi->lsi_lmd->lmd_profile, + sizeof(lsi->lsi_svname)); + strlcpy(lsi->lsi_osd_type, osd_type, sizeof(lsi->lsi_osd_type)); /* XXX: a temp. solution for components using ldiskfs * to be removed in one of the subsequent patches */ - if (!strcmp(lsi->lsi_lmd->lmd_osd_type, "osd-ldiskfs")) - strcpy(lsi->lsi_fstype, "ldiskfs"); - else - strcpy(lsi->lsi_fstype, lsi->lsi_lmd->lmd_osd_type); + strlcpy(lsi->lsi_fstype, fstype, sizeof(lsi->lsi_fstype)); /* Determine server type */ rc = server_name2index(lsi->lsi_svname, &index, NULL); @@ -1480,9 +1535,13 @@ static void server_put_super(struct super_block *sb) If there are any setup/cleanup errors, save the lov name for safety cleanup later. */ lprof = class_get_profile(lsi->lsi_svname); - if (lprof && lprof->lp_dt) { - OBD_ALLOC(extraname, strlen(lprof->lp_dt) + 1); - strcpy(extraname, lprof->lp_dt); + if (lprof != NULL) { + if (lprof->lp_dt != NULL) { + OBD_ALLOC(extraname, strlen(lprof->lp_dt) + 1); + strncpy(extraname, lprof->lp_dt, + strlen(lprof->lp_dt) + 1); + } + class_put_profile(lprof); } obd = class_name2obd(lsi->lsi_svname); @@ -1618,7 +1677,7 @@ static ssize_t lustre_listxattr(struct dentry *d_entry, char *name, return -EOPNOTSUPP; } -const struct inode_operations server_inode_operations = { +static const struct inode_operations server_inode_operations = { .setxattr = lustre_setxattr, .getxattr = lustre_getxattr, .listxattr = lustre_listxattr, @@ -1629,7 +1688,7 @@ const struct inode_operations server_inode_operations = { static int server_fill_super_common(struct super_block *sb) { - struct inode *root = 0; + struct inode *root = NULL; ENTRY; CDEBUG(D_MOUNT, "Server sb, dev=%d\n", (int)sb->s_dev); @@ -1668,7 +1727,6 @@ static int osd_start(struct lustre_sb_info *lsi, unsigned long mflags) struct dt_device_param p; char flagstr[16]; int rc; - bool already_started = 0; ENTRY; CDEBUG(D_MOUNT, @@ -1693,25 +1751,23 @@ static int osd_start(struct lustre_sb_info *lsi, unsigned long mflags) LASSERT(obd); } else { CDEBUG(D_MOUNT, "%s already started\n", lsi->lsi_osd_obdname); - already_started = 1; /* but continue setup to allow special case of MDT and internal - * MGT being started separately, that will be identified in - * caller server_fill_super(). - */ + * MGT being started separately. */ + if (!((IS_MGS(lsi) && (lsi->lsi_lmd->lmd_flags & + LMD_FLG_NOMGS)) || + (IS_MDT(lsi) && (lsi->lsi_lmd->lmd_flags & + LMD_FLG_NOSVC)))) + RETURN(-EALREADY); } rc = obd_connect(NULL, &lsi->lsi_osd_exp, obd, &obd->obd_uuid, NULL, NULL); - OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_CONNECT, 10); - if (rc) { - if (!already_started) { - obd->obd_force = 1; - class_manual_cleanup(obd); - lsi->lsi_dt_dev = NULL; - } - GOTO(out, rc); + obd->obd_force = 1; + class_manual_cleanup(obd); + lsi->lsi_dt_dev = NULL; + RETURN(rc); } LASSERT(obd->obd_lu_dev); @@ -1725,7 +1781,7 @@ static int osd_start(struct lustre_sb_info *lsi, unsigned long mflags) dt_conf_get(NULL, lsi->lsi_dt_dev, &p); out: - RETURN(already_started ? -EALREADY : rc); + RETURN(rc); } /** Fill in the superblock info for a Lustre server. @@ -1739,15 +1795,16 @@ int server_fill_super(struct super_block *sb) int rc; ENTRY; + /* to simulate target mount race */ + OBD_RACE(OBD_FAIL_TGT_MOUNT_RACE); + rc = lsi_prepare(lsi); if (rc) RETURN(rc); /* Start low level OSD */ rc = osd_start(lsi, sb->s_flags); - /* Handle separate nosvc and nomgs case */ - if (rc && ((rc != -EALREADY) || !(lsi->lsi_lmd->lmd_flags & - (LMD_FLG_NOSVC|LMD_FLG_NOMGS)))) { + if (rc) { CERROR("Unable to start osd on %s: %d\n", lsi->lsi_lmd->lmd_dev, rc); lustre_put_lsi(sb); @@ -1870,4 +1927,3 @@ void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd) obd->obd_recovery_time_hard = hard; obd->obd_recovery_ir_factor = factor; } -EXPORT_SYMBOL(server_calc_timeout);