X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fmgc%2Fmgc_request.c;h=412e7e11cdf4092ae167d5c431e0413a067771e6;hb=9d9b3fa84a5fafe7ea0906b5cdae3be119a62b25;hp=e3859319d0e59757be01103dc4a4f7c3c49d3570;hpb=6869932b552ac705f411de3362f01bd50c1f6f7d;p=fs%2Flustre-release.git diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index e385931..412e7e1 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -107,7 +107,7 @@ int mgc_logname2resid(char *logname, struct ldlm_res_id *res_id) /********************** config llog list **********************/ static CFS_LIST_HEAD(config_llog_list); -static spinlock_t config_list_lock = SPIN_LOCK_UNLOCKED; +static cfs_spinlock_t config_list_lock = CFS_SPIN_LOCK_UNLOCKED; /* Take a reference to a config log */ static int config_log_get(struct config_llog_data *cld) @@ -115,9 +115,9 @@ static int config_log_get(struct config_llog_data *cld) ENTRY; if (cld->cld_stopping) RETURN(1); - atomic_inc(&cld->cld_refcount); + cfs_atomic_inc(&cld->cld_refcount); CDEBUG(D_INFO, "log %s refs %d\n", cld->cld_logname, - atomic_read(&cld->cld_refcount)); + cfs_atomic_read(&cld->cld_refcount)); RETURN(0); } @@ -126,28 +126,42 @@ static int config_log_get(struct config_llog_data *cld) static void config_log_put(struct config_llog_data *cld) { ENTRY; + CDEBUG(D_INFO, "log %s refs %d\n", cld->cld_logname, - atomic_read(&cld->cld_refcount)); - if (atomic_dec_and_test(&cld->cld_refcount)) { + cfs_atomic_read(&cld->cld_refcount)); + LASSERT(cfs_atomic_read(&cld->cld_refcount) > 0); + + /* spinlock to make sure no item with 0 refcount in the list */ + cfs_spin_lock(&config_list_lock); + if (unlikely(cfs_atomic_dec_and_test(&cld->cld_refcount))) { + cfs_list_del(&cld->cld_list_chain); + cfs_spin_unlock(&config_list_lock); + CDEBUG(D_MGC, "dropping config log %s\n", cld->cld_logname); + + if (cld->cld_sptlrpc) + config_log_put(cld->cld_sptlrpc); + if (cld->cld_is_sptlrpc) + sptlrpc_conf_log_stop(cld->cld_logname); + class_export_put(cld->cld_mgcexp); - spin_lock(&config_list_lock); - list_del(&cld->cld_list_chain); - spin_unlock(&config_list_lock); OBD_FREE(cld->cld_logname, strlen(cld->cld_logname) + 1); if (cld->cld_cfg.cfg_instance != NULL) OBD_FREE(cld->cld_cfg.cfg_instance, strlen(cld->cld_cfg.cfg_instance) + 1); OBD_FREE(cld, sizeof(*cld)); + } else { + cfs_spin_unlock(&config_list_lock); } + EXIT; } /* Find a config log by name */ -static struct config_llog_data *config_log_find(char *logname, - struct config_llog_instance *cfg) +static +struct config_llog_data *config_log_find(char *logname, + struct config_llog_instance *cfg) { - struct list_head *tmp; struct config_llog_data *cld; char *logid = logname; int match_instance = 0; @@ -162,9 +176,8 @@ static struct config_llog_data *config_log_find(char *logname, RETURN(ERR_PTR(-EINVAL)); } - spin_lock(&config_list_lock); - list_for_each(tmp, &config_llog_list) { - cld = list_entry(tmp, struct config_llog_data, cld_list_chain); + cfs_spin_lock(&config_list_lock); + cfs_list_for_each_entry(cld, &config_llog_list, cld_list_chain) { if (match_instance && cld->cld_cfg.cfg_instance && strcmp(logid, cld->cld_cfg.cfg_instance) == 0) goto out_found; @@ -172,81 +185,176 @@ static struct config_llog_data *config_log_find(char *logname, strcmp(logid, cld->cld_logname) == 0) goto out_found; } - spin_unlock(&config_list_lock); + cfs_spin_unlock(&config_list_lock); CDEBUG(D_CONFIG, "can't get log %s\n", logid); RETURN(ERR_PTR(-ENOENT)); out_found: - atomic_inc(&cld->cld_refcount); - spin_unlock(&config_list_lock); + cfs_atomic_inc(&cld->cld_refcount); + cfs_spin_unlock(&config_list_lock); + LASSERT(cld->cld_stopping == 0 || cld->cld_is_sptlrpc == 0); RETURN(cld); } -/* Add this log to our list of active logs. - We have one active log per "mount" - client instance or servername. - Each instance may be at a different point in the log. */ -static int config_log_add(char *logname, struct config_llog_instance *cfg, - struct super_block *sb) +static +struct config_llog_data *do_config_log_add(struct obd_device *obd, + char *logname, + unsigned int is_sptlrpc, + struct config_llog_instance *cfg, + struct super_block *sb) { struct config_llog_data *cld; - struct lustre_sb_info *lsi = s2lsi(sb); - int rc; + int rc; ENTRY; - CDEBUG(D_MGC, "adding config log %s:%s\n", logname, cfg->cfg_instance); + CDEBUG(D_MGC, "do adding config log %s:%s\n", logname, + cfg ? cfg->cfg_instance : "NULL"); OBD_ALLOC(cld, sizeof(*cld)); if (!cld) - RETURN(-ENOMEM); + RETURN(ERR_PTR(-ENOMEM)); OBD_ALLOC(cld->cld_logname, strlen(logname) + 1); if (!cld->cld_logname) { OBD_FREE(cld, sizeof(*cld)); - RETURN(-ENOMEM); + RETURN(ERR_PTR(-ENOMEM)); } strcpy(cld->cld_logname, logname); - cld->cld_cfg = *cfg; + if (cfg) + cld->cld_cfg = *cfg; cld->cld_cfg.cfg_last_idx = 0; cld->cld_cfg.cfg_flags = 0; cld->cld_cfg.cfg_sb = sb; - atomic_set(&cld->cld_refcount, 1); + cld->cld_is_sptlrpc = is_sptlrpc; + cfs_atomic_set(&cld->cld_refcount, 1); /* Keep the mgc around until we are done */ - cld->cld_mgcexp = class_export_get(lsi->lsi_mgc->obd_self_export); + cld->cld_mgcexp = class_export_get(obd->obd_self_export); - if (cfg->cfg_instance != NULL) { + if (cfg && cfg->cfg_instance != NULL) { OBD_ALLOC(cld->cld_cfg.cfg_instance, strlen(cfg->cfg_instance) + 1); strcpy(cld->cld_cfg.cfg_instance, cfg->cfg_instance); } + + if (is_sptlrpc) { + sptlrpc_conf_log_start(logname); + cld->cld_cfg.cfg_obdname = obd->obd_name; + } + rc = mgc_logname2resid(logname, &cld->cld_resid); - spin_lock(&config_list_lock); - list_add(&cld->cld_list_chain, &config_llog_list); - spin_unlock(&config_list_lock); - + + cfs_spin_lock(&config_list_lock); + cfs_list_add(&cld->cld_list_chain, &config_llog_list); + cfs_spin_unlock(&config_list_lock); + if (rc) { config_log_put(cld); - RETURN(rc); + RETURN(ERR_PTR(rc)); } - RETURN(rc); + if (is_sptlrpc) { + rc = mgc_process_log(obd, cld); + if (rc) + CERROR("failed processing sptlrpc log: %d\n", rc); + } + + RETURN(cld); } +/** + * Add this log to our list of active logs. + * We have one active log per "mount" - client instance or servername. + * Each instance may be at a different point in the log. + */ +static int config_log_add(struct obd_device *obd, char *logname, + struct config_llog_instance *cfg, + struct super_block *sb) +{ + struct config_llog_data *cld, *sptlrpc_cld; + char seclogname[20]; + char *ptr; + ENTRY; + + CDEBUG(D_MGC, "adding config log %s:%s\n", logname, cfg->cfg_instance); + + /* + * for each regular log, the depended sptlrpc log name is + * -sptlrpc. multiple regular logs may share one sptlrpc log. + */ + ptr = strrchr(logname, '-'); + if (ptr == NULL || ptr - logname > 8) { + CERROR("logname %s is too long\n", logname); + RETURN(-EINVAL); + } + + memcpy(seclogname, logname, ptr - logname); + strcpy(seclogname + (ptr - logname), "-sptlrpc"); + + sptlrpc_cld = config_log_find(seclogname, NULL); + if (IS_ERR(sptlrpc_cld)) { + sptlrpc_cld = do_config_log_add(obd, seclogname, 1, NULL, NULL); + if (IS_ERR(sptlrpc_cld)) { + CERROR("can't create sptlrpc log: %s\n", seclogname); + RETURN(PTR_ERR(sptlrpc_cld)); + } + } + + cld = do_config_log_add(obd, logname, 0, cfg, sb); + if (IS_ERR(cld)) { + CERROR("can't create log: %s\n", logname); + config_log_put(sptlrpc_cld); + RETURN(PTR_ERR(cld)); + } + + cld->cld_sptlrpc = sptlrpc_cld; + + RETURN(0); +} + +CFS_DECLARE_MUTEX(llog_process_lock); + /* Stop watching for updates on this log. */ static int config_log_end(char *logname, struct config_llog_instance *cfg) { - struct config_llog_data *cld; + struct config_llog_data *cld, *cld_sptlrpc = NULL; int rc = 0; ENTRY; cld = config_log_find(logname, cfg); if (IS_ERR(cld)) RETURN(PTR_ERR(cld)); - /* drop the ref from the find */ - config_log_put(cld); + + cfs_down(&llog_process_lock); + /* + * if cld_stopping is set, it means we didn't start the log thus + * not owning the start ref. this can happen after previous umount: + * the cld still hanging there waiting for lock cancel, and we + * remount again but failed in the middle and call log_end without + * calling start_log. + */ + if (unlikely(cld->cld_stopping)) { + cfs_up(&llog_process_lock); + /* drop the ref from the find */ + config_log_put(cld); + RETURN(rc); + } cld->cld_stopping = 1; + cfs_up(&llog_process_lock); + + cfs_spin_lock(&config_list_lock); + cld_sptlrpc = cld->cld_sptlrpc; + cld->cld_sptlrpc = NULL; + cfs_spin_unlock(&config_list_lock); + + if (cld_sptlrpc) + config_log_put(cld_sptlrpc); + + /* drop the ref from the find */ + config_log_put(cld); /* drop the start ref */ config_log_put(cld); + CDEBUG(D_MGC, "end config log %s (%d)\n", logname ? logname : "client", rc); RETURN(rc); @@ -260,67 +368,118 @@ static int config_log_end(char *logname, struct config_llog_instance *cfg) static int rq_state = 0; static cfs_waitq_t rq_waitq; -static int mgc_process_log(struct obd_device *mgc, - struct config_llog_data *cld); static int mgc_requeue_add(struct config_llog_data *cld, int later); +static void do_requeue(struct config_llog_data *cld) +{ + LASSERT(cfs_atomic_read(&cld->cld_refcount) > 0); + + /* Do not run mgc_process_log on a disconnected export or an + export which is being disconnected. Take the client + semaphore to make the check non-racy. */ + cfs_down_read(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem); + if (cld->cld_mgcexp->exp_obd->u.cli.cl_conn_count != 0) { + CDEBUG(D_MGC, "updating log %s\n", cld->cld_logname); + mgc_process_log(cld->cld_mgcexp->exp_obd, cld); + } else { + CDEBUG(D_MGC, "disconnecting, won't update log %s\n", + cld->cld_logname); + } + cfs_up_read(&cld->cld_mgcexp->exp_obd->u.cli.cl_sem); + + /* Whether we enqueued again or not in mgc_process_log, we're done + * with the ref from the old enqueue */ + config_log_put(cld); +} + static int mgc_requeue_thread(void *data) { struct l_wait_info lwi_now, lwi_later; - struct config_llog_data *cld, *n; + struct config_llog_data *cld, *cld_next, *cld_prev; char name[] = "ll_cfg_requeue"; int rc = 0; ENTRY; - ptlrpc_daemonize(name); - + cfs_daemonize(name); + CDEBUG(D_MGC, "Starting requeue thread\n"); - lwi_later = LWI_TIMEOUT(60 * HZ, NULL, NULL); + lwi_later = LWI_TIMEOUT(60 * CFS_HZ, NULL, NULL); l_wait_event(rq_waitq, rq_state & (RQ_NOW | RQ_STOP), &lwi_later); /* Keep trying failed locks periodically */ - spin_lock(&config_list_lock); + cfs_spin_lock(&config_list_lock); while (rq_state & (RQ_NOW | RQ_LATER)) { /* Any new or requeued lostlocks will change the state */ - rq_state &= ~(RQ_NOW | RQ_LATER); - spin_unlock(&config_list_lock); + rq_state &= ~(RQ_NOW | RQ_LATER); + cfs_spin_unlock(&config_list_lock); - /* Always wait a few seconds to allow the server who + /* Always wait a few seconds to allow the server who caused the lock revocation to finish its setup, plus some random so everyone doesn't try to reconnect at once. */ - lwi_now = LWI_TIMEOUT(3 * HZ + (ll_rand() & 0xff) * (HZ / 100), + lwi_now = LWI_TIMEOUT(3 * CFS_HZ + (ll_rand() & 0xff) * \ + (CFS_HZ / 100), NULL, NULL); l_wait_event(rq_waitq, rq_state & RQ_STOP, &lwi_now); - - spin_lock(&config_list_lock); - list_for_each_entry_safe(cld, n, &config_llog_list, - cld_list_chain) { - spin_unlock(&config_list_lock); + + /* + * iterate & processing through the list. for each cld, process + * its depending sptlrpc cld firstly (if any) and then itself. + * + * it's guaranteed any item in the list must have + * reference > 0; and if cld_lostlock is set, at + * least one reference is taken by the previous enqueue. + * + * Note: releasing a cld might lead to itself and its depended + * sptlrpc cld be unlinked from the list. to safely iterate + * we need to take a reference on next cld before processing. + */ + cld_prev = NULL; + + cfs_spin_lock(&config_list_lock); + cfs_list_for_each_entry_safe(cld, cld_next, &config_llog_list, + cld_list_chain) { + if (cld->cld_list_chain.next != &config_llog_list) + cfs_atomic_inc(&cld_next->cld_refcount); + if (cld->cld_lostlock) { - CDEBUG(D_MGC, "updating log %s\n", - cld->cld_logname); + if (cld->cld_sptlrpc && + cld->cld_sptlrpc->cld_lostlock) { + cld->cld_sptlrpc->cld_lostlock = 0; + + cfs_spin_unlock(&config_list_lock); + do_requeue(cld->cld_sptlrpc); + cfs_spin_lock(&config_list_lock); + LASSERT(cld->cld_lostlock); + } + cld->cld_lostlock = 0; - rc = mgc_process_log(cld->cld_mgcexp->exp_obd, - cld); - /* Whether we enqueued again or not in - mgc_process_log, we're done with the ref - from the old enqueue */ - config_log_put(cld); + + cfs_spin_unlock(&config_list_lock); + do_requeue(cld); + cfs_spin_lock(&config_list_lock); + } + + + if (cld_prev) { + cfs_spin_unlock(&config_list_lock); + config_log_put(cld_prev); + cfs_spin_lock(&config_list_lock); } - spin_lock(&config_list_lock); + + cld_prev = cld_next; } - spin_unlock(&config_list_lock); - + cfs_spin_unlock(&config_list_lock); + /* Wait a bit to see if anyone else needs a requeue */ l_wait_event(rq_waitq, rq_state & (RQ_NOW | RQ_STOP), &lwi_later); - spin_lock(&config_list_lock); + cfs_spin_lock(&config_list_lock); } /* spinlock and while guarantee RQ_NOW and RQ_LATER are not set */ rq_state &= ~RQ_RUNNING; - spin_unlock(&config_list_lock); - + cfs_spin_unlock(&config_list_lock); + CDEBUG(D_MGC, "Ending requeue thread\n"); RETURN(rc); } @@ -331,24 +490,27 @@ static int mgc_requeue_add(struct config_llog_data *cld, int later) { int rc = 0; - CDEBUG(D_INFO, "log %s: requeue (l=%d r=%d sp=%d st=%x)\n", - cld->cld_logname, later, atomic_read(&cld->cld_refcount), + CDEBUG(D_INFO, "log %s: requeue (l=%d r=%d sp=%d st=%x)\n", + cld->cld_logname, later, cfs_atomic_read(&cld->cld_refcount), cld->cld_stopping, rq_state); + LASSERT(cfs_atomic_read(&cld->cld_refcount) > 0); /* Hold lock for rq_state */ - spin_lock(&config_list_lock); - cld->cld_lostlock = 1; + cfs_spin_lock(&config_list_lock); if (cld->cld_stopping || (rq_state & RQ_STOP)) { - spin_unlock(&config_list_lock); + cld->cld_lostlock = 0; + cfs_spin_unlock(&config_list_lock); config_log_put(cld); RETURN(0); } + cld->cld_lostlock = 1; + if (!(rq_state & RQ_RUNNING)) { LASSERT(rq_state == 0); rq_state = RQ_RUNNING | (later ? RQ_LATER : RQ_NOW); - spin_unlock(&config_list_lock); + cfs_spin_unlock(&config_list_lock); rc = cfs_kernel_thread(mgc_requeue_thread, 0, CLONE_VM | CLONE_FILES); if (rc < 0) { @@ -362,7 +524,7 @@ static int mgc_requeue_add(struct config_llog_data *cld, int later) } } else { rq_state |= later ? RQ_LATER : RQ_NOW; - spin_unlock(&config_list_lock); + cfs_spin_unlock(&config_list_lock); cfs_waitq_signal(&rq_waitq); } @@ -386,13 +548,13 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb, LASSERT(lsi->lsi_srv_mnt == mnt); /* The mgc fs exclusion sem. Only one fs can be setup at a time. */ - down(&cli->cl_mgc_sem); + cfs_down(&cli->cl_mgc_sem); - cleanup_group_info(); + cfs_cleanup_group_info(); obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd)); if (IS_ERR(obd->obd_fsops)) { - up(&cli->cl_mgc_sem); + cfs_up(&cli->cl_mgc_sem); CERROR("No fstype %s rc=%ld\n", MT_STR(lsi->lsi_ldd), PTR_ERR(obd->obd_fsops)); RETURN(PTR_ERR(obd->obd_fsops)); @@ -407,7 +569,7 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb, obd->obd_lvfs_ctxt.fs = get_ds(); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - dentry = lookup_one_len(MOUNT_CONFIGS_DIR, current->fs->pwd, + dentry = lookup_one_len(MOUNT_CONFIGS_DIR, cfs_fs_pwd(current->fs), strlen(MOUNT_CONFIGS_DIR)); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if (IS_ERR(dentry)) { @@ -420,7 +582,7 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb, /* We take an obd ref to insure that we can't get to mgc_cleanup without calling mgc_fs_cleanup first. */ - class_incref(obd); + class_incref(obd, "mgc_fs", obd); label = fsfilt_get_label(obd, mnt->mnt_sb); if (label) @@ -433,7 +595,7 @@ err_ops: fsfilt_put_ops(obd->obd_fsops); obd->obd_fsops = NULL; cli->cl_mgc_vfsmnt = NULL; - up(&cli->cl_mgc_sem); + cfs_up(&cli->cl_mgc_sem); RETURN(err); } @@ -451,19 +613,19 @@ static int mgc_fs_cleanup(struct obd_device *obd) l_dput(cli->cl_mgc_configs_dir); cli->cl_mgc_configs_dir = NULL; pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - class_decref(obd); + class_decref(obd, "mgc_fs", obd); } cli->cl_mgc_vfsmnt = NULL; if (obd->obd_fsops) fsfilt_put_ops(obd->obd_fsops); - up(&cli->cl_mgc_sem); + cfs_up(&cli->cl_mgc_sem); RETURN(rc); } -static atomic_t mgc_count = ATOMIC_INIT(0); +static cfs_atomic_t mgc_count = CFS_ATOMIC_INIT(0); static int mgc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) { int rc = 0; @@ -473,12 +635,12 @@ static int mgc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) case OBD_CLEANUP_EARLY: break; case OBD_CLEANUP_EXPORTS: - if (atomic_dec_and_test(&mgc_count)) { - /* Kick the requeue waitq - cld's should all be + if (cfs_atomic_dec_and_test(&mgc_count)) { + /* Kick the requeue waitq - cld's should all be stopping */ - spin_lock(&config_list_lock); + cfs_spin_lock(&config_list_lock); rq_state |= RQ_STOP; - spin_unlock(&config_list_lock); + cfs_spin_unlock(&config_list_lock); cfs_waitq_signal(&rq_waitq); } rc = obd_llog_finish(obd, 0); @@ -522,7 +684,7 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) if (rc) GOTO(err_decref, rc); - rc = obd_llog_init(obd, &obd->obd_olg, obd, 0, NULL, NULL); + rc = obd_llog_init(obd, &obd->obd_olg, obd, NULL); if (rc) { CERROR("failed to setup llogging subsystems\n"); GOTO(err_cleanup, rc); @@ -530,14 +692,15 @@ static int mgc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) lprocfs_mgc_init_vars(&lvars); lprocfs_obd_setup(obd, lvars.obd_vars); + sptlrpc_lprocfs_cliobd_attach(obd); - spin_lock(&config_list_lock); - atomic_inc(&mgc_count); - if (atomic_read(&mgc_count) == 1) { + cfs_spin_lock(&config_list_lock); + cfs_atomic_inc(&mgc_count); + if (cfs_atomic_read(&mgc_count) == 1) { rq_state &= ~RQ_STOP; cfs_waitq_init(&rq_waitq); } - spin_unlock(&config_list_lock); + cfs_spin_unlock(&config_list_lock); RETURN(rc); @@ -578,9 +741,9 @@ static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, } /* Are we done with this log? */ if (cld->cld_stopping) { - CDEBUG(D_MGC, "log %s: stopping, won't requeue\n", + CDEBUG(D_MGC, "log %s: stopping, won't requeue\n", cld->cld_logname); - config_log_put(cld); + config_log_put(cld); break; } /* Make sure not to re-enqueue when the mgc is stopping @@ -589,11 +752,12 @@ static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, !lock->l_conn_export->exp_obd->u.cli.cl_conn_count) { CDEBUG(D_MGC, "log %s: disconnecting, won't requeue\n", cld->cld_logname); - config_log_put(cld); + config_log_put(cld); break; } /* Did we fail to get the lock? */ - if (lock->l_req_mode != lock->l_granted_mode) { + if (lock->l_req_mode != lock->l_granted_mode && + !cld->cld_is_sptlrpc) { CDEBUG(D_MGC, "log %s: original grant failed, will " "requeue later\n", cld->cld_logname); /* Try to re-enqueue later */ @@ -623,26 +787,27 @@ static int mgc_set_mgs_param(struct obd_export *exp, { struct ptlrpc_request *req; struct mgs_send_param *req_msp, *rep_msp; - int size[] = { sizeof(struct ptlrpc_body), sizeof(*req_msp) }; - __u32 rep_size[] = { sizeof(struct ptlrpc_body), sizeof(*msp) }; int rc; ENTRY; - req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MGS_VERSION, - MGS_SET_INFO, 2, size, NULL); + req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), + &RQF_MGS_SET_INFO, LUSTRE_MGS_VERSION, + MGS_SET_INFO); if (!req) RETURN(-ENOMEM); - req_msp = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*req_msp)); - if (!req_msp) + req_msp = req_capsule_client_get(&req->rq_pill, &RMF_MGS_SEND_PARAM); + if (!req_msp) { + ptlrpc_req_finished(req); RETURN(-ENOMEM); + } memcpy(req_msp, msp, sizeof(*req_msp)); - ptlrpc_req_set_repsize(req, 2, rep_size); + ptlrpc_request_set_replen(req); + rc = ptlrpc_queue_wait(req); if (!rc) { - rep_msp = lustre_swab_repbuf(req, REPLY_REC_OFF, - sizeof(*rep_msp), NULL); + rep_msp = req_capsule_server_get(&req->rq_pill, &RMF_MGS_SEND_PARAM); memcpy(msp, rep_msp, sizeof(*rep_msp)); } @@ -660,7 +825,7 @@ static int mgc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, { struct config_llog_data *cld = (struct config_llog_data *)data; struct ldlm_enqueue_info einfo = { type, mode, mgc_blocking_ast, - ldlm_completion_ast, NULL, data}; + ldlm_completion_ast, NULL, NULL, data}; int rc; ENTRY; @@ -675,10 +840,10 @@ static int mgc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, /* We need a callback for every lockholder, so don't try to ldlm_lock_match (see rev 1.1.2.11.2.47) */ - rc = ldlm_cli_enqueue(exp, NULL, &einfo, &cld->cld_resid, - NULL, flags, NULL, 0, NULL, lockh, 0); - /* A failed enqueue should still call the mgc_blocking_ast, - where it will be requeued if needed ("grant failed"). */ + rc = ldlm_cli_enqueue(exp, NULL, &einfo, &cld->cld_resid, NULL, flags, + NULL, 0, lockh, 0); + /* A failed enqueue should still call the mgc_blocking_ast, + where it will be requeued if needed ("grant failed"). */ RETURN(rc); } @@ -704,7 +869,7 @@ static int mgc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, int rc; ENTRY; - if (!try_module_get(THIS_MODULE)) { + if (!cfs_try_module_get(THIS_MODULE)) { CERROR("Can't get module. Is it alive?"); return -EINVAL; } @@ -741,7 +906,7 @@ static int mgc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, GOTO(out, rc = -ENOTTY); } out: - module_put(THIS_MODULE); + cfs_module_put(THIS_MODULE); return rc; } @@ -763,10 +928,13 @@ static int mgc_target_register(struct obd_export *exp, RETURN(-ENOMEM); req_mti = req_capsule_client_get(&req->rq_pill, &RMF_MGS_TARGET_INFO); - memcpy(req_mti, mti, sizeof(*req_mti)); + if (!req_mti) { + ptlrpc_req_finished(req); + RETURN(-ENOMEM); + } + memcpy(req_mti, mti, sizeof(*req_mti)); ptlrpc_request_set_replen(req); - CDEBUG(D_MGC, "register %s\n", mti->mti_svname); rc = ptlrpc_queue_wait(req); @@ -794,9 +962,9 @@ int mgc_set_info_async(struct obd_export *exp, obd_count keylen, if (KEY_IS(KEY_INIT_RECOV)) { if (vallen != sizeof(int)) RETURN(-EINVAL); - spin_lock(&imp->imp_lock); + cfs_spin_lock(&imp->imp_lock); imp->imp_initial_recov = *(int *)val; - spin_unlock(&imp->imp_lock); + cfs_spin_unlock(&imp->imp_lock); CDEBUG(D_HA, "%s: set imp_initial_recov = %d\n", exp->exp_obd->obd_name, imp->imp_initial_recov); RETURN(0); @@ -807,12 +975,12 @@ int mgc_set_info_async(struct obd_export *exp, obd_count keylen, if (vallen != sizeof(int)) RETURN(-EINVAL); value = *(int *)val; - spin_lock(&imp->imp_lock); + cfs_spin_lock(&imp->imp_lock); imp->imp_initial_recov_bk = value > 0; /* Even after the initial connection, give up all comms if nobody answers the first time. */ imp->imp_recon_bk = 1; - spin_unlock(&imp->imp_lock); + cfs_spin_unlock(&imp->imp_lock); CDEBUG(D_MGC, "InitRecov %s %d/%d:d%d:i%d:r%d:or%d:%s\n", imp->imp_obd->obd_name, value, imp->imp_initial_recov, imp->imp_deactive, imp->imp_invalid, @@ -862,6 +1030,49 @@ int mgc_set_info_async(struct obd_export *exp, obd_count keylen, rc = mgc_set_mgs_param(exp, msp); RETURN(rc); } + if (KEY_IS(KEY_MGSSEC)) { + struct client_obd *cli = &exp->exp_obd->u.cli; + struct sptlrpc_flavor flvr; + + /* + * empty string means using current flavor, if which haven't + * been set yet, set it as null. + * + * if flavor has been set previously, check the asking flavor + * must match the existing one. + */ + if (vallen == 0) { + if (cli->cl_flvr_mgc.sf_rpc != SPTLRPC_FLVR_INVALID) + RETURN(0); + val = "null"; + vallen = 4; + } + + rc = sptlrpc_parse_flavor(val, &flvr); + if (rc) { + CERROR("invalid sptlrpc flavor %s to MGS\n", + (char *) val); + RETURN(rc); + } + + /* + * caller already hold a mutex + */ + if (cli->cl_flvr_mgc.sf_rpc == SPTLRPC_FLVR_INVALID) { + cli->cl_flvr_mgc = flvr; + } else if (memcmp(&cli->cl_flvr_mgc, &flvr, + sizeof(flvr)) != 0) { + char str[20]; + + sptlrpc_flavor2name(&cli->cl_flvr_mgc, + str, sizeof(str)); + LCONSOLE_ERROR("asking sptlrpc flavor %s to MGS but " + "currently %s is in use\n", + (char *) val, str); + rc = -EPERM; + } + RETURN(rc); + } RETURN(rc); } @@ -889,7 +1100,7 @@ static int mgc_import_event(struct obd_device *obd, case IMP_EVENT_ACTIVE: LCONSOLE_WARN("%s: Reactivating import\n", obd->obd_name); /* Clearing obd_no_recov allows us to continue pinging */ - obd->obd_no_recov = 0; + obd->obd_no_recov = 0; break; case IMP_EVENT_OCD: break; @@ -901,8 +1112,7 @@ static int mgc_import_event(struct obd_device *obd, } static int mgc_llog_init(struct obd_device *obd, struct obd_llog_group *olg, - struct obd_device *tgt, int count, - struct llog_catid *logid, struct obd_uuid *uuid) + struct obd_device *tgt, int *index) { struct llog_ctxt *ctxt; int rc; @@ -919,8 +1129,18 @@ static int mgc_llog_init(struct obd_device *obd, struct obd_llog_group *olg, &llog_client_ops); if (rc == 0) { ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT); + if (!ctxt) { + ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); + if (ctxt) + llog_cleanup(ctxt); + RETURN(-ENODEV); + } llog_initiator_connect(ctxt); llog_ctxt_put(ctxt); + } else { + ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); + if (ctxt) + llog_cleanup(ctxt); } RETURN(rc); @@ -1003,7 +1223,7 @@ static int mgc_copy_llog(struct obd_device *obd, struct llog_ctxt *rctxt, upon successful completion. */ OBD_ALLOC(temp_log, strlen(logname) + 1); - if (!temp_log) + if (!temp_log) RETURN(-ENOMEM); sprintf(temp_log, "%sT", logname); @@ -1012,7 +1232,7 @@ static int mgc_copy_llog(struct obd_device *obd, struct llog_ctxt *rctxt, if (rc) GOTO(out, rc); rc = llog_init_handle(local_llh, LLOG_F_IS_PLAIN, NULL); - if (rc) + if (rc) GOTO(out, rc); rc = llog_destroy(local_llh); llog_free_handle(local_llh); @@ -1069,32 +1289,36 @@ out: RETURN(rc); } -DECLARE_MUTEX(llog_process_lock); - /* Get a config log from the MGS and process it. This func is called for both clients and servers. */ -static int mgc_process_log(struct obd_device *mgc, - struct config_llog_data *cld) +int mgc_process_log(struct obd_device *mgc, + struct config_llog_data *cld) { struct llog_ctxt *ctxt, *lctxt; struct lustre_handle lockh; struct client_obd *cli = &mgc->u.cli; struct lvfs_run_ctxt saved; - struct lustre_sb_info *lsi; + struct lustre_sb_info *lsi = NULL; int rc = 0, rcl, flags = 0, must_pop = 0; ENTRY; - if (!cld || !cld->cld_cfg.cfg_sb) { - /* This should never happen */ - CERROR("Missing cld, aborting log update\n"); - RETURN(-EINVAL); - } - if (cld->cld_stopping) + LASSERT(cld); + + /* I don't want multiple processes running process_log at once -- + sounds like badness. It actually might be fine, as long as + we're not trying to update from the same log + simultaneously (in which case we should use a per-log sem.) */ + cfs_down(&llog_process_lock); + + if (cld->cld_stopping) { + cfs_up(&llog_process_lock); RETURN(0); + } OBD_FAIL_TIMEOUT(OBD_FAIL_MGC_PAUSE_PROCESS_LOG, 20); - lsi = s2lsi(cld->cld_cfg.cfg_sb); + if (cld->cld_cfg.cfg_sb) + lsi = s2lsi(cld->cld_cfg.cfg_sb); CDEBUG(D_MGC, "Process log %s:%s from %d\n", cld->cld_logname, cld->cld_cfg.cfg_instance, cld->cld_cfg.cfg_last_idx + 1); @@ -1102,15 +1326,10 @@ static int mgc_process_log(struct obd_device *mgc, ctxt = llog_get_context(mgc, LLOG_CONFIG_REPL_CTXT); if (!ctxt) { CERROR("missing llog context\n"); + cfs_up(&llog_process_lock); RETURN(-EINVAL); } - /* I don't want mutliple processes running process_log at once -- - sounds like badness. It actually might be fine, as long as - we're not trying to update from the same log - simultaneously (in which case we should use a per-log sem.) */ - down(&llog_process_lock); - /* Get the cfg lock on the llog */ rcl = mgc_enqueue(mgc->u.cli.cl_mgc_mgsexp, NULL, LDLM_PLAIN, NULL, LCK_CR, &flags, NULL, NULL, NULL, @@ -1120,6 +1339,13 @@ static int mgc_process_log(struct obd_device *mgc, lctxt = llog_get_context(mgc, LLOG_CONFIG_ORIG_CTXT); + /* + * local copy of sptlrpc log is controlled elsewhere, don't try to + * read it up here. + */ + if (rcl && cld->cld_is_sptlrpc) + goto out_pop; + /* Copy the setup log locally if we can. Don't mess around if we're running an MGS though (logs are already local). */ if (lctxt && lsi && (lsi->lsi_flags & LSI_SERVER) && @@ -1138,7 +1364,7 @@ static int mgc_process_log(struct obd_device *mgc, GOTO(out_pop, rc = -ENOTCONN); } CDEBUG(D_MGC, "Failed to get MGS log %s, using local " - "copy for now, will try to update later.\n", + "copy for now, will try to update later.\n", cld->cld_logname); } /* Now, whether we copied or not, start using the local llog. @@ -1148,6 +1374,9 @@ static int mgc_process_log(struct obd_device *mgc, ctxt = lctxt; } + if (cld->cld_is_sptlrpc) + sptlrpc_conf_log_update_begin(cld->cld_logname); + /* logname and instance info should be the same, so use our copy of the instance for the update. The cfg_last_idx will be updated here. */ @@ -1159,6 +1388,19 @@ out_pop: if (must_pop) pop_ctxt(&saved, &mgc->obd_lvfs_ctxt, NULL); + /* + * update settings on existing OBDs. doing it inside + * of llog_process_lock so no device is attaching/detaching + * in parallel. + * the logname must be -sptlrpc + */ + if (cld->cld_is_sptlrpc && rcl == 0) { + sptlrpc_conf_log_update_end(cld->cld_logname); + class_notify_sptlrpc_conf(cld->cld_logname, + strlen(cld->cld_logname) - + strlen("-sptlrpc")); + } + /* Now drop the lock so MGS can revoke it */ if (!rcl) { rcl = mgc_cancel(mgc->u.cli.cl_mgc_mgsexp, NULL, @@ -1170,7 +1412,7 @@ out_pop: CDEBUG(D_MGC, "%s: configuration from log '%s' %sed (%d).\n", mgc->obd_name, cld->cld_logname, rc ? "fail" : "succeed", rc); - up(&llog_process_lock); + cfs_up(&llog_process_lock); RETURN(rc); } @@ -1203,6 +1445,10 @@ static int mgc_process_config(struct obd_device *obd, obd_count len, void *buf) CERROR("lov_del_obd unimplemented\n"); rc = -ENOSYS; break; + case LCFG_SPTLRPC_CONF: { + rc = sptlrpc_process_config(lcfg); + break; + } case LCFG_LOG_START: { struct config_llog_data *cld; struct config_llog_instance *cfg; @@ -1215,7 +1461,7 @@ static int mgc_process_config(struct obd_device *obd, obd_count len, void *buf) cfg->cfg_last_idx); /* We're only called through here on the initial mount */ - rc = config_log_add(logname, cfg, sb); + rc = config_log_add(obd, logname, cfg, sb); if (rc) break; cld = config_log_find(logname, cfg);