From 7cd07db7e58e8ad82732a4cd9f08786608504224 Mon Sep 17 00:00:00 2001 From: Jinshan Xiong Date: Wed, 10 Aug 2011 18:19:15 -0700 Subject: [PATCH] ORNL-13: NonIR client support In this task, NonIR support is added. NONIR clients means those clients who doesn't know the protocol of imperative recovery. This means they won't be notified for the restarting of target. To support NonIR clients, the MGS has to record how many NonIR clients per file system, and track those clients. In this way, if there are NonIR clients for a specific file system, the MGS should tell the restarting target to disable imperative recovery; otherwise, these `old' clients are to be evicted easily. Signed-off-by: Jinshan Xiong Change-Id: I3725c66b74d702aa213644ee9a6f89d59b8a8083 Reviewed-on: http://review.whamcloud.com/1218 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/lustre_export.h | 6 ++ lustre/mgs/mgs_handler.c | 42 ++++++++++++- lustre/mgs/mgs_internal.h | 27 ++++++++ lustre/mgs/mgs_nids.c | 136 ++++++++++++++++++++++++++++++++++++++++- 4 files changed, 208 insertions(+), 3 deletions(-) diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index f8ad0c3..2e0d95e 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -116,6 +116,11 @@ struct filter_export_data { __u32 fed_group; }; +struct mgs_export_data { + cfs_list_t med_clients; /* mgc fs client via this exp */ + cfs_spinlock_t med_lock; /* protect med_clients */ +}; + /** * per-NID statistics structure. * It tracks access patterns to this export on a per-client-NID basis @@ -254,6 +259,7 @@ struct obd_export { struct mdt_export_data eu_mdt_data; struct filter_export_data eu_filter_data; struct ec_export_data eu_ec_data; + struct mgs_export_data eu_mgs_data; } u; }; diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index 5719b5d..d811f4a 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -129,11 +129,12 @@ static int mgs_disconnect(struct obd_export *exp) LASSERT(exp); + mgs_fsc_cleanup(exp); + class_export_get(exp); mgs_counter_incr(exp, LPROC_MGS_DISCONNECT); rc = server_disconnect_export(exp); - class_export_put(exp); RETURN(rc); } @@ -714,6 +715,37 @@ static int mgs_handle_exception(struct ptlrpc_request *req) RETURN(0); } +static int mgs_handle_fslog_hack(struct ptlrpc_request *req) +{ + char *logname; + char fsname[16]; + char *ptr; + int rc; + + /* XXX: We suppose that llog at mgs is only used for + * fetching file system log */ + logname = req_capsule_client_get(&req->rq_pill, &RMF_NAME); + if (logname == NULL) { + CERROR("No logname, is llog on MGS used for something else?\n"); + return -EINVAL; + } + + ptr = strchr(logname, '-'); + rc = (int)(ptr - logname); + if (ptr == NULL || rc >= sizeof(fsname)) { + CERROR("Invalid logname received: %s\n", logname); + return -EINVAL; + } + + strncpy(fsname, logname, rc); + fsname[rc] = 0; + rc = mgs_fsc_attach(req->rq_export, fsname); + if (rc < 0 && rc != -EEXIST) + CERROR("add fs client %s returns %d\n", fsname, rc); + + return rc; +} + /* TODO: handle requests in a similar way as MDT: see mdt_handle_common() */ int mgs_handle(struct ptlrpc_request *req) { @@ -815,6 +847,8 @@ int mgs_handle(struct ptlrpc_request *req) DEBUG_REQ(D_MGS, req, "llog_init"); req_capsule_set(&req->rq_pill, &RQF_LLOG_ORIGIN_HANDLE_CREATE); rc = llog_origin_handle_create(req); + if (rc == 0) + (void)mgs_handle_fslog_hack(req); break; case LLOG_ORIGIN_HANDLE_NEXT_BLOCK: DEBUG_REQ(D_MGS, req, "llog next block"); @@ -855,6 +889,12 @@ out: static inline int mgs_init_export(struct obd_export *exp) { + struct mgs_export_data *data = &exp->u.eu_mgs_data; + + /* init mgs_export_data for fsc */ + cfs_spin_lock_init(&data->med_lock); + CFS_INIT_LIST_HEAD(&data->med_clients); + cfs_spin_lock(&exp->exp_lock); exp->exp_connecting = 1; cfs_spin_unlock(&exp->exp_lock); diff --git a/lustre/mgs/mgs_internal.h b/lustre/mgs/mgs_internal.h index 51ae0b1..5c66613 100644 --- a/lustre/mgs/mgs_internal.h +++ b/lustre/mgs/mgs_internal.h @@ -76,6 +76,27 @@ enum { */ struct fs_db; +/** + * maintain fs client nodes of mgs. + */ +struct mgs_fsc { + struct fs_db *mfc_fsdb; + /** + * Where the fs client comes from. + */ + struct obd_export *mfc_export; + /** + * list of fs clients from the same export, + * protected by mgs_export_data->med_lock + */ + cfs_list_t mfc_export_list; + /** + * list of fs clients in the same fsdb, protected by fsdb->fsdb_sem + */ + cfs_list_t mfc_fsdb_list; + unsigned mfc_ir_capable:1; +}; + struct mgs_nidtbl { struct fs_db *mn_fsdb; struct file *mn_version_file; @@ -121,6 +142,9 @@ struct fs_db { struct sptlrpc_rule_set fsdb_srpc_gen; struct mgs_tgt_srpc_conf *fsdb_srpc_tgt; + /* list of fs clients, mgs_fsc. protected by mgs_sem */ + cfs_list_t fsdb_clients; + int fsdb_nonir_clients; int fsdb_ir_state; /* Target NIDs Table */ @@ -181,6 +205,9 @@ int lprocfs_wr_ir_timeout(struct file *file, const char *buffer, unsigned long count, void *data); int lprocfs_rd_ir_timeout(char *page, char **start, off_t off, int count, int *eof, void *data); +void mgs_fsc_cleanup(struct obd_export *exp); +void mgs_fsc_cleanup_by_fsdb(struct fs_db *fsdb); +int mgs_fsc_attach(struct obd_export *exp, char *fsname); /* mgs_fs.c */ int mgs_export_stats_init(struct obd_device *obd, struct obd_export *exp, diff --git a/lustre/mgs/mgs_nids.c b/lustre/mgs/mgs_nids.c index 359e321..587a9d0 100644 --- a/lustre/mgs/mgs_nids.c +++ b/lustre/mgs/mgs_nids.c @@ -452,6 +452,8 @@ int mgs_ir_init_fs(struct obd_device *obd, struct fs_db *fsdb) if (cfs_time_before(cfs_time_current_sec(), mgs->mgs_start_time + ir_timeout)) fsdb->fsdb_ir_state = IR_STARTUP; + fsdb->fsdb_nonir_clients = 0; + CFS_INIT_LIST_HEAD(&fsdb->fsdb_clients); /* start notify thread */ fsdb->fsdb_obd = obd; @@ -473,8 +475,12 @@ void mgs_ir_fini_fs(struct obd_device *obd, struct fs_db *fsdb) if (cfs_test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) return; + mgs_fsc_cleanup_by_fsdb(fsdb); + mgs_nidtbl_fini_fs(fsdb); + LASSERT(cfs_list_empty(&fsdb->fsdb_clients)); + fsdb->fsdb_notify_stop = 1; cfs_waitq_signal(&fsdb->fsdb_notify_waitq); cfs_wait_for_completion(&fsdb->fsdb_notify_comp); @@ -489,6 +495,8 @@ static inline void ir_state_graduate(struct fs_db *fsdb) if (cfs_time_before(mgs->mgs_start_time + ir_timeout, cfs_time_current_sec())) { fsdb->fsdb_ir_state = IR_FULL; + if (fsdb->fsdb_nonir_clients) + fsdb->fsdb_ir_state = IR_PARTIAL; } } } @@ -684,6 +692,8 @@ static int lprocfs_ir_set_state(struct fs_db *fsdb, const char *buf) CDEBUG(D_MGS, "change fsr state of %s from %s to %s\n", fsdb->fsdb_name, strings[fsdb->fsdb_ir_state], strings[state]); cfs_down(&fsdb->fsdb_sem); + if (state == IR_FULL && fsdb->fsdb_nonir_clients) + state = IR_PARTIAL; fsdb->fsdb_ir_state = state; cfs_up(&fsdb->fsdb_sem); @@ -784,8 +794,10 @@ int lprocfs_rd_ir_state(struct seq_file *seq, void *data) ir_state_graduate(fsdb); seq_printf(seq, - "\tstate: %s, nidtbl version: %lld\n", - ir_strings[fsdb->fsdb_ir_state], tbl->mn_version); + "\tstate: %s, nonir clients: %d\n" + "\tnidtbl version: %lld\n", + ir_strings[fsdb->fsdb_ir_state], fsdb->fsdb_nonir_clients, + tbl->mn_version); seq_printf(seq, "\tnotify total/max/count: %u/%u/%u\n", fsdb->fsdb_notify_total, fsdb->fsdb_notify_max, fsdb->fsdb_notify_count); @@ -805,3 +817,123 @@ int lprocfs_wr_ir_timeout(struct file *file, const char *buffer, return lprocfs_wr_uint(file, buffer, count, &ir_timeout); } +/* --------------- Handle non IR support clients --------------- */ +/* attach a lustre file system to an export */ +int mgs_fsc_attach(struct obd_export *exp, char *fsname) +{ + struct mgs_export_data *data = &exp->u.eu_mgs_data; + struct obd_device *obd = exp->exp_obd; + struct fs_db *fsdb; + struct mgs_fsc *fsc = NULL; + struct mgs_fsc *new_fsc = NULL; + bool found = false; + int rc; + ENTRY; + + rc = mgs_find_or_make_fsdb(obd, fsname, &fsdb); + if (rc) + RETURN(rc); + + /* allocate a new fsc in case we need it in spinlock. */ + OBD_ALLOC_PTR(new_fsc); + if (new_fsc == NULL) + RETURN(-ENOMEM); + + CFS_INIT_LIST_HEAD(&new_fsc->mfc_export_list); + CFS_INIT_LIST_HEAD(&new_fsc->mfc_fsdb_list); + new_fsc->mfc_fsdb = fsdb; + new_fsc->mfc_export = class_export_get(exp); + new_fsc->mfc_ir_capable = + !!(exp->exp_connect_flags & OBD_CONNECT_IMP_RECOV); + + rc = -EEXIST; + cfs_down(&fsdb->fsdb_sem); + + /* tend to find it in export list because this list is shorter. */ + cfs_spin_lock(&data->med_lock); + cfs_list_for_each_entry(fsc, &data->med_clients, mfc_export_list) { + if (strcmp(fsname, fsc->mfc_fsdb->fsdb_name) == 0) { + found = true; + break; + } + } + if (!found) { + fsc = new_fsc; + new_fsc = NULL; + + /* add it into export list. */ + cfs_list_add(&fsc->mfc_export_list, &data->med_clients); + + /* add into fsdb list. */ + cfs_list_add(&fsc->mfc_fsdb_list, &fsdb->fsdb_clients); + if (!fsc->mfc_ir_capable) { + ++fsdb->fsdb_nonir_clients; + if (fsdb->fsdb_ir_state == IR_FULL) + fsdb->fsdb_ir_state = IR_PARTIAL; + } + rc = 0; + } + cfs_spin_unlock(&data->med_lock); + cfs_up(&fsdb->fsdb_sem); + + if (new_fsc) { + class_export_put(new_fsc->mfc_export); + OBD_FREE_PTR(new_fsc); + } + RETURN(rc); +} + +void mgs_fsc_cleanup(struct obd_export *exp) +{ + struct mgs_export_data *data = &exp->u.eu_mgs_data; + struct mgs_fsc *fsc, *tmp; + CFS_LIST_HEAD(head); + + cfs_spin_lock(&data->med_lock); + cfs_list_splice_init(&data->med_clients, &head); + cfs_spin_unlock(&data->med_lock); + + cfs_list_for_each_entry_safe(fsc, tmp, &head, mfc_export_list) { + struct fs_db *fsdb = fsc->mfc_fsdb; + + LASSERT(fsc->mfc_export == exp); + + cfs_down(&fsdb->fsdb_sem); + cfs_list_del_init(&fsc->mfc_fsdb_list); + if (fsc->mfc_ir_capable == 0) { + --fsdb->fsdb_nonir_clients; + LASSERT(fsdb->fsdb_ir_state != IR_FULL); + if (fsdb->fsdb_nonir_clients == 0 && + fsdb->fsdb_ir_state == IR_PARTIAL) + fsdb->fsdb_ir_state = IR_FULL; + } + cfs_up(&fsdb->fsdb_sem); + cfs_list_del_init(&fsc->mfc_export_list); + class_export_put(fsc->mfc_export); + OBD_FREE_PTR(fsc); + } +} + +/* must be called with fsdb->fsdb_sem held */ +void mgs_fsc_cleanup_by_fsdb(struct fs_db *fsdb) +{ + struct mgs_fsc *fsc, *tmp; + + cfs_list_for_each_entry_safe(fsc, tmp, &fsdb->fsdb_clients, + mfc_fsdb_list) { + struct mgs_export_data *data = &fsc->mfc_export->u.eu_mgs_data; + + LASSERT(fsdb == fsc->mfc_fsdb); + cfs_list_del_init(&fsc->mfc_fsdb_list); + + cfs_spin_lock(&data->med_lock); + cfs_list_del_init(&fsc->mfc_export_list); + cfs_spin_unlock(&data->med_lock); + class_export_put(fsc->mfc_export); + OBD_FREE_PTR(fsc); + } + + fsdb->fsdb_nonir_clients = 0; + if (fsdb->fsdb_ir_state == IR_PARTIAL) + fsdb->fsdb_ir_state = IR_FULL; +} -- 1.8.3.1