From 6499b303e97e52efab54f7a89aa74c9e93e0dc72 Mon Sep 17 00:00:00 2001 From: jacob Date: Tue, 29 Mar 2005 18:51:56 +0000 Subject: [PATCH] b=3262 r=adilger,green Store backup servers in the config logs on the mds, and try them round-robin style when recovering ported from b_cray, originally by eric mei. --- lustre/ChangeLog | 2 + lustre/include/linux/lustre_cfg.h | 4 +- lustre/include/linux/lustre_import.h | 11 +++ lustre/include/linux/lustre_net.h | 4 + lustre/include/linux/obd.h | 3 + lustre/include/linux/obd_class.h | 29 +++++++ lustre/ldlm/ldlm_lib.c | 157 ++++++++++++++++++++++++++++++++--- lustre/ldlm/ldlm_lockd.c | 2 + lustre/mdc/mdc_request.c | 2 + lustre/obdclass/genops.c | 11 +++ lustre/obdclass/lprocfs_status.c | 2 + lustre/obdclass/obd_config.c | 68 +++++++++++++++ lustre/osc/osc_request.c | 4 + lustre/ptlrpc/import.c | 109 +++++++++++++++++------- lustre/ptlrpc/service.c | 2 + lustre/tests/cfg/insanity-adev.sh | 2 +- lustre/tests/cfg/insanity-local.sh | 2 +- lustre/tests/cfg/insanity-ltest.sh | 2 +- lustre/tests/cfg/insanity-mdev.sh | 2 +- lustre/tests/cfg/mdev.sh | 2 +- lustre/utils/lconf | 55 +++++++++++- lustre/utils/lctl.c | 4 + lustre/utils/lustre_cfg.c | 71 ++++++++++++++++ lustre/utils/obdctl.h | 2 + 24 files changed, 503 insertions(+), 49 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 1121eb6..021079f 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -15,6 +15,8 @@ - lconf should create multiple TCP connections from a client (5201) - init scripts are now turned off by default; run chkconfig --on lustre and chkconfig --on lustrefs to use them + - upcalls are no longer needed for clients to recover to failover + servers (3262) 2005-03-22 Cluster File Systems, Inc. * version 1.4.1 diff --git a/lustre/include/linux/lustre_cfg.h b/lustre/include/linux/lustre_cfg.h index 20c28f6..d1459da 100644 --- a/lustre/include/linux/lustre_cfg.h +++ b/lustre/include/linux/lustre_cfg.h @@ -43,7 +43,9 @@ enum lcfg_command_type { LCFG_MOUNTOPT = 0x00cf007, LCFG_DEL_MOUNTOPT = 0x00cf008, LCFG_SET_TIMEOUT = 0x00cf009, - LCFG_SET_UPCALL = 0x00cf010, + LCFG_SET_UPCALL = 0x00cf00a, + LCFG_ADD_CONN = 0x00cf00b, + LCFG_DEL_CONN = 0x00cf00c, }; struct lustre_cfg_bufs { diff --git a/lustre/include/linux/lustre_import.h b/lustre/include/linux/lustre_import.h index d5e2b2c..52ac2f9 100644 --- a/lustre/include/linux/lustre_import.h +++ b/lustre/include/linux/lustre_import.h @@ -40,6 +40,13 @@ enum obd_import_event { IMP_EVENT_ACTIVE = 0x808004, }; +struct obd_import_conn { + struct list_head oic_item; + struct ptlrpc_connection *oic_conn; + struct obd_uuid oic_uuid; + unsigned long oic_last_attempt; /* in jiffies */ +}; + struct obd_import { struct portals_handle imp_handle; atomic_t imp_refcount; @@ -70,6 +77,10 @@ struct obd_import { struct lustre_handle imp_remote_handle; unsigned long imp_next_ping; /* jiffies */ + /* all available obd_import_conn linked here */ + struct list_head imp_conn_list; + struct obd_import_conn *imp_conn_current; + /* Protects flags, level, generation, conn_cnt, *_list */ spinlock_t imp_lock; diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index d67e511..55946cb 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -742,6 +742,10 @@ int client_obd_cleanup(struct obd_device * obddev); int client_connect_import(struct lustre_handle *conn, struct obd_device *obd, struct obd_uuid *cluuid, struct obd_connect_data *); int client_disconnect_export(struct obd_export *exp); +int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid, + int priority); +int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid); +int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid); /* ptlrpc/pinger.c */ int ptlrpc_pinger_add_import(struct obd_import *imp); diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index 673ce6d..aa79897 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -565,6 +565,9 @@ struct obd_ops { int (*o_precleanup)(struct obd_device *dev); int (*o_cleanup)(struct obd_device *dev); int (*o_postrecov)(struct obd_device *dev); + int (*o_add_conn)(struct obd_import *imp, struct obd_uuid *uuid, + int priority); + int (*o_del_conn)(struct obd_import *imp, struct obd_uuid *uuid); int (*o_connect)(struct lustre_handle *conn, struct obd_device *src, struct obd_uuid *cluuid, struct obd_connect_data *); int (*o_disconnect)(struct obd_export *exp); diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 488e08d..1e3dade 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -454,6 +454,35 @@ static inline int obd_setattr(struct obd_export *exp, struct obdo *obdo, RETURN(rc); } +static inline int obd_add_conn(struct obd_import *imp, struct obd_uuid *uuid, + int priority) +{ + struct obd_device *obd = imp->imp_obd; + int rc; + ENTRY; + + OBD_CHECK_DEV_ACTIVE(obd); + OBD_CHECK_OP(obd, add_conn, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(obd, add_conn); + + rc = OBP(obd, add_conn)(imp, uuid, priority); + RETURN(rc); +} + +static inline int obd_del_conn(struct obd_import *imp, struct obd_uuid *uuid) +{ + struct obd_device *obd = imp->imp_obd; + int rc; + ENTRY; + + OBD_CHECK_DEV_ACTIVE(obd); + OBD_CHECK_OP(obd, del_conn, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(obd, del_conn); + + rc = OBP(obd, del_conn)(imp, uuid); + RETURN(rc); +} + static inline int obd_connect(struct lustre_handle *conn, struct obd_device *obd, struct obd_uuid *cluuid, struct obd_connect_data *data) diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index e91833c..0d7cc6f 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -36,9 +36,147 @@ #include #include +/* @priority: if non-zero, move the selected to the list head + * @create: if zero, only search in existed connections + */ +static int import_set_conn(struct obd_import *imp, struct obd_uuid *uuid, + int priority, int create) +{ + struct ptlrpc_connection *ptlrpc_conn; + struct obd_import_conn *imp_conn = NULL, *item; + int rc = 0; + ENTRY; + + if (!create && !priority) { + CDEBUG(D_HA, "Nothing to do\n"); + RETURN(-EINVAL); + } + + ptlrpc_conn = ptlrpc_uuid_to_connection(uuid); + if (!ptlrpc_conn) { + CDEBUG(D_HA, "can't find connection %s\n", uuid->uuid); + RETURN (-ENOENT); + } + + if (create) { + OBD_ALLOC(imp_conn, sizeof(*imp_conn)); + if (!imp_conn) { + GOTO(out_put, rc = -ENOMEM); + } + } + + spin_lock(&imp->imp_lock); + list_for_each_entry(item, &imp->imp_conn_list, oic_item) { + if (obd_uuid_equals(uuid, &item->oic_uuid)) { + if (priority) { + list_del(&item->oic_item); + list_add(&item->oic_item, &imp->imp_conn_list); + item->oic_last_attempt = 0; + } + CDEBUG(D_HA, "imp %p@%s: found existing conn %s%s\n", + imp, imp->imp_obd->obd_name, uuid->uuid, + (priority ? ", moved to head" : "")); + spin_unlock(&imp->imp_lock); + GOTO(out_free, rc = 0); + } + } + /* not found */ + if (create) { + imp_conn->oic_conn = ptlrpc_conn; + imp_conn->oic_uuid = *uuid; + imp_conn->oic_last_attempt = 0; + if (priority) + list_add(&imp_conn->oic_item, &imp->imp_conn_list); + else + list_add_tail(&imp_conn->oic_item, &imp->imp_conn_list); + CDEBUG(D_HA, "imp %p@%s: add connection %s at %s\n", + imp, imp->imp_obd->obd_name, uuid->uuid, + (priority ? "head" : "tail")); + } else { + spin_unlock(&imp->imp_lock); + GOTO(out_free, rc = -ENOENT); + + } + + spin_unlock(&imp->imp_lock); + RETURN(0); +out_free: + if (imp_conn) + OBD_FREE(imp_conn, sizeof(*imp_conn)); +out_put: + ptlrpc_put_connection(ptlrpc_conn); + RETURN(rc); +} + +int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid) +{ + return import_set_conn(imp, uuid, 1, 0); +} + +int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid, + int priority) +{ + return import_set_conn(imp, uuid, priority, 1); +} + +int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid) +{ + struct obd_import_conn *imp_conn; + struct obd_export *dlmexp; + int rc = -ENOENT; + ENTRY; + + spin_lock(&imp->imp_lock); + if (list_empty(&imp->imp_conn_list)) { + LASSERT(!imp->imp_conn_current); + LASSERT(!imp->imp_connection); + GOTO(out, rc); + } + + list_for_each_entry(imp_conn, &imp->imp_conn_list, oic_item) { + if (!obd_uuid_equals(uuid, &imp_conn->oic_uuid)) + continue; + LASSERT(imp_conn->oic_conn); + + /* is current conn? */ + if (imp_conn == imp->imp_conn_current) { + LASSERT(imp_conn->oic_conn == imp->imp_connection); + + if (imp->imp_state != LUSTRE_IMP_CLOSED && + imp->imp_state != LUSTRE_IMP_DISCON) { + CERROR("can't remove current connection\n"); + GOTO(out, rc = -EBUSY); + } + + ptlrpc_put_connection(imp->imp_connection); + imp->imp_connection = NULL; + + dlmexp = class_conn2export(&imp->imp_dlm_handle); + if (dlmexp && dlmexp->exp_connection) { + LASSERT(dlmexp->exp_connection == + imp_conn->oic_conn); + ptlrpc_put_connection(dlmexp->exp_connection); + dlmexp->exp_connection = NULL; + } + } + + list_del(&imp_conn->oic_item); + ptlrpc_put_connection(imp_conn->oic_conn); + OBD_FREE(imp_conn, sizeof(*imp_conn)); + CDEBUG(D_HA, "imp %p@%s: remove connection %s\n", + imp, imp->imp_obd->obd_name, uuid->uuid); + rc = 0; + break; + } +out: + spin_unlock(&imp->imp_lock); + if (rc == -ENOENT) + CERROR("connection %s not found\n", uuid->uuid); + RETURN(rc); +} + int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) { - struct ptlrpc_connection *conn; struct lustre_cfg* lcfg = buf; struct client_obd *cli = &obddev->u.cli; struct obd_import *imp; @@ -133,19 +271,12 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) GOTO(err, rc); } - conn = ptlrpc_uuid_to_connection(&server_uuid); - if (conn == NULL) - GOTO(err_ldlm, rc = -ENOENT); - ptlrpc_init_client(rq_portal, rp_portal, name, &obddev->obd_ldlm_client); imp = class_new_import(); - if (imp == NULL) { - ptlrpc_put_connection(conn); + if (imp == NULL) GOTO(err_ldlm, rc = -ENOENT); - } - imp->imp_connection = conn; imp->imp_client = &obddev->obd_ldlm_client; imp->imp_obd = obddev; imp->imp_connect_op = connect_op; @@ -156,6 +287,12 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) LUSTRE_CFG_BUFLEN(lcfg, 1)); class_import_put(imp); + rc = client_import_add_conn(imp, &server_uuid, 1); + if (rc) { + CERROR("can't add initial connection\n"); + GOTO(err_import, rc); + } + cli->cl_import = imp; /* cli->cl_max_mds_{easize,cookiesize} updated by mdc_init_ea_size() */ cli->cl_max_mds_easize = sizeof(struct lov_mds_md); @@ -265,7 +402,6 @@ int client_connect_import(struct lustre_handle *dlm_handle, if (rc != 0) GOTO(out_ldlm, rc); - exp->exp_connection = ptlrpc_connection_addref(imp->imp_connection); if (data) memcpy(&imp->imp_connect_data, data, sizeof(*data)); rc = ptlrpc_connect_import(imp, NULL); @@ -273,6 +409,7 @@ int client_connect_import(struct lustre_handle *dlm_handle, LASSERT (imp->imp_state == LUSTRE_IMP_DISCON); GOTO(out_ldlm, rc); } + LASSERT(exp->exp_connection); ptlrpc_pinger_add_import(imp); EXIT; diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index eb4565d..f74881b 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -1635,6 +1635,8 @@ EXPORT_SYMBOL(l_lock); EXPORT_SYMBOL(l_unlock); /* ldlm_lib.c */ +EXPORT_SYMBOL(client_import_add_conn); +EXPORT_SYMBOL(client_import_del_conn); EXPORT_SYMBOL(client_obd_setup); EXPORT_SYMBOL(client_obd_cleanup); EXPORT_SYMBOL(client_connect_import); diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index f7cc298..4c21f5a 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -1033,6 +1033,8 @@ struct obd_ops mdc_obd_ops = { .o_setup = mdc_setup, .o_precleanup = mdc_precleanup, .o_cleanup = mdc_cleanup, + .o_add_conn = client_import_add_conn, + .o_del_conn = client_import_del_conn, .o_connect = client_connect_import, .o_disconnect = client_disconnect_export, .o_iocontrol = mdc_iocontrol, diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index bea932d..d5d4334 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -543,6 +543,16 @@ void class_import_put(struct obd_import *import) ptlrpc_put_connection_superhack(import->imp_connection); + while (!list_empty(&import->imp_conn_list)) { + struct obd_import_conn *imp_conn; + + imp_conn = list_entry(import->imp_conn_list.next, + struct obd_import_conn, oic_item); + list_del(&imp_conn->oic_item); + ptlrpc_put_connection_superhack(imp_conn->oic_conn); + OBD_FREE(imp_conn, sizeof(*imp_conn)); + } + LASSERT(list_empty(&import->imp_handle.h_link)); OBD_FREE(import, sizeof(*import)); EXIT; @@ -569,6 +579,7 @@ struct obd_import *class_new_import(void) atomic_set(&imp->imp_refcount, 2); atomic_set(&imp->imp_inflight, 0); atomic_set(&imp->imp_replay_inflight, 0); + INIT_LIST_HEAD(&imp->imp_conn_list); INIT_LIST_HEAD(&imp->imp_handle.h_link); class_handle_hash(&imp->imp_handle, import_handle_addref); diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 7b838e1..f871fff 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -622,6 +622,8 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats) LPROCFS_OBD_OP_INIT(num_private_stats, stats, precleanup); LPROCFS_OBD_OP_INIT(num_private_stats, stats, cleanup); LPROCFS_OBD_OP_INIT(num_private_stats, stats, postrecov); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, add_conn); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, del_conn); LPROCFS_OBD_OP_INIT(num_private_stats, stats, connect); LPROCFS_OBD_OP_INIT(num_private_stats, stats, disconnect); LPROCFS_OBD_OP_INIT(num_private_stats, stats, statfs); diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index 3105e22..f705364 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -379,6 +379,66 @@ void class_decref(struct obd_device *obd) } } +int class_add_conn(struct obd_device *obd, struct lustre_cfg *lcfg) +{ + struct obd_import *imp; + struct obd_uuid uuid; + int rc; + ENTRY; + + if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1 || + LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(struct obd_uuid)) { + CERROR("invalid conn_uuid\n"); + RETURN(-EINVAL); + } + if (strcmp(obd->obd_type->typ_name, "mdc") && + strcmp(obd->obd_type->typ_name, "osc")) { + CERROR("can't add connection on non-client dev\n"); + RETURN(-EINVAL); + } + + imp = obd->u.cli.cl_import; + if (!imp) { + CERROR("try to add conn on immature client dev\n"); + RETURN(-EINVAL); + } + + obd_str2uuid(&uuid, lustre_cfg_string(lcfg, 1)); + rc = obd_add_conn(imp, &uuid, lcfg->lcfg_num); + + RETURN(rc); +} + +int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg) +{ + struct obd_import *imp; + struct obd_uuid uuid; + int rc; + ENTRY; + + if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1 || + LUSTRE_CFG_BUFLEN(lcfg, 1) > sizeof(struct obd_uuid)) { + CERROR("invalid conn_uuid\n"); + RETURN(-EINVAL); + } + if (strcmp(obd->obd_type->typ_name, "mdc") && + strcmp(obd->obd_type->typ_name, "osc")) { + CERROR("can't del connection on non-client dev\n"); + RETURN(-EINVAL); + } + + imp = obd->u.cli.cl_import; + if (!imp) { + CERROR("try to del conn on immature client dev\n"); + RETURN(-EINVAL); + } + + obd_str2uuid(&uuid, lustre_cfg_string(lcfg, 1)); + rc = obd_del_conn(imp, &uuid); + + RETURN(rc); +} + LIST_HEAD(lustre_profile_list); struct lustre_profile *class_get_profile(char * prof) @@ -545,6 +605,14 @@ int class_process_config(struct lustre_cfg *lcfg) err = class_cleanup(obd, lcfg); GOTO(out, err = 0); } + case LCFG_ADD_CONN: { + err = class_add_conn(obd, lcfg); + GOTO(out, err = 0); + } + case LCFG_DEL_CONN: { + err = class_del_conn(obd, lcfg); + GOTO(out, err = 0); + } default: { CERROR("Unknown command: %d\n", lcfg->lcfg_command); GOTO(out, err = -EINVAL); diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 28f28db..5fd0062d 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -3100,6 +3100,8 @@ struct obd_ops osc_obd_ops = { .o_owner = THIS_MODULE, .o_setup = osc_setup, .o_cleanup = osc_cleanup, + .o_add_conn = client_import_add_conn, + .o_del_conn = client_import_del_conn, .o_connect = client_connect_import, .o_disconnect = osc_disconnect, .o_statfs = osc_statfs, @@ -3138,6 +3140,8 @@ struct obd_ops osc_obd_ops = { struct obd_ops sanosc_obd_ops = { .o_owner = THIS_MODULE, .o_cleanup = client_obd_cleanup, + .o_add_conn = client_import_add_conn, + .o_del_conn = client_import_del_conn, .o_connect = client_connect_import, .o_disconnect = client_disconnect_export, .o_statfs = osc_statfs, diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index c54cba7..0629196 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -243,6 +243,76 @@ void ptlrpc_fail_import(struct obd_import *imp, int generation) EXIT; } +#define ATTEMPT_TOO_SOON(last) \ + ((last) && ((long)(jiffies - (last)) <= (long)(obd_timeout * 2 * HZ))) + +static int import_select_connection(struct obd_import *imp) +{ + struct obd_import_conn *imp_conn, *tmp; + struct obd_export *dlmexp; + int found = 0; + ENTRY; + + spin_lock(&imp->imp_lock); + + if (list_empty(&imp->imp_conn_list)) { + CERROR("%s: no connections available\n", + imp->imp_obd->obd_name); + spin_unlock(&imp->imp_lock); + RETURN(-EINVAL); + } + + list_for_each_entry(imp_conn, &imp->imp_conn_list, oic_item) { + if (!imp_conn->oic_last_attempt || + time_after(jiffies, imp_conn->oic_last_attempt + + obd_timeout * 2 * HZ)) { + found = 1; + break; + } + } + + /* if not found, simply choose the current one */ + if (!found) { + CWARN("%s: continuing with current connection\n", + imp->imp_obd->obd_name); + LASSERT(imp->imp_conn_current); + imp_conn = imp->imp_conn_current; + } + LASSERT(imp_conn->oic_conn); + + imp_conn->oic_last_attempt = jiffies; + + /* move the items ahead of the selected one to list tail */ + while (1) { + tmp= list_entry(imp->imp_conn_list.next, + struct obd_import_conn, oic_item); + if (tmp == imp_conn) + break; + list_del(&tmp->oic_item); + list_add_tail(&tmp->oic_item, &imp->imp_conn_list); + } + + /* switch connection, don't mind if it's same as the current one */ + if (imp->imp_connection) + ptlrpc_put_connection(imp->imp_connection); + imp->imp_connection = ptlrpc_connection_addref(imp_conn->oic_conn); + + dlmexp = class_conn2export(&imp->imp_dlm_handle); + LASSERT(dlmexp != NULL); + if (dlmexp->exp_connection) + ptlrpc_put_connection(imp->imp_connection); + dlmexp->exp_connection = ptlrpc_connection_addref(imp_conn->oic_conn); + class_export_put(dlmexp); + + imp->imp_conn_current = imp_conn; + CWARN("%s: Using connection %s\n", + imp->imp_obd->obd_name, + imp_conn->oic_uuid.uuid); + spin_unlock(&imp->imp_lock); + + RETURN(0); +} + int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid) { struct obd_device *obd = imp->imp_obd; @@ -290,40 +360,18 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid) spin_unlock_irqrestore(&imp->imp_lock, flags); if (new_uuid) { - struct ptlrpc_connection *conn; struct obd_uuid uuid; - struct obd_export *dlmexp; obd_str2uuid(&uuid, new_uuid); - - conn = ptlrpc_uuid_to_connection(&uuid); - if (!conn) - GOTO(out, rc = -ENOENT); - - CDEBUG(D_HA, "switching import %s/%s from %s to %s\n", - imp->imp_target_uuid.uuid, imp->imp_obd->obd_name, - imp->imp_connection->c_remote_uuid.uuid, - conn->c_remote_uuid.uuid); - - /* Switch the import's connection and the DLM export's - * connection (which are almost certainly the same, but we - * keep distinct refs just to make things clearer. I think. */ - if (imp->imp_connection) - ptlrpc_put_connection(imp->imp_connection); - /* We hand off the ref from ptlrpc_get_connection. */ - imp->imp_connection = conn; - - dlmexp = class_conn2export(&imp->imp_dlm_handle); - - LASSERT(dlmexp != NULL); - - if (dlmexp->exp_connection) - ptlrpc_put_connection(dlmexp->exp_connection); - dlmexp->exp_connection = ptlrpc_connection_addref(conn); - class_export_put(dlmexp); - + rc = import_set_conn_priority(imp, &uuid); + if (rc) + GOTO(out, rc); } + rc = import_select_connection(imp); + if (rc) + GOTO(out, rc); + request = ptlrpc_prep_req(imp, imp->imp_connect_op, 4, size, tmp); if (!request) GOTO(out, rc = -ENOMEM); @@ -377,6 +425,9 @@ static int ptlrpc_connect_interpret(struct ptlrpc_request *request, if (rc) GOTO(out, rc); + LASSERT(imp->imp_conn_current); + imp->imp_conn_current->oic_last_attempt = 0; + msg_flags = lustre_msg_get_op_flags(request->rq_repmsg); if (aa->pcaa_initial_connect) { diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 829c078..98cd63e 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -545,6 +545,8 @@ ptlrpc_server_handle_request (struct ptlrpc_service *svc) rc = svc->srv_handler(request); + LASSERT(request); + request->rq_phase = RQ_PHASE_COMPLETE; CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:ni:nid:opc " diff --git a/lustre/tests/cfg/insanity-adev.sh b/lustre/tests/cfg/insanity-adev.sh index 8da018d..756043a 100644 --- a/lustre/tests/cfg/insanity-adev.sh +++ b/lustre/tests/cfg/insanity-adev.sh @@ -16,7 +16,7 @@ TIMEOUT=${TIMEOUT:-30} PTLDEBUG=${PTLDEBUG:-0} SUBSYSTEM=${SUBSYSTEM:-0} MOUNT=${MOUNT:-"/mnt/lustre"} -UPCALL=${CLIENT_UPCALL:-`pwd`/replay-single-upcall.sh} +#UPCALL=${CLIENT_UPCALL:-`pwd`/replay-single-upcall.sh} MDSDEV=${MDSDEV:-/dev/sdc} MDSSIZE=${MDSSIZE:-50000} diff --git a/lustre/tests/cfg/insanity-local.sh b/lustre/tests/cfg/insanity-local.sh index f39bd8f..b12b068 100644 --- a/lustre/tests/cfg/insanity-local.sh +++ b/lustre/tests/cfg/insanity-local.sh @@ -14,7 +14,7 @@ PTLDEBUG=${PTLDEBUG:-0x3f0400} SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff} MOUNT=${MOUNT:-"/mnt/lustre"} #CLIENT_UPCALL=${CLIENT_UPCALL:-`pwd`/client-upcall-mdev.sh} -UPCALL=${CLIENT_UPCALL:-`pwd`/replay-single-upcall.sh} +#UPCALL=${CLIENT_UPCALL:-`pwd`/replay-single-upcall.sh} MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} MDSSIZE=${MDSSIZE:-10000} #50000000 diff --git a/lustre/tests/cfg/insanity-ltest.sh b/lustre/tests/cfg/insanity-ltest.sh index fe63e81..47a7b0c 100644 --- a/lustre/tests/cfg/insanity-ltest.sh +++ b/lustre/tests/cfg/insanity-ltest.sh @@ -37,7 +37,7 @@ TIMEOUT=${TIMEOUT:-30} PTLDEBUG=${PTLDEBUG:-0} SUBSYSTEM=${SUBSYSTEM:-0} MOUNT=${MOUNT:-${MOUNTPT}} -UPCALL=${CLIENT_UPCALL:-"${LUSTRE_TESTS}/replay-single-upcall.sh"} +#UPCALL=${CLIENT_UPCALL:-"${LUSTRE_TESTS}/replay-single-upcall.sh"} mdsdev1=${MDSDEV[1]:-$MDSDEVBASE} MDSDEV=${MDSDEV:-${mdsdev1}} diff --git a/lustre/tests/cfg/insanity-mdev.sh b/lustre/tests/cfg/insanity-mdev.sh index fa15cd2..05f038d 100644 --- a/lustre/tests/cfg/insanity-mdev.sh +++ b/lustre/tests/cfg/insanity-mdev.sh @@ -15,7 +15,7 @@ TIMEOUT=${TIMEOUT:-30} PTLDEBUG=${PTLDEBUG:-0x3f0400} SUBSYSTEM=${SUBSYSTEM:- 0xffb7e3ff} MOUNT=${MOUNT:-"/mnt/lustre"} -UPCALL=${CLIENT_UPCALL:-`pwd`/replay-single-upcall.sh} +#UPCALL=${CLIENT_UPCALL:-`pwd`/replay-single-upcall.sh} MDSDEV=${MDSDEV:-/dev/sda1} MDSSIZE=${MDSSIZE:-50000} diff --git a/lustre/tests/cfg/mdev.sh b/lustre/tests/cfg/mdev.sh index 7d50f07..38031e1 100644 --- a/lustre/tests/cfg/mdev.sh +++ b/lustre/tests/cfg/mdev.sh @@ -21,7 +21,7 @@ OSTDEV=${OSTDEV:-/tmp/ost1-`hostname`} OSTSIZE=${OSTSIZE:-20000} FSTYPE=${FSTYPE:-ext3} TIMEOUT=${TIMEOUT:-10} -UPCALL=${UPCALL:-$PWD/replay-single-upcall.sh} +#UPCALL=${UPCALL:-$PWD/replay-single-upcall.sh} STRIPE_BYTES=${STRIPE_BYTES:-65536} STRIPES_PER_OBJ=${STRIPES_PER_OBJ:-0} diff --git a/lustre/utils/lconf b/lustre/utils/lconf index 7f1939c..e40dd3f 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -606,6 +606,12 @@ class LCTLInterface: quit""" % (name, setup) self.run(cmds) + def add_conn(self, name, conn_uuid): + cmds = """ + cfg_device %s + add_conn %s + quit""" % (name, conn_uuid) + self.run(cmds) # create a new device with lctl def newdev(self, type, name, uuid, setup = ""): @@ -1909,6 +1915,7 @@ class Client(Module): self.target_name = tgtdb.getName() self.target_uuid = tgtdb.getUUID() self.db = tgtdb + self.backup_targets = [] self.tgt_dev_uuid = get_active_target(tgtdb) if not self.tgt_dev_uuid: @@ -1927,6 +1934,7 @@ class Client(Module): self.name = self_name self.uuid = uuid self.lookup_server(self.tgt_dev_uuid) + self.lookup_backup_targets() mgmt_uuid = mgmt_uuid_for_fs(fs_name) if mgmt_uuid: self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid) @@ -1946,6 +1954,20 @@ class Client(Module): def get_servers(self): return self._server_nets + def lookup_backup_targets(self): + """ Lookup alternative network information """ + prof_list = toplustreDB.get_refs('profile') + for prof_uuid in prof_list: + prof_db = toplustreDB.lookup(prof_uuid) + if not prof_db: + panic("profile:", prof_uuid, "not found.") + for ref_class, ref_uuid in prof_db.get_all_refs(): + if ref_class in ('osd', 'mdsdev'): + devdb = toplustreDB.lookup(ref_uuid) + uuid = devdb.get_first_ref('target') + if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid: + self.backup_targets.append(ref_uuid) + def prepare(self, ignore_connect_failure = 0): self.info(self.target_uuid) if is_prepared(self.name): @@ -1980,14 +2002,29 @@ class Client(Module): else: panic("Unable to create OSC for ", self.target_uuid) + for tgt_dev_uuid in self.backup_targets: + this_nets = get_ost_net(toplustreDB, tgt_dev_uuid) + if len(this_nets) == 0: + panic ("Unable to find a backup server for:", tgt_dev_uuid) + srv_list = find_local_servers(this_nets) + if srv_list: + for srv in srv_list: + lctl.connect(srv) + break + else: + routes = find_route(this_nets); + if len(routes) == 0: + panic("no route to", tgt_dev_uuid) + for (srv, r) in routes: + lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3]) + if srv: + lctl.add_conn(self.name, srv.nid_uuid); + + def cleanup(self): if is_prepared(self.name): Module.cleanup(self) try: - srv_list = find_local_servers(self.get_servers()) - for srv in srv_list: - lctl.disconnect(srv) - routes = find_route(self.get_servers()) for (srv, r) in routes: lctl.del_route_host(r[0], srv.nid_uuid, r[1], r[3]) @@ -1996,6 +2033,16 @@ class Client(Module): e.dump() cleanup_error(e.rc) + for tgt_dev_uuid in self.backup_targets: + this_net = get_ost_net(toplustreDB, tgt_dev_uuid) + srv_list = find_local_servers(self.get_servers()) + if srv_list: + for srv in srv_list: + lctl.disconnect(srv) + break + else: + for (srv, r) in find_route(this_net): + lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3]) class MDC(Client): def __init__(self, db, uuid, fs_name): diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index 8b86745..30e13f1 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -231,6 +231,10 @@ command_t cmdlist[] = { "usage: set_timeout \n"}, {"set_lustre_upcall", jt_lcfg_set_lustre_upcall, 0, "usage: set_lustre_upcall \n"}, + {"add_conn ", jt_lcfg_add_conn, 0, + "usage: add_conn [priority]\n"}, + {"del_conn ", jt_lcfg_del_conn, 0, + "usage: del_conn \n"}, /* Llog operations */ {"llog_catlist", jt_llog_catlist, 0, diff --git a/lustre/utils/lustre_cfg.c b/lustre/utils/lustre_cfg.c index 149f181..cb0f987f 100644 --- a/lustre/utils/lustre_cfg.c +++ b/lustre/utils/lustre_cfg.c @@ -541,3 +541,74 @@ int jt_lcfg_set_lustre_upcall(int argc, char **argv) } return rc; } + +int jt_lcfg_add_conn(int argc, char **argv) +{ + struct lustre_cfg_bufs bufs; + struct lustre_cfg *lcfg; + int priority; + int rc; + + if (argc == 2) + priority = 0; + else if (argc == 3) + priority = 1; + else + return CMD_HELP; + + if (lcfg_devname == NULL) { + fprintf(stderr, "%s: please use 'cfg_device name' to set the " + "device name for config commands.\n", + jt_cmdname(argv[0])); + return -EINVAL; + } + + lustre_cfg_bufs_reset(&bufs, lcfg_devname); + + lustre_cfg_bufs_set_string(&bufs, 1, argv[1]); + + lcfg = lustre_cfg_new(LCFG_ADD_CONN, &bufs); + lcfg->lcfg_num = priority; + + rc = lcfg_ioctl(argv[0], OBD_DEV_ID, lcfg); + lustre_cfg_free (lcfg); + if (rc < 0) { + fprintf(stderr, "error: %s: %s\n", jt_cmdname(argv[0]), + strerror(rc = errno)); + } + + return rc; +} + +int jt_lcfg_del_conn(int argc, char **argv) +{ + struct lustre_cfg_bufs bufs; + struct lustre_cfg *lcfg; + int rc; + + if (argc != 2) + return CMD_HELP; + + if (lcfg_devname == NULL) { + fprintf(stderr, "%s: please use 'cfg_device name' to set the " + "device name for config commands.\n", + jt_cmdname(argv[0])); + return -EINVAL; + } + + lustre_cfg_bufs_reset(&bufs, lcfg_devname); + + /* connection uuid */ + lustre_cfg_bufs_set_string(&bufs, 1, argv[1]); + + lcfg = lustre_cfg_new(LCFG_DEL_MOUNTOPT, &bufs); + + rc = lcfg_ioctl(argv[0], OBD_DEV_ID, lcfg); + lustre_cfg_free(lcfg); + if (rc < 0) { + fprintf(stderr, "error: %s: %s\n", jt_cmdname(argv[0]), + strerror(rc = errno)); + } + + return rc; +} diff --git a/lustre/utils/obdctl.h b/lustre/utils/obdctl.h index bec2e12..e1c28ba 100644 --- a/lustre/utils/obdctl.h +++ b/lustre/utils/obdctl.h @@ -85,6 +85,8 @@ int jt_lcfg_mount_option(int argc, char **argv); int jt_lcfg_del_mount_option(int argc, char **argv); int jt_lcfg_set_timeout(int argc, char **argv); int jt_lcfg_set_lustre_upcall(int argc, char **argv); +int jt_lcfg_add_conn(int argc, char **argv); +int jt_lcfg_del_conn(int argc, char **argv); int obd_add_uuid(char *uuid, ptl_nid_t nid, int nal); -- 1.8.3.1