From 87dff1d2fbfb77381b797c818e0d92d9dc26a5ec Mon Sep 17 00:00:00 2001 From: adilger Date: Tue, 26 Jul 2005 22:43:05 +0000 Subject: [PATCH] Branch b1_4 Description: Network or server problems during mount may cause partially mounted clients instead of returning an error. Details : lustre_process_log() will overwrite the error return code during mount error handling, returning success instead of an error. Also, an error during cleanup caused cleanup to stop instead of attempting to do the remaining cleanup. b=6302 --- lustre/ChangeLog | 10 +++++++ lustre/liblustre/llite_lib.c | 51 +++++++++++++++++--------------- lustre/llite/llite_lib.c | 70 +++++++++++++++++++++++--------------------- 3 files changed, 74 insertions(+), 57 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 7c25507..4bc434b 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -118,6 +118,7 @@ Details : Additional detailed health check information on MSD and OSTs is now provided through the procfs health_check value. Severity : minor +Frequency : occasional, depends on IO load Bugzilla : 4466 Description: Disk fragmentation on the OSTs could eventually cause slowdowns after numerous create/delete cycles @@ -126,6 +127,15 @@ Details : The ext3 inode allocation policy would not allocate new inodes being created. Instead we look for groups with free space if the parent directories are nearly full. +Severity : major +Bugzilla : 6302 +Frequency : rare +Description: Network or server problems during mount may cause partially + mounted clients instead of returning an error. +Details : The config llog parsing code may overwrite the error return + code during mount error handling, returning success instead + of an error. + ------------------------------------------------------------------------------ 2005-06-20 Cluster File Systems, Inc. diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c index 816286c..b8b9997 100644 --- a/lustre/liblustre/llite_lib.c +++ b/lustre/liblustre/llite_lib.c @@ -119,43 +119,42 @@ int liblustre_process_log(struct config_llog_instance *cfg, lcfg = lustre_cfg_new(LCFG_ADD_UUID, &bufs); lcfg->lcfg_nid = nid; lcfg->lcfg_nal = nal; - err = class_process_config(lcfg); + rc = class_process_config(lcfg); lustre_cfg_free(lcfg); - if (err < 0) - GOTO(out, err); + if (rc < 0) + GOTO(out, rc); lustre_cfg_bufs_reset(&bufs, name); lustre_cfg_bufs_set_string(&bufs, 1, LUSTRE_MDC_NAME); lustre_cfg_bufs_set_string(&bufs, 2, mdc_uuid.uuid); lcfg = lustre_cfg_new(LCFG_ATTACH, &bufs); - err = class_process_config(lcfg); + rc = class_process_config(lcfg); lustre_cfg_free(lcfg); - if (err < 0) - GOTO(out_del_uuid, err); + if (rc < 0) + GOTO(out_del_uuid, rc); lustre_cfg_bufs_reset(&bufs, name); lustre_cfg_bufs_set_string(&bufs, 1, mdsname); lustre_cfg_bufs_set_string(&bufs, 2, peer); lcfg = lustre_cfg_new(LCFG_SETUP, &bufs); - err = class_process_config(lcfg); + rc = class_process_config(lcfg); lustre_cfg_free(lcfg); - if (err < 0) - GOTO(out_detach, err); + if (rc < 0) + GOTO(out_detach, rc); obd = class_name2obd(name); if (obd == NULL) - GOTO(out_cleanup, err = -EINVAL); + GOTO(out_cleanup, rc = -EINVAL); /* Disable initial recovery on this import */ - err = obd_set_info(obd->obd_self_export, - strlen("initial_recov"), "initial_recov", - sizeof(allow_recov), &allow_recov); + rc = obd_set_info(obd->obd_self_export, + strlen("initial_recov"), "initial_recov", + sizeof(allow_recov), &allow_recov); - err = obd_connect(&mdc_conn, obd, &mdc_uuid, NULL /*connect_flags*/); - if (err) { - CERROR("cannot connect to %s: rc = %d\n", - mdsname, err); - GOTO(out_cleanup, err); + rc = obd_connect(&mdc_conn, obd, &mdc_uuid, NULL /*connect_flags*/); + if (rc) { + CERROR("cannot connect to %s: rc = %d\n", mdsname, rc); + GOTO(out_cleanup, rc); } exp = class_conn2export(&mdc_conn); @@ -166,33 +165,37 @@ int liblustre_process_log(struct config_llog_instance *cfg, CERROR("class_config_parse_llog failed: rc = %d\n", rc); } + /* We don't so much care about errors in cleaning up the config llog + * connection, as we have already read the config by this point. */ err = obd_disconnect(exp); + if (err) + CERROR("obd_disconnect failed: rc = %d\n", err); out_cleanup: lustre_cfg_bufs_reset(&bufs, name); lcfg = lustre_cfg_new(LCFG_CLEANUP, &bufs); err = class_process_config(lcfg); lustre_cfg_free(lcfg); - if (err < 0) - GOTO(out, err); + if (err) + CERROR("mdc_cleanup failed: rc = %d\n", err); out_detach: lustre_cfg_bufs_reset(&bufs, name); lcfg = lustre_cfg_new(LCFG_DETACH, &bufs); err = class_process_config(lcfg); lustre_cfg_free(lcfg); - if (err < 0) - GOTO(out, err); + if (err) + CERROR("mdc_detach failed: rc = %d\n", err); out_del_uuid: lustre_cfg_bufs_reset(&bufs, name); lustre_cfg_bufs_set_string(&bufs, 1, peer); lcfg = lustre_cfg_new(LCFG_DEL_UUID, &bufs); err = class_process_config(lcfg); + if (err) + CERROR("del MDC UUID failed: rc = %d\n", err); lustre_cfg_free(lcfg); out: - if (rc == 0) - rc = err; RETURN(rc); } diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 4ba568b..1add8bd 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -531,9 +531,9 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile, PCFG_INIT(pcfg, NAL_CMD_REGISTER_MYNID); pcfg.pcfg_nal = lmd->lmd_nal; pcfg.pcfg_nid = lmd->lmd_local_nid; - err = libcfs_nal_cmd(&pcfg); - if (err <0) - GOTO(out, err); + rc = libcfs_nal_cmd(&pcfg); + if (rc < 0) + GOTO(out, rc); } if (lmd->lmd_nal == SOCKNAL || @@ -547,9 +547,9 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile, LASSERT(pcfg.pcfg_nid); pcfg.pcfg_id = lmd->lmd_server_ipaddr; pcfg.pcfg_misc = lmd->lmd_port; - err = libcfs_nal_cmd(&pcfg); - if (err <0) - GOTO(out, err); + rc = libcfs_nal_cmd(&pcfg); + if (rc < 0) + GOTO(out, rc); } lustre_cfg_bufs_reset(&bufs, name); @@ -559,46 +559,46 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile, lcfg->lcfg_nal = lmd->lmd_nal; lcfg->lcfg_nid = lmd->lmd_server_nid; LASSERT(lcfg->lcfg_nal); - err = class_process_config(lcfg); + rc = class_process_config(lcfg); lustre_cfg_free(lcfg); - if (err < 0) - GOTO(out_del_conn, err); + if (rc < 0) + GOTO(out_del_conn, rc); lustre_cfg_bufs_reset(&bufs, name); lustre_cfg_bufs_set_string(&bufs, 1, LUSTRE_MDC_NAME); lustre_cfg_bufs_set_string(&bufs, 2, mdc_uuid.uuid); lcfg = lustre_cfg_new(LCFG_ATTACH, &bufs); - err = class_process_config(lcfg); + rc = class_process_config(lcfg); lustre_cfg_free(lcfg); - if (err < 0) - GOTO(out_del_uuid, err); + if (rc < 0) + GOTO(out_del_uuid, rc); lustre_cfg_bufs_reset(&bufs, name); lustre_cfg_bufs_set_string(&bufs, 1, lmd->lmd_mds); lustre_cfg_bufs_set_string(&bufs, 2, peer); lcfg = lustre_cfg_new(LCFG_SETUP, &bufs); - err = class_process_config(lcfg); + rc = class_process_config(lcfg); lustre_cfg_free(lcfg); - if (err < 0) - GOTO(out_detach, err); + if (rc < 0) + GOTO(out_detach, rc); obd = class_name2obd(name); if (obd == NULL) - GOTO(out_cleanup, err = -EINVAL); + GOTO(out_cleanup, rc = -EINVAL); /* Disable initial recovery on this import */ - err = obd_set_info(obd->obd_self_export, - strlen("initial_recov"), "initial_recov", - sizeof(allow_recov), &allow_recov); - if (err) - GOTO(out_cleanup, err); + rc = obd_set_info(obd->obd_self_export, + strlen("initial_recov"), "initial_recov", + sizeof(allow_recov), &allow_recov); + if (rc) + GOTO(out_cleanup, rc); - err = obd_connect(&mdc_conn, obd, &mdc_uuid, NULL /* ocd */); - if (err) { - CERROR("cannot connect to %s: rc = %d\n", lmd->lmd_mds, err); - GOTO(out_cleanup, err); + rc = obd_connect(&mdc_conn, obd, &mdc_uuid, NULL /* ocd */); + if (rc) { + CERROR("cannot connect to %s: rc = %d\n", lmd->lmd_mds, rc); + GOTO(out_cleanup, rc); } exp = class_conn2export(&mdc_conn); @@ -627,23 +627,27 @@ int lustre_process_log(struct lustre_mount_data *lmd, char * profile, break; } + /* We don't so much care about errors in cleaning up the config llog + * connection, as we have already read the config by this point. */ err = obd_disconnect(exp); + if (err) + CERROR("obd_disconnect failed: rc = %d\n", err); out_cleanup: lustre_cfg_bufs_reset(&bufs, name); lcfg = lustre_cfg_new(LCFG_CLEANUP, &bufs); err = class_process_config(lcfg); lustre_cfg_free(lcfg); - if (err < 0) - GOTO(out, err); + if (err) + CERROR("mdc_cleanup failed: rc = %d\n", err); out_detach: lustre_cfg_bufs_reset(&bufs, name); lcfg = lustre_cfg_new(LCFG_DETACH, &bufs); err = class_process_config(lcfg); lustre_cfg_free(lcfg); - if (err < 0) - GOTO(out, err); + if (err) + CERROR("mdc_detach failed: rc = %d\n", err); out_del_uuid: lustre_cfg_bufs_reset(&bufs, name); @@ -651,6 +655,8 @@ out_del_uuid: lcfg = lustre_cfg_new(LCFG_DEL_UUID, &bufs); err = class_process_config(lcfg); lustre_cfg_free(lcfg); + if (err) + CERROR("del MDC UUID failed: rc = %d\n", err); out_del_conn: if (lmd->lmd_nal == SOCKNAL || @@ -663,12 +669,10 @@ out_del_conn: pcfg.pcfg_nid = lmd->lmd_server_nid; pcfg.pcfg_flags = 1; /* single_share */ err = libcfs_nal_cmd(&pcfg); - if (err <0) - GOTO(out, err); + if (err) + CERROR("del MDS peer failed: rc = %d\n", err); } out: - if (rc == 0) - rc = err; RETURN(rc); } -- 1.8.3.1