From: alex Date: Sun, 6 Jun 2004 09:06:52 +0000 (+0000) Subject: - import tracks when connection procedure started. if connection error happens X-Git-Tag: v1_7_100~2255 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=0d1f2618b4127d8bb413835b7c2e99b9fc057273;p=fs%2Flustre-release.git - import tracks when connection procedure started. if connection error happens after HZ then ptlrpc_connect_interpret() starts reconnection immediately. this speeds recovery up a bit - pinger pings inter-mds connection more ofter then usual ones - pinger finds the most sooner event and waits for it, not for obd_timeout * 2 - mds_getattr_name() needs not to check whether mds is choosen right if attrs are requested by fid --- diff --git a/lustre/include/linux/lustre_import.h b/lustre/include/linux/lustre_import.h index d2af141..751e349 100644 --- a/lustre/include/linux/lustre_import.h +++ b/lustre/include/linux/lustre_import.h @@ -75,6 +75,7 @@ struct obd_import { struct obd_uuid imp_target_uuid; /* XXX -> lustre_name */ struct lustre_handle imp_remote_handle; unsigned long imp_next_ping; + unsigned long imp_connect_start; /* Protects flags, level, generation, conn_cnt, *_list */ spinlock_t imp_lock; diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index f552198..b6cc997 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -948,13 +948,13 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, cleanup_phase = 2; /* dchild, dparent, locks */ -fill_inode: - /* let's make sure this name should leave on this mds node */ rc = mds_check_mds_num(obd, dparent->d_inode, name, namesize); if (rc) GOTO(cleanup, rc); +fill_inode: + if (!DENTRY_VALID(dchild)) { intent_set_disposition(rep, DISP_LOOKUP_NEG); /* in the intent case, the policy clears this error: diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index e78176b..9ef1d1d 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -203,10 +203,7 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode, RETURN(NULL); } - if (imp->imp_server_timeout) - request->rq_timeout = obd_timeout / 2; - else - request->rq_timeout = obd_timeout; + request->rq_timeout = obd_timeout; request->rq_send_state = LUSTRE_IMP_FULL; request->rq_type = PTL_RPC_MSG_REQUEST; request->rq_import = class_import_get(imp); diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index aa4a6a9..99b2264 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -327,6 +327,7 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid) ptlrpcd_add_req(request); rc = 0; + imp->imp_connect_start = jiffies; out: if (rc != 0) { IMPORT_SET_STATE(imp, LUSTRE_IMP_DISCON); @@ -443,13 +444,13 @@ finish: if (aa->pcaa_initial_connect && !imp->imp_initial_recov) { ptlrpc_deactivate_import(imp); } - /*if (rc == -ETIMEDOUT) { + if (rc == -ETIMEDOUT && (jiffies - imp->imp_connect_start) > HZ) { CDEBUG(D_ERROR, "recovery of %s on %s failed (timeout)\n", imp->imp_target_uuid.uuid, (char *)imp->imp_connection->c_remote_uuid.uuid); ptlrpc_connect_import(imp, NULL); RETURN(0); - }*/ + } CDEBUG(D_ERROR, "recovery of %s on %s failed (%d)\n", imp->imp_target_uuid.uuid, (char *)imp->imp_connection->c_remote_uuid.uuid, rc); diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 05d3fff..2659d7e 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -65,6 +65,14 @@ int ptlrpc_ping(struct obd_import *imp) } #ifdef __KERNEL__ +int ptlrpc_next_ping(struct obd_import *imp) +{ + if (imp->imp_server_timeout) + return jiffies + (obd_timeout / 4 * HZ); + else + return jiffies + (obd_timeout / 2 * HZ); +} + static int ptlrpc_pinger_main(void *arg) { struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg; @@ -98,6 +106,7 @@ static int ptlrpc_pinger_main(void *arg) NULL, NULL); struct list_head *iter; + time_to_next_ping = this_ping + (obd_timeout * HZ) - jiffies; down(&pinger_sem); list_for_each(iter, &pinger_imports) { struct obd_import *imp = @@ -118,34 +127,37 @@ static int ptlrpc_pinger_main(void *arg) if (level == LUSTRE_IMP_DISCON) { /* wait at least a timeout before trying recovery again. */ - imp->imp_next_ping = jiffies + - (obd_timeout * HZ); + imp->imp_next_ping = + ptlrpc_next_ping(imp); ptlrpc_initiate_recovery(imp); - } - else if (level != LUSTRE_IMP_FULL || - imp->imp_obd->obd_no_recov) { + } else if (level != LUSTRE_IMP_FULL || + imp->imp_obd->obd_no_recov) { CDEBUG(D_HA, "not pinging %s (in recovery " " or recovery disabled: %s)\n", imp->imp_target_uuid.uuid, ptlrpc_import_state_name(level)); - } - else if (imp->imp_pingable || force) { + } else if (imp->imp_pingable || force) { ptlrpc_ping(imp); } - } else { - if (imp->imp_pingable) - CDEBUG(D_HA, "don't need to ping %s " - "(%lu > %lu)\n", - imp->imp_target_uuid.uuid, - imp->imp_next_ping, this_ping); + } else if (imp->imp_pingable) { + CDEBUG(D_HA, "don't need to ping %s " + "(%lu > %lu)\n", + imp->imp_target_uuid.uuid, + imp->imp_next_ping, this_ping); } + CDEBUG(D_OTHER, "%s: pingable %d, next_ping %lu(%lu)\n", + imp->imp_target_uuid.uuid, + imp->imp_pingable, imp->imp_next_ping, jiffies); + if (imp->imp_pingable && imp->imp_next_ping && + imp->imp_next_ping - jiffies < time_to_next_ping && + imp->imp_next_ping > jiffies) + time_to_next_ping = imp->imp_next_ping - jiffies; } up(&pinger_sem); /* Wait until the next ping time, or until we're stopped. */ - time_to_next_ping = this_ping + (obd_timeout * HZ) - jiffies; CDEBUG(D_HA, "next ping in %lu (%lu)\n", time_to_next_ping, this_ping + (obd_timeout * HZ)); if (time_to_next_ping > 0) {