X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fldlm%2Fldlm_lib.c;h=9b5c4254d416ff5b604221bd92ae497ebe92f5b0;hp=59bc60873d7a54d38d97ac1d1786245abd1929a7;hb=3f65f631c74ddf648ae8522f887da9e4edbf4c7d;hpb=82e794e268bede7092e64e3f93d50a9bdf9936bd diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 59bc608..9b5c425 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -23,7 +23,7 @@ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2010, 2016, Intel Corporation. + * Copyright (c) 2010, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -39,6 +39,7 @@ #define DEBUG_SUBSYSTEM S_LDLM +#include #include #include #include @@ -390,9 +391,15 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) spin_lock_init(&cli->cl_lru_list_lock); atomic_long_set(&cli->cl_unstable_count, 0); INIT_LIST_HEAD(&cli->cl_shrink_list); + INIT_LIST_HEAD(&cli->cl_grant_chain); + + INIT_LIST_HEAD(&cli->cl_flight_waiters); + cli->cl_rpcs_in_flight = 0; init_waitqueue_head(&cli->cl_destroy_waitq); atomic_set(&cli->cl_destroy_in_flight, 0); + + cli->cl_supp_cksum_types = OBD_CKSUM_CRC32; #ifdef ENABLE_CHECKSUM /* Turn on checksumming by default. */ cli->cl_checksum = 1; @@ -401,7 +408,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) * Set cl_chksum* to CRC32 for now to avoid returning screwed info * through procfs. */ - cli->cl_cksum_type = cli->cl_supp_cksum_types = OBD_CKSUM_CRC32; + cli->cl_cksum_type = cli->cl_supp_cksum_types; #endif atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS); @@ -409,6 +416,8 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) * from OFD after connecting. */ cli->cl_max_pages_per_rpc = PTLRPC_MAX_BRW_PAGES; + cli->cl_max_short_io_bytes = OBD_MAX_SHORT_IO_BYTES; + /* set cl_chunkbits default value to PAGE_SHIFT, * it will be updated at OSC connection time. */ cli->cl_chunkbits = PAGE_SHIFT; @@ -426,7 +435,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_MAX; else cli->cl_max_rpcs_in_flight = OBD_MAX_RIF_DEFAULT; - } + } spin_lock_init(&cli->cl_mod_rpcs_lock); spin_lock_init(&cli->cl_mod_rpcs_hist.oh_lock); @@ -742,12 +751,12 @@ static int target_handle_reconnect(struct lustre_handle *conn, { struct obd_device *target; struct lustre_handle *hdl; - cfs_time_t now; - cfs_time_t deadline; - int timeout; + time64_t deadline; + time64_t timeout; + time64_t now; int rc = 0; - ENTRY; + ENTRY; hdl = &exp->exp_imp_reverse->imp_remote_handle; if (!exp->exp_connection || !lustre_handle_is_used(hdl)) { conn->cookie = exp->exp_handle.h_cookie; @@ -781,16 +790,17 @@ static int target_handle_reconnect(struct lustre_handle *conn, GOTO(out_already, rc); } - now = cfs_time_current(); - deadline = target->obd_recovery_timer.expires; - if (cfs_time_before(now, deadline)) { - struct target_distribute_txn_data *tdtd = - class_exp2tgt(exp)->lut_tdtd; + now = ktime_get_seconds(); + deadline = jiffies_to_msecs(target->obd_recovery_timer.expires) / + MSEC_PER_SEC; + if (now < deadline) { + struct target_distribute_txn_data *tdtd; int size = 0; int count = 0; char *buf = NULL; - timeout = cfs_duration_sec(cfs_time_sub(deadline, now)); + timeout = deadline - now; + tdtd = class_exp2tgt(exp)->lut_tdtd; if (tdtd && tdtd->tdtd_show_update_logs_retrievers) buf = tdtd->tdtd_show_update_logs_retrievers( tdtd->tdtd_show_retrievers_cbdata, @@ -798,7 +808,7 @@ static int target_handle_reconnect(struct lustre_handle *conn, if (count > 0) LCONSOLE_WARN("%s: Recovery already passed deadline " - "%d:%.02d. It is due to DNE recovery " + "%lld:%.02lld. It is due to DNE recovery " "failed/stuck on the %d MDT(s):%s. " "Please wait until all MDTs recovered " "or abort the recovery by force.\n", @@ -807,7 +817,7 @@ static int target_handle_reconnect(struct lustre_handle *conn, buf ? buf : "unknown (not enough RAM)"); else LCONSOLE_WARN("%s: Recovery already passed deadline " - "%d:%.02d. If you do not want to wait " + "%lld:%.02lld. If you do not want to wait " "more, please abort the recovery by " "force.\n", target->obd_name, timeout / 60, timeout % 60); @@ -815,9 +825,9 @@ static int target_handle_reconnect(struct lustre_handle *conn, if (buf != NULL) OBD_FREE(buf, size); } else { - timeout = cfs_duration_sec(cfs_time_sub(now, deadline)); + timeout = now - deadline; LCONSOLE_WARN("%s: Recovery already passed deadline" - " %d:%.02d, It is most likely due to DNE" + " %lld:%.02lld, It is most likely due to DNE" " recovery is failed or stuck, please wait a" " few more minutes or abort the recovery.\n", target->obd_name, timeout / 60, timeout % 60); @@ -1074,7 +1084,8 @@ int target_handle_connect(struct ptlrpc_request *req) * * Via check OBD_CONNECT_FID, we can distinguish whether * the OBD_CONNECT_MDS_MDS/OBD_CONNECT_MNE_SWAB is from - * MGC or MDT. */ + * MGC or MDT, since MGC does not use OBD_CONNECT_FID. + */ if (!lw_client && (data->ocd_connect_flags & OBD_CONNECT_MDS_MDS) && (data->ocd_connect_flags & OBD_CONNECT_FID) && @@ -1150,6 +1161,7 @@ int target_handle_connect(struct ptlrpc_request *req) * cause namespace inconsistency */ spin_lock(&export->exp_lock); export->exp_connecting = 1; + export->exp_conn_cnt = 0; spin_unlock(&export->exp_lock); conn.cookie = export->exp_handle.h_cookie; rc = EALREADY; @@ -1191,18 +1203,19 @@ no_export: target->obd_name, cluuid.uuid, libcfs_nid2str(req->rq_peer.nid), atomic_read(&export->exp_refcount)); - GOTO(out, rc = -EBUSY); - } else if (lustre_msg_get_conn_cnt(req->rq_reqmsg) == 1) { - if (!strstr(cluuid.uuid, "mdt")) - LCONSOLE_WARN("%s: Rejecting reconnect from the " - "known client %s (at %s) because it " - "is indicating it is a new client", - target->obd_name, cluuid.uuid, - libcfs_nid2str(req->rq_peer.nid)); - GOTO(out, rc = -EALREADY); - } else { - OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_RECONNECT, 2 * obd_timeout); - } + GOTO(out, rc = -EBUSY); + } else if (lustre_msg_get_conn_cnt(req->rq_reqmsg) == 1 && + rc != EALREADY) { + if (!strstr(cluuid.uuid, "mdt")) + LCONSOLE_WARN("%s: Rejecting reconnect from the " + "known client %s (at %s) because it " + "is indicating it is a new client", + target->obd_name, cluuid.uuid, + libcfs_nid2str(req->rq_peer.nid)); + GOTO(out, rc = -EALREADY); + } else { + OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_RECONNECT, 2 * obd_timeout); + } if (rc < 0) { GOTO(out, rc); @@ -1238,27 +1251,36 @@ no_export: /* allow "new" MDT to be connected during recovery, since we * need retrieve recovery update records from it */ if (target->obd_recovering && !lw_client && !mds_mds_conn) { - cfs_time_t t; - int c; /* connected */ - int i; /* in progress */ - int k; /* known */ - int s; /* stale/evicted */ + time64_t now; + time64_t t; + char *msg; + int c; /* connected */ + int i; /* in progress */ + int k; /* known */ + int s; /* stale/evicted */ c = atomic_read(&target->obd_connected_clients); i = atomic_read(&target->obd_lock_replay_clients); k = target->obd_max_recoverable_clients; s = target->obd_stale_clients; - t = target->obd_recovery_timer.expires; - t = cfs_time_sub(t, cfs_time_current()); - t = cfs_duration_sec(t); + t = jiffies_to_msecs(target->obd_recovery_timer.expires); + t /= MSEC_PER_SEC; + now = ktime_get_seconds(); + if (now > t) { + t = now - t; + msg = "already passed deadline"; + } else { + t -= now; + msg = "to recover in"; + } + LCONSOLE_WARN("%s: Denying connection for new client %s" "(at %s), waiting for %d known clients " "(%d recovered, %d in progress, and %d " - "evicted) to recover in %d:%.02d\n", + "evicted) %s %lld:%.02lld\n", target->obd_name, cluuid.uuid, libcfs_nid2str(req->rq_peer.nid), k, - c - i, i, s, (int)t / 60, - (int)t % 60); + c - i, i, s, msg, t / 60, t % 60); rc = -EBUSY; } else { dont_check_exports: @@ -1572,12 +1594,13 @@ static void target_finish_recovery(struct lu_target *lut) obd->obd_recovery_end = ktime_get_real_seconds(); /* When recovery finished, cleanup orphans on MDS and OST. */ - if (OBT(obd) && OBP(obd, postrecov)) { - int rc = OBP(obd, postrecov)(obd); - if (rc < 0) - LCONSOLE_WARN("%s: Post recovery failed, rc %d\n", - obd->obd_name, rc); - } + if (obd->obd_type && OBP(obd, postrecov)) { + int rc = OBP(obd, postrecov)(obd); + + if (rc < 0) + LCONSOLE_WARN("%s: Post recovery failed, rc %d\n", + obd->obd_name, rc); + } EXIT; } @@ -1697,7 +1720,7 @@ static void target_start_recovery_timer(struct obd_device *obd) } mod_timer(&obd->obd_recovery_timer, - cfs_time_shift(obd->obd_recovery_timeout)); + jiffies + cfs_time_seconds(obd->obd_recovery_timeout)); obd->obd_recovery_start = ktime_get_real_seconds(); spin_unlock(&obd->obd_dev_lock); @@ -1716,7 +1739,7 @@ static void target_start_recovery_timer(struct obd_device *obd) * if @extend is true, extend recovery window to have @drt remaining at least; * otherwise, make sure the recovery timeout value is not less than @drt. */ -static void extend_recovery_timer(struct obd_device *obd, int drt, +static void extend_recovery_timer(struct obd_device *obd, time64_t drt, bool extend) { time64_t now; @@ -1752,7 +1775,7 @@ static void extend_recovery_timer(struct obd_device *obd, int drt, obd->obd_recovery_timeout = to; end = obd->obd_recovery_start + to; mod_timer(&obd->obd_recovery_timer, - cfs_time_shift(end - now)); + jiffies + cfs_time_seconds(end - now)); } spin_unlock(&obd->obd_dev_lock); @@ -1776,7 +1799,7 @@ check_and_start_recovery_timer(struct obd_device *obd, struct ptlrpc_request *req, int new_client) { - int service_time = lustre_msg_get_service_time(req->rq_reqmsg); + time64_t service_time = lustre_msg_get_service_time(req->rq_reqmsg); struct obd_device_target *obt = &obd->u.obt; if (!new_client && service_time) @@ -1788,7 +1811,8 @@ check_and_start_recovery_timer(struct obd_device *obd, target_start_recovery_timer(obd); /* Convert the service time to RPC timeout, - * and reuse service_time to limit stack usage. */ + * and reuse service_time to limit stack usage. + */ service_time = at_est2timeout(service_time); if (OBD_FAIL_CHECK(OBD_FAIL_TGT_SLUGGISH_NET) && @@ -1960,7 +1984,18 @@ static int target_recovery_overseer(struct lu_target *lut, { struct obd_device *obd = lut->lut_obd; struct target_distribute_txn_data *tdtd; + time64_t last = 0; + time64_t now; repeat: + if (obd->obd_recovering && obd->obd_recovery_start == 0) { + now = ktime_get_seconds(); + if (now - last > 600) { + LCONSOLE_INFO("%s: in recovery but waiting for " + "the first client to connect\n", + obd->obd_name); + last = now; + } + } if (obd->obd_recovery_start != 0 && ktime_get_real_seconds() >= (obd->obd_recovery_start + obd->obd_recovery_time_hard)) { __u64 next_update_transno = 0; @@ -2131,7 +2166,7 @@ static void handle_recovery_req(struct ptlrpc_thread *thread, /* don't reset timer for final stage */ if (!exp_finished(req->rq_export)) { - int to = obd_timeout; + time64_t to = obd_timeout; /** * Add request timeout to the recovery time so next request from @@ -2585,9 +2620,9 @@ void target_recovery_fini(struct obd_device *obd) } EXPORT_SYMBOL(target_recovery_fini); -static void target_recovery_expired(unsigned long castmeharder) +static void target_recovery_expired(cfs_timer_cb_arg_t data) { - struct obd_device *obd = (struct obd_device *)castmeharder; + struct obd_device *obd = cfs_from_timer(obd, data, obd_recovery_timer); CDEBUG(D_HA, "%s: recovery timed out; %d clients are still in recovery" " after %llus (%d clients connected)\n", obd->obd_name, atomic_read(&obd->obd_lock_replay_clients), @@ -2619,8 +2654,8 @@ void target_recovery_init(struct lu_target *lut, svc_handler_t handler) obd->obd_recovery_start = 0; obd->obd_recovery_end = 0; - setup_timer(&obd->obd_recovery_timer, target_recovery_expired, - (unsigned long)obd); + cfs_timer_setup(&obd->obd_recovery_timer, target_recovery_expired, + (unsigned long)obd, 0); target_start_recovery_thread(lut, handler); } EXPORT_SYMBOL(target_recovery_init); @@ -2676,6 +2711,17 @@ int target_queue_recovery_request(struct ptlrpc_request *req, target_process_req_flags(obd, req); if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LOCK_REPLAY_DONE) { + if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_TGT_RECOVERY_REQ_RACE))) { + if (cfs_fail_val == 1) { + cfs_race_state = 1; + cfs_fail_val = 0; + wake_up(&cfs_race_waitq); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1)); + } + } + /* client declares he's ready to complete recovery * so, we put the request on th final queue */ target_request_copy_get(req); @@ -3123,10 +3169,10 @@ static inline const char *bulk2type(struct ptlrpc_request *req) int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc, struct l_wait_info *lwi) { - struct ptlrpc_request *req = desc->bd_req; - time_t start = cfs_time_current_sec(); - time_t deadline; - int rc = 0; + struct ptlrpc_request *req = desc->bd_req; + time64_t start = ktime_get_real_seconds(); + time64_t deadline; + int rc = 0; ENTRY; @@ -3173,12 +3219,13 @@ int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc, deadline = req->rq_deadline; do { - long timeoutl = deadline - cfs_time_current_sec(); - cfs_duration_t timeout = timeoutl <= 0 ? - CFS_TICK : cfs_time_seconds(timeoutl); - time_t rq_deadline; + time64_t timeoutl = deadline - ktime_get_real_seconds(); + long timeout_jiffies = timeoutl <= 0 ? + 1 : cfs_time_seconds(timeoutl); + time64_t rq_deadline; - *lwi = LWI_TIMEOUT_INTERVAL(timeout, cfs_time_seconds(1), + *lwi = LWI_TIMEOUT_INTERVAL(timeout_jiffies, + cfs_time_seconds(1), target_bulk_timeout, desc); rc = l_wait_event(desc->bd_waitq, !ptlrpc_server_bulk_active(desc) || @@ -3188,17 +3235,17 @@ int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc, lwi); LASSERT(rc == 0 || rc == -ETIMEDOUT); /* Wait again if we changed rq_deadline. */ - rq_deadline = ACCESS_ONCE(req->rq_deadline); + rq_deadline = READ_ONCE(req->rq_deadline); deadline = start + bulk_timeout; if (deadline > rq_deadline) deadline = rq_deadline; - } while ((rc == -ETIMEDOUT) && - (deadline > cfs_time_current_sec())); + } while (rc == -ETIMEDOUT && + deadline > ktime_get_real_seconds()); if (rc == -ETIMEDOUT) { - DEBUG_REQ(D_ERROR, req, "timeout on bulk %s after %ld%+lds", + DEBUG_REQ(D_ERROR, req, "timeout on bulk %s after %lld%+llds", bulk2type(req), deadline - start, - cfs_time_current_sec() - deadline); + ktime_get_real_seconds() - deadline); ptlrpc_abort_bulk(desc); } else if (exp->exp_failed) { DEBUG_REQ(D_ERROR, req, "Eviction on bulk %s",