X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;ds=sidebyside;f=lustre%2Ftarget%2Ftgt_lastrcvd.c;h=061f088f8a91967b9888b14c84c028384bf57995;hb=300858ccfcd00b52663de45e0bb472012242f342;hp=b8692987325095dbf52bdbcdde1bab7ffc99dc28;hpb=4c90aef2f0712d8da720f6a66cd09b88df7d0573;p=fs%2Flustre-release.git diff --git a/lustre/target/tgt_lastrcvd.c b/lustre/target/tgt_lastrcvd.c index b869298..061f088 100644 --- a/lustre/target/tgt_lastrcvd.c +++ b/lustre/target/tgt_lastrcvd.c @@ -23,7 +23,7 @@ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2016, Intel Corporation. + * Copyright (c) 2011, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -40,6 +40,8 @@ #include "tgt_internal.h" +/** version recovery epoch */ +#define LR_EPOCH_BITS 32 /* Allocate a bitmap for a chunk of reply data slots */ static int tgt_bitmap_chunk_alloc(struct lu_target *lut, int chunk) @@ -146,6 +148,13 @@ static int tgt_clear_reply_slot(struct lu_target *lut, int idx) int chunk; int b; + if (lut->lut_obd->obd_stopping) + /* + * in case of failover keep the bit set in order to + * avoid overwriting slots in reply_data which might + * be required by resent rpcs + */ + return 0; chunk = idx / LUT_REPLY_SLOTS_PER_CHUNK; b = idx % LUT_REPLY_SLOTS_PER_CHUNK; @@ -322,7 +331,7 @@ static void tgt_free_reply_data(struct lu_target *lut, list_del(&trd->trd_list); ted->ted_reply_cnt--; - if (lut != NULL) + if (lut != NULL && trd->trd_index != TRD_INDEX_MEMORY) tgt_clear_reply_slot(lut, trd->trd_index); OBD_FREE_PTR(trd); } @@ -386,6 +395,8 @@ int tgt_client_alloc(struct obd_export *exp) spin_lock_init(&exp->exp_target_data.ted_nodemap_lock); INIT_LIST_HEAD(&exp->exp_target_data.ted_nodemap_member); + spin_lock_init(&exp->exp_target_data.ted_fmd_lock); + INIT_LIST_HEAD(&exp->exp_target_data.ted_fmd_list); OBD_ALLOC_PTR(exp->exp_target_data.ted_lcd); if (exp->exp_target_data.ted_lcd == NULL) @@ -409,6 +420,8 @@ void tgt_client_free(struct obd_export *exp) LASSERT(exp != exp->exp_obd->obd_self_export); + tgt_fmd_cleanup(exp); + /* free reply data */ mutex_lock(&ted->ted_lcd_lock); list_for_each_entry_safe(trd, tmp, &ted->ted_reply_list, trd_list) { @@ -447,6 +460,19 @@ void tgt_client_free(struct obd_export *exp) } EXPORT_SYMBOL(tgt_client_free); +static inline void tgt_check_lcd(const char *obd_name, int index, + struct lsd_client_data *lcd) +{ + size_t uuid_size = sizeof(lcd->lcd_uuid); + + if (strnlen((char*)lcd->lcd_uuid, uuid_size) == uuid_size) { + lcd->lcd_uuid[uuid_size - 1] = '\0'; + + LCONSOLE_ERROR("the client UUID (%s) on %s for exports stored in last_rcvd(index = %d) is bad!\n", + lcd->lcd_uuid, obd_name, index); + } +} + static int tgt_client_data_read(const struct lu_env *env, struct lu_target *tgt, struct lsd_client_data *lcd, loff_t *off, int index) @@ -457,7 +483,7 @@ static int tgt_client_data_read(const struct lu_env *env, struct lu_target *tgt, tti_buf_lcd(tti); rc = dt_record_read(env, tgt->lut_last_rcvd, &tti->tti_buf, off); if (rc == 0) { - check_lcd(tgt->lut_obd->obd_name, index, &tti->tti_lcd); + tgt_check_lcd(tgt->lut_obd->obd_name, index, &tti->tti_lcd); lcd_le_to_cpu(&tti->tti_lcd, lcd); lcd->lcd_last_result = ptlrpc_status_ntoh(lcd->lcd_last_result); lcd->lcd_last_close_result = @@ -503,7 +529,7 @@ static void tgt_cb_new_client(struct lu_env *env, struct thandle *th, { struct tgt_new_client_callback *ccb; - ccb = container_of0(cb, struct tgt_new_client_callback, lncc_cb); + ccb = container_of(cb, struct tgt_new_client_callback, lncc_cb); LASSERT(ccb->lncc_exp->exp_obd); @@ -771,7 +797,7 @@ void tgt_boot_epoch_update(struct lu_target *tgt) struct lu_env env; struct ptlrpc_request *req; __u32 start_epoch; - struct list_head client_list; + LIST_HEAD(client_list); int rc; if (tgt->lut_obd->obd_stopping) @@ -785,12 +811,11 @@ void tgt_boot_epoch_update(struct lu_target *tgt) } spin_lock(&tgt->lut_translock); - start_epoch = lr_epoch(tgt->lut_last_transno) + 1; + start_epoch = (tgt->lut_last_transno >> LR_EPOCH_BITS) + 1; tgt->lut_last_transno = (__u64)start_epoch << LR_EPOCH_BITS; tgt->lut_lsd.lsd_start_epoch = start_epoch; spin_unlock(&tgt->lut_translock); - INIT_LIST_HEAD(&client_list); /** * The recovery is not yet finished and final queue can still be updated * with resend requests. Move final list to separate one for processing @@ -818,7 +843,7 @@ void tgt_boot_epoch_update(struct lu_target *tgt) * - there is no client to recover or the recovery was aborted */ if (!strncmp(tgt->lut_obd->obd_type->typ_name, LUSTRE_MDT_NAME, 3) && - (tgt->lut_obd->obd_max_recoverable_clients == 0 || + (atomic_read(&tgt->lut_obd->obd_max_recoverable_clients) == 0 || tgt->lut_obd->obd_abort_recovery)) tgt->lut_lsd.lsd_feature_incompat &= ~OBD_INCOMPAT_MULTI_RPCS; @@ -842,7 +867,7 @@ static void tgt_cb_last_committed(struct lu_env *env, struct thandle *th, { struct tgt_last_committed_callback *ccb; - ccb = container_of0(cb, struct tgt_last_committed_callback, llcc_cb); + ccb = container_of(cb, struct tgt_last_committed_callback, llcc_cb); LASSERT(ccb->llcc_exp); LASSERT(ccb->llcc_tgt != NULL); @@ -919,6 +944,26 @@ static int tgt_last_commit_cb_add(struct thandle *th, struct lu_target *tgt, return rc ? rc : exp->exp_need_sync; } +static int tgt_is_local_client(const struct lu_env *env, + struct obd_export *exp) +{ + struct lu_target *tgt = class_exp2tgt(exp); + struct tgt_session_info *tsi = tgt_ses_info(env); + struct ptlrpc_request *req = tgt_ses_req(tsi); + + if (exp_connect_flags(exp) & OBD_CONNECT_MDS || + exp_connect_flags(exp) & OBD_CONNECT_MDS_MDS) + return 0; + if (tgt->lut_local_recovery) + return 0; + if (!req) + return 0; + if (!LNetIsPeerLocal(req->rq_peer.nid)) + return 0; + + return 1; +} + /** * Add new client to the last_rcvd upon new connection. * @@ -940,6 +985,13 @@ int tgt_client_new(const struct lu_env *env, struct obd_export *exp) if (exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT) RETURN(0); + if (tgt_is_local_client(env, exp)) { + LCONSOLE_WARN("%s: local client %s w/o recovery\n", + exp->exp_obd->obd_name, ted->ted_lcd->lcd_uuid); + exp->exp_no_recovery = 1; + RETURN(0); + } + /* the bitmap operations can handle cl_idx > sizeof(long) * 8, so * there's no need for extra complication here */ @@ -1067,7 +1119,8 @@ int tgt_client_del(const struct lu_env *env, struct obd_export *exp) /* XXX if lcd_uuid were a real obd_uuid, I could use obd_uuid_equals */ if (!strcmp((char *)ted->ted_lcd->lcd_uuid, (char *)tgt->lut_obd->obd_uuid.uuid) || - exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT) + exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT || + exp->exp_no_recovery) RETURN(0); /* Slot may be not yet assigned, use case is race between Client @@ -1119,12 +1172,35 @@ int tgt_client_del(const struct lu_env *env, struct obd_export *exp) } EXPORT_SYMBOL(tgt_client_del); -int tgt_add_reply_data(const struct lu_env *env, struct lu_target *tgt, +static void tgt_clean_by_tag(struct obd_export *exp, __u64 xid, __u16 tag) +{ + struct tg_export_data *ted = &exp->exp_target_data; + struct lu_target *lut = class_exp2tgt(exp); + struct tg_reply_data *trd, *tmp; + + if (tag == 0) + return; + + list_for_each_entry_safe(trd, tmp, &ted->ted_reply_list, trd_list) { + if (trd->trd_tag != tag) + continue; + + LASSERT(ergo(tgt_is_increasing_xid_client(exp), + trd->trd_reply.lrd_xid <= xid)); + + ted->ted_release_tag++; + tgt_release_reply_data(lut, ted, trd); + } +} + +static int tgt_add_reply_data(const struct lu_env *env, struct lu_target *tgt, struct tg_export_data *ted, struct tg_reply_data *trd, + struct ptlrpc_request *req, struct thandle *th, bool update_lrd_file) { struct lsd_reply_data *lrd; int i; + int rc; lrd = &trd->trd_reply; /* update export last transno */ @@ -1133,30 +1209,51 @@ int tgt_add_reply_data(const struct lu_env *env, struct lu_target *tgt, ted->ted_lcd->lcd_last_transno = lrd->lrd_transno; mutex_unlock(&ted->ted_lcd_lock); - /* find a empty slot */ - i = tgt_find_free_reply_slot(tgt); - if (unlikely(i < 0)) { - CERROR("%s: couldn't find a slot for reply data: " - "rc = %d\n", tgt_name(tgt), i); - RETURN(i); - } - trd->trd_index = i; + if (tgt != NULL) { + /* find a empty slot */ + i = tgt_find_free_reply_slot(tgt); + if (unlikely(i < 0)) { + CERROR("%s: couldn't find a slot for reply data: " + "rc = %d\n", tgt_name(tgt), i); + RETURN(i); + } + trd->trd_index = i; - if (update_lrd_file) { - loff_t off; - int rc; + if (update_lrd_file) { + loff_t off; - /* write reply data to disk */ - off = sizeof(struct lsd_reply_header) + sizeof(*lrd) * i; - rc = tgt_reply_data_write(env, tgt, lrd, off, th); - if (unlikely(rc != 0)) { - CERROR("%s: can't update %s file: rc = %d\n", - tgt_name(tgt), REPLY_DATA, rc); - RETURN(rc); + /* write reply data to disk */ + off = sizeof(struct lsd_reply_header) + sizeof(*lrd) * i; + rc = tgt_reply_data_write(env, tgt, lrd, off, th); + if (unlikely(rc != 0)) { + CERROR("%s: can't update %s file: rc = %d\n", + tgt_name(tgt), REPLY_DATA, rc); + GOTO(free_slot, rc); + } } + } else { + trd->trd_index = TRD_INDEX_MEMORY; } + /* add reply data to target export's reply list */ mutex_lock(&ted->ted_lcd_lock); + if (req != NULL) { + int exclude = tgt_is_increasing_xid_client(req->rq_export) ? + MSG_REPLAY : MSG_REPLAY|MSG_RESENT; + + if (req->rq_obsolete) { + CDEBUG(D_INFO, + "drop reply data update for obsolete req xid=%llu," + "transno=%llu, tag=%hu\n", req->rq_xid, + lrd->lrd_transno, trd->trd_tag); + mutex_unlock(&ted->ted_lcd_lock); + GOTO(free_slot, rc = -EBADR); + } + + if (!(lustre_msg_get_flags(req->rq_reqmsg) & exclude)) + tgt_clean_by_tag(req->rq_export, req->rq_xid, + trd->trd_tag); + } list_add(&trd->trd_list, &ted->ted_reply_list); ted->ted_reply_cnt++; if (ted->ted_reply_cnt > ted->ted_reply_max) @@ -1166,10 +1263,76 @@ int tgt_add_reply_data(const struct lu_env *env, struct lu_target *tgt, CDEBUG(D_TRACE, "add reply %p: xid %llu, transno %llu, " "tag %hu, client gen %u, slot idx %d\n", trd, lrd->lrd_xid, lrd->lrd_transno, - trd->trd_tag, lrd->lrd_client_gen, i); + trd->trd_tag, lrd->lrd_client_gen, trd->trd_index); + RETURN(0); + +free_slot: + if (tgt != NULL) + tgt_clear_reply_slot(tgt, trd->trd_index); + return rc; +} + +int tgt_mk_reply_data(const struct lu_env *env, + struct lu_target *tgt, + struct tg_export_data *ted, + struct ptlrpc_request *req, + __u64 opdata, + struct thandle *th, + bool write_update, + __u64 transno) +{ + struct tg_reply_data *trd; + struct lsd_reply_data *lrd; + __u64 *pre_versions = NULL; + int rc; + + OBD_ALLOC_PTR(trd); + if (unlikely(trd == NULL)) + RETURN(-ENOMEM); + + /* fill reply data information */ + lrd = &trd->trd_reply; + lrd->lrd_transno = transno; + if (req != NULL) { + lrd->lrd_xid = req->rq_xid; + trd->trd_tag = lustre_msg_get_tag(req->rq_reqmsg); + lrd->lrd_client_gen = ted->ted_lcd->lcd_generation; + if (write_update) { + pre_versions = lustre_msg_get_versions(req->rq_repmsg); + lrd->lrd_result = th->th_result; + } + } else { + struct tgt_session_info *tsi; + + LASSERT(env != NULL); + tsi = tgt_ses_info(env); + LASSERT(tsi->tsi_xid != 0); + + lrd->lrd_xid = tsi->tsi_xid; + lrd->lrd_result = tsi->tsi_result; + lrd->lrd_client_gen = tsi->tsi_client_gen; + } + + lrd->lrd_data = opdata; + if (pre_versions) { + trd->trd_pre_versions[0] = pre_versions[0]; + trd->trd_pre_versions[1] = pre_versions[1]; + trd->trd_pre_versions[2] = pre_versions[2]; + trd->trd_pre_versions[3] = pre_versions[3]; + } + + rc = tgt_add_reply_data(env, tgt, ted, trd, req, + th, write_update); + if (rc < 0) { + OBD_FREE_PTR(trd); + if (rc == -EBADR) + rc = 0; + } + return rc; + } -EXPORT_SYMBOL(tgt_add_reply_data); +EXPORT_SYMBOL(tgt_mk_reply_data); /* * last_rcvd & last_committed update callbacks @@ -1180,11 +1343,11 @@ static int tgt_last_rcvd_update(const struct lu_env *env, struct lu_target *tgt, { struct tgt_thread_info *tti = tgt_th_info(env); struct tgt_session_info *tsi = tgt_ses_info(env); - struct obd_export *exp = tsi->tsi_exp; - struct tg_export_data *ted; - __u64 *transno_p; - int rc = 0; - bool lw_client; + struct obd_export *exp = tsi->tsi_exp; + struct tg_export_data *ted; + __u64 *transno_p; + bool nolcd = false; + int rc = 0; ENTRY; @@ -1192,11 +1355,15 @@ static int tgt_last_rcvd_update(const struct lu_env *env, struct lu_target *tgt, LASSERT(exp != NULL); ted = &exp->exp_target_data; - lw_client = exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT; - if (ted->ted_lr_idx < 0 && !lw_client) - /* ofd connect may cause transaction before export has - * last_rcvd slot */ - RETURN(0); + /* Some clients don't support recovery, and they don't have last_rcvd + * client data: + * 1. lightweight clients. + * 2. local clients on MDS which doesn't enable "localrecov". + * 3. OFD connect may cause transaction before export has last_rcvd + * slot. + */ + if (ted->ted_lr_idx < 0) + nolcd = true; if (req != NULL) tti->tti_transno = lustre_msg_get_transno(req->rq_reqmsg); @@ -1237,14 +1404,13 @@ static int tgt_last_rcvd_update(const struct lu_env *env, struct lu_target *tgt, /* if can't add callback, do sync write */ th->th_sync |= !!tgt_last_commit_cb_add(th, tgt, exp, tti->tti_transno); - if (lw_client) { - /* All operations performed by LW clients are synchronous and - * we store the committed transno in the last_rcvd header */ + if (nolcd) { + /* store transno in the last_rcvd header */ spin_lock(&tgt->lut_translock); if (tti->tti_transno > tgt->lut_lsd.lsd_last_transno) { tgt->lut_lsd.lsd_last_transno = tti->tti_transno; spin_unlock(&tgt->lut_translock); - /* Although lightweight (LW) connections have no slot + /* Although current connection doesn't have slot * in the last_rcvd, we still want to maintain * the in-memory lsd_client_data structure in order to * properly handle reply reconstruction. */ @@ -1260,47 +1426,8 @@ static int tgt_last_rcvd_update(const struct lu_env *env, struct lu_target *tgt, /* Target that supports multiple reply data */ if (tgt_is_multimodrpcs_client(exp)) { - struct tg_reply_data *trd; - struct lsd_reply_data *lrd; - __u64 *pre_versions; - bool write_update; - - OBD_ALLOC_PTR(trd); - if (unlikely(trd == NULL)) - RETURN(-ENOMEM); - - /* fill reply data information */ - lrd = &trd->trd_reply; - lrd->lrd_transno = tti->tti_transno; - if (req != NULL) { - lrd->lrd_xid = req->rq_xid; - trd->trd_tag = lustre_msg_get_tag(req->rq_reqmsg); - pre_versions = lustre_msg_get_versions(req->rq_repmsg); - lrd->lrd_result = th->th_result; - lrd->lrd_client_gen = ted->ted_lcd->lcd_generation; - write_update = true; - } else { - LASSERT(tsi->tsi_xid != 0); - lrd->lrd_xid = tsi->tsi_xid; - lrd->lrd_result = tsi->tsi_result; - lrd->lrd_client_gen = tsi->tsi_client_gen; - trd->trd_tag = 0; - pre_versions = NULL; - write_update = false; - } - - lrd->lrd_data = opdata; - if (pre_versions) { - trd->trd_pre_versions[0] = pre_versions[0]; - trd->trd_pre_versions[1] = pre_versions[1]; - trd->trd_pre_versions[2] = pre_versions[2]; - trd->trd_pre_versions[3] = pre_versions[3]; - } - - rc = tgt_add_reply_data(env, tgt, ted, trd, th, write_update); - if (rc < 0) - OBD_FREE_PTR(trd); - return rc; + return tgt_mk_reply_data(env, tgt, ted, req, opdata, th, + !!(req != NULL), tti->tti_transno); } /* Enough for update replay, let's return */ @@ -1356,7 +1483,7 @@ static int tgt_last_rcvd_update(const struct lu_env *env, struct lu_target *tgt, } } - if (!lw_client) { + if (!nolcd) { tti->tti_off = ted->ted_lr_off; if (CFS_FAIL_CHECK(OBD_FAIL_TGT_RCVD_EIO)) rc = -EIO; @@ -1436,8 +1563,8 @@ static int tgt_clients_data_init(const struct lu_env *env, if (tgt->lut_bottom->dd_rdonly) RETURN(0); - CLASSERT(offsetof(struct lsd_client_data, lcd_padding) + - sizeof(lcd->lcd_padding) == LR_CLIENT_SIZE); + BUILD_BUG_ON(offsetof(struct lsd_client_data, lcd_padding) + + sizeof(lcd->lcd_padding) != LR_CLIENT_SIZE); OBD_ALLOC_PTR(lcd); if (lcd == NULL) @@ -1502,7 +1629,7 @@ static int tgt_clients_data_init(const struct lu_env *env, exp->exp_connecting = 0; exp->exp_in_recovery = 0; spin_unlock(&exp->exp_lock); - obd->obd_max_recoverable_clients++; + atomic_inc(&obd->obd_max_recoverable_clients); if (tgt->lut_lsd.lsd_feature_incompat & OBD_INCOMPAT_MULTI_RPCS && @@ -1591,8 +1718,8 @@ int tgt_server_data_init(const struct lu_env *env, struct lu_target *tgt) last_rcvd_size = (unsigned long)tti->tti_attr.la_size; /* ensure padding in the struct is the correct size */ - CLASSERT(offsetof(struct lr_server_data, lsd_padding) + - sizeof(lsd->lsd_padding) == LR_SERVER_SIZE); + BUILD_BUG_ON(offsetof(struct lr_server_data, lsd_padding) + + sizeof(lsd->lsd_padding) != LR_SERVER_SIZE); rc = server_name2index(tgt_name(tgt), &index, NULL); if (rc < 0) { @@ -1655,10 +1782,9 @@ int tgt_server_data_init(const struct lu_env *env, struct lu_target *tgt) } if (lsd->lsd_osd_index != index) { - LCONSOLE_ERROR_MSG(0x157, "%s: index %d in last rcvd " - "is different with the index %d in" - "config log, It might be disk" - "corruption!\n", tgt_name(tgt), + LCONSOLE_ERROR_MSG(0x157, + "%s: index %d in last rcvd is different with the index %d in config log, It might be disk corruption!\n", + tgt_name(tgt), lsd->lsd_osd_index, index); RETURN(-EINVAL); } @@ -1874,7 +2000,6 @@ int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt) unsigned long reply_data_size; int rc; struct lsd_reply_header *lrh = NULL; - struct lsd_client_data *lcd = NULL; struct tg_reply_data *trd = NULL; int idx; loff_t off; @@ -1923,10 +2048,6 @@ int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt) if (hash == NULL) GOTO(out, rc = -ENODEV); - OBD_ALLOC_PTR(lcd); - if (lcd == NULL) - GOTO(out, rc = -ENOMEM); - OBD_ALLOC_PTR(trd); if (trd == NULL) GOTO(out, rc = -ENOMEM); @@ -1978,6 +2099,13 @@ int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt) /* update export last committed transation */ exp->exp_last_committed = max(exp->exp_last_committed, lrd->lrd_transno); + /* Update lcd_last_transno as well for check in + * tgt_release_reply_data() or the latest client + * transno can be lost. + */ + ted->ted_lcd->lcd_last_transno = + max(ted->ted_lcd->lcd_last_transno, + exp->exp_last_committed); mutex_unlock(&ted->ted_lcd_lock); class_export_put(exp); @@ -2009,8 +2137,6 @@ int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt) out: if (hash != NULL) cfs_hash_putref(hash); - if (lcd != NULL) - OBD_FREE_PTR(lcd); if (trd != NULL) OBD_FREE_PTR(trd); if (lrh != NULL) @@ -2018,43 +2144,70 @@ out: return rc; } -struct tg_reply_data *tgt_lookup_reply_by_xid(struct tg_export_data *ted, - __u64 xid) +static int tgt_check_lookup_req(struct ptlrpc_request *req, int lookup, + struct tg_reply_data *trd) { - struct tg_reply_data *found = NULL; - struct tg_reply_data *reply; + struct tg_export_data *ted = &req->rq_export->exp_target_data; + struct lu_target *lut = class_exp2tgt(req->rq_export); + __u16 tag = lustre_msg_get_tag(req->rq_reqmsg); + int rc = 0; + struct tg_reply_data *reply; + bool check_increasing; + + if (tag == 0) + return 0; + + check_increasing = tgt_is_increasing_xid_client(req->rq_export) && + !(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY); + if (!lookup && !check_increasing) + return 0; - mutex_lock(&ted->ted_lcd_lock); list_for_each_entry(reply, &ted->ted_reply_list, trd_list) { - if (reply->trd_reply.lrd_xid == xid) { - found = reply; + if (lookup && reply->trd_reply.lrd_xid == req->rq_xid) { + rc = 1; + if (trd != NULL) + *trd = *reply; + break; + } else if (check_increasing && reply->trd_tag == tag && + reply->trd_reply.lrd_xid > req->rq_xid) { + rc = -EPROTO; + CERROR("%s: busy tag=%u req_xid=%llu, trd=%p: xid=%llu transno=%llu client_gen=%u slot_idx=%d: rc = %d\n", + tgt_name(lut), tag, req->rq_xid, trd, + reply->trd_reply.lrd_xid, + reply->trd_reply.lrd_transno, + reply->trd_reply.lrd_client_gen, + reply->trd_index, rc); break; } } - mutex_unlock(&ted->ted_lcd_lock); - return found; + + return rc; } -EXPORT_SYMBOL(tgt_lookup_reply_by_xid); /* Look for a reply data matching specified request @req * A copy is returned in @trd if the pointer is not NULL */ -bool tgt_lookup_reply(struct ptlrpc_request *req, struct tg_reply_data *trd) +int tgt_lookup_reply(struct ptlrpc_request *req, struct tg_reply_data *trd) { - struct tg_export_data *ted = &req->rq_export->exp_target_data; - struct tg_reply_data *reply; - bool found = false; - - reply = tgt_lookup_reply_by_xid(ted, req->rq_xid); - if (reply != NULL) { - found = true; - if (trd != NULL) - *trd = *reply; + struct tg_export_data *ted = &req->rq_export->exp_target_data; + int found = 0; + bool not_replay = !(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY); + + mutex_lock(&ted->ted_lcd_lock); + if (not_replay && req->rq_xid <= req->rq_export->exp_last_xid) { + /* A check for the last_xid is needed here in case there is + * no reply data is left in the list. It may happen if another + * RPC on another slot increased the last_xid between our + * process_req_last_xid & tgt_lookup_reply calls */ + found = -EPROTO; + } else { + found = tgt_check_lookup_req(req, 1, trd); } + mutex_unlock(&ted->ted_lcd_lock); - CDEBUG(D_TRACE, "%s: lookup reply xid %llu, found %d\n", - tgt_name(class_exp2tgt(req->rq_export)), req->rq_xid, - found ? 1 : 0); + CDEBUG(D_TRACE, "%s: lookup reply xid %llu, found %d last_xid %llu\n", + tgt_name(class_exp2tgt(req->rq_export)), req->rq_xid, found, + req->rq_export->exp_last_xid); return found; } @@ -2066,37 +2219,19 @@ int tgt_handle_received_xid(struct obd_export *exp, __u64 rcvd_xid) struct lu_target *lut = class_exp2tgt(exp); struct tg_reply_data *trd, *tmp; - mutex_lock(&ted->ted_lcd_lock); + list_for_each_entry_safe(trd, tmp, &ted->ted_reply_list, trd_list) { if (trd->trd_reply.lrd_xid > rcvd_xid) continue; ted->ted_release_xid++; tgt_release_reply_data(lut, ted, trd); } - mutex_unlock(&ted->ted_lcd_lock); return 0; } -int tgt_handle_tag(struct obd_export *exp, __u16 tag) +int tgt_handle_tag(struct ptlrpc_request *req) { - struct tg_export_data *ted = &exp->exp_target_data; - struct lu_target *lut = class_exp2tgt(exp); - struct tg_reply_data *trd, *tmp; - - if (tag == 0) - return 0; - - mutex_lock(&ted->ted_lcd_lock); - list_for_each_entry_safe(trd, tmp, &ted->ted_reply_list, trd_list) { - if (trd->trd_tag != tag) - continue; - ted->ted_release_tag++; - tgt_release_reply_data(lut, ted, trd); - break; - } - mutex_unlock(&ted->ted_lcd_lock); - - return 0; + return tgt_check_lookup_req(req, 0, NULL); }