X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Ftarget%2Ftgt_lastrcvd.c;h=b3a7e7159910b5a96e885afecdf89a143cf83520;hp=3a55026a8dcfa25a7b1e1e32d208a214604b0205;hb=0098396983e1075668414aa5298a4990e61ffbda;hpb=c60e949e3b9f7ff19e1a644210cc764ee150ad8b diff --git a/lustre/target/tgt_lastrcvd.c b/lustre/target/tgt_lastrcvd.c index 3a55026..b3a7e71 100644 --- a/lustre/target/tgt_lastrcvd.c +++ b/lustre/target/tgt_lastrcvd.c @@ -23,7 +23,7 @@ * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2016, Intel Corporation. + * Copyright (c) 2011, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -148,6 +148,13 @@ static int tgt_clear_reply_slot(struct lu_target *lut, int idx) int chunk; int b; + if (lut->lut_obd->obd_stopping) + /* + * in case of failover keep the bit set in order to + * avoid overwriting slots in reply_data which might + * be required by resent rpcs + */ + return 0; chunk = idx / LUT_REPLY_SLOTS_PER_CHUNK; b = idx % LUT_REPLY_SLOTS_PER_CHUNK; @@ -324,7 +331,7 @@ static void tgt_free_reply_data(struct lu_target *lut, list_del(&trd->trd_list); ted->ted_reply_cnt--; - if (lut != NULL) + if (lut != NULL && trd->trd_index != TRD_INDEX_MEMORY) tgt_clear_reply_slot(lut, trd->trd_index); OBD_FREE_PTR(trd); } @@ -388,6 +395,8 @@ int tgt_client_alloc(struct obd_export *exp) spin_lock_init(&exp->exp_target_data.ted_nodemap_lock); INIT_LIST_HEAD(&exp->exp_target_data.ted_nodemap_member); + spin_lock_init(&exp->exp_target_data.ted_fmd_lock); + INIT_LIST_HEAD(&exp->exp_target_data.ted_fmd_list); OBD_ALLOC_PTR(exp->exp_target_data.ted_lcd); if (exp->exp_target_data.ted_lcd == NULL) @@ -411,6 +420,8 @@ void tgt_client_free(struct obd_export *exp) LASSERT(exp != exp->exp_obd->obd_self_export); + tgt_fmd_cleanup(exp); + /* free reply data */ mutex_lock(&ted->ted_lcd_lock); list_for_each_entry_safe(trd, tmp, &ted->ted_reply_list, trd_list) { @@ -786,7 +797,7 @@ void tgt_boot_epoch_update(struct lu_target *tgt) struct lu_env env; struct ptlrpc_request *req; __u32 start_epoch; - struct list_head client_list; + LIST_HEAD(client_list); int rc; if (tgt->lut_obd->obd_stopping) @@ -805,7 +816,6 @@ void tgt_boot_epoch_update(struct lu_target *tgt) tgt->lut_lsd.lsd_start_epoch = start_epoch; spin_unlock(&tgt->lut_translock); - INIT_LIST_HEAD(&client_list); /** * The recovery is not yet finished and final queue can still be updated * with resend requests. Move final list to separate one for processing @@ -833,7 +843,7 @@ void tgt_boot_epoch_update(struct lu_target *tgt) * - there is no client to recover or the recovery was aborted */ if (!strncmp(tgt->lut_obd->obd_type->typ_name, LUSTRE_MDT_NAME, 3) && - (tgt->lut_obd->obd_max_recoverable_clients == 0 || + (atomic_read(&tgt->lut_obd->obd_max_recoverable_clients) == 0 || tgt->lut_obd->obd_abort_recovery)) tgt->lut_lsd.lsd_feature_incompat &= ~OBD_INCOMPAT_MULTI_RPCS; @@ -1079,10 +1089,6 @@ int tgt_client_del(const struct lu_env *env, struct obd_export *exp) RETURN(-EINVAL); } - /* Do not erase record for recoverable client. */ - if (exp->exp_obd->obd_fail) - RETURN(0); - /* XXX if lcd_uuid were a real obd_uuid, I could use obd_uuid_equals */ if (!strcmp((char *)ted->ted_lcd->lcd_uuid, (char *)tgt->lut_obd->obd_uuid.uuid) || @@ -1109,6 +1115,9 @@ int tgt_client_del(const struct lu_env *env, struct obd_export *exp) LBUG(); } + /* Do not erase record for recoverable client. */ + if (exp->exp_flags & OBD_OPT_FAILOVER) + RETURN(0); if (OBD_FAIL_CHECK(OBD_FAIL_TGT_CLIENT_DEL)) RETURN(0); @@ -1135,8 +1144,30 @@ int tgt_client_del(const struct lu_env *env, struct obd_export *exp) } EXPORT_SYMBOL(tgt_client_del); -int tgt_add_reply_data(const struct lu_env *env, struct lu_target *tgt, +static void tgt_clean_by_tag(struct obd_export *exp, __u64 xid, __u16 tag) +{ + struct tg_export_data *ted = &exp->exp_target_data; + struct lu_target *lut = class_exp2tgt(exp); + struct tg_reply_data *trd, *tmp; + + if (tag == 0) + return; + + list_for_each_entry_safe(trd, tmp, &ted->ted_reply_list, trd_list) { + if (trd->trd_tag != tag) + continue; + + LASSERT(ergo(tgt_is_increasing_xid_client(exp), + trd->trd_reply.lrd_xid <= xid)); + + ted->ted_release_tag++; + tgt_release_reply_data(lut, ted, trd); + } +} + +static int tgt_add_reply_data(const struct lu_env *env, struct lu_target *tgt, struct tg_export_data *ted, struct tg_reply_data *trd, + struct ptlrpc_request *req, struct thandle *th, bool update_lrd_file) { struct lsd_reply_data *lrd; @@ -1149,30 +1180,48 @@ int tgt_add_reply_data(const struct lu_env *env, struct lu_target *tgt, ted->ted_lcd->lcd_last_transno = lrd->lrd_transno; mutex_unlock(&ted->ted_lcd_lock); - /* find a empty slot */ - i = tgt_find_free_reply_slot(tgt); - if (unlikely(i < 0)) { - CERROR("%s: couldn't find a slot for reply data: " - "rc = %d\n", tgt_name(tgt), i); - RETURN(i); - } - trd->trd_index = i; + if (tgt != NULL) { + /* find a empty slot */ + i = tgt_find_free_reply_slot(tgt); + if (unlikely(i < 0)) { + CERROR("%s: couldn't find a slot for reply data: " + "rc = %d\n", tgt_name(tgt), i); + RETURN(i); + } + trd->trd_index = i; - if (update_lrd_file) { - loff_t off; - int rc; + if (update_lrd_file) { + loff_t off; + int rc; - /* write reply data to disk */ - off = sizeof(struct lsd_reply_header) + sizeof(*lrd) * i; - rc = tgt_reply_data_write(env, tgt, lrd, off, th); - if (unlikely(rc != 0)) { - CERROR("%s: can't update %s file: rc = %d\n", - tgt_name(tgt), REPLY_DATA, rc); - RETURN(rc); + /* write reply data to disk */ + off = sizeof(struct lsd_reply_header) + sizeof(*lrd) * i; + rc = tgt_reply_data_write(env, tgt, lrd, off, th); + if (unlikely(rc != 0)) { + CERROR("%s: can't update %s file: rc = %d\n", + tgt_name(tgt), REPLY_DATA, rc); + RETURN(rc); + } } + } else { + trd->trd_index = TRD_INDEX_MEMORY; } + /* add reply data to target export's reply list */ mutex_lock(&ted->ted_lcd_lock); + if (req != NULL) { + int exclude = tgt_is_increasing_xid_client(req->rq_export) ? + MSG_REPLAY : MSG_REPLAY|MSG_RESENT; + + if (req->rq_obsolete) { + mutex_unlock(&ted->ted_lcd_lock); + RETURN(-EALREADY); + } + + if (!(lustre_msg_get_flags(req->rq_reqmsg) & exclude)) + tgt_clean_by_tag(req->rq_export, req->rq_xid, + trd->trd_tag); + } list_add(&trd->trd_list, &ted->ted_reply_list); ted->ted_reply_cnt++; if (ted->ted_reply_cnt > ted->ted_reply_max) @@ -1182,10 +1231,68 @@ int tgt_add_reply_data(const struct lu_env *env, struct lu_target *tgt, CDEBUG(D_TRACE, "add reply %p: xid %llu, transno %llu, " "tag %hu, client gen %u, slot idx %d\n", trd, lrd->lrd_xid, lrd->lrd_transno, - trd->trd_tag, lrd->lrd_client_gen, i); + trd->trd_tag, lrd->lrd_client_gen, trd->trd_index); + RETURN(0); } -EXPORT_SYMBOL(tgt_add_reply_data); + +int tgt_mk_reply_data(const struct lu_env *env, + struct lu_target *tgt, + struct tg_export_data *ted, + struct ptlrpc_request *req, + __u64 opdata, + struct thandle *th, + bool write_update, + __u64 transno) +{ + struct tg_reply_data *trd; + struct lsd_reply_data *lrd; + __u64 *pre_versions = NULL; + int rc; + + OBD_ALLOC_PTR(trd); + if (unlikely(trd == NULL)) + RETURN(-ENOMEM); + + /* fill reply data information */ + lrd = &trd->trd_reply; + lrd->lrd_transno = transno; + if (req != NULL) { + lrd->lrd_xid = req->rq_xid; + trd->trd_tag = lustre_msg_get_tag(req->rq_reqmsg); + lrd->lrd_client_gen = ted->ted_lcd->lcd_generation; + if (write_update) { + pre_versions = lustre_msg_get_versions(req->rq_repmsg); + lrd->lrd_result = th->th_result; + } + } else { + struct tgt_session_info *tsi; + + LASSERT(env != NULL); + tsi = tgt_ses_info(env); + LASSERT(tsi->tsi_xid != 0); + + lrd->lrd_xid = tsi->tsi_xid; + lrd->lrd_result = tsi->tsi_result; + lrd->lrd_client_gen = tsi->tsi_client_gen; + } + + lrd->lrd_data = opdata; + if (pre_versions) { + trd->trd_pre_versions[0] = pre_versions[0]; + trd->trd_pre_versions[1] = pre_versions[1]; + trd->trd_pre_versions[2] = pre_versions[2]; + trd->trd_pre_versions[3] = pre_versions[3]; + } + + rc = tgt_add_reply_data(env, tgt, ted, trd, req, + th, write_update); + if (rc < 0) + OBD_FREE_PTR(trd); + return rc; + +} +EXPORT_SYMBOL(tgt_mk_reply_data); /* * last_rcvd & last_committed update callbacks @@ -1276,47 +1383,8 @@ static int tgt_last_rcvd_update(const struct lu_env *env, struct lu_target *tgt, /* Target that supports multiple reply data */ if (tgt_is_multimodrpcs_client(exp)) { - struct tg_reply_data *trd; - struct lsd_reply_data *lrd; - __u64 *pre_versions; - bool write_update; - - OBD_ALLOC_PTR(trd); - if (unlikely(trd == NULL)) - RETURN(-ENOMEM); - - /* fill reply data information */ - lrd = &trd->trd_reply; - lrd->lrd_transno = tti->tti_transno; - if (req != NULL) { - lrd->lrd_xid = req->rq_xid; - trd->trd_tag = lustre_msg_get_tag(req->rq_reqmsg); - pre_versions = lustre_msg_get_versions(req->rq_repmsg); - lrd->lrd_result = th->th_result; - lrd->lrd_client_gen = ted->ted_lcd->lcd_generation; - write_update = true; - } else { - LASSERT(tsi->tsi_xid != 0); - lrd->lrd_xid = tsi->tsi_xid; - lrd->lrd_result = tsi->tsi_result; - lrd->lrd_client_gen = tsi->tsi_client_gen; - trd->trd_tag = 0; - pre_versions = NULL; - write_update = false; - } - - lrd->lrd_data = opdata; - if (pre_versions) { - trd->trd_pre_versions[0] = pre_versions[0]; - trd->trd_pre_versions[1] = pre_versions[1]; - trd->trd_pre_versions[2] = pre_versions[2]; - trd->trd_pre_versions[3] = pre_versions[3]; - } - - rc = tgt_add_reply_data(env, tgt, ted, trd, th, write_update); - if (rc < 0) - OBD_FREE_PTR(trd); - return rc; + return tgt_mk_reply_data(env, tgt, ted, req, opdata, th, + !!(req != NULL), tti->tti_transno); } /* Enough for update replay, let's return */ @@ -1452,8 +1520,8 @@ static int tgt_clients_data_init(const struct lu_env *env, if (tgt->lut_bottom->dd_rdonly) RETURN(0); - CLASSERT(offsetof(struct lsd_client_data, lcd_padding) + - sizeof(lcd->lcd_padding) == LR_CLIENT_SIZE); + BUILD_BUG_ON(offsetof(struct lsd_client_data, lcd_padding) + + sizeof(lcd->lcd_padding) != LR_CLIENT_SIZE); OBD_ALLOC_PTR(lcd); if (lcd == NULL) @@ -1518,7 +1586,7 @@ static int tgt_clients_data_init(const struct lu_env *env, exp->exp_connecting = 0; exp->exp_in_recovery = 0; spin_unlock(&exp->exp_lock); - obd->obd_max_recoverable_clients++; + atomic_inc(&obd->obd_max_recoverable_clients); if (tgt->lut_lsd.lsd_feature_incompat & OBD_INCOMPAT_MULTI_RPCS && @@ -1607,8 +1675,8 @@ int tgt_server_data_init(const struct lu_env *env, struct lu_target *tgt) last_rcvd_size = (unsigned long)tti->tti_attr.la_size; /* ensure padding in the struct is the correct size */ - CLASSERT(offsetof(struct lr_server_data, lsd_padding) + - sizeof(lsd->lsd_padding) == LR_SERVER_SIZE); + BUILD_BUG_ON(offsetof(struct lr_server_data, lsd_padding) + + sizeof(lsd->lsd_padding) != LR_SERVER_SIZE); rc = server_name2index(tgt_name(tgt), &index, NULL); if (rc < 0) { @@ -1671,10 +1739,9 @@ int tgt_server_data_init(const struct lu_env *env, struct lu_target *tgt) } if (lsd->lsd_osd_index != index) { - LCONSOLE_ERROR_MSG(0x157, "%s: index %d in last rcvd " - "is different with the index %d in" - "config log, It might be disk" - "corruption!\n", tgt_name(tgt), + LCONSOLE_ERROR_MSG(0x157, + "%s: index %d in last rcvd is different with the index %d in config log, It might be disk corruption!\n", + tgt_name(tgt), lsd->lsd_osd_index, index); RETURN(-EINVAL); } @@ -1890,7 +1957,6 @@ int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt) unsigned long reply_data_size; int rc; struct lsd_reply_header *lrh = NULL; - struct lsd_client_data *lcd = NULL; struct tg_reply_data *trd = NULL; int idx; loff_t off; @@ -1939,10 +2005,6 @@ int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt) if (hash == NULL) GOTO(out, rc = -ENODEV); - OBD_ALLOC_PTR(lcd); - if (lcd == NULL) - GOTO(out, rc = -ENOMEM); - OBD_ALLOC_PTR(trd); if (trd == NULL) GOTO(out, rc = -ENOMEM); @@ -1994,6 +2056,13 @@ int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt) /* update export last committed transation */ exp->exp_last_committed = max(exp->exp_last_committed, lrd->lrd_transno); + /* Update lcd_last_transno as well for check in + * tgt_release_reply_data() or the latest client + * transno can be lost. + */ + ted->ted_lcd->lcd_last_transno = + max(ted->ted_lcd->lcd_last_transno, + exp->exp_last_committed); mutex_unlock(&ted->ted_lcd_lock); class_export_put(exp); @@ -2025,8 +2094,6 @@ int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt) out: if (hash != NULL) cfs_hash_putref(hash); - if (lcd != NULL) - OBD_FREE_PTR(lcd); if (trd != NULL) OBD_FREE_PTR(trd); if (lrh != NULL) @@ -2034,43 +2101,70 @@ out: return rc; } -struct tg_reply_data *tgt_lookup_reply_by_xid(struct tg_export_data *ted, - __u64 xid) +static int tgt_check_lookup_req(struct ptlrpc_request *req, int lookup, + struct tg_reply_data *trd) { - struct tg_reply_data *found = NULL; - struct tg_reply_data *reply; + struct tg_export_data *ted = &req->rq_export->exp_target_data; + struct lu_target *lut = class_exp2tgt(req->rq_export); + __u16 tag = lustre_msg_get_tag(req->rq_reqmsg); + int rc = 0; + struct tg_reply_data *reply; + bool check_increasing; + + if (tag == 0) + return 0; + + check_increasing = tgt_is_increasing_xid_client(req->rq_export) && + !(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY); + if (!lookup && !check_increasing) + return 0; - mutex_lock(&ted->ted_lcd_lock); list_for_each_entry(reply, &ted->ted_reply_list, trd_list) { - if (reply->trd_reply.lrd_xid == xid) { - found = reply; + if (lookup && reply->trd_reply.lrd_xid == req->rq_xid) { + rc = 1; + if (trd != NULL) + *trd = *reply; + break; + } else if (check_increasing && reply->trd_tag == tag && + reply->trd_reply.lrd_xid > req->rq_xid) { + rc = -EPROTO; + CERROR("%s: busy tag=%u req_xid=%llu, trd=%p: xid=%llu transno=%llu client_gen=%u slot_idx=%d: rc = %d\n", + tgt_name(lut), tag, req->rq_xid, trd, + reply->trd_reply.lrd_xid, + reply->trd_reply.lrd_transno, + reply->trd_reply.lrd_client_gen, + reply->trd_index, rc); break; } } - mutex_unlock(&ted->ted_lcd_lock); - return found; + + return rc; } -EXPORT_SYMBOL(tgt_lookup_reply_by_xid); /* Look for a reply data matching specified request @req * A copy is returned in @trd if the pointer is not NULL */ -bool tgt_lookup_reply(struct ptlrpc_request *req, struct tg_reply_data *trd) +int tgt_lookup_reply(struct ptlrpc_request *req, struct tg_reply_data *trd) { - struct tg_export_data *ted = &req->rq_export->exp_target_data; - struct tg_reply_data *reply; - bool found = false; - - reply = tgt_lookup_reply_by_xid(ted, req->rq_xid); - if (reply != NULL) { - found = true; - if (trd != NULL) - *trd = *reply; + struct tg_export_data *ted = &req->rq_export->exp_target_data; + int found = 0; + bool not_replay = !(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY); + + mutex_lock(&ted->ted_lcd_lock); + if (not_replay && req->rq_xid <= req->rq_export->exp_last_xid) { + /* A check for the last_xid is needed here in case there is + * no reply data is left in the list. It may happen if another + * RPC on another slot increased the last_xid between our + * process_req_last_xid & tgt_lookup_reply calls */ + found = -EPROTO; + } else { + found = tgt_check_lookup_req(req, 1, trd); } + mutex_unlock(&ted->ted_lcd_lock); - CDEBUG(D_TRACE, "%s: lookup reply xid %llu, found %d\n", - tgt_name(class_exp2tgt(req->rq_export)), req->rq_xid, - found ? 1 : 0); + CDEBUG(D_TRACE, "%s: lookup reply xid %llu, found %d last_xid %llu\n", + tgt_name(class_exp2tgt(req->rq_export)), req->rq_xid, found, + req->rq_export->exp_last_xid); return found; } @@ -2082,37 +2176,19 @@ int tgt_handle_received_xid(struct obd_export *exp, __u64 rcvd_xid) struct lu_target *lut = class_exp2tgt(exp); struct tg_reply_data *trd, *tmp; - mutex_lock(&ted->ted_lcd_lock); + list_for_each_entry_safe(trd, tmp, &ted->ted_reply_list, trd_list) { if (trd->trd_reply.lrd_xid > rcvd_xid) continue; ted->ted_release_xid++; tgt_release_reply_data(lut, ted, trd); } - mutex_unlock(&ted->ted_lcd_lock); return 0; } -int tgt_handle_tag(struct obd_export *exp, __u16 tag) +int tgt_handle_tag(struct ptlrpc_request *req) { - struct tg_export_data *ted = &exp->exp_target_data; - struct lu_target *lut = class_exp2tgt(exp); - struct tg_reply_data *trd, *tmp; - - if (tag == 0) - return 0; - - mutex_lock(&ted->ted_lcd_lock); - list_for_each_entry_safe(trd, tmp, &ted->ted_reply_list, trd_list) { - if (trd->trd_tag != tag) - continue; - ted->ted_release_tag++; - tgt_release_reply_data(lut, ted, trd); - break; - } - mutex_unlock(&ted->ted_lcd_lock); - - return 0; + return tgt_check_lookup_req(req, 0, NULL); }