Whamcloud - gitweb
LU-15880 quota: fix issues in reserving quota
[fs/lustre-release.git] / lustre / target / tgt_lastrcvd.c
index 99a39c3..387223c 100644 (file)
@@ -27,7 +27,6 @@
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
  *
  * Lustre Unified Target
  * These are common function to work with last_received file
@@ -331,7 +330,7 @@ static void tgt_free_reply_data(struct lu_target *lut,
 
        list_del(&trd->trd_list);
        ted->ted_reply_cnt--;
-       if (lut != NULL)
+       if (lut != NULL && trd->trd_index != TRD_INDEX_MEMORY)
                tgt_clear_reply_slot(lut, trd->trd_index);
        OBD_FREE_PTR(trd);
 }
@@ -529,7 +528,7 @@ static void tgt_cb_new_client(struct lu_env *env, struct thandle *th,
 {
        struct tgt_new_client_callback *ccb;
 
-       ccb = container_of0(cb, struct tgt_new_client_callback, lncc_cb);
+       ccb = container_of(cb, struct tgt_new_client_callback, lncc_cb);
 
        LASSERT(ccb->lncc_exp->exp_obd);
 
@@ -797,7 +796,7 @@ void tgt_boot_epoch_update(struct lu_target *tgt)
        struct lu_env            env;
        struct ptlrpc_request   *req;
        __u32                    start_epoch;
-       struct list_head         client_list;
+       LIST_HEAD(client_list);
        int                      rc;
 
        if (tgt->lut_obd->obd_stopping)
@@ -816,7 +815,6 @@ void tgt_boot_epoch_update(struct lu_target *tgt)
        tgt->lut_lsd.lsd_start_epoch = start_epoch;
        spin_unlock(&tgt->lut_translock);
 
-       INIT_LIST_HEAD(&client_list);
        /**
         * The recovery is not yet finished and final queue can still be updated
         * with resend requests. Move final list to separate one for processing
@@ -868,12 +866,23 @@ static void tgt_cb_last_committed(struct lu_env *env, struct thandle *th,
 {
        struct tgt_last_committed_callback *ccb;
 
-       ccb = container_of0(cb, struct tgt_last_committed_callback, llcc_cb);
+       ccb = container_of(cb, struct tgt_last_committed_callback, llcc_cb);
 
        LASSERT(ccb->llcc_exp);
        LASSERT(ccb->llcc_tgt != NULL);
        LASSERT(ccb->llcc_exp->exp_obd == ccb->llcc_tgt->lut_obd);
 
+       if (th->th_reserved_quota.lqi_space > 0) {
+               CDEBUG(D_QUOTA, "free quota %llu %llu\n",
+                      th->th_reserved_quota.lqi_id.qid_gid,
+                      th->th_reserved_quota.lqi_space);
+
+               /* env can be NULL for freeing reserved quota */
+               th->th_reserved_quota.lqi_space *= -1;
+               dt_reserve_or_free_quota(NULL, th->th_dev,
+                                        &th->th_reserved_quota);
+       }
+
        /* error hit, don't update last committed to provide chance to
         * replay data after fail */
        if (err != 0)
@@ -945,6 +954,28 @@ static int tgt_last_commit_cb_add(struct thandle *th, struct lu_target *tgt,
        return rc ? rc : exp->exp_need_sync;
 }
 
+static int tgt_is_local_client(const struct lu_env *env,
+                                     struct obd_export *exp)
+{
+       struct lu_target        *tgt = class_exp2tgt(exp);
+       struct tgt_session_info *tsi = tgt_ses_info(env);
+       struct ptlrpc_request   *req = tgt_ses_req(tsi);
+       struct lnet_nid nid;
+
+       if (exp_connect_flags(exp) & OBD_CONNECT_MDS ||
+           exp_connect_flags(exp) & OBD_CONNECT_MDS_MDS)
+               return 0;
+       if (tgt->lut_local_recovery)
+               return 0;
+       if (!req)
+               return 0;
+       lnet_nid4_to_nid(req->rq_peer.nid, &nid);
+       if (!LNetIsPeerLocal(&nid))
+               return 0;
+
+       return 1;
+}
+
 /**
  * Add new client to the last_rcvd upon new connection.
  *
@@ -966,6 +997,13 @@ int tgt_client_new(const struct lu_env *env, struct obd_export *exp)
        if (exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT)
                RETURN(0);
 
+       if (tgt_is_local_client(env, exp)) {
+               LCONSOLE_WARN("%s: local client %s w/o recovery\n",
+                             exp->exp_obd->obd_name, ted->ted_lcd->lcd_uuid);
+               exp->exp_no_recovery = 1;
+               RETURN(0);
+       }
+
        /* the bitmap operations can handle cl_idx > sizeof(long) * 8, so
         * there's no need for extra complication here
         */
@@ -1093,7 +1131,8 @@ int tgt_client_del(const struct lu_env *env, struct obd_export *exp)
        /* XXX if lcd_uuid were a real obd_uuid, I could use obd_uuid_equals */
        if (!strcmp((char *)ted->ted_lcd->lcd_uuid,
                    (char *)tgt->lut_obd->obd_uuid.uuid) ||
-           exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT)
+           exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT ||
+           exp->exp_no_recovery)
                RETURN(0);
 
        /* Slot may be not yet assigned, use case is race between Client
@@ -1166,13 +1205,14 @@ static void tgt_clean_by_tag(struct obd_export *exp, __u64 xid, __u16 tag)
        }
 }
 
-int tgt_add_reply_data(const struct lu_env *env, struct lu_target *tgt,
+static int tgt_add_reply_data(const struct lu_env *env, struct lu_target *tgt,
                       struct tg_export_data *ted, struct tg_reply_data *trd,
                       struct ptlrpc_request *req,
                       struct thandle *th, bool update_lrd_file)
 {
        struct lsd_reply_data   *lrd;
        int     i;
+       int     rc;
 
        lrd = &trd->trd_reply;
        /* update export last transno */
@@ -1181,28 +1221,32 @@ int tgt_add_reply_data(const struct lu_env *env, struct lu_target *tgt,
                ted->ted_lcd->lcd_last_transno = lrd->lrd_transno;
        mutex_unlock(&ted->ted_lcd_lock);
 
-       /* find a empty slot */
-       i = tgt_find_free_reply_slot(tgt);
-       if (unlikely(i < 0)) {
-               CERROR("%s: couldn't find a slot for reply data: "
-                      "rc = %d\n", tgt_name(tgt), i);
-               RETURN(i);
-       }
-       trd->trd_index = i;
+       if (tgt != NULL) {
+               /* find a empty slot */
+               i = tgt_find_free_reply_slot(tgt);
+               if (unlikely(i < 0)) {
+                       CERROR("%s: couldn't find a slot for reply data: "
+                              "rc = %d\n", tgt_name(tgt), i);
+                       RETURN(i);
+               }
+               trd->trd_index = i;
 
-       if (update_lrd_file) {
-               loff_t  off;
-               int     rc;
+               if (update_lrd_file) {
+                       loff_t  off;
 
-               /* write reply data to disk */
-               off = sizeof(struct lsd_reply_header) + sizeof(*lrd) * i;
-               rc = tgt_reply_data_write(env, tgt, lrd, off, th);
-               if (unlikely(rc != 0)) {
-                       CERROR("%s: can't update %s file: rc = %d\n",
-                              tgt_name(tgt), REPLY_DATA, rc);
-                       RETURN(rc);
+                       /* write reply data to disk */
+                       off = sizeof(struct lsd_reply_header) + sizeof(*lrd) * i;
+                       rc = tgt_reply_data_write(env, tgt, lrd, off, th);
+                       if (unlikely(rc != 0)) {
+                               CERROR("%s: can't update %s file: rc = %d\n",
+                                      tgt_name(tgt), REPLY_DATA, rc);
+                               GOTO(free_slot, rc);
+                       }
                }
+       } else {
+               trd->trd_index = TRD_INDEX_MEMORY;
        }
+
        /* add reply data to target export's reply list */
        mutex_lock(&ted->ted_lcd_lock);
        if (req != NULL) {
@@ -1210,8 +1254,12 @@ int tgt_add_reply_data(const struct lu_env *env, struct lu_target *tgt,
                              MSG_REPLAY : MSG_REPLAY|MSG_RESENT;
 
                if (req->rq_obsolete) {
+                       CDEBUG(D_INFO,
+                              "drop reply data update for obsolete req xid=%llu,"
+                              "transno=%llu, tag=%hu\n", req->rq_xid,
+                              lrd->lrd_transno, trd->trd_tag);
                        mutex_unlock(&ted->ted_lcd_lock);
-                       RETURN(-EALREADY);
+                       GOTO(free_slot, rc = -EBADR);
                }
 
                if (!(lustre_msg_get_flags(req->rq_reqmsg) & exclude))
@@ -1230,8 +1278,77 @@ int tgt_add_reply_data(const struct lu_env *env, struct lu_target *tgt,
               trd->trd_tag, lrd->lrd_client_gen, trd->trd_index);
 
        RETURN(0);
+
+free_slot:
+       if (tgt != NULL)
+               tgt_clear_reply_slot(tgt, trd->trd_index);
+       return rc;
 }
-EXPORT_SYMBOL(tgt_add_reply_data);
+
+int tgt_mk_reply_data(const struct lu_env *env,
+                     struct lu_target *tgt,
+                     struct tg_export_data *ted,
+                     struct ptlrpc_request *req,
+                     __u64 opdata,
+                     struct thandle *th,
+                     bool write_update,
+                     __u64 transno)
+{
+       struct tg_reply_data    *trd;
+       struct lsd_reply_data   *lrd;
+       __u64                   *pre_versions = NULL;
+       int                     rc;
+       struct tgt_session_info *tsi = NULL;
+
+       OBD_ALLOC_PTR(trd);
+       if (unlikely(trd == NULL))
+               RETURN(-ENOMEM);
+
+       if (env != NULL)
+               tsi = tgt_ses_info(env);
+
+       /* fill reply data information */
+       lrd = &trd->trd_reply;
+       lrd->lrd_transno = transno;
+       if (req != NULL) {
+               lrd->lrd_xid = req->rq_xid;
+               trd->trd_tag = lustre_msg_get_tag(req->rq_reqmsg);
+               lrd->lrd_client_gen = ted->ted_lcd->lcd_generation;
+               if (write_update) {
+                       pre_versions = lustre_msg_get_versions(req->rq_repmsg);
+                       lrd->lrd_result = th->th_result;
+               }
+       } else {
+               LASSERT(env != NULL);
+               LASSERT(tsi->tsi_xid != 0);
+
+               lrd->lrd_xid = tsi->tsi_xid;
+               lrd->lrd_result = tsi->tsi_result;
+               lrd->lrd_client_gen = tsi->tsi_client_gen;
+       }
+
+       lrd->lrd_data = opdata;
+       if (pre_versions) {
+               trd->trd_pre_versions[0] = pre_versions[0];
+               trd->trd_pre_versions[1] = pre_versions[1];
+               trd->trd_pre_versions[2] = pre_versions[2];
+               trd->trd_pre_versions[3] = pre_versions[3];
+       }
+
+       if (tsi && tsi->tsi_open_obj)
+               trd->trd_object = *lu_object_fid(&tsi->tsi_open_obj->do_lu);
+
+       rc = tgt_add_reply_data(env, tgt, ted, trd, req,
+                               th, write_update);
+       if (rc < 0) {
+               OBD_FREE_PTR(trd);
+               if (rc == -EBADR)
+                       rc = 0;
+       }
+       return rc;
+
+}
+EXPORT_SYMBOL(tgt_mk_reply_data);
 
 /*
  * last_rcvd & last_committed update callbacks
@@ -1242,11 +1359,11 @@ static int tgt_last_rcvd_update(const struct lu_env *env, struct lu_target *tgt,
 {
        struct tgt_thread_info  *tti = tgt_th_info(env);
        struct tgt_session_info *tsi = tgt_ses_info(env);
-       struct obd_export       *exp = tsi->tsi_exp;
-       struct tg_export_data   *ted;
-       __u64                   *transno_p;
-       int                      rc = 0;
-       bool                     lw_client;
+       struct obd_export *exp = tsi->tsi_exp;
+       struct tg_export_data *ted;
+       __u64 *transno_p;
+       bool nolcd = false;
+       int rc = 0;
 
        ENTRY;
 
@@ -1254,11 +1371,15 @@ static int tgt_last_rcvd_update(const struct lu_env *env, struct lu_target *tgt,
        LASSERT(exp != NULL);
        ted = &exp->exp_target_data;
 
-       lw_client = exp_connect_flags(exp) & OBD_CONNECT_LIGHTWEIGHT;
-       if (ted->ted_lr_idx < 0 && !lw_client)
-               /* ofd connect may cause transaction before export has
-                * last_rcvd slot */
-               RETURN(0);
+       /* Some clients don't support recovery, and they don't have last_rcvd
+        * client data:
+        * 1. lightweight clients.
+        * 2. local clients on MDS which doesn't enable "localrecov".
+        * 3. OFD connect may cause transaction before export has last_rcvd
+        *    slot.
+        */
+       if (ted->ted_lr_idx < 0)
+               nolcd = true;
 
        if (req != NULL)
                tti->tti_transno = lustre_msg_get_transno(req->rq_reqmsg);
@@ -1299,14 +1420,13 @@ static int tgt_last_rcvd_update(const struct lu_env *env, struct lu_target *tgt,
        /* if can't add callback, do sync write */
        th->th_sync |= !!tgt_last_commit_cb_add(th, tgt, exp, tti->tti_transno);
 
-       if (lw_client) {
-               /* All operations performed by LW clients are synchronous and
-                * we store the committed transno in the last_rcvd header */
+       if (nolcd) {
+               /* store transno in the last_rcvd header */
                spin_lock(&tgt->lut_translock);
                if (tti->tti_transno > tgt->lut_lsd.lsd_last_transno) {
                        tgt->lut_lsd.lsd_last_transno = tti->tti_transno;
                        spin_unlock(&tgt->lut_translock);
-                       /* Although lightweight (LW) connections have no slot
+                       /* Although current connection doesn't have slot
                         * in the last_rcvd, we still want to maintain
                         * the in-memory lsd_client_data structure in order to
                         * properly handle reply reconstruction. */
@@ -1322,48 +1442,8 @@ static int tgt_last_rcvd_update(const struct lu_env *env, struct lu_target *tgt,
 
        /* Target that supports multiple reply data */
        if (tgt_is_multimodrpcs_client(exp)) {
-               struct tg_reply_data    *trd;
-               struct lsd_reply_data   *lrd;
-               __u64                   *pre_versions;
-               bool                    write_update;
-
-               OBD_ALLOC_PTR(trd);
-               if (unlikely(trd == NULL))
-                       RETURN(-ENOMEM);
-
-               /* fill reply data information */
-               lrd = &trd->trd_reply;
-               lrd->lrd_transno = tti->tti_transno;
-               if (req != NULL) {
-                       lrd->lrd_xid = req->rq_xid;
-                       trd->trd_tag = lustre_msg_get_tag(req->rq_reqmsg);
-                       pre_versions = lustre_msg_get_versions(req->rq_repmsg);
-                       lrd->lrd_result = th->th_result;
-                       lrd->lrd_client_gen = ted->ted_lcd->lcd_generation;
-                       write_update = true;
-               } else {
-                       LASSERT(tsi->tsi_xid != 0);
-                       lrd->lrd_xid = tsi->tsi_xid;
-                       lrd->lrd_result = tsi->tsi_result;
-                       lrd->lrd_client_gen = tsi->tsi_client_gen;
-                       trd->trd_tag = 0;
-                       pre_versions = NULL;
-                       write_update = false;
-               }
-
-               lrd->lrd_data = opdata;
-               if (pre_versions) {
-                       trd->trd_pre_versions[0] = pre_versions[0];
-                       trd->trd_pre_versions[1] = pre_versions[1];
-                       trd->trd_pre_versions[2] = pre_versions[2];
-                       trd->trd_pre_versions[3] = pre_versions[3];
-               }
-
-               rc = tgt_add_reply_data(env, tgt, ted, trd, req,
-                                       th, write_update);
-               if (rc < 0)
-                       OBD_FREE_PTR(trd);
-               return rc;
+               return tgt_mk_reply_data(env, tgt, ted, req, opdata, th,
+                                        !!(req != NULL), tti->tti_transno);
        }
 
        /* Enough for update replay, let's return */
@@ -1419,7 +1499,7 @@ static int tgt_last_rcvd_update(const struct lu_env *env, struct lu_target *tgt,
                }
        }
 
-       if (!lw_client) {
+       if (!nolcd) {
                tti->tti_off = ted->ted_lr_off;
                if (CFS_FAIL_CHECK(OBD_FAIL_TGT_RCVD_EIO))
                        rc = -EIO;
@@ -1718,10 +1798,9 @@ int tgt_server_data_init(const struct lu_env *env, struct lu_target *tgt)
                }
 
                if (lsd->lsd_osd_index != index) {
-                       LCONSOLE_ERROR_MSG(0x157, "%s: index %d in last rcvd "
-                                          "is different with the index %d in"
-                                          "config log, It might be disk"
-                                          "corruption!\n", tgt_name(tgt),
+                       LCONSOLE_ERROR_MSG(0x157,
+                                          "%s: index %d in last rcvd is different with the index %d in config log, It might be disk corruption!\n",
+                                          tgt_name(tgt),
                                           lsd->lsd_osd_index, index);
                        RETURN(-EINVAL);
                }
@@ -1937,7 +2016,6 @@ int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt)
        unsigned long            reply_data_size;
        int                      rc;
        struct lsd_reply_header *lrh = NULL;
-       struct lsd_client_data  *lcd = NULL;
        struct tg_reply_data    *trd = NULL;
        int                      idx;
        loff_t                   off;
@@ -1986,10 +2064,6 @@ int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt)
                if (hash == NULL)
                        GOTO(out, rc = -ENODEV);
 
-               OBD_ALLOC_PTR(lcd);
-               if (lcd == NULL)
-                       GOTO(out, rc = -ENOMEM);
-
                OBD_ALLOC_PTR(trd);
                if (trd == NULL)
                        GOTO(out, rc = -ENOMEM);
@@ -2027,6 +2101,7 @@ int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt)
                        trd->trd_pre_versions[3] = 0;
                        trd->trd_index = idx;
                        trd->trd_tag = 0;
+                       fid_zero(&trd->trd_object);
                        list_add(&trd->trd_list, &ted->ted_reply_list);
                        ted->ted_reply_cnt++;
                        if (ted->ted_reply_cnt > ted->ted_reply_max)
@@ -2041,6 +2116,13 @@ int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt)
                        /* update export last committed transation */
                        exp->exp_last_committed = max(exp->exp_last_committed,
                                                      lrd->lrd_transno);
+                       /* Update lcd_last_transno as well for check in
+                        * tgt_release_reply_data() or the latest client
+                        * transno can be lost.
+                        */
+                       ted->ted_lcd->lcd_last_transno =
+                               max(ted->ted_lcd->lcd_last_transno,
+                                   exp->exp_last_committed);
 
                        mutex_unlock(&ted->ted_lcd_lock);
                        class_export_put(exp);
@@ -2072,8 +2154,6 @@ int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt)
 out:
        if (hash != NULL)
                cfs_hash_putref(hash);
-       if (lcd != NULL)
-               OBD_FREE_PTR(lcd);
        if (trd != NULL)
                OBD_FREE_PTR(trd);
        if (lrh != NULL)