Whamcloud - gitweb
LU-14876 out: don't connect to busy MDS-MDS export
[fs/lustre-release.git] / lustre / target / out_handler.c
index d62bb26..57c0d91 100644 (file)
@@ -20,7 +20,7 @@
  * GPL HEADER END
  */
 /*
- * Copyright (c) 2013, 2016, Intel Corporation.
+ * Copyright (c) 2013, 2017, Intel Corporation.
  *
  * lustre/target/out_handler.c
  *
@@ -52,11 +52,10 @@ static void out_reconstruct(const struct lu_env *env, struct dt_device *dt,
                            struct object_update_reply *reply,
                            int index)
 {
-       CDEBUG(D_INFO, "%s: fork reply reply %p index %d: rc = %d\n",
+       CDEBUG(D_HA, "%s: fork reply reply %p index %d: rc = %d\n",
               dt_obd_name(dt), reply, index, 0);
 
        object_update_result_insert(reply, NULL, 0, index, 0);
-       return;
 }
 
 typedef void (*out_reconstruct_t)(const struct lu_env *env,
@@ -65,16 +64,10 @@ typedef void (*out_reconstruct_t)(const struct lu_env *env,
                                  struct object_update_reply *reply,
                                  int index);
 
-static inline int out_check_resent(const struct lu_env *env,
-                                  struct dt_device *dt,
-                                  struct dt_object *obj,
-                                  struct ptlrpc_request *req,
-                                  out_reconstruct_t reconstruct,
-                                  struct object_update_reply *reply,
-                                  int index)
+static inline bool out_check_resent(struct ptlrpc_request *req)
 {
        if (likely(!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT)))
-               return 0;
+               return false;
 
        if (req_xid_is_last(req)) {
                struct lsd_client_data *lcd;
@@ -90,14 +83,12 @@ static inline int out_check_resent(const struct lu_env *env,
                lustre_msg_set_transno(req->rq_repmsg, req->rq_transno);
                lustre_msg_set_status(req->rq_repmsg, req->rq_status);
 
-               DEBUG_REQ(D_RPCTRACE, req, "restoring resent RPC");
-
-               reconstruct(env, dt, obj, reply, index);
-               return 1;
+               DEBUG_REQ(D_HA, req, "reconstruct resent RPC");
+               return true;
        }
-       DEBUG_REQ(D_HA, req, "no reply for RESENT req (have %lld)",
-                req->rq_export->exp_target_data.ted_lcd->lcd_last_xid);
-       return 0;
+       DEBUG_REQ(D_HA, req, "reprocess RESENT req, last_xid is %lld",
+                 req->rq_export->exp_target_data.ted_lcd->lcd_last_xid);
+       return false;
 }
 
 static int out_create(struct tgt_session_info *tsi)
@@ -122,7 +113,7 @@ static int out_create(struct tgt_session_info *tsi)
                RETURN(PTR_ERR(wobdo));
        }
 
-       if (ptlrpc_req_need_swab(tsi->tsi_pill->rc_req))
+       if (req_capsule_req_need_swab(tsi->tsi_pill))
                lustre_swab_obdo(wobdo);
        lustre_get_wire_obdo(NULL, lobdo, wobdo);
        la_from_obdo(attr, lobdo, lobdo->o_valid);
@@ -135,7 +126,7 @@ static int out_create(struct tgt_session_info *tsi)
                               tgt_name(tsi->tsi_tgt), PTR_ERR(fid));
                        RETURN(PTR_ERR(fid));
                }
-               if (ptlrpc_req_need_swab(tsi->tsi_pill->rc_req))
+               if (req_capsule_req_need_swab(tsi->tsi_pill))
                        lustre_swab_lu_fid(fid);
                if (!fid_is_sane(fid)) {
                        CERROR("%s: invalid fid "DFID": rc = %d\n",
@@ -178,7 +169,7 @@ static int out_attr_set(struct tgt_session_info *tsi)
        attr->la_valid = 0;
        attr->la_valid = 0;
 
-       if (ptlrpc_req_need_swab(tsi->tsi_pill->rc_req))
+       if (req_capsule_req_need_swab(tsi->tsi_pill))
                lustre_swab_obdo(wobdo);
        lustre_get_wire_obdo(NULL, lobdo, wobdo);
        la_from_obdo(attr, lobdo, lobdo->o_valid);
@@ -217,7 +208,7 @@ static int out_attr_get(struct tgt_session_info *tsi)
                RETURN(-ENOENT);
        }
 
-       dt_read_lock(env, obj, MOR_TGT_CHILD);
+       dt_read_lock(env, obj, DT_TGT_CHILD);
        rc = dt_attr_get(env, obj, la);
        if (rc)
                GOTO(out_unlock, rc);
@@ -266,7 +257,7 @@ static int out_xattr_get(struct tgt_session_info *tsi)
                RETURN(PTR_ERR(name));
        }
 
-       update_result = object_update_result_get(reply, 0, NULL);
+       update_result = object_update_result_get(reply, idx, NULL);
        if (update_result == NULL) {
                CERROR("%s: empty name for xattr get: rc = %d\n",
                       tgt_name(tsi->tsi_tgt), -EPROTO);
@@ -279,7 +270,7 @@ static int out_xattr_get(struct tgt_session_info *tsi)
        else
                lbuf->lb_buf = update_result->our_data;
 
-       dt_read_lock(env, obj, MOR_TGT_CHILD);
+       dt_read_lock(env, obj, DT_TGT_CHILD);
        rc = dt_xattr_get(env, obj, lbuf, name);
        dt_read_unlock(env, obj);
        if (rc <= 0) {
@@ -289,15 +280,70 @@ static int out_xattr_get(struct tgt_session_info *tsi)
        } else if (lbuf->lb_buf) {
                lbuf->lb_len = rc;
        }
-
-       CDEBUG(D_INFO, "%s: "DFID" get xattr %s len %d: rc = %d\n",
+       CDEBUG(D_INFO, "%s: "DFID" get xattr %s len %d\n",
               tgt_name(tsi->tsi_tgt), PFID(lu_object_fid(&obj->do_lu)),
-              name, (int)lbuf->lb_len, rc);
+              name, rc);
 
+       GOTO(out, rc);
+
+out:
        object_update_result_insert(reply, lbuf->lb_buf, lbuf->lb_len, idx, rc);
        RETURN(0);
 }
 
+static int out_xattr_list(struct tgt_session_info *tsi)
+{
+       const struct lu_env *env = tsi->tsi_env;
+       struct tgt_thread_info *tti = tgt_th_info(env);
+       struct lu_buf *lbuf = &tti->tti_buf;
+       struct object_update_reply *reply = tti->tti_u.update.tti_update_reply;
+       struct dt_object *obj = tti->tti_u.update.tti_dt_object;
+       struct object_update_result *update_result;
+       int idx = tti->tti_u.update.tti_update_reply_index;
+       int rc;
+
+       ENTRY;
+
+       if (!lu_object_exists(&obj->do_lu)) {
+               set_bit(LU_OBJECT_HEARD_BANSHEE,
+                       &obj->do_lu.lo_header->loh_flags);
+               RETURN(-ENOENT);
+       }
+
+       update_result = object_update_result_get(reply, 0, NULL);
+       if (!update_result) {
+               rc = -EPROTO;
+               CERROR("%s: empty buf for xattr list: rc = %d\n",
+                      tgt_name(tsi->tsi_tgt), rc);
+               RETURN(rc);
+       }
+
+       lbuf->lb_len = (int)tti->tti_u.update.tti_update->ou_result_size;
+       lbuf->lb_buf = update_result->our_data;
+       if (lbuf->lb_len == 0)
+               lbuf->lb_buf = 0;
+
+       dt_read_lock(env, obj, DT_TGT_CHILD);
+       rc = dt_xattr_list(env, obj, lbuf);
+       dt_read_unlock(env, obj);
+       if (rc <= 0) {
+               lbuf->lb_len = 0;
+               if (unlikely(!rc))
+                       rc = -ENODATA;
+       } else if (lbuf->lb_buf) {
+               lbuf->lb_len = rc;
+       }
+
+       CDEBUG(D_INFO, "%s: "DFID" list xattr len %d\n",
+              tgt_name(tsi->tsi_tgt), PFID(lu_object_fid(&obj->do_lu)), rc);
+
+       /* Since we directly use update_result->our_data as the lbuf->lb_buf,
+        * then use NULL for result_insert to avoid unnecessary memory copy. */
+       object_update_result_insert(reply, NULL, lbuf->lb_len, idx, rc);
+
+       RETURN(0);
+}
+
 static int out_index_lookup(struct tgt_session_info *tsi)
 {
        const struct lu_env     *env = tsi->tsi_env;
@@ -322,7 +368,7 @@ static int out_index_lookup(struct tgt_session_info *tsi)
                RETURN(PTR_ERR(name));
        }
 
-       dt_read_lock(env, obj, MOR_TGT_CHILD);
+       dt_read_lock(env, obj, DT_TGT_CHILD);
        if (!dt_try_as_dir(env, obj))
                GOTO(out_unlock, rc = -ENOTDIR);
 
@@ -389,7 +435,7 @@ static int out_xattr_set(struct tgt_session_info *tsi)
                RETURN(PTR_ERR(tmp));
        }
 
-       if (ptlrpc_req_need_swab(tsi->tsi_pill->rc_req))
+       if (req_capsule_req_need_swab(tsi->tsi_pill))
                __swab32s(tmp);
        flag = *tmp;
 
@@ -486,7 +532,7 @@ static int out_index_insert(struct tgt_session_info *tsi)
                RETURN(PTR_ERR(fid));
        }
 
-       if (ptlrpc_req_need_swab(tsi->tsi_pill->rc_req))
+       if (req_capsule_req_need_swab(tsi->tsi_pill))
                lustre_swab_lu_fid(fid);
 
        if (!fid_is_sane(fid)) {
@@ -502,7 +548,7 @@ static int out_index_insert(struct tgt_session_info *tsi)
                RETURN(PTR_ERR(ptype));
        }
 
-       if (ptlrpc_req_need_swab(tsi->tsi_pill->rc_req))
+       if (req_capsule_req_need_swab(tsi->tsi_pill))
                __swab32s(ptype);
 
        rec->rec_fid = fid;
@@ -513,6 +559,11 @@ static int out_index_insert(struct tgt_session_info *tsi)
                                 tti->tti_tea.ta_handle,
                                 tti->tti_u.update.tti_update_reply,
                                 tti->tti_u.update.tti_update_reply_index);
+
+       CDEBUG(D_INFO, "%s: "DFID" index insert %s: rc = %d\n",
+              tgt_name(tsi->tsi_tgt), PFID(lu_object_fid(&obj->do_lu)),
+              name, rc);
+
        RETURN(rc);
 }
 
@@ -598,7 +649,7 @@ static int out_write(struct tgt_session_info *tsi)
                RETURN(PTR_ERR(tmp));
        }
 
-       if (ptlrpc_req_need_swab(tsi->tsi_pill->rc_req))
+       if (req_capsule_req_need_swab(tsi->tsi_pill))
                __swab64s(tmp);
        pos = *tmp;
 
@@ -656,8 +707,7 @@ static int out_read(struct tgt_session_info *tsi)
        orr = (struct out_read_reply *)update_result->our_data;
 
        nbufs = (size + OUT_BULK_BUFFER_SIZE - 1) / OUT_BULK_BUFFER_SIZE;
-       OBD_ALLOC(rdbuf, sizeof(struct lu_rdbuf) +
-                        nbufs * sizeof(rdbuf->rb_bufs[0]));
+       OBD_ALLOC(rdbuf, sizeof(*rdbuf) + nbufs * sizeof(rdbuf->rb_bufs[0]));
        if (rdbuf == NULL)
                GOTO(out, rc = -ENOMEM);
 
@@ -673,7 +723,7 @@ static int out_read(struct tgt_session_info *tsi)
                        GOTO(out_free, rc = -ENOMEM);
 
                rdbuf->rb_bufs[i].lb_len = read_size;
-               dt_read_lock(env, obj, MOR_TGT_CHILD);
+               dt_read_lock(env, obj, DT_TGT_CHILD);
                rc = dt_read(env, obj, &rdbuf->rb_bufs[i], &pos);
                dt_read_unlock(env, obj);
 
@@ -702,7 +752,7 @@ out_free:
                                 rdbuf->rb_bufs[i].lb_len);
                }
        }
-       OBD_FREE(rdbuf, sizeof(struct lu_rdbuf) +
+       OBD_FREE(rdbuf, sizeof(*rdbuf) +
                        nbufs * sizeof(rdbuf->rb_bufs[0]));
 out:
        /* Insert read buffer */
@@ -729,33 +779,35 @@ static int out_noop(struct tgt_session_info *tsi)
 }
 
 static struct tgt_handler out_update_ops[] = {
-       DEF_OUT_HNDL(OUT_CREATE, "out_create", MUTABOR | HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_CREATE, "out_create", IS_MUTABLE | HAS_REPLY,
                     out_create),
-       DEF_OUT_HNDL(OUT_DESTROY, "out_create", MUTABOR | HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_DESTROY, "out_create", IS_MUTABLE | HAS_REPLY,
                     out_destroy),
-       DEF_OUT_HNDL(OUT_REF_ADD, "out_ref_add", MUTABOR | HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_REF_ADD, "out_ref_add", IS_MUTABLE | HAS_REPLY,
                     out_ref_add),
-       DEF_OUT_HNDL(OUT_REF_DEL, "out_ref_del", MUTABOR | HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_REF_DEL, "out_ref_del", IS_MUTABLE | HAS_REPLY,
                     out_ref_del),
-       DEF_OUT_HNDL(OUT_ATTR_SET, "out_attr_set",  MUTABOR | HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_ATTR_SET, "out_attr_set",  IS_MUTABLE | HAS_REPLY,
                     out_attr_set),
-       DEF_OUT_HNDL(OUT_ATTR_GET, "out_attr_get",  HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_ATTR_GET, "out_attr_get",  HAS_REPLY,
                     out_attr_get),
-       DEF_OUT_HNDL(OUT_XATTR_SET, "out_xattr_set", MUTABOR | HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_XATTR_SET, "out_xattr_set", IS_MUTABLE | HAS_REPLY,
                     out_xattr_set),
-       DEF_OUT_HNDL(OUT_XATTR_DEL, "out_xattr_del", MUTABOR | HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_XATTR_DEL, "out_xattr_del", IS_MUTABLE | HAS_REPLY,
                     out_xattr_del),
-       DEF_OUT_HNDL(OUT_XATTR_GET, "out_xattr_get", HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_XATTR_GET, "out_xattr_get", HAS_REPLY,
                     out_xattr_get),
-       DEF_OUT_HNDL(OUT_INDEX_LOOKUP, "out_index_lookup", HABEO_REFERO,
+       DEF_OUT_HNDL(OUT_INDEX_LOOKUP, "out_index_lookup", HAS_REPLY,
                     out_index_lookup),
        DEF_OUT_HNDL(OUT_INDEX_INSERT, "out_index_insert",
-                    MUTABOR | HABEO_REFERO, out_index_insert),
+                    IS_MUTABLE | HAS_REPLY, out_index_insert),
        DEF_OUT_HNDL(OUT_INDEX_DELETE, "out_index_delete",
-                    MUTABOR | HABEO_REFERO, out_index_delete),
-       DEF_OUT_HNDL(OUT_WRITE, "out_write", MUTABOR | HABEO_REFERO, out_write),
-       DEF_OUT_HNDL(OUT_READ, "out_read", HABEO_REFERO, out_read),
-       DEF_OUT_HNDL(OUT_NOOP, "out_noop", HABEO_REFERO, out_noop),
+                    IS_MUTABLE | HAS_REPLY, out_index_delete),
+       DEF_OUT_HNDL(OUT_WRITE, "out_write", IS_MUTABLE | HAS_REPLY, out_write),
+       DEF_OUT_HNDL(OUT_READ, "out_read", HAS_REPLY, out_read),
+       DEF_OUT_HNDL(OUT_NOOP, "out_noop", HAS_REPLY, out_noop),
+       DEF_OUT_HNDL(OUT_XATTR_LIST, "out_xattr_list", HAS_REPLY,
+                    out_xattr_list),
 };
 
 static struct tgt_handler *out_handler_find(__u32 opc)
@@ -904,7 +956,6 @@ int out_handle(struct tgt_session_info *tsi)
        struct object_update            *update;
        struct object_update_reply      *reply;
        struct ptlrpc_bulk_desc         *desc = NULL;
-       struct l_wait_info              lwi;
        void                            **update_bufs;
        int                             current_batchid = -1;
        __u32                           update_buf_count;
@@ -914,6 +965,8 @@ int out_handle(struct tgt_session_info *tsi)
        int                             rc1 = 0;
        int                             ouh_size, reply_size;
        int                             updates;
+       bool need_reconstruct;
+
        ENTRY;
 
        req_capsule_set(pill, &RQF_OUT_UPDATE);
@@ -937,7 +990,7 @@ int out_handle(struct tgt_session_info *tsi)
        if (update_buf_count == 0)
                RETURN(err_serious(-EPROTO));
 
-       OBD_ALLOC(update_bufs, sizeof(*update_bufs) * update_buf_count);
+       OBD_ALLOC_PTR_ARRAY(update_bufs, update_buf_count);
        if (update_bufs == NULL)
                RETURN(err_serious(-ENOMEM));
 
@@ -945,17 +998,22 @@ int out_handle(struct tgt_session_info *tsi)
                update_bufs[0] = ouh->ouh_inline_data;
        } else {
                struct out_update_buffer *tmp;
+               int page_count = 0;
 
                oub = req_capsule_client_get(pill, &RMF_OUT_UPDATE_BUF);
                if (oub == NULL)
                        GOTO(out_free, rc = err_serious(-EPROTO));
 
-               desc = ptlrpc_prep_bulk_exp(pill->rc_req, update_buf_count,
+               for (i = 0; i < update_buf_count; i++)
+                       /* First *and* last might be partial pages, hence +1 */
+                       page_count += DIV_ROUND_UP(oub[i].oub_size,
+                                                  PAGE_SIZE) + 1;
+
+               desc = ptlrpc_prep_bulk_exp(pill->rc_req, page_count,
                                            PTLRPC_BULK_OPS_COUNT,
-                                           PTLRPC_BULK_GET_SINK |
-                                           PTLRPC_BULK_BUF_KVEC,
+                                           PTLRPC_BULK_GET_SINK,
                                            MDS_BULK_PORTAL,
-                                           &ptlrpc_bulk_kvec_ops);
+                                           &ptlrpc_bulk_kiov_nopin_ops);
                if (desc == NULL)
                        GOTO(out_free, rc = err_serious(-ENOMEM));
 
@@ -977,7 +1035,7 @@ int out_handle(struct tgt_session_info *tsi)
                if (rc != 0)
                        GOTO(out_free, rc = err_serious(rc));
 
-               rc = target_bulk_io(pill->rc_req->rq_export, desc, &lwi);
+               rc = target_bulk_io(pill->rc_req->rq_export, desc);
                if (rc < 0)
                        GOTO(out_free, rc = err_serious(rc));
        }
@@ -990,8 +1048,8 @@ int out_handle(struct tgt_session_info *tsi)
                int                              j;
 
                our = update_bufs[i];
-               if (ptlrpc_req_need_swab(pill->rc_req))
-                       lustre_swab_object_update_request(our);
+               if (req_capsule_req_need_swab(pill))
+                       lustre_swab_object_update_request(our, 0);
 
                if (our->ourq_magic != UPDATE_REQUEST_MAGIC) {
                        CERROR("%s: invalid update buffer magic %x"
@@ -1007,7 +1065,7 @@ int out_handle(struct tgt_session_info *tsi)
                        update = object_update_request_get(our, j, NULL);
                        if (update == NULL)
                                GOTO(out, rc = err_serious(-EPROTO));
-                       if (ptlrpc_req_need_swab(pill->rc_req))
+                       if (req_capsule_req_need_swab(pill))
                                lustre_swab_object_update(update);
 
                        if (!fid_is_sane(&update->ou_fid)) {
@@ -1051,6 +1109,8 @@ int out_handle(struct tgt_session_info *tsi)
        tti->tti_u.update.tti_update_reply = reply;
        tti->tti_mult_trans = !req_is_replay(tgt_ses_req(tsi));
 
+       need_reconstruct = out_check_resent(pill->rc_req);
+
        /* Walk through updates in the request to execute them */
        for (i = 0; i < update_buf_count; i++) {
                struct tgt_handler      *h;
@@ -1097,21 +1157,32 @@ int out_handle(struct tgt_session_info *tsi)
                        }
 
                        /* Check resend case only for modifying RPC */
-                       if (h->th_flags & MUTABOR) {
-                               struct ptlrpc_request *req = tgt_ses_req(tsi);
+                       if (h->th_flags & IS_MUTABLE) {
+                               /* sanity check for last XID changing */
+                               if (unlikely(!need_reconstruct &&
+                                            req_xid_is_last(pill->rc_req))) {
+                                       DEBUG_REQ(D_ERROR, pill->rc_req,
+                                                 "unexpected last XID change");
+                                       GOTO(next, rc = -EINVAL);
+                               }
 
-                               if (out_check_resent(env, dt, dt_obj, req,
-                                                    out_reconstruct, reply,
-                                                    reply_index))
+                               if (need_reconstruct) {
+                                       out_reconstruct(env, dt, dt_obj, reply,
+                                                       reply_index);
                                        GOTO(next, rc = 0);
+                               }
 
                                if (dt->dd_rdonly)
                                        GOTO(next, rc = -EROFS);
                        }
 
                        /* start transaction for modification RPC only */
-                       if (h->th_flags & MUTABOR && current_batchid == -1) {
+                       if (h->th_flags & IS_MUTABLE && current_batchid == -1) {
                                current_batchid = update->ou_batchid;
+
+                               if (reply_index == 0)
+                                       CFS_RACE(OBD_FAIL_PTLRPC_RESEND_RACE);
+
                                rc = out_tx_start(env, dt, ta, tsi->tsi_exp);
                                if (rc != 0)
                                        GOTO(next, rc);
@@ -1123,7 +1194,7 @@ int out_handle(struct tgt_session_info *tsi)
                        /* Stop the current update transaction, if the update
                         * has different batchid, or read-only update */
                        if (((current_batchid != update->ou_batchid) ||
-                            !(h->th_flags & MUTABOR)) &&
+                            !(h->th_flags & IS_MUTABLE)) &&
                             ta->ta_handle != NULL) {
                                rc = out_tx_end(env, ta, rc);
                                current_batchid = -1;
@@ -1131,7 +1202,7 @@ int out_handle(struct tgt_session_info *tsi)
                                        GOTO(next, rc);
 
                                /* start a new transaction if needed */
-                               if (h->th_flags & MUTABOR) {
+                               if (h->th_flags & IS_MUTABLE) {
                                        rc = out_tx_start(env, dt, ta,
                                                          tsi->tsi_exp);
                                        if (rc != 0)
@@ -1167,7 +1238,7 @@ out_free:
                        }
                }
 
-               OBD_FREE(update_bufs, sizeof(*update_bufs) * update_buf_count);
+               OBD_FREE_PTR_ARRAY(update_bufs, update_buf_count);
        }
 
        if (desc != NULL)
@@ -1177,7 +1248,7 @@ out_free:
 }
 
 struct tgt_handler tgt_out_handlers[] = {
-TGT_UPDATE_HDL(MUTABOR,        OUT_UPDATE,     out_handle),
+TGT_UPDATE_HDL(IS_MUTABLE,     OUT_UPDATE,     out_handle),
 };
 EXPORT_SYMBOL(tgt_out_handlers);