LU-7117 osp: set ptlrpc_request::rq_allow_replay properly

author Fan Yong <fan.yong@intel.com>

Wed, 15 Jun 2016 06:56:01 +0000 (14:56 +0800)

committer Oleg Drokin <oleg.drokin@intel.com>

Mon, 15 Aug 2016 21:08:48 +0000 (21:08 +0000)
author Fan Yong <fan.yong@intel.com>
Wed, 15 Jun 2016 06:56:01 +0000 (14:56 +0800)
committer Oleg Drokin <oleg.drokin@intel.com>
Mon, 15 Aug 2016 21:08:48 +0000 (21:08 +0000)
diff --git a/lustre/osp/osp_internal.h b/lustre/osp/osp_internal.h

index 208366a..999da87 100644 (file)
--- a/lustre/osp/osp_internal.h
+++ b/lustre/osp/osp_internal.h
@@ -797,4 +797,23 @@ void __osp_sync_check_for_work(struct osp_device *d);
  extern struct obd_ops lwp_obd_device_ops;
  extern struct lu_device_type lwp_device_type;
  
  extern struct obd_ops lwp_obd_device_ops;
  extern struct lu_device_type lwp_device_type;
  
+static inline struct lu_device *osp2top(const struct osp_device *osp)
+{
+       return osp->opd_dt_dev.dd_lu_dev.ld_site->ls_top_dev;
+}
+
+static inline void osp_set_req_replay(const struct osp_device *osp,
+                                     struct ptlrpc_request *req)
+{
+       struct obd_device *obd = osp2top(osp)->ld_obd;
+
+       /* The RPC must be recovery related for the cases:
+        *
+        * 1. sent during recovery, or
+        * 2. sent before the recovery thread target_recovery_thread() start,
+        *    such as triggered by lod_sub_recovery_thread(). */
+       if (obd->obd_recovering || (obd->obd_replayable && obd->obd_no_conn))
+               req->rq_allow_replay = 1;
+}
+
  #endif
  #endif
diff --git a/lustre/osp/osp_md_object.c b/lustre/osp/osp_md_object.c

index a976fec..4d72051 100644 (file)
--- a/lustre/osp/osp_md_object.c
+++ b/lustre/osp/osp_md_object.c
@@ -869,12 +869,11 @@ static int osp_md_object_lock(const struct lu_env *env,
                               union ldlm_policy_data *policy)
  {
         struct ldlm_res_id      *res_id;
                               union ldlm_policy_data *policy)
  {
         struct ldlm_res_id      *res_id;
-       struct dt_device        *dt_dev = lu2dt_dev(dt->do_lu.lo_dev);
-       struct osp_device       *osp = dt2osp_dev(dt_dev);
-       struct lu_device        *top_device;
+       struct osp_device       *osp = dt2osp_dev(lu2dt_dev(dt->do_lu.lo_dev));
         struct ptlrpc_request   *req;
         int                     rc = 0;
         __u64                   flags = LDLM_FL_NO_LRU;
         struct ptlrpc_request   *req;
         int                     rc = 0;
         __u64                   flags = LDLM_FL_NO_LRU;
+       ENTRY;
  
         res_id = einfo->ei_res_id;
         LASSERT(res_id != NULL);
  
         res_id = einfo->ei_res_id;
         LASSERT(res_id != NULL);
@@ -888,22 +887,14 @@ static int osp_md_object_lock(const struct lu_env *env,
         if (IS_ERR(req))
                 RETURN(PTR_ERR(req));
  
         if (IS_ERR(req))
                 RETURN(PTR_ERR(req));
  
-       /* During recovery, it needs to let OSP send enqueue
-        * without checking recoverying status, in case the
-        * other target is being recovered at the same time,
-        * and if we wait here for the import to be recovered,
-        * it might cause deadlock */
-       top_device = dt_dev->dd_lu_dev.ld_site->ls_top_dev;
-       if (top_device->ld_obd->obd_recovering)
-               req->rq_allow_replay = 1;
-
+       osp_set_req_replay(osp, req);
         rc = ldlm_cli_enqueue(osp->opd_exp, &req, einfo, res_id,
                               (const union ldlm_policy_data *)policy,
                               &flags, NULL, 0, LVB_T_NONE, lh, 0);
  
         ptlrpc_req_finished(req);
  
         rc = ldlm_cli_enqueue(osp->opd_exp, &req, einfo, res_id,
                               (const union ldlm_policy_data *)policy,
                               &flags, NULL, 0, LVB_T_NONE, lh, 0);
  
         ptlrpc_req_finished(req);
  
-       return rc == ELDLM_OK ? 0 : -EIO;
+       RETURN(rc == ELDLM_OK ? 0 : -EIO);
  }
  
  /**
  }
  
  /**
@@ -1189,10 +1180,7 @@ static ssize_t osp_md_read(const struct lu_env *env, struct dt_object *dt,
                 ptr += read_size;
         }
  
                 ptr += read_size;
         }
  
-       /* This will only be called with read-only update, and these updates
-        * might be used to retrieve update log during recovery process, so
-        * it will be allowed to send during recovery process */
-       req->rq_allow_replay = 1;
+       osp_set_req_replay(osp, req);
         req->rq_bulk_read = 1;
         /* send request to master and wait for RPC to complete */
         rc = ptlrpc_queue_wait(req);
         req->rq_bulk_read = 1;
         /* send request to master and wait for RPC to complete */
         rc = ptlrpc_queue_wait(req);
diff --git a/lustre/osp/osp_object.c b/lustre/osp/osp_object.c

index 91367d2..73b1ab7 100644 (file)
--- a/lustre/osp/osp_object.c
+++ b/lustre/osp/osp_object.c
@@ -1650,7 +1650,6 @@ static int osp_it_fetch(const struct lu_env *env, struct osp_it *it)
         struct lu_device         *dev   = it->ooi_obj->do_lu.lo_dev;
         struct osp_device        *osp   = lu2osp_dev(dev);
         struct page             **pages;
         struct lu_device         *dev   = it->ooi_obj->do_lu.lo_dev;
         struct osp_device        *osp   = lu2osp_dev(dev);
         struct page             **pages;
-       struct lu_device *top_device;
         struct ptlrpc_request    *req   = NULL;
         struct ptlrpc_bulk_desc  *desc;
         struct idx_info          *ii;
         struct ptlrpc_request    *req   = NULL;
         struct ptlrpc_bulk_desc  *desc;
         struct idx_info          *ii;
@@ -1686,13 +1685,7 @@ static int osp_it_fetch(const struct lu_env *env, struct osp_it *it)
                 RETURN(rc);
         }
  
                 RETURN(rc);
         }
  
-       /* Let's allow this request during recovery, otherwise
-        * if the remote target is also in recovery status,
-        * it might cause deadlock */
-       top_device = dev->ld_site->ls_top_dev;
-       if (top_device->ld_obd->obd_recovering)
-               req->rq_allow_replay = 1;
-
+       osp_set_req_replay(osp, req);
         req->rq_request_portal = OUT_PORTAL;
         ii = req_capsule_client_get(&req->rq_pill, &RMF_IDX_INFO);
         memset(ii, 0, sizeof(*ii));
         req->rq_request_portal = OUT_PORTAL;
         ii = req_capsule_client_get(&req->rq_pill, &RMF_IDX_INFO);
         memset(ii, 0, sizeof(*ii));
diff --git a/lustre/osp/osp_trans.c b/lustre/osp/osp_trans.c

index ccc8d52..c9c15c0 100644 (file)
--- a/lustre/osp/osp_trans.c
+++ b/lustre/osp/osp_trans.c
@@ -484,10 +484,7 @@ int osp_remote_sync(const struct lu_env *env, struct osp_device *osp,
         if (rc != 0)
                 RETURN(rc);
  
         if (rc != 0)
                 RETURN(rc);
  
-       /* This will only be called with read-only update, and these updates
-        * might be used to retrieve update log during recovery process, so
-        * it will be allowed to send during recovery process */
-       req->rq_allow_replay = 1;
+       osp_set_req_replay(osp, req);
         req->rq_allow_intr = 1;
  
         /* Note: some dt index api might return non-zero result here, like
         req->rq_allow_intr = 1;
  
         /* Note: some dt index api might return non-zero result here, like
@@ -1117,7 +1114,6 @@ static int osp_send_update_req(const struct lu_env *env,
  {
         struct osp_update_args  *args;
         struct ptlrpc_request   *req;
  {
         struct osp_update_args  *args;
         struct ptlrpc_request   *req;
-       struct lu_device *top_device;
         struct osp_thandle      *oth = our->our_th;
         int     rc = 0;
         ENTRY;
         struct osp_thandle      *oth = our->our_th;
         int     rc = 0;
         ENTRY;
@@ -1174,9 +1170,7 @@ static int osp_send_update_req(const struct lu_env *env,
                  * status, in case the other target is being recoveried
                  * at the same time, and if we wait here for the import
                  * to be recoveryed, it might cause deadlock */
                  * status, in case the other target is being recoveried
                  * at the same time, and if we wait here for the import
                  * to be recoveryed, it might cause deadlock */
-               top_device = osp->opd_dt_dev.dd_lu_dev.ld_site->ls_top_dev;
-               if (top_device->ld_obd->obd_recovering)
-                       req->rq_allow_replay = 1;
+               osp_set_req_replay(osp, req);
  
                 /* Because this req will be synchronus, i.e. it will be called
                  * in the same thread, so it will be safe to use current
  
                 /* Because this req will be synchronus, i.e. it will be called
                  * in the same thread, so it will be safe to use current
author	Fan Yong <fan.yong@intel.com>
	Wed, 15 Jun 2016 06:56:01 +0000 (14:56 +0800)
committer	Oleg Drokin <oleg.drokin@intel.com>
	Mon, 15 Aug 2016 21:08:48 +0000 (21:08 +0000)
lustre/osp/osp_internal.h		patch \| blob \| history
lustre/osp/osp_md_object.c		patch \| blob \| history
lustre/osp/osp_object.c		patch \| blob \| history
lustre/osp/osp_trans.c		patch \| blob \| history