LU-2139 osc: Use SOFT_SYNC to urge server commit

[fs/lustre-release.git] / lustre / osc / osc_request.c
diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c

index 6b0ead0..2aa6b30 100644 (file)
--- a/lustre/osc/osc_request.c
+++ b/lustre/osc/osc_request.c
@@ -658,8 +658,8 @@ static int osc_sync(const struct lu_env *env, struct obd_export *exp,
   * @objid. Found locks are added into @cancel list. Returns the amount of
   * locks added to @cancels list. */
  static int osc_resource_get_unused(struct obd_export *exp, struct obdo *oa,
-                                   cfs_list_t *cancels,
-                                   ldlm_mode_t mode, int lock_flags)
+                                  cfs_list_t *cancels,
+                                  ldlm_mode_t mode, __u64 lock_flags)
  {
          struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
          struct ldlm_res_id res_id;
@@ -690,32 +690,32 @@ static int osc_resource_get_unused(struct obd_export *exp, struct obdo *oa,
  }
  
  static int osc_destroy_interpret(const struct lu_env *env,
-                                 struct ptlrpc_request *req, void *data,
-                                 int rc)
+                                struct ptlrpc_request *req, void *data,
+                                int rc)
  {
-        struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+       struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
  
-        cfs_atomic_dec(&cli->cl_destroy_in_flight);
-        cfs_waitq_signal(&cli->cl_destroy_waitq);
-        return 0;
+       cfs_atomic_dec(&cli->cl_destroy_in_flight);
+       wake_up(&cli->cl_destroy_waitq);
+       return 0;
  }
  
  static int osc_can_send_destroy(struct client_obd *cli)
  {
-        if (cfs_atomic_inc_return(&cli->cl_destroy_in_flight) <=
-            cli->cl_max_rpcs_in_flight) {
-                /* The destroy request can be sent */
-                return 1;
-        }
-        if (cfs_atomic_dec_return(&cli->cl_destroy_in_flight) <
-            cli->cl_max_rpcs_in_flight) {
-                /*
-                 * The counter has been modified between the two atomic
-                 * operations.
-                 */
-                cfs_waitq_signal(&cli->cl_destroy_waitq);
-        }
-        return 0;
+       if (cfs_atomic_inc_return(&cli->cl_destroy_in_flight) <=
+           cli->cl_max_rpcs_in_flight) {
+               /* The destroy request can be sent */
+               return 1;
+       }
+       if (cfs_atomic_dec_return(&cli->cl_destroy_in_flight) <
+           cli->cl_max_rpcs_in_flight) {
+               /*
+                * The counter has been modified between the two atomic
+                * operations.
+                */
+               wake_up(&cli->cl_destroy_waitq);
+       }
+       return 0;
  }
  
  int osc_create(const struct lu_env *env, struct obd_export *exp,
@@ -838,13 +838,16 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
                 CERROR("dirty %lu - %lu > dirty_max %lu\n",
                        cli->cl_dirty, cli->cl_dirty_transit, cli->cl_dirty_max);
                 oa->o_undirty = 0;
-       } else if (unlikely(cfs_atomic_read(&obd_dirty_pages) -
+       } else if (unlikely(cfs_atomic_read(&obd_unstable_pages) +
+                           cfs_atomic_read(&obd_dirty_pages) -
                             cfs_atomic_read(&obd_dirty_transit_pages) >
                             (long)(obd_max_dirty_pages + 1))) {
                 /* The cfs_atomic_read() allowing the cfs_atomic_inc() are
                  * not covered by a lock thus they may safely race and trip
                  * this CERROR() unless we add in a small fudge factor (+1). */
-               CERROR("dirty %d - %d > system dirty_max %d\n",
+               CERROR("%s: dirty %d + %d - %d > system dirty_max %d\n",
+                      cli->cl_import->imp_obd->obd_name,
+                      cfs_atomic_read(&obd_unstable_pages),
                        cfs_atomic_read(&obd_dirty_pages),
                        cfs_atomic_read(&obd_dirty_transit_pages),
                        obd_max_dirty_pages);
@@ -1169,8 +1172,9 @@ static int check_write_rcs(struct ptlrpc_request *req,
  static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2)
  {
          if (p1->flag != p2->flag) {
-                unsigned mask = ~(OBD_BRW_FROM_GRANT| OBD_BRW_NOCACHE|
-                                  OBD_BRW_SYNC|OBD_BRW_ASYNC|OBD_BRW_NOQUOTA);
+               unsigned mask = ~(OBD_BRW_FROM_GRANT | OBD_BRW_NOCACHE |
+                                 OBD_BRW_SYNC       | OBD_BRW_ASYNC   |
+                                 OBD_BRW_NOQUOTA    | OBD_BRW_SOFT_SYNC);
  
                  /* warn if we try to combine flags that we don't know to be
                   * safe to combine */
@@ -1657,16 +1661,16 @@ static int osc_brw_internal(int cmd, struct obd_export *exp, struct obdo *oa,
                              obd_count page_count, struct brw_page **pga,
                              struct obd_capa *ocapa)
  {
-        struct ptlrpc_request *req;
-        int                    rc;
-        cfs_waitq_t            waitq;
-        int                    generation, resends = 0;
-        struct l_wait_info     lwi;
+       struct ptlrpc_request *req;
+       int                    rc;
+       wait_queue_head_t            waitq;
+       int                    generation, resends = 0;
+       struct l_wait_info     lwi;
  
-        ENTRY;
+       ENTRY;
  
-        cfs_waitq_init(&waitq);
-        generation = exp->exp_obd->u.cli.cl_import->imp_generation;
+       init_waitqueue_head(&waitq);
+       generation = exp->exp_obd->u.cli.cl_import->imp_generation;
  
  restart_bulk:
          rc = osc_brw_prep_request(cmd, &exp->exp_obd->u.cli, oa, lsm,
@@ -1758,6 +1762,7 @@ static int osc_brw_redo_request(struct ptlrpc_request *request,
          aa->aa_resends++;
          new_req->rq_interpret_reply = request->rq_interpret_reply;
          new_req->rq_async_args = request->rq_async_args;
+       new_req->rq_commit_cb = request->rq_commit_cb;
         /* cap resend delay to the current request timeout, this is similar to
          * what ptlrpc does (see after_reply()) */
         if (aa->aa_resends > new_req->rq_timeout)
@@ -2051,6 +2056,23 @@ static int brw_interpret(const struct lu_env *env,
         RETURN(rc);
  }
  
+static void brw_commit(struct ptlrpc_request *req)
+{
+       spin_lock(&req->rq_lock);
+       /* If osc_inc_unstable_pages (via osc_extent_finish) races with
+        * this called via the rq_commit_cb, I need to ensure
+        * osc_dec_unstable_pages is still called. Otherwise unstable
+        * pages may be leaked. */
+       if (req->rq_unstable) {
+               spin_unlock(&req->rq_lock);
+               osc_dec_unstable_pages(req);
+               spin_lock(&req->rq_lock);
+       } else {
+               req->rq_committed = 1;
+       }
+       spin_unlock(&req->rq_lock);
+}
+
  /**
   * Build an RPC by the list of extent @ext_list. The caller must ensure
   * that the total pages in this list are NOT over max pages per RPC.
@@ -2162,7 +2184,9 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
                 GOTO(out, rc);
         }
  
+       req->rq_commit_cb = brw_commit;
         req->rq_interpret_reply = brw_interpret;
+
         if (mem_tight != 0)
                 req->rq_memalloc = 1;
  
@@ -2436,8 +2460,8 @@ static int osc_enqueue_interpret(const struct lu_env *env,
  }
  
  void osc_update_enqueue(struct lustre_handle *lov_lockhp,
-                        struct lov_oinfo *loi, int flags,
-                        struct ost_lvb *lvb, __u32 mode, int rc)
+                       struct lov_oinfo *loi, __u64 flags,
+                       struct ost_lvb *lvb, __u32 mode, int rc)
  {
          struct ldlm_lock *lock = ldlm_handle2lock(lov_lockhp);
  
@@ -2491,19 +2515,19 @@ struct ptlrpc_request_set *PTLRPCD_SET = (void *)1;
   * release locks just after they are obtained. */
  int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
                      __u64 *flags, ldlm_policy_data_t *policy,
-                     struct ost_lvb *lvb, int kms_valid,
-                     obd_enqueue_update_f upcall, void *cookie,
-                     struct ldlm_enqueue_info *einfo,
-                     struct lustre_handle *lockh,
-                     struct ptlrpc_request_set *rqset, int async, int agl)
-{
-        struct obd_device *obd = exp->exp_obd;
-        struct ptlrpc_request *req = NULL;
-        int intent = *flags & LDLM_FL_HAS_INTENT;
-        int match_lvb = (agl != 0 ? 0 : LDLM_FL_LVB_READY);
-        ldlm_mode_t mode;
-        int rc;
-        ENTRY;
+                    struct ost_lvb *lvb, int kms_valid,
+                    obd_enqueue_update_f upcall, void *cookie,
+                    struct ldlm_enqueue_info *einfo,
+                    struct lustre_handle *lockh,
+                    struct ptlrpc_request_set *rqset, int async, int agl)
+{
+       struct obd_device *obd = exp->exp_obd;
+       struct ptlrpc_request *req = NULL;
+       int intent = *flags & LDLM_FL_HAS_INTENT;
+       __u64 match_lvb = (agl != 0 ? 0 : LDLM_FL_LVB_READY);
+       ldlm_mode_t mode;
+       int rc;
+       ENTRY;
  
          /* Filesystem lock extents are extended to page boundaries so that
           * dealing with the page cache is a little smoother.  */
@@ -2539,7 +2563,7 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
          if (mode) {
                  struct ldlm_lock *matched = ldlm_handle2lock(lockh);
  
-                if ((agl != 0) && !(matched->l_flags & LDLM_FL_LVB_READY)) {
+               if ((agl != 0) && !ldlm_is_lvb_ready(matched)) {
                          /* For AGL, if enqueue RPC is sent but the lock is not
                           * granted, then skip to process this strpe.
                           * Return -ECANCELED to tell the caller. */
@@ -2652,14 +2676,14 @@ static int osc_enqueue(struct obd_export *exp, struct obd_info *oinfo,
  }
  
  int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
-                   __u32 type, ldlm_policy_data_t *policy, __u32 mode,
-                   int *flags, void *data, struct lustre_handle *lockh,
-                   int unref)
+                  __u32 type, ldlm_policy_data_t *policy, __u32 mode,
+                  __u64 *flags, void *data, struct lustre_handle *lockh,
+                  int unref)
  {
-        struct obd_device *obd = exp->exp_obd;
-        int lflags = *flags;
-        ldlm_mode_t rc;
-        ENTRY;
+       struct obd_device *obd = exp->exp_obd;
+       __u64 lflags = *flags;
+       ldlm_mode_t rc;
+       ENTRY;
  
          if (OBD_FAIL_CHECK(OBD_FAIL_OSC_MATCH))
                  RETURN(-EIO);
@@ -2943,10 +2967,10 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
          int err = 0;
          ENTRY;
  
-        if (!cfs_try_module_get(THIS_MODULE)) {
-                CERROR("Can't get module. Is it alive?");
-                return -EINVAL;
-        }
+       if (!try_module_get(THIS_MODULE)) {
+               CERROR("Can't get module. Is it alive?");
+               return -EINVAL;
+       }
          switch (cmd) {
          case OBD_IOC_LOV_GET_CONFIG: {
                  char *buf;
@@ -3011,14 +3035,14 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
          case OBD_IOC_PING_TARGET:
                  err = ptlrpc_obd_ping(obd);
                  GOTO(out, err);
-        default:
-                CDEBUG(D_INODE, "unrecognised ioctl %#x by %s\n",
-                       cmd, cfs_curproc_comm());
-                GOTO(out, err = -ENOTTY);
-        }
+       default:
+               CDEBUG(D_INODE, "unrecognised ioctl %#x by %s\n",
+                      cmd, current_comm());
+               GOTO(out, err = -ENOTTY);
+       }
  out:
-        cfs_module_put(THIS_MODULE);
-        return err;
+       module_put(THIS_MODULE);
+       return err;
  }
  
  static int osc_get_info(const struct lu_env *env, struct obd_export *exp,