b=18881

[fs/lustre-release.git] / lustre / osc / osc_io.c
diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c

index fd38a1c..5cb13ee 100644 (file)
--- a/lustre/osc/osc_io.c
+++ b/lustre/osc/osc_io.c
@@ -113,7 +113,8 @@ static void osc_io_unplug(const struct lu_env *env, struct osc_object *osc,
   */
  static int osc_io_submit(const struct lu_env *env,
                           const struct cl_io_slice *ios,
-                         enum cl_req_type crt, struct cl_2queue *queue)
+                         enum cl_req_type crt, struct cl_2queue *queue,
+                         enum cl_req_priority priority)
  {
          struct cl_page    *page;
          struct cl_page    *tmp;
@@ -148,6 +149,11 @@ static int osc_io_submit(const struct lu_env *env,
                  osc = cl2osc(opg->ops_cl.cpl_obj);
                  exp = osc_export(osc);
  
+                if (priority > CRP_NORMAL) {
+                        spin_lock(&oap->oap_lock);
+                        oap->oap_async_flags |= ASYNC_HP;
+                        spin_unlock(&oap->oap_lock);
+                }
                  /*
                   * This can be checked without cli->cl_loi_list_lock, because
                   * ->oap_*_item are always manipulated when the page is owned.
@@ -177,9 +183,18 @@ static int osc_io_submit(const struct lu_env *env,
                                                                    osc->oo_oinfo,
                                                                    oap,
                                                                    OSC_FLAGS);
-                                if (result != 0)
-                                        break;
+                                /*
+                                 * bug 18881: we can't just break out here when
+                                 * error occurrs after cl_page_prep has been
+                                 * called against the page. The correct
+                                 * way is to call page's completion routine,
+                                 * as in osc_oap_interrupted.  For simplicity,
+                                 * we just force osc_set_async_flags_base() to
+                                 * not return error.
+                                 */
+                                LASSERT(result == 0);
                          }
+                        opg->ops_submit_time = cfs_time_current();
                  } else {
                          LASSERT(result < 0);
                          if (result != -EALREADY)
@@ -194,6 +209,19 @@ static int osc_io_submit(const struct lu_env *env,
                  /*
                   * Don't keep client_obd_list_lock() for too long.
                   *
+                 * XXX client_obd_list lock has to be unlocked periodically to
+                 * avoid soft-lockups that tend to happen otherwise (see bug
+                 * 16651). On the other hand, osc_io_submit_page() queues a
+                 * page with ASYNC_URGENT flag and so all pages queued up
+                 * until this point are sent out immediately by
+                 * osc_io_unplug() resulting in sub-optimal RPCs (sub-optimal
+                 * RPCs only happen during `warm up' phase when less than
+                 * cl_max_rpcs_in_flight RPCs are in flight). To balance these
+                 * conflicting requirements, one might unplug once enough
+                 * pages to form a large RPC were queued (i.e., use
+                 * cli->cl_max_pages_per_rpc as OSC_QUEUE_GRAIN, see
+                 * lop_makes_rpc()), or ignore soft-lockup issue altogether.
+                 *
                   * XXX lock_need_resched() should be used here, but it is not
                   * available in the older of supported kernels.
                   */
@@ -641,7 +669,7 @@ int osc_req_init(const struct lu_env *env, struct cl_device *dev,
          struct osc_req *or;
          int result;
  
-        OBD_SLAB_ALLOC_PTR(or, osc_req_kmem);
+        OBD_SLAB_ALLOC_PTR_GFP(or, osc_req_kmem, CFS_ALLOC_IO);
          if (or != NULL) {
                  cl_req_slice_add(req, &or->or_cl, dev, &osc_req_ops);
                  result = 0;