Whamcloud - gitweb
b=18881
[fs/lustre-release.git] / lustre / osc / osc_page.c
index a583eef..4686b37 100644 (file)
 
 #include "osc_cl_internal.h"
 
+/* 
+ * Comment out osc_page_protected because it may sleep inside the
+ * the client_obd_list_lock.
+ * client_obd_list_lock -> osc_ap_completion -> osc_completion ->
+ *   -> osc_page_protected -> osc_page_is_dlocked -> osc_match_base
+ *   -> ldlm_lock_match -> sptlrpc_import_check_ctx -> sleep.
+ */
+#if 0
 static int osc_page_is_dlocked(const struct lu_env *env,
                                const struct osc_page *opg,
                                enum cl_lock_mode mode, int pending, int unref)
@@ -57,6 +65,8 @@ static int osc_page_is_dlocked(const struct lu_env *env,
         ldlm_mode_t             dlmmode;
         int                     flags;
 
+        might_sleep();
+
         info = osc_env_info(env);
         resname = &info->oti_resname;
         policy = &info->oti_policy;
@@ -75,6 +85,10 @@ static int osc_page_is_dlocked(const struct lu_env *env,
                               dlmmode, &flags, NULL, lockh, unref);
 }
 
+/**
+ * Checks an invariant that a page in the cache is covered by a lock, as
+ * needed.
+ */
 static int osc_page_protected(const struct lu_env *env,
                               const struct osc_page *opg,
                               enum cl_lock_mode mode, int unref)
@@ -87,11 +101,20 @@ static int osc_page_protected(const struct lu_env *env,
 
         LINVRNT(!opg->ops_temp);
 
+        page = opg->ops_cl.cpl_page;
+        if (page->cp_owner != NULL &&
+            cl_io_top(page->cp_owner)->ci_lockreq == CILR_NEVER)
+                /*
+                 * If IO is done without locks (liblustre, or lloop), lock is
+                 * not required.
+                 */
+                result = 1;
+        else
+                /* otherwise check for a DLM lock */
         result = osc_page_is_dlocked(env, opg, mode, 1, unref);
         if (result == 0) {
                 /* maybe this page is a part of a lockless io? */
                 hdr = cl_object_header(opg->ops_cl.cpl_obj);
-                page = opg->ops_cl.cpl_page;
                 descr = &osc_env_info(env)->oti_descr;
                 descr->cld_mode = mode;
                 descr->cld_start = page->cp_index;
@@ -118,6 +141,14 @@ static int osc_page_protected(const struct lu_env *env,
         }
         return result;
 }
+#else
+static int osc_page_protected(const struct lu_env *env,
+                              const struct osc_page *opg,
+                              enum cl_lock_mode mode, int unref)
+{
+        return 1;
+}
+#endif
 
 /*****************************************************************************
  *
@@ -164,6 +195,8 @@ static void osc_page_transfer_add(const struct lu_env *env,
 {
         struct osc_object *obj;
 
+        LINVRNT(cl_page_is_vmlocked(env, opg->ops_cl.cpl_page));
+
         obj = cl2osc(opg->ops_cl.cpl_obj);
         spin_lock(&obj->oo_seatbelt);
         list_add(&opg->ops_inflight, &obj->oo_inflight[crt]);
@@ -173,7 +206,7 @@ static void osc_page_transfer_add(const struct lu_env *env,
 
 static int osc_page_cache_add(const struct lu_env *env,
                               const struct cl_page_slice *slice,
-                              struct cl_io *_)
+                              struct cl_io *unused)
 {
         struct osc_page   *opg = cl2osc_page(slice);
         struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
@@ -186,7 +219,7 @@ static int osc_page_cache_add(const struct lu_env *env,
         ENTRY;
 
         /* Set the OBD_BRW_SRVLOCK before the page is queued. */
-        brw_flags = oio->oi_lockless ? OBD_BRW_SRVLOCK : 0;
+        brw_flags = osc_io_srvlock(oio) ? OBD_BRW_SRVLOCK : 0;
         if (!client_is_remote(osc_export(obj)) &&
             cfs_capable(CFS_CAP_SYS_RESOURCE)) {
                 brw_flags |= OBD_BRW_NOQUOTA;
@@ -214,7 +247,7 @@ void osc_index2policy(ldlm_policy_data_t *policy, const struct cl_object *obj,
 
 static int osc_page_is_under_lock(const struct lu_env *env,
                                   const struct cl_page_slice *slice,
-                                  struct cl_io *_)
+                                  struct cl_io *unused)
 {
         struct cl_lock *lock;
         int             result;
@@ -231,7 +264,8 @@ static int osc_page_is_under_lock(const struct lu_env *env,
 }
 
 static int osc_page_fail(const struct lu_env *env,
-                         const struct cl_page_slice *slice, struct cl_io *_)
+                         const struct cl_page_slice *slice,
+                         struct cl_io *unused)
 {
         /*
          * Cached read?
@@ -246,24 +280,67 @@ static const char *osc_list(struct list_head *head)
         return list_empty(head) ? "-" : "+";
 }
 
+static inline cfs_time_t osc_submit_duration(struct osc_page *opg)
+{
+        if (opg->ops_submit_time == 0)
+                return 0;
+
+        return (cfs_time_current() - opg->ops_submit_time);
+}
+
 static int osc_page_print(const struct lu_env *env,
                           const struct cl_page_slice *slice,
                           void *cookie, lu_printer_t printer)
 {
         struct osc_page       *opg = cl2osc_page(slice);
         struct osc_async_page *oap = &opg->ops_oap;
+        struct osc_object     *obj = cl2osc(slice->cpl_obj);
+        struct client_obd     *cli = &osc_export(obj)->exp_obd->u.cli;
+        struct lov_oinfo      *loi = obj->oo_oinfo;
 
         return (*printer)(env, cookie, LUSTRE_OSC_NAME"-page@%p: "
-                          "%#x %d %u %s %s %s %llu %u %#x %p %p %p %p %p\n",
-                          opg, oap->oap_magic, oap->oap_cmd,
+                          "1< %#x %d %u %s %s %s > "
+                          "2< %llu %u %#x %#x | %p %p %p %p %p > "
+                          "3< %s %p %d %lu > "
+                          "4< %d %d %d %lu %s | %s %s %s %s > "
+                          "5< %s %s %s %s | %d %s %s | %d %s %s>\n",
+                          opg,
+                          /* 1 */
+                          oap->oap_magic, oap->oap_cmd,
                           oap->oap_interrupted,
                           osc_list(&oap->oap_pending_item),
                           osc_list(&oap->oap_urgent_item),
                           osc_list(&oap->oap_rpc_item),
+                          /* 2 */
                           oap->oap_obj_off, oap->oap_page_off,
-                          oap->oap_async_flags, oap->oap_request,
+                          oap->oap_async_flags, oap->oap_brw_flags,
+                          oap->oap_request,
                           oap->oap_cli, oap->oap_loi, oap->oap_caller_ops,
-                          oap->oap_caller_data);
+                          oap->oap_caller_data,
+                          /* 3 */
+                          osc_list(&opg->ops_inflight),
+                          opg->ops_submitter, opg->ops_transfer_pinned,
+                          osc_submit_duration(opg),
+                          /* 4 */
+                          cli->cl_r_in_flight, cli->cl_w_in_flight,
+                          cli->cl_max_rpcs_in_flight,
+                          cli->cl_avail_grant,
+                          osc_list(&cli->cl_cache_waiters),
+                          osc_list(&cli->cl_loi_ready_list),
+                          osc_list(&cli->cl_loi_hp_ready_list),
+                          osc_list(&cli->cl_loi_write_list),
+                          osc_list(&cli->cl_loi_read_list),
+                          /* 5 */
+                          osc_list(&loi->loi_ready_item),
+                          osc_list(&loi->loi_hp_ready_item),
+                          osc_list(&loi->loi_write_item),
+                          osc_list(&loi->loi_read_item),
+                          loi->loi_read_lop.lop_num_pending,
+                          osc_list(&loi->loi_read_lop.lop_pending),
+                          osc_list(&loi->loi_read_lop.lop_urgent),
+                          loi->loi_write_lop.lop_num_pending,
+                          osc_list(&loi->loi_write_lop.lop_pending),
+                          osc_list(&loi->loi_write_lop.lop_urgent));
 }
 
 static void osc_page_delete(const struct lu_env *env,
@@ -280,7 +357,11 @@ static void osc_page_delete(const struct lu_env *env,
         CDEBUG(D_TRACE, "%p\n", opg);
         osc_page_transfer_put(env, opg);
         rc = osc_teardown_async_page(osc_export(obj), NULL, obj->oo_oinfo, oap);
-        LASSERTF(rc == 0, "%i\n", rc);
+        if (rc) {
+                CL_PAGE_DEBUG(D_ERROR, env, cl_page_top(slice->cpl_page),
+                              "Trying to teardown failed: %d\n", rc);
+                LASSERT(0);
+        }
         spin_lock(&obj->oo_seatbelt);
         list_del_init(&opg->ops_inflight);
         spin_unlock(&obj->oo_seatbelt);
@@ -297,7 +378,9 @@ void osc_page_clip(const struct lu_env *env, const struct cl_page_slice *slice,
 
         opg->ops_from = from;
         opg->ops_to   = to;
+        spin_lock(&oap->oap_lock);
         oap->oap_async_flags |= ASYNC_COUNT_STABLE;
+        spin_unlock(&oap->oap_lock);
 }
 
 static int osc_page_cancel(const struct lu_env *env,
@@ -348,6 +431,8 @@ static int osc_make_ready(const struct lu_env *env, void *data, int cmd)
 
         ENTRY;
         result = cl_page_make_ready(env, page, CRT_WRITE);
+        if (result == 0)
+                opg->ops_submit_time = cfs_time_current();
         RETURN(result);
 }
 
@@ -395,6 +480,7 @@ static int osc_completion(const struct lu_env *env,
         enum cl_req_type crt;
 
         LINVRNT(osc_page_protected(env, opg, CLM_READ, 1));
+        LINVRNT(cl_page_is_vmlocked(env, page));
 
         ENTRY;
 
@@ -412,7 +498,9 @@ static int osc_completion(const struct lu_env *env,
         LASSERT(page->cp_req == NULL);
 
         /* As the transfer for this page is being done, clear the flags */
+        spin_lock(&oap->oap_lock);
         oap->oap_async_flags = 0;
+        spin_unlock(&oap->oap_lock);
 
         crt = cmd == OBD_BRW_READ ? CRT_READ : CRT_WRITE;
         /* Clear opg->ops_transfer_pinned before VM lock is released. */
@@ -424,6 +512,8 @@ static int osc_completion(const struct lu_env *env,
         list_del_init(&opg->ops_inflight);
         spin_unlock(&obj->oo_seatbelt);
 
+        opg->ops_submit_time = 0;
+
         cl_page_completion(env, page, crt, rc);
 
         /* statistic */
@@ -467,7 +557,7 @@ struct cl_page *osc_page_init(const struct lu_env *env,
         struct osc_page   *opg;
         int result;
 
-        OBD_SLAB_ALLOC_PTR(opg, osc_page_kmem);
+        OBD_SLAB_ALLOC_PTR_GFP(opg, osc_page_kmem, CFS_ALLOC_IO);
         if (opg != NULL) {
                 void *oap = &opg->ops_oap;
 
@@ -495,20 +585,28 @@ struct cl_page *osc_page_init(const struct lu_env *env,
         return ERR_PTR(result);
 }
 
+/**
+ * Helper function called by osc_io_submit() for every page in an immediate
+ * transfer (i.e., transferred synchronously).
+ */
 void osc_io_submit_page(const struct lu_env *env,
                         struct osc_io *oio, struct osc_page *opg,
                         enum cl_req_type crt)
 {
         struct osc_async_page *oap = &opg->ops_oap;
         struct client_obd     *cli = oap->oap_cli;
+        int flags = 0;
 
         LINVRNT(osc_page_protected(env, opg,
                                    crt == CRT_WRITE ? CLM_WRITE : CLM_READ, 1));
 
         oap->oap_page_off   = opg->ops_from;
         oap->oap_count      = opg->ops_to - opg->ops_from;
+        /* Give a hint to OST that requests are coming from kswapd - bug19529 */
+        if (libcfs_memory_pressure_get())
+                oap->oap_brw_flags |= OBD_BRW_MEMALLOC;
         oap->oap_brw_flags |= OBD_BRW_SYNC;
-        if (oio->oi_lockless)
+        if (osc_io_srvlock(oio))
                 oap->oap_brw_flags |= OBD_BRW_SRVLOCK;
 
         oap->oap_cmd = crt == CRT_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ;
@@ -518,12 +616,15 @@ void osc_io_submit_page(const struct lu_env *env,
                 oap->oap_cmd |= OBD_BRW_NOQUOTA;
         }
 
-        oap->oap_async_flags |= OSC_FLAGS;
         if (oap->oap_cmd & OBD_BRW_READ)
-                oap->oap_async_flags |= ASYNC_COUNT_STABLE;
+                flags = ASYNC_COUNT_STABLE;
         else if (!(oap->oap_brw_page.flag & OBD_BRW_FROM_GRANT))
                 osc_enter_cache_try(env, cli, oap->oap_loi, oap, 1);
 
+        spin_lock(&oap->oap_lock);
+        oap->oap_async_flags |= OSC_FLAGS | flags;
+        spin_unlock(&oap->oap_lock);
+
         osc_oap_to_pending(oap);
         osc_page_transfer_get(opg, "transfer\0imm");
         osc_page_transfer_add(env, opg, crt);