Whamcloud - gitweb
LU-14711 tests: Ensure there's no eviction with long cache discard
[fs/lustre-release.git] / lustre / osc / osc_cache.c
index 9d3d22b..3255696 100644 (file)
@@ -1937,9 +1937,9 @@ static unsigned int get_write_extents(struct osc_object *obj,
        };
 
        assert_osc_object_is_locked(obj);
-       while (!list_empty(&obj->oo_hp_exts)) {
-               ext = list_entry(obj->oo_hp_exts.next, struct osc_extent,
-                                oe_link);
+       while ((ext = list_first_entry_or_null(&obj->oo_hp_exts,
+                                              struct osc_extent,
+                                              oe_link)) != NULL) {
                if (!try_to_add_extent_for_io(cli, ext, &data))
                        return data.erd_page_count;
                EASSERT(ext->oe_nr_pages <= data.erd_max_pages, ext);
@@ -1947,9 +1947,9 @@ static unsigned int get_write_extents(struct osc_object *obj,
        if (data.erd_page_count == data.erd_max_pages)
                return data.erd_page_count;
 
-       while (!list_empty(&obj->oo_urgent_exts)) {
-               ext = list_entry(obj->oo_urgent_exts.next,
-                                struct osc_extent, oe_link);
+       while ((ext = list_first_entry_or_null(&obj->oo_urgent_exts,
+                                              struct osc_extent,
+                                              oe_link)) != NULL) {
                if (!try_to_add_extent_for_io(cli, ext, &data))
                        return data.erd_page_count;
        }
@@ -1960,10 +1960,11 @@ static unsigned int get_write_extents(struct osc_object *obj,
         * extents can usually only be added if the rpclist was empty, so if we
         * can't add one, we continue on to trying to add normal extents.  This
         * is so we don't miss adding extra extents to an RPC containing high
-        * priority or urgent extents. */
-       while (!list_empty(&obj->oo_full_exts)) {
-               ext = list_entry(obj->oo_full_exts.next,
-                                struct osc_extent, oe_link);
+        * priority or urgent extents.
+        */
+       while ((ext = list_first_entry_or_null(&obj->oo_full_exts,
+                                              struct osc_extent,
+                                              oe_link)) != NULL) {
                if (!try_to_add_extent_for_io(cli, ext, &data))
                        break;
        }
@@ -2237,10 +2238,11 @@ int osc_io_unplug0(const struct lu_env *env, struct client_obd *cli,
 EXPORT_SYMBOL(osc_io_unplug0);
 
 int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
-                       struct page *page, loff_t offset)
+                       struct cl_page *page, loff_t offset)
 {
        struct obd_export     *exp = osc_export(osc);
        struct osc_async_page *oap = &ops->ops_oap;
+       struct page           *vmpage = page->cp_vmpage;
        ENTRY;
 
        if (!page)
@@ -2250,16 +2252,24 @@ int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
        oap->oap_cli = &exp->exp_obd->u.cli;
        oap->oap_obj = osc;
 
-       oap->oap_page = page;
+       oap->oap_page = vmpage;
        oap->oap_obj_off = offset;
        LASSERT(!(offset & ~PAGE_MASK));
 
+       /* Count of transient (direct i/o) pages is always stable by the time
+        * they're submitted.  Setting this here lets us avoid calling
+        * cl_page_clip later to set this.
+        */
+       if (page->cp_type == CPT_TRANSIENT)
+               oap->oap_async_flags |= ASYNC_COUNT_STABLE|ASYNC_URGENT|
+                                       ASYNC_READY;
+
        INIT_LIST_HEAD(&oap->oap_pending_item);
        INIT_LIST_HEAD(&oap->oap_rpc_item);
 
        spin_lock_init(&oap->oap_lock);
-       CDEBUG(D_INFO, "oap %p page %p obj off %llu\n",
-              oap, page, oap->oap_obj_off);
+       CDEBUG(D_INFO, "oap %p vmpage %p obj off %llu\n",
+              oap, vmpage, oap->oap_obj_off);
        RETURN(0);
 }
 EXPORT_SYMBOL(osc_prep_async_page);
@@ -2300,7 +2310,7 @@ int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
        }
 
        /* check if the file's owner/group is over quota */
-       if (!(cmd & OBD_BRW_NOQUOTA)) {
+       if (!io->ci_noquota) {
                struct cl_object *obj;
                struct cl_attr   *attr;
                unsigned int qid[LL_MAXQUOTAS];
@@ -2559,7 +2569,7 @@ out:
        return rc;
 }
 
-int osc_queue_sync_pages(const struct lu_env *env, const struct cl_io *io,
+int osc_queue_sync_pages(const struct lu_env *env, struct cl_io *io,
                         struct osc_object *obj, struct list_head *list,
                         int brw_flags)
 {
@@ -2619,17 +2629,28 @@ int osc_queue_sync_pages(const struct lu_env *env, const struct cl_io *io,
                grants += (1 << cli->cl_chunkbits) *
                        ((page_count + ppc - 1) / ppc);
 
+               CDEBUG(D_CACHE, "requesting %d bytes grant\n", grants);
                spin_lock(&cli->cl_loi_list_lock);
                if (osc_reserve_grant(cli, grants) == 0) {
                        list_for_each_entry(oap, list, oap_pending_item) {
                                osc_consume_write_grant(cli,
                                                        &oap->oap_brw_page);
-                               atomic_long_inc(&obd_dirty_pages);
                        }
+                       atomic_long_add(page_count, &obd_dirty_pages);
                        osc_unreserve_grant_nolock(cli, grants, 0);
                        ext->oe_grants = grants;
+               } else {
+                       /* We cannot report ENOSPC correctly if we do parallel
+                        * DIO (async RPC submission), so turn off parallel dio
+                        * if there is not sufficient grant available.  This
+                        * makes individual RPCs synchronous.
+                        */
+                       io->ci_parallel_dio = false;
+                       CDEBUG(D_CACHE,
+                       "not enough grant available, switching to sync for this i/o\n");
                }
                spin_unlock(&cli->cl_loi_list_lock);
+               osc_update_next_shrink(cli);
        }
 
        ext->oe_is_rdma_only = !!(brw_flags & OBD_BRW_RDMA_ONLY);
@@ -2737,10 +2758,11 @@ again:
 
        osc_list_maint(cli, obj);
 
-       while (!list_empty(&list)) {
+       while ((ext = list_first_entry_or_null(&list,
+                                              struct osc_extent,
+                                              oe_link)) != NULL) {
                int rc;
 
-               ext = list_entry(list.next, struct osc_extent, oe_link);
                list_del_init(&ext->oe_link);
 
                /* extent may be in OES_ACTIVE state because inode mutex
@@ -3097,6 +3119,9 @@ bool osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
                if (!res)
                        break;
 
+               OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_SLOW_PAGE_EVICT,
+                                cfs_fail_val ?: 20);
+
                if (io->ci_type == CIT_MISC &&
                    io->u.ci_misc.lm_next_rpc_time &&
                    ktime_get_seconds() > io->u.ci_misc.lm_next_rpc_time) {