* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2014, Intel Corporation.
+ * Copyright (c) 2011, 2015, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
struct client_obd *aa_cli;
struct list_head aa_oaps;
struct list_head aa_exts;
- struct cl_req *aa_clerq;
};
#define osc_grant_args osc_brw_async_args
LASSERT(list_empty(&aa->aa_exts));
LASSERT(list_empty(&aa->aa_oaps));
- cl_req_completion(env, aa->aa_clerq, rc < 0 ? rc :
- req->rq_bulk->bd_nob_transferred);
osc_release_ppga(aa->aa_ppga, aa->aa_page_count);
ptlrpc_lprocfs_brw(req, req->rq_bulk->bd_nob_transferred);
struct osc_brw_async_args *aa = NULL;
struct obdo *oa = NULL;
struct osc_async_page *oap;
- struct osc_async_page *tmp;
- struct cl_req *clerq = NULL;
- enum cl_req_type crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE :
- CRT_READ;
+ struct osc_object *obj = NULL;
struct cl_req_attr *crattr = NULL;
loff_t starting_offset = OBD_OBJECT_EOF;
loff_t ending_offset = 0;
int mem_tight = 0;
int page_count = 0;
bool soft_sync = false;
+ bool interrupted = false;
int i;
int rc;
struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
list_for_each_entry(ext, ext_list, oe_link) {
LASSERT(ext->oe_state == OES_RPC);
mem_tight |= ext->oe_memalloc;
- list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
- ++page_count;
- list_add_tail(&oap->oap_rpc_item, &rpc_list);
- if (starting_offset == OBD_OBJECT_EOF ||
- starting_offset > oap->oap_obj_off)
- starting_offset = oap->oap_obj_off;
- else
- LASSERT(oap->oap_page_off == 0);
- if (ending_offset < oap->oap_obj_off + oap->oap_count)
- ending_offset = oap->oap_obj_off +
- oap->oap_count;
- else
- LASSERT(oap->oap_page_off + oap->oap_count ==
- PAGE_CACHE_SIZE);
- }
+ page_count += ext->oe_nr_pages;
+ if (obj == NULL)
+ obj = ext->oe_obj;
}
soft_sync = osc_over_unstable_soft_limit(cli);
if (mem_tight)
mpflag = cfs_memory_pressure_get_and_set();
- OBD_ALLOC(crattr, sizeof(*crattr));
- if (crattr == NULL)
- GOTO(out, rc = -ENOMEM);
-
OBD_ALLOC(pga, sizeof(*pga) * page_count);
if (pga == NULL)
GOTO(out, rc = -ENOMEM);
GOTO(out, rc = -ENOMEM);
i = 0;
- list_for_each_entry(oap, &rpc_list, oap_rpc_item) {
- struct cl_page *page = oap2cl_page(oap);
- if (clerq == NULL) {
- clerq = cl_req_alloc(env, page, crt,
- 1 /* only 1-object rpcs for now */);
- if (IS_ERR(clerq))
- GOTO(out, rc = PTR_ERR(clerq));
+ list_for_each_entry(ext, ext_list, oe_link) {
+ list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
+ if (mem_tight)
+ oap->oap_brw_flags |= OBD_BRW_MEMALLOC;
+ if (soft_sync)
+ oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC;
+ pga[i] = &oap->oap_brw_page;
+ pga[i]->off = oap->oap_obj_off + oap->oap_page_off;
+ i++;
+
+ list_add_tail(&oap->oap_rpc_item, &rpc_list);
+ if (starting_offset == OBD_OBJECT_EOF ||
+ starting_offset > oap->oap_obj_off)
+ starting_offset = oap->oap_obj_off;
+ else
+ LASSERT(oap->oap_page_off == 0);
+ if (ending_offset < oap->oap_obj_off + oap->oap_count)
+ ending_offset = oap->oap_obj_off +
+ oap->oap_count;
+ else
+ LASSERT(oap->oap_page_off + oap->oap_count ==
+ PAGE_CACHE_SIZE);
+ if (oap->oap_interrupted)
+ interrupted = true;
}
- if (mem_tight)
- oap->oap_brw_flags |= OBD_BRW_MEMALLOC;
- if (soft_sync)
- oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC;
- pga[i] = &oap->oap_brw_page;
- pga[i]->off = oap->oap_obj_off + oap->oap_page_off;
- CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n",
- pga[i]->pg, page_index(oap->oap_page), oap,
- pga[i]->flag);
- i++;
- cl_req_page_add(env, clerq, page);
}
- /* always get the data for the obdo for the rpc */
- LASSERT(clerq != NULL);
- crattr->cra_oa = oa;
- cl_req_attr_set(env, clerq, crattr, ~0ULL);
+ /* first page in the list */
+ oap = list_entry(rpc_list.next, typeof(*oap), oap_rpc_item);
- rc = cl_req_prep(env, clerq);
- if (rc != 0) {
- CERROR("cl_req_prep failed: %d\n", rc);
- GOTO(out, rc);
- }
+ crattr = &osc_env_info(env)->oti_req_attr;
+ memset(crattr, 0, sizeof(*crattr));
+ crattr->cra_type = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : CRT_READ;
+ crattr->cra_flags = ~0ULL;
+ crattr->cra_page = oap2cl_page(oap);
+ crattr->cra_oa = oa;
+ cl_req_attr_set(env, osc2cl(obj), crattr);
sort_brw_pages(pga, page_count);
rc = osc_brw_prep_request(cmd, cli, oa, page_count, pga, &req, 0);
req->rq_commit_cb = brw_commit;
req->rq_interpret_reply = brw_interpret;
-
- if (mem_tight != 0)
- req->rq_memalloc = 1;
+ req->rq_memalloc = mem_tight != 0;
+ oap->oap_request = ptlrpc_request_addref(req);
+ if (interrupted && !req->rq_intr)
+ ptlrpc_mark_interrupted(req);
/* Need to update the timestamps after the request is built in case
* we race with setattr (locally or in queue at OST). If OST gets
* way to do this in a single call. bug 10150 */
body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
crattr->cra_oa = &body->oa;
- cl_req_attr_set(env, clerq, crattr,
- OBD_MD_FLMTIME|OBD_MD_FLCTIME|OBD_MD_FLATIME);
-
+ crattr->cra_flags = OBD_MD_FLMTIME|OBD_MD_FLCTIME|OBD_MD_FLATIME;
+ cl_req_attr_set(env, osc2cl(obj), crattr);
lustre_msg_set_jobid(req->rq_reqmsg, crattr->cra_jobid);
CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
list_splice_init(&rpc_list, &aa->aa_oaps);
INIT_LIST_HEAD(&aa->aa_exts);
list_splice_init(ext_list, &aa->aa_exts);
- aa->aa_clerq = clerq;
-
- /* queued sync pages can be torn down while the pages
- * were between the pending list and the rpc */
- tmp = NULL;
- list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) {
- /* only one oap gets a request reference */
- if (tmp == NULL)
- tmp = oap;
- if (oap->oap_interrupted && !req->rq_intr) {
- CDEBUG(D_INODE, "oap %p in req %p interrupted\n",
- oap, req);
- ptlrpc_mark_interrupted(req);
- }
- }
- if (tmp != NULL)
- tmp->oap_request = ptlrpc_request_addref(req);
spin_lock(&cli->cl_loi_list_lock);
starting_offset >>= PAGE_CACHE_SHIFT;
if (mem_tight != 0)
cfs_memory_pressure_restore(mpflag);
- if (crattr != NULL)
- OBD_FREE(crattr, sizeof(*crattr));
-
if (rc != 0) {
LASSERT(req == NULL);
list_del_init(&ext->oe_link);
osc_extent_finish(env, ext, 0, rc);
}
- if (clerq && !IS_ERR(clerq))
- cl_req_completion(env, clerq, rc);
}
RETURN(rc);
}
}
no_match:
- if (*flags & LDLM_FL_TEST_LOCK)
+ if (*flags & (LDLM_FL_TEST_LOCK | LDLM_FL_MATCH_LOCK))
RETURN(-ENOLCK);
if (intent) {
}
static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
- void *karg, void *uarg)
+ void *karg, void __user *uarg)
{
struct obd_device *obd = exp->exp_obd;
struct obd_ioctl_data *data = karg;
osc = lock->l_ast_data;
cl_object_get(osc2cl(osc));
}
- lock->l_ast_data = NULL;
+
+ /* clear LDLM_FL_CLEANED flag to make sure it will be canceled
+ * by the 2nd round of ldlm_namespace_clean() call in
+ * osc_import_event(). */
+ ldlm_clear_cleaned(lock);
}
unlock_res(res);
case IMP_EVENT_INVALIDATE: {
struct ldlm_namespace *ns = obd->obd_namespace;
struct lu_env *env;
- int refcheck;
+ __u16 refcheck;
ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
ns_register_cancel(obd->obd_namespace, osc_cancel_weight);
+
+ spin_lock(&osc_shrink_lock);
+ list_add_tail(&cli->cl_shrink_list, &osc_shrink_list);
+ spin_unlock(&osc_shrink_lock);
+
RETURN(0);
out_ptlrpcd_work:
RETURN(rc);
}
-static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
+static int osc_precleanup(struct obd_device *obd)
{
- int rc = 0;
- ENTRY;
+ struct client_obd *cli = &obd->u.cli;
+ ENTRY;
- switch (stage) {
- case OBD_CLEANUP_EARLY: {
- struct obd_import *imp;
- imp = obd->u.cli.cl_import;
- CDEBUG(D_HA, "Deactivating import %s\n", obd->obd_name);
- /* ptlrpc_abort_inflight to stop an mds_lov_synchronize */
- ptlrpc_deactivate_import(imp);
- spin_lock(&imp->imp_lock);
- imp->imp_pingable = 0;
- spin_unlock(&imp->imp_lock);
- break;
- }
- case OBD_CLEANUP_EXPORTS: {
- struct client_obd *cli = &obd->u.cli;
- /* LU-464
- * for echo client, export may be on zombie list, wait for
- * zombie thread to cull it, because cli.cl_import will be
- * cleared in client_disconnect_export():
- * class_export_destroy() -> obd_cleanup() ->
- * echo_device_free() -> echo_client_cleanup() ->
- * obd_disconnect() -> osc_disconnect() ->
- * client_disconnect_export()
- */
- obd_zombie_barrier();
- if (cli->cl_writeback_work) {
- ptlrpcd_destroy_work(cli->cl_writeback_work);
- cli->cl_writeback_work = NULL;
- }
- if (cli->cl_lru_work) {
- ptlrpcd_destroy_work(cli->cl_lru_work);
- cli->cl_lru_work = NULL;
- }
- obd_cleanup_client_import(obd);
- ptlrpc_lprocfs_unregister_obd(obd);
- lprocfs_obd_cleanup(obd);
- break;
- }
- }
- RETURN(rc);
+ /* LU-464
+ * for echo client, export may be on zombie list, wait for
+ * zombie thread to cull it, because cli.cl_import will be
+ * cleared in client_disconnect_export():
+ * class_export_destroy() -> obd_cleanup() ->
+ * echo_device_free() -> echo_client_cleanup() ->
+ * obd_disconnect() -> osc_disconnect() ->
+ * client_disconnect_export()
+ */
+ obd_zombie_barrier();
+ if (cli->cl_writeback_work) {
+ ptlrpcd_destroy_work(cli->cl_writeback_work);
+ cli->cl_writeback_work = NULL;
+ }
+
+ if (cli->cl_lru_work) {
+ ptlrpcd_destroy_work(cli->cl_lru_work);
+ cli->cl_lru_work = NULL;
+ }
+
+ obd_cleanup_client_import(obd);
+ ptlrpc_lprocfs_unregister_obd(obd);
+ lprocfs_obd_cleanup(obd);
+ RETURN(0);
}
int osc_cleanup(struct obd_device *obd)
ENTRY;
+ spin_lock(&osc_shrink_lock);
+ list_del(&cli->cl_shrink_list);
+ spin_unlock(&osc_shrink_lock);
+
/* lru cleanup */
if (cli->cl_cache != NULL) {
LASSERT(atomic_read(&cli->cl_cache->ccc_users) > 0);
cli->cl_cache = NULL;
}
- /* free memory of osc quota cache */
+ /* free memory of osc quota cache */
osc_quota_cleanup(obd);
rc = client_obd_cleanup(obd);
.o_quotactl = osc_quotactl,
};
+static struct shrinker *osc_cache_shrinker;
+struct list_head osc_shrink_list = LIST_HEAD_INIT(osc_shrink_list);
+DEFINE_SPINLOCK(osc_shrink_lock);
+
+#ifndef HAVE_SHRINKER_COUNT
+static int osc_cache_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+{
+ struct shrink_control scv = {
+ .nr_to_scan = shrink_param(sc, nr_to_scan),
+ .gfp_mask = shrink_param(sc, gfp_mask)
+ };
+#if !defined(HAVE_SHRINKER_WANT_SHRINK_PTR) && !defined(HAVE_SHRINK_CONTROL)
+ struct shrinker *shrinker = NULL;
+#endif
+
+ (void)osc_cache_shrink_scan(shrinker, &scv);
+
+ return osc_cache_shrink_count(shrinker, &scv);
+}
+#endif
+
static int __init osc_init(void)
{
bool enable_proc = true;
unsigned int reqpool_size;
unsigned int reqsize;
int rc;
-
+ DEF_SHRINKER_VAR(osc_shvar, osc_cache_shrink,
+ osc_cache_shrink_count, osc_cache_shrink_scan);
ENTRY;
- /* print an address of _any_ initialized kernel symbol from this
- * module, to allow debugging with gdb that doesn't support data
- * symbols from modules.*/
- CDEBUG(D_INFO, "Lustre OSC module (%p).\n", &osc_caches);
+ /* print an address of _any_ initialized kernel symbol from this
+ * module, to allow debugging with gdb that doesn't support data
+ * symbols from modules.*/
+ CDEBUG(D_INFO, "Lustre OSC module (%p).\n", &osc_caches);
- rc = lu_kmem_init(osc_caches);
+ rc = lu_kmem_init(osc_caches);
if (rc)
RETURN(rc);
if (rc)
GOTO(out_kmem, rc);
+ osc_cache_shrinker = set_shrinker(DEFAULT_SEEKS, &osc_shvar);
+
/* This is obviously too much memory, only prevent overflow here */
if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0)
GOTO(out_type, rc = -EINVAL);
RETURN(rc);
}
-static void /*__exit*/ osc_exit(void)
+static void __exit osc_exit(void)
{
+ remove_shrinker(osc_cache_shrinker);
class_unregister_type(LUSTRE_OSC_NAME);
lu_kmem_fini(osc_caches);
ptlrpc_free_rq_pool(osc_rq_pool);
}
-MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
+MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
MODULE_DESCRIPTION("Lustre Object Storage Client (OSC)");
MODULE_VERSION(LUSTRE_VERSION_STRING);
MODULE_LICENSE("GPL");