* Author: Nikita Danilov <nikita.danilov@sun.com>
*/
-/** \addtogroup osc osc @{ */
-
#define DEBUG_SUBSYSTEM S_OSC
#include "osc_cl_internal.h"
+/** \addtogroup osc
+ * @{
+ */
+
/*****************************************************************************
*
* Type conversions.
*/
static int osc_io_submit(const struct lu_env *env,
const struct cl_io_slice *ios,
- enum cl_req_type crt, struct cl_2queue *queue)
+ enum cl_req_type crt, struct cl_2queue *queue,
+ enum cl_req_priority priority)
{
struct cl_page *page;
struct cl_page *tmp;
osc = cl2osc(opg->ops_cl.cpl_obj);
exp = osc_export(osc);
+ if (priority > CRP_NORMAL) {
+ cfs_spin_lock(&oap->oap_lock);
+ oap->oap_async_flags |= ASYNC_HP;
+ cfs_spin_unlock(&oap->oap_lock);
+ }
/*
* This can be checked without cli->cl_loi_list_lock, because
* ->oap_*_item are always manipulated when the page is owned.
*/
- if (!list_empty(&oap->oap_urgent_item) ||
- !list_empty(&oap->oap_rpc_item)) {
+ if (!cfs_list_empty(&oap->oap_urgent_item) ||
+ !cfs_list_empty(&oap->oap_rpc_item)) {
result = -EBUSY;
break;
}
result = cl_page_prep(env, io, page, crt);
if (result == 0) {
cl_page_list_move(qout, qin, page);
- if (list_empty(&oap->oap_pending_item)) {
+ if (cfs_list_empty(&oap->oap_pending_item)) {
osc_io_submit_page(env, cl2osc_io(env, ios),
opg, crt);
} else {
osc->oo_oinfo,
oap,
OSC_FLAGS);
- if (result != 0)
- break;
+ /*
+ * bug 18881: we can't just break out here when
+ * error occurrs after cl_page_prep has been
+ * called against the page. The correct
+ * way is to call page's completion routine,
+ * as in osc_oap_interrupted. For simplicity,
+ * we just force osc_set_async_flags_base() to
+ * not return error.
+ */
+ LASSERT(result == 0);
}
+ opg->ops_submit_time = cfs_time_current();
} else {
LASSERT(result < 0);
if (result != -EALREADY)
/*
* Don't keep client_obd_list_lock() for too long.
*
+ * XXX client_obd_list lock has to be unlocked periodically to
+ * avoid soft-lockups that tend to happen otherwise (see bug
+ * 16651). On the other hand, osc_io_submit_page() queues a
+ * page with ASYNC_URGENT flag and so all pages queued up
+ * until this point are sent out immediately by
+ * osc_io_unplug() resulting in sub-optimal RPCs (sub-optimal
+ * RPCs only happen during `warm up' phase when less than
+ * cl_max_rpcs_in_flight RPCs are in flight). To balance these
+ * conflicting requirements, one might unplug once enough
+ * pages to form a large RPC were queued (i.e., use
+ * cli->cl_max_pages_per_rpc as OSC_QUEUE_GRAIN, see
+ * lop_makes_rpc()), or ignore soft-lockup issue altogether.
+ *
* XXX lock_need_resched() should be used here, but it is not
* available in the older of supported kernels.
*/
const struct cl_page_slice *slice,
unsigned from, unsigned to)
{
- LASSERT(to > 0);
-
+ struct osc_page *opg = cl2osc_page(slice);
+ struct osc_object *obj = cl2osc(opg->ops_cl.cpl_obj);
+ struct osc_async_page *oap = &opg->ops_oap;
ENTRY;
+
+ LASSERT(to > 0);
/*
* XXX instead of calling osc_page_touch() here and in
* osc_io_fault_start() it might be more logical to introduce
* fault code calls.
*/
osc_page_touch(env, cl2osc_page(slice), to);
+ if (!client_is_remote(osc_export(obj)) &&
+ cfs_capable(CFS_CAP_SYS_RESOURCE))
+ oap->oap_brw_flags |= OBD_BRW_NOQUOTA;
+
RETURN(0);
}
struct osc_punch_cbargs *args = a;
args->opc_rc = rc;
- complete(&args->opc_sync);
+ cfs_complete(&args->opc_sync);
return 0;
}
-#ifdef __KERNEL__
+/* Disable osc_trunc_check() because it is naturally race between read and
+ * truncate. See bug 20645 for details.
+ */
+#if 0 && defined(__KERNEL__)
/**
* Checks that there are no pages being written in the extent being truncated.
*/
* XXX this is quite expensive check.
*/
cl_page_list_init(list);
- cl_page_gang_lookup(env, clob, io, start + partial, CL_PAGE_EOF, list);
+ cl_page_gang_lookup(env, clob, io, start + partial, CL_PAGE_EOF, list, 0);
cl_page_list_for_each(page, list)
CL_PAGE_DEBUG(D_ERROR, env, page, "exists %lu\n", start);
cl_page_list_disown(env, io, list);
cl_page_list_fini(env, list);
- spin_lock(&obj->oo_seatbelt);
- list_for_each_entry(cp, &obj->oo_inflight[CRT_WRITE], ops_inflight) {
+ cfs_spin_lock(&obj->oo_seatbelt);
+ cfs_list_for_each_entry(cp, &obj->oo_inflight[CRT_WRITE],
+ ops_inflight) {
page = cp->ops_cl.cpl_page;
if (page->cp_index >= start + partial) {
cfs_task_t *submitter;
libcfs_debug_dumpstack(submitter);
}
}
- spin_unlock(&obj->oo_seatbelt);
+ cfs_spin_unlock(&obj->oo_seatbelt);
}
#else /* __KERNEL__ */
# define osc_trunc_check(env, io, oio, size) do {;} while (0)
struct osc_punch_cbargs *cbargs = &oio->oi_punch_cbarg;
struct obd_capa *capa;
loff_t size = io->u.ci_truncate.tr_size;
- int result;
-
- memset(oa, 0, sizeof(*oa));
+ int result = 0;
osc_trunc_check(env, io, oio, size);
- cl_object_attr_lock(obj);
- result = cl_object_attr_get(env, obj, attr);
- if (result == 0) {
- attr->cat_size = attr->cat_kms = size;
- result = cl_object_attr_set(env, obj, attr, CAT_SIZE|CAT_KMS);
+ if (oio->oi_lockless == 0) {
+ cl_object_attr_lock(obj);
+ result = cl_object_attr_get(env, obj, attr);
+ if (result == 0) {
+ attr->cat_size = attr->cat_kms = size;
+ result = cl_object_attr_set(env, obj, attr,
+ CAT_SIZE|CAT_KMS);
+ }
+ cl_object_attr_unlock(obj);
}
- cl_object_attr_unlock(obj);
+ memset(oa, 0, sizeof(*oa));
if (result == 0) {
oa->o_id = loi->loi_id;
oa->o_gr = loi->loi_gr;
oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLATIME |
OBD_MD_FLCTIME | OBD_MD_FLMTIME;
if (oio->oi_lockless) {
- oa->o_flags = OBD_FL_TRUNCLOCK;
+ oa->o_flags = OBD_FL_SRVLOCK;
oa->o_valid |= OBD_MD_FLFLAGS;
}
oa->o_size = size;
oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
capa = io->u.ci_truncate.tr_capa;
- init_completion(&cbargs->opc_sync);
+ cfs_init_completion(&cbargs->opc_sync);
result = osc_punch_base(osc_export(cl2osc(obj)), oa, capa,
osc_punch_upcall, cbargs, PTLRPCD_SET);
}
struct obdo *oa = &oio->oi_oa;
int result;
- wait_for_completion(&cbargs->opc_sync);
+ cfs_wait_for_completion(&cbargs->opc_sync);
result = io->ci_result = cbargs->opc_rc;
if (result == 0) {
}
if (flags & OBD_MD_FLHANDLE) {
clerq = slice->crs_req;
- LASSERT(!list_empty(&clerq->crq_pages));
+ LASSERT(!cfs_list_empty(&clerq->crq_pages));
apage = container_of(clerq->crq_pages.next,
struct cl_page, cp_flight);
opg = osc_cl_page_osc(apage);
apage = opg->ops_cl.cpl_page; /* now apage is a sub-page */
lock = cl_lock_at_page(env, apage->cp_obj, apage, NULL, 1, 1);
- if (lock != NULL) {
- olck = osc_lock_at(lock);
- LASSERT(olck != NULL);
- /* check for lockless io. */
- if (olck->ols_lock != NULL) {
- oa->o_handle = olck->ols_lock->l_remote_handle;
- oa->o_valid |= OBD_MD_FLHANDLE;
- }
- cl_lock_put(env, lock);
- } else {
- /* Should only be possible with liblustre */
- LASSERT(LIBLUSTRE_CLIENT);
+ if (lock == NULL) {
+ struct cl_object_header *head;
+ struct cl_lock *scan;
+
+ head = cl_object_header(apage->cp_obj);
+ cfs_list_for_each_entry(scan, &head->coh_locks,
+ cll_linkage)
+ CL_LOCK_DEBUG(D_ERROR, env, scan,
+ "no cover page!\n");
+ CL_PAGE_DEBUG(D_ERROR, env, apage,
+ "dump uncover page!\n");
+ libcfs_debug_dumpstack(NULL);
+ LBUG();
+ }
+
+ olck = osc_lock_at(lock);
+ LASSERT(olck != NULL);
+ LASSERT(ergo(opg->ops_srvlock, olck->ols_lock == NULL));
+ /* check for lockless io. */
+ if (olck->ols_lock != NULL) {
+ oa->o_handle = olck->ols_lock->l_remote_handle;
+ oa->o_valid |= OBD_MD_FLHANDLE;
}
+ cl_lock_put(env, lock);
}
}
struct osc_req *or;
int result;
- OBD_SLAB_ALLOC_PTR(or, osc_req_kmem);
+ OBD_SLAB_ALLOC_PTR_GFP(or, osc_req_kmem, CFS_ALLOC_IO);
if (or != NULL) {
cl_req_slice_add(req, &or->or_cl, dev, &osc_req_ops);
result = 0;