* GPL HEADER END
*/
/*
- * Copyright (c) 2014, 2015, Intel Corporation.
+ * Copyright (c) 2014, 2017, Intel Corporation.
*/
/*
* lustre/osp/osp_trans.c
osp_update_interpreter_t ouc_interpreter;
};
-static struct object_update_request *object_update_request_alloc(size_t size)
-{
- struct object_update_request *ourq;
-
- OBD_ALLOC_LARGE(ourq, size);
- if (ourq == NULL)
- return ERR_PTR(-ENOMEM);
-
- ourq->ourq_magic = UPDATE_REQUEST_MAGIC;
- ourq->ourq_count = 0;
-
- return ourq;
-}
-
/**
* Allocate new update request
*
size_t size)
{
struct osp_update_request_sub *ours;
+ struct object_update_request *ourq;
OBD_ALLOC_PTR(ours);
if (ours == NULL)
return -ENOMEM;
- if (size < OUT_UPDATE_INIT_BUFFER_SIZE)
- size = OUT_UPDATE_INIT_BUFFER_SIZE;
-
- ours->ours_req = object_update_request_alloc(size);
-
- if (IS_ERR(ours->ours_req)) {
+ /* The object update request will be added to an SG list for
+ * bulk transfer. Some IB HW cannot handle partial pages in SG
+ * lists (since they create gaps in memory regions) so we
+ * round the size up to the next multiple of PAGE_SIZE. See
+ * LU-9983. */
+ LASSERT(size > 0);
+ size = round_up(size, PAGE_SIZE);
+ OBD_ALLOC_LARGE(ourq, size);
+ if (ourq == NULL) {
OBD_FREE_PTR(ours);
return -ENOMEM;
}
+ ourq->ourq_magic = UPDATE_REQUEST_MAGIC;
+ ourq->ourq_count = 0;
+ ours->ours_req = ourq;
ours->ours_req_size = size;
INIT_LIST_HEAD(&ours->ours_list);
list_add_tail(&ours->ours_list, &our->our_req_list);
struct osp_update_request *osp_update_request_create(struct dt_device *dt)
{
struct osp_update_request *our;
+ int rc;
OBD_ALLOC_PTR(our);
if (our == NULL)
INIT_LIST_HEAD(&our->our_req_list);
INIT_LIST_HEAD(&our->our_cb_items);
INIT_LIST_HEAD(&our->our_list);
+ INIT_LIST_HEAD(&our->our_invalidate_cb_list);
spin_lock_init(&our->our_list_lock);
- osp_object_update_request_create(our, OUT_UPDATE_INIT_BUFFER_SIZE);
+ rc = osp_object_update_request_create(our, PAGE_SIZE);
+ if (rc != 0) {
+ OBD_FREE_PTR(our);
+ return ERR_PTR(rc);
+ }
return our;
}
-void osp_update_request_destroy(struct osp_update_request *our)
+void osp_update_request_destroy(const struct lu_env *env,
+ struct osp_update_request *our)
{
struct osp_update_request_sub *ours;
struct osp_update_request_sub *tmp;
OBD_FREE_LARGE(ours->ours_req, ours->ours_req_size);
OBD_FREE_PTR(ours);
}
+
+ if (!list_empty(&our->our_invalidate_cb_list)) {
+ struct lu_env lenv;
+ struct osp_object *obj;
+ struct osp_object *next;
+
+ if (env == NULL) {
+ lu_env_init(&lenv, LCT_MD_THREAD | LCT_DT_THREAD);
+ env = &lenv;
+ }
+
+ list_for_each_entry_safe(obj, next,
+ &our->our_invalidate_cb_list,
+ opo_invalidate_cb_list) {
+ spin_lock(&obj->opo_lock);
+ list_del_init(&obj->opo_invalidate_cb_list);
+ spin_unlock(&obj->opo_lock);
+
+ dt_object_put(env, &obj->opo_obj);
+ }
+
+ if (env == &lenv)
+ lu_env_fini(&lenv);
+ }
+
OBD_FREE_PTR(our);
}
update = object_update_request_get(ourq, i, &size);
LASSERT(update != NULL);
CDEBUG(mask, "i = %u fid = "DFID" op = %s "
- "params = %d batchid = "LPU64" size = %zu repsize %u\n",
+ "params = %d batchid = %llu size = %zu repsize %u\n",
i, PFID(&update->ou_fid),
update_op_str(update->ou_type),
update->ou_params_count,
buf_count++;
}
repsize += sizeof(*reply);
- repsize = (repsize + OUT_UPDATE_REPLY_SIZE - 1) &
- ~(OUT_UPDATE_REPLY_SIZE - 1);
+ if (repsize < OUT_UPDATE_REPLY_SIZE)
+ repsize = OUT_UPDATE_REPLY_SIZE;
LASSERT(buf_count > 0);
req = ptlrpc_request_alloc(imp, &RQF_OUT_UPDATE);
if (rc != 0)
RETURN(rc);
- /* This will only be called with read-only update, and these updates
- * might be used to retrieve update log during recovery process, so
- * it will be allowed to send during recovery process */
- req->rq_allow_replay = 1;
+ osp_set_req_replay(osp, req);
req->rq_allow_intr = 1;
/* Note: some dt index api might return non-zero result here, like
* \param[in] oth osp thandle.
*/
static void osp_thandle_invalidate_object(const struct lu_env *env,
- struct osp_thandle *oth)
+ struct osp_thandle *oth,
+ int result)
{
struct osp_update_request *our = oth->ot_our;
- struct osp_update_request_sub *ours;
+ struct osp_object *obj;
+ struct osp_object *next;
if (our == NULL)
return;
- list_for_each_entry(ours, &our->our_req_list, ours_list) {
- struct object_update_request *our_req = ours->ours_req;
- unsigned int i;
- struct lu_object *obj;
-
- for (i = 0; i < our_req->ourq_count; i++) {
- struct object_update *update;
-
- update = object_update_request_get(our_req, i, NULL);
- if (update == NULL)
- break;
-
- if (update->ou_type != OUT_WRITE)
- continue;
+ list_for_each_entry_safe(obj, next, &our->our_invalidate_cb_list,
+ opo_invalidate_cb_list) {
+ if (result < 0)
+ osp_invalidate(env, &obj->opo_obj);
- if (!fid_is_sane(&update->ou_fid))
- continue;
+ spin_lock(&obj->opo_lock);
+ list_del_init(&obj->opo_invalidate_cb_list);
+ spin_unlock(&obj->opo_lock);
- obj = lu_object_find_slice(env,
- &oth->ot_super.th_dev->dd_lu_dev,
- &update->ou_fid, NULL);
- if (IS_ERR(obj))
- break;
-
- osp_invalidate(env, lu2dt_obj(obj));
- lu_object_put(env, obj);
- }
+ dt_object_put(env, &obj->opo_obj);
}
}
dcb->dcb_func(NULL, &oth->ot_super, dcb, result);
}
- if (result < 0)
- osp_thandle_invalidate_object(env, oth);
+ osp_thandle_invalidate_object(env, oth, result);
}
/**
/* oth and osp_update_requests will be destoryed in
* osp_thandle_put */
osp_trans_stop_cb(env, oth, rc);
- osp_thandle_put(oth);
+ osp_thandle_put(env, oth);
} else {
- osp_update_request_destroy(our);
+ osp_update_request_destroy(env, our);
}
RETURN(rc);
ouc->ouc_data, 0, rc);
osp_update_callback_fini(env, ouc);
}
- osp_update_request_destroy(our);
+ osp_update_request_destroy(env, our);
} else {
args = ptlrpc_req_async_args(req);
args->oaua_update = our;
return 0;
}
-void osp_thandle_destroy(struct osp_thandle *oth)
+void osp_thandle_destroy(const struct lu_env *env,
+ struct osp_thandle *oth)
{
LASSERT(oth->ot_magic == OSP_THANDLE_MAGIC);
LASSERT(list_empty(&oth->ot_commit_dcb_list));
LASSERT(list_empty(&oth->ot_stop_dcb_list));
if (oth->ot_our != NULL)
- osp_update_request_destroy(oth->ot_our);
+ osp_update_request_destroy(env, oth->ot_our);
OBD_FREE_PTR(oth);
}
oth->ot_magic = OSP_THANDLE_MAGIC;
th = &oth->ot_super;
th->th_dev = d;
- th->th_tags = LCT_TX_HANDLE;
atomic_set(&oth->ot_refcount, 1);
INIT_LIST_HEAD(&oth->ot_commit_dcb_list);
last_committed_transno =
req->rq_import->imp_peer_committed_transno;
- CDEBUG(D_HA, "trans no "LPU64" committed transno "LPU64"\n",
+ CDEBUG(D_HA, "trans no %llu committed transno %llu\n",
req->rq_transno, last_committed_transno);
/* If the transaction is not really committed, mark result = 1 */
osp_trans_commit_cb(oth, result);
req->rq_committed = 1;
- osp_thandle_put(oth);
+ osp_thandle_put(NULL, oth);
EXIT;
}
{
struct osp_update_args *args;
struct ptlrpc_request *req;
- struct lu_device *top_device;
struct osp_thandle *oth = our->our_th;
int rc = 0;
ENTRY;
if (!oth->ot_super.th_wait_submit && !oth->ot_super.th_sync) {
if (!osp->opd_imp_active || !osp->opd_imp_connected) {
osp_trans_callback(env, oth, rc);
- osp_thandle_put(oth);
+ osp_thandle_put(env, oth);
GOTO(out, rc = -ENOTCONN);
}
rc = obd_get_request_slot(&osp->opd_obd->u.cli);
if (rc != 0) {
osp_trans_callback(env, oth, rc);
- osp_thandle_put(oth);
+ osp_thandle_put(env, oth);
GOTO(out, rc = -ENOTCONN);
}
args->oaua_flow_control = true;
if (!osp->opd_connect_mdt) {
down_read(&osp->opd_async_updates_rwsem);
args->oaua_count = &osp->opd_async_updates_count;
- args->oaua_waitq = &osp->opd_syn_barrier_waitq;
+ args->oaua_waitq = &osp->opd_sync_barrier_waitq;
up_read(&osp->opd_async_updates_rwsem);
atomic_inc(args->oaua_count);
}
* status, in case the other target is being recoveried
* at the same time, and if we wait here for the import
* to be recoveryed, it might cause deadlock */
- top_device = osp->opd_dt_dev.dd_lu_dev.ld_site->ls_top_dev;
- if (top_device->ld_obd->obd_recovering)
- req->rq_allow_replay = 1;
+ osp_set_req_replay(osp, req);
/* Because this req will be synchronus, i.e. it will be called
* in the same thread, so it will be safe to use current
rc = ptlrpc_queue_wait(req);
if (osp->opd_connect_mdt)
osp_put_rpc_lock(osp);
- if ((rc == -ENOMEM && req->rq_set == NULL) ||
+
+ /* We use rq_queued_time to distinguish between local
+ * and remote -ENOMEM. */
+ if ((rc == -ENOMEM && req->rq_queued_time == 0) ||
(req->rq_transno == 0 && !req->rq_committed)) {
if (args->oaua_update != NULL) {
/* If osp_update_interpret is not being called,
* release the osp_thandle */
args->oaua_update = NULL;
- osp_thandle_put(oth);
+ osp_thandle_put(env, oth);
}
req->rq_cb_data = NULL;
rc = rc == 0 ? req->rq_status : rc;
osp_trans_callback(env, oth, rc);
- osp_thandle_put(oth);
+ osp_thandle_put(env, oth);
GOTO(out, rc);
}
}
* Get local thandle for osp_thandle
*
* Get the local OSD thandle from the OSP thandle. Currently, there
- * are a few OSP API (osp_object_create() and osp_sync_add()) needs
+ * are a few OSP API (osp_create() and osp_sync_add()) needs
* to update the object on local OSD device.
*
* If the osp_thandle comes from normal stack (MDD->LOD->OSP), then
*
* Set the version for the transaction and add the request to
* the sending list, then after transaction stop, the request
- * will be picked in the order of version, by sending thread.
+ * will be sent in the order of version by the sending thread.
*
* \param [in] oth osp thandle to be set version.
*
/* Assign the version and add it to the sending list */
osp_thandle_get(oth);
oth->ot_our->our_version = ou->ou_version++;
+ oth->ot_our->our_generation = ou->ou_generation;
list_add_tail(&oth->ot_our->our_list,
&osp->opd_update->ou_list);
oth->ot_our->our_req_ready = 0;
spin_unlock(&ou->ou_lock);
LASSERT(oth->ot_super.th_wait_submit == 1);
- CDEBUG(D_INFO, "%s: version "LPU64" oth:version %p:"LPU64"\n",
- osp->opd_obd->obd_name, ou->ou_version, oth,
+ CDEBUG(D_INFO, "%s: version %llu gen %llu oth:version %p:%llu\n",
+ osp->opd_obd->obd_name, ou->ou_version, ou->ou_generation, oth,
oth->ot_our->our_version);
return 0;
spin_lock(&ou->ou_lock);
list_for_each_entry_safe(our, tmp, &ou->ou_list, our_list) {
LASSERT(our->our_th != NULL);
- CDEBUG(D_HA, "ou %p version "LPU64" rpc_version "LPU64"\n",
+ CDEBUG(D_HA, "ou %p version %llu rpc_version %llu\n",
ou, our->our_version, ou->ou_rpc_version);
spin_lock(&our->our_list_lock);
/* Find next osp_update_request in the list */
if (rc < 0) {
CERROR("%s: init env error: rc = %d\n", osp->opd_obd->obd_name,
rc);
+
+ spin_lock(&ou->ou_lock);
+ ou->ou_generation++;
+ spin_unlock(&ou->ou_lock);
+
return;
}
our);
}
+ /* Increase the generation, then the update request with old generation
+ * will fail with -EIO. */
+ ou->ou_generation++;
spin_unlock(&ou->ou_lock);
/* invalidate all of request in the sending list */
spin_unlock(&our->our_list_lock);
osp_trans_callback(&env, our->our_th,
our->our_th->ot_super.th_result);
- osp_thandle_put(our->our_th);
+ osp_thandle_put(&env, our->our_th);
}
lu_env_fini(&env);
}
if (!osp_send_update_thread_running(osp)) {
if (our != NULL) {
osp_trans_callback(&env, our->our_th, -EINTR);
- osp_thandle_put(our->our_th);
+ osp_thandle_put(&env, our->our_th);
}
break;
}
osp_trans_callback(&env, our->our_th,
our->our_th->ot_super.th_result);
rc = our->our_th->ot_super.th_result;
- } else if (OBD_FAIL_CHECK(OBD_FAIL_INVALIDATE_UPDATE)) {
+ } else if (ou->ou_generation != our->our_generation ||
+ OBD_FAIL_CHECK(OBD_FAIL_INVALIDATE_UPDATE)) {
rc = -EIO;
osp_trans_callback(&env, our->our_th, rc);
} else {
osp_invalidate_request(osp);
/* Balanced for thandle_get in osp_check_and_set_rpc_version */
- osp_thandle_put(our->our_th);
+ osp_thandle_put(&env, our->our_th);
}
thread->t_flags = SVC_STOPPED;
GOTO(out, rc = -EIO);
}
- CDEBUG(D_HA, "%s: add oth %p with version "LPU64"\n",
+ CDEBUG(D_HA, "%s: add oth %p with version %llu\n",
osp->opd_obd->obd_name, oth, our->our_version);
LASSERT(our->our_req_ready == 0);
osp_trans_callback(env, oth, th->th_result);
}
out:
- osp_thandle_put(oth);
+ osp_thandle_put(env, oth);
RETURN(rc);
}