+struct ost_prolong_data {
+ struct ptlrpc_request *opd_req;
+ struct obd_export *opd_exp;
+ struct obdo *opd_oa;
+ struct ldlm_res_id opd_resid;
+ struct ldlm_extent opd_extent;
+ ldlm_mode_t opd_mode;
+ unsigned int opd_locks;
+ int opd_timeout;
+};
+
+/* prolong locks for the current service time of the corresponding
+ * portal (= OST_IO_PORTAL)
+ */
+static inline int prolong_timeout(struct ptlrpc_request *req)
+{
+ struct ptlrpc_service *svc = req->rq_rqbd->rqbd_service;
+
+ if (AT_OFF)
+ return obd_timeout / 2;
+
+ return max(at_est2timeout(at_get(&svc->srv_at_estimate)), ldlm_timeout);
+}
+
+static void ost_prolong_lock_one(struct ost_prolong_data *opd,
+ struct ldlm_lock *lock)
+{
+ LASSERT(lock->l_export == opd->opd_exp);
+
+ if (lock->l_destroyed) /* lock already cancelled */
+ return;
+
+ /* XXX: never try to grab resource lock here because we're inside
+ * exp_bl_list_lock; in ldlm_lockd.c to handle waiting list we take
+ * res lock and then exp_bl_list_lock. */
+
+ if (!(lock->l_flags & LDLM_FL_AST_SENT))
+ /* ignore locks not being cancelled */
+ return;
+
+ LDLM_DEBUG(lock,
+ "refreshed for req x"LPU64" ext("LPU64"->"LPU64") to %ds.\n",
+ opd->opd_req->rq_xid, opd->opd_extent.start,
+ opd->opd_extent.end, opd->opd_timeout);
+
+ /* OK. this is a possible lock the user holds doing I/O
+ * let's refresh eviction timer for it */
+ ldlm_refresh_waiting_lock(lock, opd->opd_timeout);
+ ++opd->opd_locks;
+}
+
+static void ost_prolong_locks(struct ost_prolong_data *data)
+{
+ struct obd_export *exp = data->opd_exp;
+ struct obdo *oa = data->opd_oa;
+ struct ldlm_lock *lock;
+ ENTRY;
+
+ if (oa->o_valid & OBD_MD_FLHANDLE) {
+ /* mostly a request should be covered by only one lock, try
+ * fast path. */
+ lock = ldlm_handle2lock(&oa->o_handle);
+ if (lock != NULL) {
+ /* Fast path to check if the lock covers the whole IO
+ * region exclusively. */
+ if (lock->l_granted_mode == LCK_PW &&
+ ldlm_extent_contain(&lock->l_policy_data.l_extent,
+ &data->opd_extent)) {
+ /* bingo */
+ ost_prolong_lock_one(data, lock);
+ LDLM_LOCK_PUT(lock);
+ RETURN_EXIT;
+ }
+ LDLM_LOCK_PUT(lock);
+ }
+ }
+
+
+ cfs_spin_lock_bh(&exp->exp_bl_list_lock);
+ cfs_list_for_each_entry(lock, &exp->exp_bl_list, l_exp_list) {
+ LASSERT(lock->l_flags & LDLM_FL_AST_SENT);
+ LASSERT(lock->l_resource->lr_type == LDLM_EXTENT);
+
+ if (!ldlm_res_eq(&data->opd_resid, &lock->l_resource->lr_name))
+ continue;
+
+ if (!ldlm_extent_overlap(&lock->l_policy_data.l_extent,
+ &data->opd_extent))
+ continue;
+
+ ost_prolong_lock_one(data, lock);
+ }
+ cfs_spin_unlock_bh(&exp->exp_bl_list_lock);
+
+ EXIT;
+}
+
+/**
+ * Returns 1 if the given PTLRPC matches the given LDLM locks, or 0 if it does
+ * not.
+ */
+static int ost_rw_hpreq_lock_match(struct ptlrpc_request *req,
+ struct ldlm_lock *lock)
+{
+ struct niobuf_remote *nb;
+ struct obd_ioobj *ioo;
+ int mode, opc;
+ struct ldlm_extent ext;
+ ENTRY;
+
+ opc = lustre_msg_get_opc(req->rq_reqmsg);
+ LASSERT(opc == OST_READ || opc == OST_WRITE);
+
+ ioo = req_capsule_client_get(&req->rq_pill, &RMF_OBD_IOOBJ);
+ LASSERT(ioo != NULL);
+
+ nb = req_capsule_client_get(&req->rq_pill, &RMF_NIOBUF_REMOTE);
+ LASSERT(nb != NULL);
+
+ ext.start = nb->offset;
+ nb += ioo->ioo_bufcnt - 1;
+ ext.end = nb->offset + nb->len - 1;
+
+ LASSERT(lock->l_resource != NULL);
+ if (!osc_res_name_eq(ioo->ioo_id, ioo->ioo_seq,
+ &lock->l_resource->lr_name))
+ RETURN(0);
+
+ mode = LCK_PW;
+ if (opc == OST_READ)
+ mode |= LCK_PR;
+ if (!(lock->l_granted_mode & mode))
+ RETURN(0);
+
+ RETURN(ldlm_extent_overlap(&lock->l_policy_data.l_extent, &ext));
+}
+
+/**
+ * High-priority queue request check for whether the given PTLRPC request (\a
+ * req) is blocking an LDLM lock cancel.
+ *
+ * Returns 1 if the given given PTLRPC request (\a req) is blocking an LDLM lock
+ * cancel, 0 if it is not, and -EFAULT if the request is malformed.
+ *
+ * Only OST_READs, OST_WRITEs and OST_PUNCHes go on the h-p RPC queue. This
+ * function looks only at OST_READs and OST_WRITEs.
+ */
+static int ost_rw_hpreq_check(struct ptlrpc_request *req)
+{
+ struct obd_device *obd = req->rq_export->exp_obd;
+ struct ost_body *body;
+ struct obd_ioobj *ioo;
+ struct niobuf_remote *nb;
+ struct ost_prolong_data opd = { 0 };
+ int mode, opc;
+ ENTRY;
+
+ /*
+ * Use LASSERT to do sanity check because malformed RPCs should have
+ * been filtered out in ost_hpreq_handler().
+ */
+ opc = lustre_msg_get_opc(req->rq_reqmsg);
+ LASSERT(opc == OST_READ || opc == OST_WRITE);
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
+ LASSERT(body != NULL);
+
+ ioo = req_capsule_client_get(&req->rq_pill, &RMF_OBD_IOOBJ);
+ LASSERT(ioo != NULL);
+
+ nb = req_capsule_client_get(&req->rq_pill, &RMF_NIOBUF_REMOTE);
+ LASSERT(nb != NULL);
+ LASSERT(!(nb->flags & OBD_BRW_SRVLOCK));
+
+ osc_build_res_name(ioo->ioo_id, ioo->ioo_seq, &opd.opd_resid);
+
+ opd.opd_req = req;
+ mode = LCK_PW;
+ if (opc == OST_READ)
+ mode |= LCK_PR;
+ opd.opd_mode = mode;
+ opd.opd_exp = req->rq_export;
+ opd.opd_oa = &body->oa;
+ opd.opd_extent.start = nb->offset;
+ nb += ioo->ioo_bufcnt - 1;
+ opd.opd_extent.end = nb->offset + nb->len - 1;
+ opd.opd_timeout = prolong_timeout(req);
+
+ DEBUG_REQ(D_RPCTRACE, req,
+ "%s %s: refresh rw locks: " LPU64"/"LPU64" ("LPU64"->"LPU64")\n",
+ obd->obd_name, cfs_current()->comm,
+ opd.opd_resid.name[0], opd.opd_resid.name[1],
+ opd.opd_extent.start, opd.opd_extent.end);
+
+ ost_prolong_locks(&opd);
+
+ CDEBUG(D_DLMTRACE, "%s: refreshed %u locks timeout for req %p.\n",
+ obd->obd_name, opd.opd_locks, req);
+
+ RETURN(opd.opd_locks);
+}
+
+static void ost_rw_hpreq_fini(struct ptlrpc_request *req)
+{
+ (void)ost_rw_hpreq_check(req);
+}
+
+/**
+ * Like ost_rw_hpreq_lock_match(), but for OST_PUNCH RPCs.
+ */
+static int ost_punch_hpreq_lock_match(struct ptlrpc_request *req,
+ struct ldlm_lock *lock)
+{
+ struct ost_body *body;
+ ENTRY;
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
+ LASSERT(body != NULL);
+
+ if (body->oa.o_valid & OBD_MD_FLHANDLE &&
+ body->oa.o_handle.cookie == lock->l_handle.h_cookie)
+ RETURN(1);
+
+ RETURN(0);
+}
+
+/**
+ * Like ost_rw_hpreq_check(), but for OST_PUNCH RPCs.
+ */
+static int ost_punch_hpreq_check(struct ptlrpc_request *req)
+{
+ struct obd_device *obd = req->rq_export->exp_obd;
+ struct ost_body *body;
+ struct obdo *oa;
+ struct ost_prolong_data opd = { 0 };
+ __u64 start, end;
+ ENTRY;
+
+ body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
+ LASSERT(body != NULL);
+
+ oa = &body->oa;
+ LASSERT(!(oa->o_valid & OBD_MD_FLFLAGS) ||
+ !(oa->o_flags & OBD_FL_SRVLOCK));
+
+ start = oa->o_size;
+ end = start + oa->o_blocks;
+
+ opd.opd_req = req;
+ opd.opd_mode = LCK_PW;
+ opd.opd_exp = req->rq_export;
+ opd.opd_oa = oa;
+ opd.opd_extent.start = start;
+ opd.opd_extent.end = end;
+ if (oa->o_blocks == OBD_OBJECT_EOF)
+ opd.opd_extent.end = OBD_OBJECT_EOF;
+ opd.opd_timeout = prolong_timeout(req);
+
+ osc_build_res_name(oa->o_id, oa->o_seq, &opd.opd_resid);
+
+ CDEBUG(D_DLMTRACE,
+ "%s: refresh locks: "LPU64"/"LPU64" ("LPU64"->"LPU64")\n",
+ obd->obd_name,
+ opd.opd_resid.name[0], opd.opd_resid.name[1],
+ opd.opd_extent.start, opd.opd_extent.end);
+
+ ost_prolong_locks(&opd);
+
+ CDEBUG(D_DLMTRACE, "%s: refreshed %u locks timeout for req %p.\n",
+ obd->obd_name, opd.opd_locks, req);
+
+ RETURN(opd.opd_locks > 0);
+}
+
+static void ost_punch_hpreq_fini(struct ptlrpc_request *req)
+{
+ (void)ost_punch_hpreq_check(req);
+}
+
+struct ptlrpc_hpreq_ops ost_hpreq_rw = {
+ .hpreq_lock_match = ost_rw_hpreq_lock_match,
+ .hpreq_check = ost_rw_hpreq_check,
+ .hpreq_fini = ost_rw_hpreq_fini
+};
+
+struct ptlrpc_hpreq_ops ost_hpreq_punch = {
+ .hpreq_lock_match = ost_punch_hpreq_lock_match,
+ .hpreq_check = ost_punch_hpreq_check,
+ .hpreq_fini = ost_punch_hpreq_fini
+};
+
+/** Assign high priority operations to the request if needed. */
+static int ost_hpreq_handler(struct ptlrpc_request *req)
+{
+ ENTRY;
+ if (req->rq_export) {
+ int opc = lustre_msg_get_opc(req->rq_reqmsg);
+ struct ost_body *body;
+
+ if (opc == OST_READ || opc == OST_WRITE) {
+ struct niobuf_remote *nb;
+ struct obd_ioobj *ioo;
+ int objcount, niocount;
+ int rc;
+ int i;
+
+ /* RPCs on the H-P queue can be inspected before
+ * ost_handler() initializes their pills, so we
+ * initialize that here. Capsule initialization is
+ * idempotent, as is setting the pill's format (provided
+ * it doesn't change).
+ */
+ req_capsule_init(&req->rq_pill, req, RCL_SERVER);
+ if (opc == OST_READ)
+ req_capsule_set(&req->rq_pill,
+ &RQF_OST_BRW_READ);
+ else
+ req_capsule_set(&req->rq_pill,
+ &RQF_OST_BRW_WRITE);
+
+ body = req_capsule_client_get(&req->rq_pill,
+ &RMF_OST_BODY);
+ if (body == NULL) {
+ CERROR("Missing/short ost_body\n");
+ RETURN(-EFAULT);
+ }
+
+ objcount = req_capsule_get_size(&req->rq_pill,
+ &RMF_OBD_IOOBJ,
+ RCL_CLIENT) /
+ sizeof(*ioo);
+ if (objcount == 0) {
+ CERROR("Missing/short ioobj\n");
+ RETURN(-EFAULT);
+ }
+ if (objcount > 1) {
+ CERROR("too many ioobjs (%d)\n", objcount);
+ RETURN(-EFAULT);
+ }
+
+ ioo = req_capsule_client_get(&req->rq_pill,
+ &RMF_OBD_IOOBJ);
+ if (ioo == NULL) {
+ CERROR("Missing/short ioobj\n");
+ RETURN(-EFAULT);
+ }
+
+ rc = ost_validate_obdo(req->rq_export, &body->oa, ioo);
+ if (rc) {
+ CERROR("invalid object ids\n");
+ RETURN(rc);
+ }
+
+ for (niocount = i = 0; i < objcount; i++) {
+ if (ioo[i].ioo_bufcnt == 0) {
+ CERROR("ioo[%d] has zero bufcnt\n", i);
+ RETURN(-EFAULT);
+ }
+ niocount += ioo[i].ioo_bufcnt;
+ }
+ if (niocount > PTLRPC_MAX_BRW_PAGES) {
+ DEBUG_REQ(D_RPCTRACE, req,
+ "bulk has too many pages (%d)",
+ niocount);
+ RETURN(-EFAULT);
+ }
+
+ nb = req_capsule_client_get(&req->rq_pill,
+ &RMF_NIOBUF_REMOTE);
+ if (nb == NULL) {
+ CERROR("Missing/short niobuf\n");
+ RETURN(-EFAULT);
+ }
+
+ if (niocount == 0 || !(nb[0].flags & OBD_BRW_SRVLOCK))
+ req->rq_ops = &ost_hpreq_rw;
+ } else if (opc == OST_PUNCH) {
+ req_capsule_init(&req->rq_pill, req, RCL_SERVER);
+ req_capsule_set(&req->rq_pill, &RQF_OST_PUNCH);
+
+ body = req_capsule_client_get(&req->rq_pill,
+ &RMF_OST_BODY);
+ if (body == NULL) {
+ CERROR("Missing/short ost_body\n");
+ RETURN(-EFAULT);
+ }
+
+ if (!(body->oa.o_valid & OBD_MD_FLFLAGS) ||
+ !(body->oa.o_flags & OBD_FL_SRVLOCK))
+ req->rq_ops = &ost_hpreq_punch;
+ }
+ }
+ RETURN(0);
+}
+