}
/**
+ * This function refresh eviction timer for cancelled lock.
+ * \param[in] lock ldlm lock for refresh
+ * \param[in] arg ldlm prolong arguments, timeout, export, extent
+ * and counter are used
+ */
+void ldlm_lock_prolong_one(struct ldlm_lock *lock,
+ struct ldlm_prolong_args *arg)
+{
+ int timeout;
+
+ if (arg->lpa_export != lock->l_export ||
+ lock->l_flags & LDLM_FL_DESTROYED)
+ /* ignore unrelated locks */
+ return;
+
+ arg->lpa_locks_cnt++;
+
+ if (!(lock->l_flags & LDLM_FL_AST_SENT))
+ /* ignore locks not being cancelled */
+ return;
+
+ /* We are in the middle of the process - BL AST is sent, CANCEL
+ * is ahead. Take half of BL AT + IO AT process time.
+ */
+ timeout = arg->lpa_timeout + (ldlm_bl_timeout(lock) >> 1);
+
+ LDLM_DEBUG(lock, "refreshed to %ds.\n", timeout);
+
+ arg->lpa_blocks_cnt++;
+
+ /* OK. this is a possible lock the user holds doing I/O
+ * let's refresh eviction timer for it.
+ */
+ ldlm_refresh_waiting_lock(lock, timeout);
+}
+EXPORT_SYMBOL(ldlm_lock_prolong_one);
+
+static enum interval_iter ldlm_resource_prolong_cb(struct interval_node *n,
+ void *data)
+{
+ struct ldlm_prolong_args *arg = data;
+ struct ldlm_interval *node = to_ldlm_interval(n);
+ struct ldlm_lock *lock;
+
+ ENTRY;
+
+ LASSERT(!list_empty(&node->li_group));
+
+ list_for_each_entry(lock, &node->li_group, l_sl_policy) {
+ ldlm_lock_prolong_one(lock, arg);
+ }
+
+ RETURN(INTERVAL_ITER_CONT);
+}
+
+/**
+ * Walk through granted tree and prolong locks if they overlaps extent.
+ *
+ * \param[in] arg prolong args
+ */
+void ldlm_resource_prolong(struct ldlm_prolong_args *arg)
+{
+ struct ldlm_interval_tree *tree;
+ struct ldlm_resource *res;
+ struct interval_node_extent ex = { .start = arg->lpa_extent.start,
+ .end = arg->lpa_extent.end };
+ int idx;
+
+ ENTRY;
+
+ res = ldlm_resource_get(arg->lpa_export->exp_obd->obd_namespace, NULL,
+ &arg->lpa_resid, LDLM_EXTENT, 0);
+ if (IS_ERR(res)) {
+ CDEBUG(D_DLMTRACE, "Failed to get resource for resid "LPU64"/"
+ LPU64"\n", arg->lpa_resid.name[0],
+ arg->lpa_resid.name[1]);
+ RETURN_EXIT;
+ }
+
+ lock_res(res);
+ for (idx = 0; idx < LCK_MODE_NUM; idx++) {
+ tree = &res->lr_itree[idx];
+ if (tree->lit_root == NULL) /* empty tree, skipped */
+ continue;
+
+ /* There is no possibility to check for the groupID
+ * so all the group locks are considered as valid
+ * here, especially because the client is supposed
+ * to check it has such a lock before sending an RPC.
+ */
+ if (!(tree->lit_mode & arg->lpa_mode))
+ continue;
+
+ interval_search(tree->lit_root, &ex,
+ ldlm_resource_prolong_cb, arg);
+ }
+
+ unlock_res(res);
+ ldlm_resource_putref(res);
+
+ EXIT;
+}
+EXPORT_SYMBOL(ldlm_resource_prolong);
+
+
+/**
* Discard all AST work items from list.
*
* If for whatever reason we do not want to send ASTs to conflicting locks
*
* \retval amount of time to extend the timeout with
*/
-static inline int prolong_timeout(struct ptlrpc_request *req,
- struct ldlm_lock *lock)
+static inline int prolong_timeout(struct ptlrpc_request *req)
{
struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
if (AT_OFF)
return obd_timeout / 2;
- /* We are in the middle of the process - BL AST is sent, CANCEL
- is ahead. Take half of AT + IO process time. */
- return at_est2timeout(at_get(&svcpt->scp_at_estimate)) +
- (ldlm_bl_timeout(lock) >> 1);
-}
-
-/**
- * Prolong single lock timeout.
- *
- * This is supplemental function to the ofd_prolong_locks(). It prolongs
- * a single lock.
- *
- * \param[in] tsi target session environment for this request
- * \param[in] lock LDLM lock to prolong
- * \param[in] extent related extent
- * \param[in] timeout timeout value to add
- *
- * \retval 0 if lock is not suitable for prolongation
- * \retval 1 if lock was prolonged successfully
- */
-static int ofd_prolong_one_lock(struct tgt_session_info *tsi,
- struct ldlm_lock *lock,
- struct ldlm_extent *extent)
-{
- int timeout = prolong_timeout(tgt_ses_req(tsi), lock);
-
- if (lock->l_flags & LDLM_FL_DESTROYED) /* lock already cancelled */
- return 0;
-
- /* XXX: never try to grab resource lock here because we're inside
- * exp_bl_list_lock; in ldlm_lockd.c to handle waiting list we take
- * res lock and then exp_bl_list_lock. */
-
- if (!(lock->l_flags & LDLM_FL_AST_SENT))
- /* ignore locks not being cancelled */
- return 0;
-
- LDLM_DEBUG(lock, "refreshed for req x"LPU64" ext("LPU64"->"LPU64") "
- "to %ds.\n", tgt_ses_req(tsi)->rq_xid, extent->start,
- extent->end, timeout);
-
- /* OK. this is a possible lock the user holds doing I/O
- * let's refresh eviction timer for it */
- ldlm_refresh_waiting_lock(lock, timeout);
- return 1;
+ return at_est2timeout(at_get(&svcpt->scp_at_estimate));
}
/**
* request may cover multiple locks.
*
* \param[in] tsi target session environment for this request
- * \param[in] start start of extent
- * \param[in] end end of extent
+ * \param[in] data struct of data to prolong locks
*
- * \retval number of prolonged locks
*/
-static int ofd_prolong_extent_locks(struct tgt_session_info *tsi,
- __u64 start, __u64 end)
+static void ofd_prolong_extent_locks(struct tgt_session_info *tsi,
+ struct ldlm_prolong_args *data)
{
- struct obd_export *exp = tsi->tsi_exp;
struct obdo *oa = &tsi->tsi_ost_body->oa;
- struct ldlm_extent extent = {
- .start = start,
- .end = end
- };
struct ldlm_lock *lock;
- int lock_count = 0;
ENTRY;
+ data->lpa_timeout = prolong_timeout(tgt_ses_req(tsi));
+ data->lpa_export = tsi->tsi_exp;
+ data->lpa_resid = tsi->tsi_resid;
+
+ CDEBUG(D_RPCTRACE, "Prolong locks for req %p with x"LPU64
+ " ext("LPU64"->"LPU64")\n", tgt_ses_req(tsi),
+ tgt_ses_req(tsi)->rq_xid, data->lpa_extent.start,
+ data->lpa_extent.end);
+
if (oa->o_valid & OBD_MD_FLHANDLE) {
/* mostly a request should be covered by only one lock, try
* fast path. */
if (lock != NULL) {
/* Fast path to check if the lock covers the whole IO
* region exclusively. */
- if (lock->l_granted_mode == LCK_PW &&
- ldlm_extent_contain(&lock->l_policy_data.l_extent,
- &extent)) {
+ if (ldlm_extent_contain(&lock->l_policy_data.l_extent,
+ &data->lpa_extent)) {
/* bingo */
- LASSERT(lock->l_export == exp);
- lock_count = ofd_prolong_one_lock(tsi, lock,
- &extent);
+ LASSERT(lock->l_export == data->lpa_export);
+ ldlm_lock_prolong_one(lock, data);
LDLM_LOCK_PUT(lock);
- RETURN(lock_count);
+ RETURN_EXIT;
}
lock->l_last_used = cfs_time_current();
LDLM_LOCK_PUT(lock);
}
}
- spin_lock_bh(&exp->exp_bl_list_lock);
- list_for_each_entry(lock, &exp->exp_bl_list, l_exp_list) {
- LASSERT(lock->l_flags & LDLM_FL_AST_SENT);
- LASSERT(lock->l_resource->lr_type == LDLM_EXTENT);
-
- /* ignore waiting locks, no more granted locks in the list */
- if (lock->l_granted_mode != lock->l_req_mode)
- break;
-
- if (!ldlm_res_eq(&tsi->tsi_resid, &lock->l_resource->lr_name))
- continue;
-
- if (!ldlm_extent_overlap(&lock->l_policy_data.l_extent,
- &extent))
- continue;
-
- lock_count += ofd_prolong_one_lock(tsi, lock, &extent);
- }
- spin_unlock_bh(&exp->exp_bl_list_lock);
-
- RETURN(lock_count);
+ ldlm_resource_prolong(data);
+ EXIT;
}
/**
if (!ostid_res_name_eq(&ioo->ioo_oid, &lock->l_resource->lr_name))
RETURN(0);
- /* a bulk write can only hold a reference on a PW extent lock */
- mode = LCK_PW;
+ /* a bulk write can only hold a reference on a PW extent lock
+ * or GROUP lock.
+ */
+ mode = LCK_PW | LCK_GROUP;
if (opc == OST_READ)
/* whereas a bulk read can be protected by either a PR or PW
* extent lock */
* Implementation of ptlrpc_hpreq_ops::hpreq_lock_check for OFD RW requests.
*
* Check for whether the given PTLRPC request (\a req) is blocking
- * an LDLM lock cancel.
+ * an LDLM lock cancel. Also checks whether the request is covered by an LDLM
+ * lock.
*
* \param[in] req the incoming request
*
* \retval 1 if \a req is blocking an LDLM lock cancel
* \retval 0 if it is not
+ * \retval -ESTALE if lock is not found
*/
static int ofd_rw_hpreq_check(struct ptlrpc_request *req)
{
struct tgt_session_info *tsi;
struct obd_ioobj *ioo;
struct niobuf_remote *rnb;
- __u64 start, end;
- int lock_count;
+ int opc;
+ struct ldlm_prolong_args pa = { 0 };
ENTRY;
* Use LASSERT below because malformed RPCs should have
* been filtered out in tgt_hpreq_handler().
*/
+ opc = lustre_msg_get_opc(req->rq_reqmsg);
+ LASSERT(opc == OST_READ || opc == OST_WRITE);
+
ioo = req_capsule_client_get(&req->rq_pill, &RMF_OBD_IOOBJ);
LASSERT(ioo != NULL);
LASSERT(rnb != NULL);
LASSERT(!(rnb->rnb_flags & OBD_BRW_SRVLOCK));
- start = rnb->rnb_offset;
+ pa.lpa_mode = LCK_PW | LCK_GROUP;
+ if (opc == OST_READ)
+ pa.lpa_mode |= LCK_PR;
+
+ pa.lpa_extent.start = rnb->rnb_offset;
rnb += ioo->ioo_bufcnt - 1;
- end = rnb->rnb_offset + rnb->rnb_len - 1;
+ pa.lpa_extent.end = rnb->rnb_offset + rnb->rnb_len - 1;
DEBUG_REQ(D_RPCTRACE, req, "%s %s: refresh rw locks: "DFID
- " ("LPU64"->"LPU64")\n",
- tgt_name(tsi->tsi_tgt), current->comm,
- PFID(&tsi->tsi_fid), start, end);
+ " ("LPU64"->"LPU64")\n", tgt_name(tsi->tsi_tgt),
+ current->comm, PFID(&tsi->tsi_fid), pa.lpa_extent.start,
+ pa.lpa_extent.end);
- lock_count = ofd_prolong_extent_locks(tsi, start, end);
+ ofd_prolong_extent_locks(tsi, &pa);
CDEBUG(D_DLMTRACE, "%s: refreshed %u locks timeout for req %p.\n",
- tgt_name(tsi->tsi_tgt), lock_count, req);
+ tgt_name(tsi->tsi_tgt), pa.lpa_blocks_cnt, req);
- RETURN(lock_count > 0);
+ if (pa.lpa_blocks_cnt > 0)
+ RETURN(1);
+
+ RETURN(pa.lpa_locks_cnt > 0 ? 0 : -ESTALE);
}
/**
* Implementation of ptlrpc_hpreq_ops::hpreq_lock_check for OST_PUNCH request.
*
* High-priority queue request check for whether the given punch request
- * (\a req) is blocking an LDLM lock cancel.
+ * (\a req) is blocking an LDLM lock cancel. Also checks whether the request is
+ * covered by an LDLM lock.
+ *
+
*
* \param[in] req the incoming request
*
* \retval 1 if \a req is blocking an LDLM lock cancel
* \retval 0 if it is not
+ * \retval -ESTALE if lock is not found
*/
static int ofd_punch_hpreq_check(struct ptlrpc_request *req)
{
struct tgt_session_info *tsi;
struct obdo *oa;
- int lock_count;
+ struct ldlm_prolong_args pa = { 0 };
ENTRY;
LASSERT(!(oa->o_valid & OBD_MD_FLFLAGS &&
oa->o_flags & OBD_FL_SRVLOCK));
+ pa.lpa_mode = LCK_PW | LCK_GROUP;
+ pa.lpa_extent.start = oa->o_size;
+ pa.lpa_extent.end = oa->o_blocks;
+
CDEBUG(D_DLMTRACE,
"%s: refresh locks: "LPU64"/"LPU64" ("LPU64"->"LPU64")\n",
tgt_name(tsi->tsi_tgt), tsi->tsi_resid.name[0],
- tsi->tsi_resid.name[1], oa->o_size, oa->o_blocks);
+ tsi->tsi_resid.name[1], pa.lpa_extent.start, pa.lpa_extent.end);
- lock_count = ofd_prolong_extent_locks(tsi, oa->o_size, oa->o_blocks);
+ ofd_prolong_extent_locks(tsi, &pa);
CDEBUG(D_DLMTRACE, "%s: refreshed %u locks timeout for req %p.\n",
- tgt_name(tsi->tsi_tgt), lock_count, req);
+ tgt_name(tsi->tsi_tgt), pa.lpa_blocks_cnt, req);
+
+ if (pa.lpa_blocks_cnt > 0)
+ RETURN(1);
- RETURN(lock_count > 0);
+ RETURN(pa.lpa_locks_cnt > 0 ? 0 : -ESTALE);
}
/**
LASSERT(rnb != NULL); /* must exist after request preprocessing */
/* no high priority if server lock is needed */
- if (rnb->rnb_flags & OBD_BRW_SRVLOCK)
+ if (rnb->rnb_flags & OBD_BRW_SRVLOCK ||
+ (lustre_msg_get_flags(tgt_ses_req(tsi)->rq_reqmsg)
+ & MSG_REPLAY))
return;
}
tgt_ses_req(tsi)->rq_ops = &ofd_hpreq_rw;
{
LASSERT(tsi->tsi_ost_body != NULL); /* must exists if we are here */
/* no high-priority if server lock is needed */
- if (tsi->tsi_ost_body->oa.o_valid & OBD_MD_FLFLAGS &&
- tsi->tsi_ost_body->oa.o_flags & OBD_FL_SRVLOCK)
+ if ((tsi->tsi_ost_body->oa.o_valid & OBD_MD_FLFLAGS &&
+ tsi->tsi_ost_body->oa.o_flags & OBD_FL_SRVLOCK) ||
+ tgt_conn_flags(tsi) & OBD_CONNECT_MDS ||
+ lustre_msg_get_flags(tgt_ses_req(tsi)->rq_reqmsg) & MSG_REPLAY)
return;
tgt_ses_req(tsi)->rq_ops = &ofd_hpreq_punch;
}