* GPL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2011 Whamcloud, Inc.
+ *
*/
/*
* This file is part of Lustre, http://www.lustre.org/
/**
* Do not return server-side uid/gid to remote client
*/
-static void ost_drop_id(struct obd_export *exp, struct obdo *oa)
+static void ost_drop_id(struct obd_export *exp, struct obdo *oa)
{
if (exp_connect_rmtclient(exp)) {
oa->o_uid = -1;
}
}
+/**
+ * Validate oa from client.
+ * If the request comes from 2.0 clients, currently only RSVD seq and IDIF
+ * req are valid.
+ * a. for single MDS seq = FID_SEQ_OST_MDT0,
+ * b. for CMD, seq = FID_SEQ_OST_MDT0, FID_SEQ_OST_MDT1 - FID_SEQ_OST_MAX
+ */
+static int ost_validate_obdo(struct obd_export *exp, struct obdo *oa,
+ struct obd_ioobj *ioobj)
+{
+ if (oa != NULL && !(oa->o_valid & OBD_MD_FLGROUP)) {
+ oa->o_seq = FID_SEQ_OST_MDT0;
+ if (ioobj)
+ ioobj->ioo_seq = FID_SEQ_OST_MDT0;
+ /* remove fid_seq_is_rsvd() after FID-on-OST allows SEQ > 9 */
+ } else if (oa == NULL ||
+ !(fid_seq_is_rsvd(oa->o_seq) || fid_seq_is_idif(oa->o_seq))) {
+ CERROR("%s: client %s sent invalid object "POSTID"\n",
+ exp->exp_obd->obd_name, obd_export_nid2str(exp),
+ oa ? oa->o_id : -1, oa ? oa->o_seq : -1);
+ return -EPROTO;
+ }
+ obdo_from_ostid(oa, &oa->o_oi);
+ if (ioobj)
+ ioobj_from_obdo(ioobj, oa);
+ return 0;
+}
+
void oti_to_request(struct obd_trans_info *oti, struct ptlrpc_request *req)
{
struct oti_req_ack_lock *ack_lock;
if (oti == NULL)
return;
- if (req->rq_repmsg)
+ if (req->rq_repmsg) {
+ __u64 versions[PTLRPC_NUM_VERSIONS] = { 0 };
lustre_msg_set_transno(req->rq_repmsg, oti->oti_transno);
+ versions[0] = oti->oti_pre_version;
+ lustre_msg_set_versions(req->rq_repmsg, versions);
+ }
req->rq_transno = oti->oti_transno;
/* XXX 4 == entries in oti_ack_locks??? */
if (body->oa.o_id == 0)
RETURN(-EPROTO);
+ rc = ost_validate_obdo(exp, &body->oa, NULL);
+ if (rc)
+ RETURN(rc);
+
/* If there's a DLM request, cancel the locks mentioned in it*/
if (req_capsule_field_present(&req->rq_pill, &RMF_DLM_REQ, RCL_CLIENT)) {
struct ldlm_request *dlm;
__u64 start, __u64 count, struct lustre_handle *lh,
int mode, int flags)
{
- struct ldlm_res_id res_id = { .name = { oa->o_id, 0, oa->o_gr, 0} };
+ struct ldlm_res_id res_id;
ldlm_policy_data_t policy;
__u64 end = start + count;
ENTRY;
LASSERT(!lustre_handle_is_used(lh));
- LASSERT((oa->o_valid & (OBD_MD_FLID | OBD_MD_FLGROUP)) ==
- (OBD_MD_FLID | OBD_MD_FLGROUP));
+ /* o_id and o_gr are used for localizing resource, if client miss to set
+ * them, do not trigger ASSERTION. */
+ if (unlikely((oa->o_valid & (OBD_MD_FLID | OBD_MD_FLGROUP)) !=
+ (OBD_MD_FLID | OBD_MD_FLGROUP)))
+ RETURN(-EPROTO);
if (!(oa->o_valid & OBD_MD_FLFLAGS) ||
!(oa->o_flags & OBD_FL_SRVLOCK))
RETURN(0);
+ osc_build_res_name(oa->o_id, oa->o_seq, &res_id);
CDEBUG(D_INODE, "OST-side extent lock.\n");
policy.l_extent.start = start & CFS_PAGE_MASK;
static int ost_getattr(struct obd_export *exp, struct ptlrpc_request *req)
{
struct ost_body *body, *repbody;
- struct obd_info oinfo = { { { 0 } } };
+ struct obd_info *oinfo;
struct lustre_handle lh = { 0 };
+ struct lustre_capa *capa = NULL;
int rc;
ENTRY;
if (body == NULL)
RETURN(-EFAULT);
- rc = req_capsule_server_pack(&req->rq_pill);
+ rc = ost_validate_obdo(exp, &body->oa, NULL);
if (rc)
RETURN(rc);
- repbody = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- repbody->oa = body->oa;
+ rc = req_capsule_server_pack(&req->rq_pill);
+ if (rc)
+ RETURN(rc);
rc = ost_lock_get(exp, &body->oa, 0, OBD_OBJECT_EOF, &lh, LCK_PR, 0);
if (rc)
RETURN(rc);
- oinfo.oi_oa = &repbody->oa;
- if (oinfo.oi_oa->o_valid & OBD_MD_FLOSSCAPA) {
- oinfo.oi_capa = req_capsule_client_get(&req->rq_pill,
- &RMF_CAPA1);
- if (oinfo.oi_capa == NULL) {
+ if (body->oa.o_valid & OBD_MD_FLOSSCAPA) {
+ capa = req_capsule_client_get(&req->rq_pill, &RMF_CAPA1);
+ if (capa == NULL) {
CERROR("Missing capability for OST GETATTR");
- RETURN (-EFAULT);
+ GOTO(unlock, rc = -EFAULT);
}
}
- req->rq_status = obd_getattr(exp, &oinfo);
- ost_lock_put(exp, &lh, LCK_PR);
+ OBD_ALLOC_PTR(oinfo);
+ if (!oinfo)
+ GOTO(unlock, rc = -ENOMEM);
+ oinfo->oi_oa = &body->oa;
+ oinfo->oi_capa = capa;
+
+ req->rq_status = obd_getattr(exp, oinfo);
+
+ OBD_FREE_PTR(oinfo);
+ repbody = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
+ repbody->oa = body->oa;
ost_drop_id(exp, &repbody->oa);
+
+unlock:
+ ost_lock_put(exp, &lh, LCK_PR);
+
RETURN(0);
}
osfs = req_capsule_server_get(&req->rq_pill, &RMF_OBD_STATFS);
req->rq_status = obd_statfs(req->rq_export->exp_obd, osfs,
- cfs_time_current_64() - CFS_HZ, 0);
+ cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+ 0);
if (req->rq_status != 0)
CERROR("ost: statfs failed: rc %d\n", req->rq_status);
if (body == NULL)
RETURN(-EFAULT);
+ rc = ost_validate_obdo(req->rq_export, &body->oa, NULL);
+ if (rc)
+ RETURN(rc);
+
rc = req_capsule_server_pack(&req->rq_pill);
if (rc)
RETURN(rc);
repbody = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
- oti->oti_logcookies = &repbody->oa.o_lcookie;
+ repbody->oa = body->oa;
+ oti->oti_logcookies = &body->oa.o_lcookie;
req->rq_status = obd_create(exp, &repbody->oa, NULL, oti);
//obd_log_cancel(conn, NULL, 1, oti->oti_logcookies, 0);
static int ost_punch(struct obd_export *exp, struct ptlrpc_request *req,
struct obd_trans_info *oti)
{
- struct obd_info oinfo = { { { 0 } } };
struct ost_body *body, *repbody;
int rc, flags = 0;
struct lustre_handle lh = {0,};
if (body == NULL)
RETURN(-EFAULT);
- oinfo.oi_oa = &body->oa;
- oinfo.oi_policy.l_extent.start = oinfo.oi_oa->o_size;
- oinfo.oi_policy.l_extent.end = oinfo.oi_oa->o_blocks;
+ rc = ost_validate_obdo(exp, &body->oa, NULL);
+ if (rc)
+ RETURN(rc);
- if ((oinfo.oi_oa->o_valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) !=
+ if ((body->oa.o_valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) !=
(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS))
RETURN(-EPROTO);
/* standard truncate optimization: if file body is completely
* destroyed, don't send data back to the server. */
- if (oinfo.oi_oa->o_size == 0)
+ if (body->oa.o_size == 0)
flags |= LDLM_AST_DISCARD_DATA;
- repbody = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- rc = ost_lock_get(exp, oinfo.oi_oa, oinfo.oi_oa->o_size,
- oinfo.oi_oa->o_blocks, &lh, LCK_PW, flags);
+ rc = ost_lock_get(exp, &body->oa, body->oa.o_size, body->oa.o_blocks,
+ &lh, LCK_PW, flags);
if (rc == 0) {
- if (oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS &&
- oinfo.oi_oa->o_flags == OBD_FL_SRVLOCK)
+ struct obd_info *oinfo;
+ struct lustre_capa *capa = NULL;
+
+ if (body->oa.o_valid & OBD_MD_FLFLAGS &&
+ body->oa.o_flags == OBD_FL_SRVLOCK)
/*
* If OBD_FL_SRVLOCK is the only bit set in
* ->o_flags, clear OBD_MD_FLFLAGS to avoid falling
* through filter_setattr() to filter_iocontrol().
*/
- oinfo.oi_oa->o_valid &= ~OBD_MD_FLFLAGS;
+ body->oa.o_valid &= ~OBD_MD_FLFLAGS;
- if (oinfo.oi_oa->o_valid & OBD_MD_FLOSSCAPA) {
- oinfo.oi_capa = req_capsule_client_get(&req->rq_pill,
- &RMF_CAPA1);
- if (oinfo.oi_capa == NULL) {
+ if (body->oa.o_valid & OBD_MD_FLOSSCAPA) {
+ capa = req_capsule_client_get(&req->rq_pill,
+ &RMF_CAPA1);
+ if (capa == NULL) {
CERROR("Missing capability for OST PUNCH");
- RETURN (-EFAULT);
+ GOTO(unlock, rc = -EFAULT);
}
}
- req->rq_status = obd_punch(exp, &oinfo, oti, NULL);
+
+ OBD_ALLOC_PTR(oinfo);
+ if (!oinfo)
+ GOTO(unlock, rc = -ENOMEM);
+ oinfo->oi_oa = &body->oa;
+ oinfo->oi_policy.l_extent.start = oinfo->oi_oa->o_size;
+ oinfo->oi_policy.l_extent.end = oinfo->oi_oa->o_blocks;
+ oinfo->oi_capa = capa;
+ oinfo->oi_flags = OBD_FL_PUNCH;
+
+ req->rq_status = obd_punch(exp, oinfo, oti, NULL);
+ OBD_FREE_PTR(oinfo);
+unlock:
ost_lock_put(exp, &lh, LCK_PW);
}
- repbody->oa = *oinfo.oi_oa;
+
+ repbody = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
+ repbody->oa = body->oa;
ost_drop_id(exp, &repbody->oa);
RETURN(rc);
}
static int ost_sync(struct obd_export *exp, struct ptlrpc_request *req)
{
struct ost_body *body, *repbody;
+ struct obd_info *oinfo;
struct lustre_capa *capa = NULL;
int rc;
ENTRY;
if (body == NULL)
RETURN(-EFAULT);
+ rc = ost_validate_obdo(exp, &body->oa, NULL);
+ if (rc)
+ RETURN(rc);
+
if (body->oa.o_valid & OBD_MD_FLOSSCAPA) {
capa = req_capsule_client_get(&req->rq_pill, &RMF_CAPA1);
if (capa == NULL) {
if (rc)
RETURN(rc);
+ OBD_ALLOC_PTR(oinfo);
+ if (!oinfo)
+ RETURN(-ENOMEM);
+
+ oinfo->oi_oa = &body->oa;
+ oinfo->oi_capa = capa;
+ req->rq_status = obd_sync(exp, oinfo, body->oa.o_size,
+ body->oa.o_blocks, NULL);
+ OBD_FREE_PTR(oinfo);
+
repbody = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
- req->rq_status = obd_sync(exp, &repbody->oa, NULL, repbody->oa.o_size,
- repbody->oa.o_blocks, capa);
+ repbody->oa = body->oa;
ost_drop_id(exp, &repbody->oa);
RETURN(0);
}
struct obd_trans_info *oti)
{
struct ost_body *body, *repbody;
+ struct obd_info *oinfo;
+ struct lustre_capa *capa = NULL;
int rc;
- struct obd_info oinfo = { { { 0 } } };
ENTRY;
body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
if (body == NULL)
RETURN(-EFAULT);
- rc = req_capsule_server_pack(&req->rq_pill);
+ rc = ost_validate_obdo(req->rq_export, &body->oa, NULL);
if (rc)
RETURN(rc);
- repbody = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- repbody->oa = body->oa;
+ rc = req_capsule_server_pack(&req->rq_pill);
+ if (rc)
+ RETURN(rc);
- oinfo.oi_oa = &repbody->oa;
- if (oinfo.oi_oa->o_valid & OBD_MD_FLOSSCAPA) {
- oinfo.oi_capa = req_capsule_client_get(&req->rq_pill,
- &RMF_CAPA1);
- if (oinfo.oi_capa == NULL) {
+ if (body->oa.o_valid & OBD_MD_FLOSSCAPA) {
+ capa = req_capsule_client_get(&req->rq_pill, &RMF_CAPA1);
+ if (capa == NULL) {
CERROR("Missing capability for OST SETATTR");
RETURN (-EFAULT);
}
}
- req->rq_status = obd_setattr(exp, &oinfo, oti);
+
+ OBD_ALLOC_PTR(oinfo);
+ if (!oinfo)
+ RETURN(-ENOMEM);
+ oinfo->oi_oa = &body->oa;
+ oinfo->oi_capa = capa;
+
+ req->rq_status = obd_setattr(exp, oinfo, oti);
+
+ OBD_FREE_PTR(oinfo);
+
+ repbody = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
+ repbody->oa = body->oa;
ost_drop_id(exp, &repbody->oa);
RETURN(0);
}
-static int ost_bulk_timeout(void *data)
-{
- ENTRY;
- /* We don't fail the connection here, because having the export
- * killed makes the (vital) call to commitrw very sad.
- */
- RETURN(1);
-}
-
static __u32 ost_checksum_bulk(struct ptlrpc_bulk_desc *desc, int opc,
cksum_type_t cksum_type)
{
kunmap(page);
}
- return cksum;
+ return fini_checksum(cksum, cksum_type);
}
static int ost_brw_lock_get(int mode, struct obd_export *exp,
int i;
ENTRY;
- osc_build_res_name(obj->ioo_id, obj->ioo_gr, &res_id);
+ osc_build_res_name(obj->ioo_id, obj->ioo_seq, &res_id);
LASSERT(mode == LCK_PR || mode == LCK_PW);
LASSERT(!lustre_handle_is_used(lh));
struct ost_prolong_data opd = { 0 };
ENTRY;
- osc_build_res_name(obj->ioo_id, obj->ioo_gr, &res_id);
+ osc_build_res_name(obj->ioo_id, obj->ioo_seq, &res_id);
opd.opd_mode = mode;
opd.opd_exp = req->rq_export;
RETURN(opd.opd_lock_match);
}
+/* Allocate thread local buffers if needed */
+static struct ost_thread_local_cache *ost_tls_get(struct ptlrpc_request *r)
+{
+ struct ost_thread_local_cache *tls =
+ (struct ost_thread_local_cache *)(r->rq_svc_thread->t_data);
+
+ /* In normal mode of operation an I/O request is serviced only
+ * by ll_ost_io threads each of them has own tls buffers allocated by
+ * ost_thread_init().
+ * During recovery, an I/O request may be queued until any of the ost
+ * service threads process it. Not necessary it should be one of
+ * ll_ost_io threads. In that case we dynamically allocating tls
+ * buffers for the request service time. */
+ if (unlikely(tls == NULL)) {
+ LASSERT(r->rq_export->exp_in_recovery);
+ OBD_ALLOC_PTR(tls);
+ if (tls != NULL) {
+ tls->temporary = 1;
+ r->rq_svc_thread->t_data = tls;
+ }
+ }
+ return tls;
+}
+
+/* Free thread local buffers if they were allocated only for servicing
+ * this one request */
+static void ost_tls_put(struct ptlrpc_request *r)
+{
+ struct ost_thread_local_cache *tls =
+ (struct ost_thread_local_cache *)(r->rq_svc_thread->t_data);
+
+ if (unlikely(tls->temporary)) {
+ OBD_FREE_PTR(tls);
+ r->rq_svc_thread->t_data = NULL;
+ }
+}
+
static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
{
struct ptlrpc_bulk_desc *desc = NULL;
struct lustre_handle lockh = { 0 };
int niocount, npages, nob = 0, rc, i;
int no_reply = 0;
+ struct ost_thread_local_cache *tls;
ENTRY;
req->rq_bulk_read = 1;
if (ioo == NULL)
GOTO(out, rc = -EFAULT);
+ rc = ost_validate_obdo(exp, &body->oa, ioo);
+ if (rc)
+ RETURN(rc);
+
niocount = ioo->ioo_bufcnt;
remote_nb = req_capsule_client_get(&req->rq_pill, &RMF_NIOBUF_REMOTE);
if (remote_nb == NULL)
if (rc)
GOTO(out, rc);
- /*
- * Per-thread array of struct niobuf_{local,remote}'s was allocated by
- * ost_thread_init().
- */
- local_nb = ost_tls(req)->local;
+ tls = ost_tls_get(req);
+ if (tls == NULL)
+ GOTO(out_bulk, rc = -ENOMEM);
+ local_nb = tls->local;
rc = ost_brw_lock_get(LCK_PR, exp, ioo, remote_nb, &lockh);
if (rc != 0)
- GOTO(out_bulk, rc);
+ GOTO(out_tls, rc);
/*
* If getting the lock took more time than
desc = ptlrpc_prep_bulk_exp(req, npages,
BULK_PUT_SOURCE, OST_BULK_PORTAL);
if (desc == NULL)
- GOTO(out_lock, rc = -ENOMEM);
+ GOTO(out_commitrw, rc = -ENOMEM);
if (!lustre_handle_is_used(&lockh))
/* no needs to try to prolong lock if server is asked
}
if (body->oa.o_valid & OBD_MD_FLCKSUM) {
- cksum_type_t cksum_type = OBD_CKSUM_CRC32;
-
- if (body->oa.o_valid & OBD_MD_FLFLAGS)
- cksum_type = cksum_type_unpack(body->oa.o_flags);
+ cksum_type_t cksum_type =
+ cksum_type_unpack(body->oa.o_valid & OBD_MD_FLFLAGS ?
+ body->oa.o_flags : 0);
body->oa.o_flags = cksum_type_pack(cksum_type);
body->oa.o_valid = OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
- body->oa.o_cksum = ost_checksum_bulk(desc, OST_READ, cksum_type);
+ body->oa.o_cksum = ost_checksum_bulk(desc, OST_READ,cksum_type);
CDEBUG(D_PAGE,"checksum at read origin: %x\n",body->oa.o_cksum);
} else {
body->oa.o_valid = 0;
/* Check if client was evicted while we were doing i/o before touching
network */
if (rc == 0) {
- /* Check if there is eviction in progress, and if so, wait for
- * it to finish */
- if (unlikely(cfs_atomic_read(&exp->exp_obd->
- obd_evict_inprogress))) {
- lwi = LWI_INTR(NULL, NULL);
- rc = l_wait_event(exp->exp_obd->
- obd_evict_inprogress_waitq,
- !cfs_atomic_read(&exp->exp_obd->
- obd_evict_inprogress),
- &lwi);
- }
- /* Check if client was evicted or tried to reconnect already */
- if (exp->exp_failed || exp->exp_abort_active_req)
- rc = -ENOTCONN;
- else {
- rc = sptlrpc_svc_wrap_bulk(req, desc);
- if (rc == 0)
- rc = ptlrpc_start_bulk_transfer(desc);
- }
-
- if (rc == 0) {
- time_t start = cfs_time_current_sec();
- do {
- long timeoutl = req->rq_deadline -
- cfs_time_current_sec();
- cfs_duration_t timeout = timeoutl <= 0 ?
- CFS_TICK : cfs_time_seconds(timeoutl);
- lwi = LWI_TIMEOUT_INTERVAL(timeout,
- cfs_time_seconds(1),
- ost_bulk_timeout,
- desc);
- rc = l_wait_event(desc->bd_waitq,
- !ptlrpc_server_bulk_active(desc) ||
- exp->exp_failed ||
- exp->exp_abort_active_req,
- &lwi);
- LASSERT(rc == 0 || rc == -ETIMEDOUT);
- /* Wait again if we changed deadline */
- } while ((rc == -ETIMEDOUT) &&
- (req->rq_deadline > cfs_time_current_sec()));
-
- if (rc == -ETIMEDOUT) {
- DEBUG_REQ(D_ERROR, req,
- "timeout on bulk PUT after %ld%+lds",
- req->rq_deadline - start,
- cfs_time_current_sec() -
- req->rq_deadline);
- ptlrpc_abort_bulk(desc);
- } else if (exp->exp_failed) {
- DEBUG_REQ(D_ERROR, req, "Eviction on bulk PUT");
- rc = -ENOTCONN;
- ptlrpc_abort_bulk(desc);
- } else if (exp->exp_abort_active_req) {
- DEBUG_REQ(D_ERROR, req, "Reconnect on bulk PUT");
- /* we don't reply anyway */
- rc = -ETIMEDOUT;
- ptlrpc_abort_bulk(desc);
- } else if (!desc->bd_success ||
- desc->bd_nob_transferred != desc->bd_nob) {
- DEBUG_REQ(D_ERROR, req, "%s bulk PUT %d(%d)",
- desc->bd_success ?
- "truncated" : "network error on",
- desc->bd_nob_transferred,
- desc->bd_nob);
- /* XXX should this be a different errno? */
- rc = -ETIMEDOUT;
- }
- } else {
- DEBUG_REQ(D_ERROR, req, "bulk PUT failed: rc %d", rc);
- }
+ rc = target_bulk_io(exp, desc, &lwi);
no_reply = rc != 0;
}
+out_commitrw:
/* Must commit after prep above in all cases */
rc = obd_commitrw(OBD_BRW_READ, exp, &body->oa, 1, ioo,
remote_nb, npages, local_nb, oti, rc);
out_lock:
ost_brw_lock_put(LCK_PR, ioo, remote_nb, &lockh);
+out_tls:
+ ost_tls_put(req);
out_bulk:
if (desc)
ptlrpc_free_bulk(desc);
int rc, i, j;
obd_count client_cksum = 0, server_cksum = 0;
cksum_type_t cksum_type = OBD_CKSUM_CRC32;
- int no_reply = 0;
+ int no_reply = 0, mmap = 0;
__u32 o_uid = 0, o_gid = 0;
+ struct ost_thread_local_cache *tls;
ENTRY;
req->rq_bulk_write = 1;
/* pause before transaction has been started */
OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK, (obd_timeout + 1) / 4);
- /* Check if there is eviction in progress, and if so, wait for it to
- * finish */
- if (unlikely(cfs_atomic_read(&exp->exp_obd->obd_evict_inprogress))) {
- lwi = LWI_INTR(NULL, NULL); // We do not care how long it takes
- rc = l_wait_event(exp->exp_obd->obd_evict_inprogress_waitq,
- !cfs_atomic_read(&exp->exp_obd->obd_evict_inprogress),
- &lwi);
- }
- if (exp->exp_failed)
- GOTO(out, rc = -ENOTCONN);
-
/* ost_body, ioobj & noibuf_remote are verified and swabbed in
* ost_rw_hpreq_check(). */
body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
if (body == NULL)
GOTO(out, rc = -EFAULT);
- if ((body->oa.o_flags & OBD_BRW_MEMALLOC) &&
- (exp->exp_connection->c_peer.nid == exp->exp_connection->c_self))
- libcfs_memory_pressure_set();
-
objcount = req_capsule_get_size(&req->rq_pill, &RMF_OBD_IOOBJ,
RCL_CLIENT) / sizeof(*ioo);
ioo = req_capsule_client_get(&req->rq_pill, &RMF_OBD_IOOBJ);
if (ioo == NULL)
GOTO(out, rc = -EFAULT);
+
+ rc = ost_validate_obdo(exp, &body->oa, ioo);
+ if (rc)
+ RETURN(rc);
+
for (niocount = i = 0; i < objcount; i++)
niocount += ioo[i].ioo_bufcnt;
&RMF_NIOBUF_REMOTE, RCL_CLIENT) / sizeof(*remote_nb)))
GOTO(out, rc = -EFAULT);
+ if ((remote_nb[0].flags & OBD_BRW_MEMALLOC) &&
+ (exp->exp_connection->c_peer.nid == exp->exp_connection->c_self))
+ cfs_memory_pressure_set();
+
if (body->oa.o_valid & OBD_MD_FLOSSCAPA) {
capa = req_capsule_client_get(&req->rq_pill, &RMF_CAPA1);
if (capa == NULL) {
rc = req_capsule_server_pack(&req->rq_pill);
if (rc != 0)
GOTO(out, rc);
- OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_PACK, obd_fail_val);
+ CFS_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_PACK, cfs_fail_val);
rcs = req_capsule_server_get(&req->rq_pill, &RMF_RCS);
- /*
- * Per-thread array of struct niobuf_{local,remote}'s was allocated by
- * ost_thread_init().
- */
- local_nb = ost_tls(req)->local;
+ tls = ost_tls_get(req);
+ if (tls == NULL)
+ GOTO(out_bulk, rc = -ENOMEM);
+ local_nb = tls->local;
rc = ost_brw_lock_get(LCK_PW, exp, ioo, remote_nb, &lockh);
if (rc != 0)
- GOTO(out_bulk, rc);
+ GOTO(out_tls, rc);
/*
* If getting the lock took more time than
if (body->oa.o_valid & OBD_MD_FLFLAGS)
cksum_type = cksum_type_unpack(body->oa.o_flags);
}
+ if (body->oa.o_valid & OBD_MD_FLFLAGS && body->oa.o_flags & OBD_FL_MMAP)
+ mmap = 1;
/* Because we already sync grant info with client when reconnect,
* grant info will be cleared for resent req, then fed_grant and
desc = ptlrpc_prep_bulk_exp(req, npages,
BULK_GET_SINK, OST_BULK_PORTAL);
if (desc == NULL)
- GOTO(out_lock, rc = -ENOMEM);
+ GOTO(skip_transfer, rc = -ENOMEM);
/* NB Having prepped, we must commit... */
if (rc != 0)
GOTO(out_lock, rc);
- /* Check if client was evicted or tried to reconnect while we
- * were doing i/o before touching network */
- if (desc->bd_export->exp_failed ||
- desc->bd_export->exp_abort_active_req)
- rc = -ENOTCONN;
- else
- rc = ptlrpc_start_bulk_transfer(desc);
- if (rc == 0) {
- time_t start = cfs_time_current_sec();
- do {
- long timeoutl = req->rq_deadline -
- cfs_time_current_sec();
- cfs_duration_t timeout = timeoutl <= 0 ?
- CFS_TICK : cfs_time_seconds(timeoutl);
- lwi = LWI_TIMEOUT_INTERVAL(timeout, cfs_time_seconds(1),
- ost_bulk_timeout, desc);
- rc = l_wait_event(desc->bd_waitq,
- !ptlrpc_server_bulk_active(desc) ||
- desc->bd_export->exp_failed ||
- desc->bd_export->exp_abort_active_req,
- &lwi);
- LASSERT(rc == 0 || rc == -ETIMEDOUT);
- /* Wait again if we changed deadline */
- } while ((rc == -ETIMEDOUT) &&
- (req->rq_deadline > cfs_time_current_sec()));
-
- if (rc == -ETIMEDOUT) {
- DEBUG_REQ(D_ERROR, req,
- "timeout on bulk GET after %ld%+lds",
- req->rq_deadline - start,
- cfs_time_current_sec() -
- req->rq_deadline);
- ptlrpc_abort_bulk(desc);
- } else if (desc->bd_export->exp_failed) {
- DEBUG_REQ(D_ERROR, req, "Eviction on bulk GET");
- rc = -ENOTCONN;
- ptlrpc_abort_bulk(desc);
- } else if (desc->bd_export->exp_abort_active_req) {
- DEBUG_REQ(D_ERROR, req, "Reconnect on bulk GET");
- /* we don't reply anyway */
- rc = -ETIMEDOUT;
- ptlrpc_abort_bulk(desc);
- } else if (!desc->bd_success) {
- DEBUG_REQ(D_ERROR, req, "network error on bulk GET");
- /* XXX should this be a different errno? */
- rc = -ETIMEDOUT;
- } else {
- rc = sptlrpc_svc_unwrap_bulk(req, desc);
- }
- } else {
- DEBUG_REQ(D_ERROR, req, "ptlrpc_bulk_get failed: rc %d", rc);
- }
+ rc = target_bulk_io(exp, desc, &lwi);
no_reply = rc != 0;
+skip_transfer:
repbody = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa));
repbody->oa.o_cksum = server_cksum;
cksum_counter++;
if (unlikely(client_cksum != server_cksum)) {
- CERROR("client csum %x, server csum %x\n",
- client_cksum, server_cksum);
+ CDEBUG_LIMIT(mmap ? D_INFO : D_ERROR,
+ "client csum %x, server csum %x\n",
+ client_cksum, server_cksum);
cksum_counter = 0;
} else if ((cksum_counter & (-cksum_counter)) == cksum_counter){
CDEBUG(D_INFO, "Checksum %u from %s OK: %x\n",
repbody->oa.o_gid = o_gid;
}
- if (unlikely(client_cksum != server_cksum && rc == 0)) {
+ /*
+ * Disable sending mtime back to the client. If the client locked the
+ * whole object, then it has already updated the mtime on its side,
+ * otherwise it will have to glimpse anyway (see bug 21489, comment 32)
+ */
+ repbody->oa.o_valid &= ~(OBD_MD_FLMTIME | OBD_MD_FLATIME);
+
+ if (unlikely(client_cksum != server_cksum && rc == 0 && !mmap)) {
int new_cksum = ost_checksum_bulk(desc, OST_WRITE, cksum_type);
char *msg;
char *via;
}
LCONSOLE_ERROR_MSG(0x168, "%s: BAD WRITE CHECKSUM: %s from "
- "%s%s%s inum "LPU64"/"LPU64" object "
+ "%s%s%s inode "DFID" object "
LPU64"/"LPU64" extent ["LPU64"-"LPU64"]\n",
exp->exp_obd->obd_name, msg,
libcfs_id2str(req->rq_peer),
via, router,
body->oa.o_valid & OBD_MD_FLFID ?
- body->oa.o_fid : (__u64)0,
+ body->oa.o_parent_seq : (__u64)0,
+ body->oa.o_valid & OBD_MD_FLFID ?
+ body->oa.o_parent_oid : 0,
body->oa.o_valid & OBD_MD_FLFID ?
- body->oa.o_generation :(__u64)0,
+ body->oa.o_parent_ver : 0,
body->oa.o_id,
body->oa.o_valid & OBD_MD_FLGROUP ?
- body->oa.o_gr : (__u64)0,
+ body->oa.o_seq : (__u64)0,
local_nb[0].offset,
local_nb[npages-1].offset +
local_nb[npages-1].len - 1 );
out_lock:
ost_brw_lock_put(LCK_PW, ioo, remote_nb, &lockh);
+out_tls:
+ ost_tls_put(req);
out_bulk:
if (desc)
ptlrpc_free_bulk(desc);
exp->exp_connection->c_remote_uuid.uuid,
libcfs_id2str(req->rq_peer));
}
- libcfs_memory_pressure_clr();
+ cfs_memory_pressure_clr();
RETURN(rc);
}
}
keylen = req_capsule_get_size(pill, &RMF_SETINFO_KEY, RCL_CLIENT);
+ if (KEY_IS(KEY_FIEMAP)) {
+ struct ll_fiemap_info_key *fm_key = key;
+ int rc;
+
+ rc = ost_validate_obdo(exp, &fm_key->oa, NULL);
+ if (rc)
+ RETURN(rc);
+ }
+
rc = obd_get_info(exp, keylen, key, &replylen, NULL, NULL);
if (rc)
RETURN(rc);
GOTO(out, rc);
repoqa = req_capsule_server_get(&req->rq_pill, &RMF_QUOTA_ADJUST_QUNIT);
- req->rq_status = obd_quota_adjust_qunit(req->rq_export, oqaq, qctxt);
+ req->rq_status = obd_quota_adjust_qunit(req->rq_export, oqaq, qctxt, NULL);
*repoqa = *oqaq;
out:
return rc;
}
+/* Ensure that data and metadata are synced to the disk when lock is cancelled
+ * (if requested) */
+int ost_blocking_ast(struct ldlm_lock *lock,
+ struct ldlm_lock_desc *desc,
+ void *data, int flag)
+{
+ __u32 sync_lock_cancel = 0;
+ __u32 len = sizeof(sync_lock_cancel);
+ int rc = 0;
+ ENTRY;
+
+ rc = obd_get_info(lock->l_export, sizeof(KEY_SYNC_LOCK_CANCEL),
+ KEY_SYNC_LOCK_CANCEL, &len, &sync_lock_cancel, NULL);
+
+ if (!rc && flag == LDLM_CB_CANCELING &&
+ (lock->l_granted_mode & (LCK_PW|LCK_GROUP)) &&
+ (sync_lock_cancel == ALWAYS_SYNC_ON_CANCEL ||
+ (sync_lock_cancel == BLOCKING_SYNC_ON_CANCEL &&
+ lock->l_flags & LDLM_FL_CBPENDING))) {
+ struct obd_info *oinfo;
+ struct obdo *oa;
+ int rc;
+
+ OBD_ALLOC_PTR(oinfo);
+ if (!oinfo)
+ RETURN(-ENOMEM);
+ OBDO_ALLOC(oa);
+ if (!oa) {
+ OBD_FREE_PTR(oinfo);
+ RETURN(-ENOMEM);
+ }
+ oa->o_id = lock->l_resource->lr_name.name[0];
+ oa->o_seq = lock->l_resource->lr_name.name[1];
+ oa->o_valid = OBD_MD_FLID|OBD_MD_FLGROUP;
+ oinfo->oi_oa = oa;
+
+ rc = obd_sync(lock->l_export, oinfo,
+ lock->l_policy_data.l_extent.start,
+ lock->l_policy_data.l_extent.end, NULL);
+ if (rc)
+ CERROR("Error %d syncing data on lock cancel\n", rc);
+
+ OBDO_FREE(oa);
+ OBD_FREE_PTR(oinfo);
+ }
+
+ rc = ldlm_server_blocking_ast(lock, desc, data, flag);
+ RETURN(rc);
+}
+
static int ost_filter_recovery_request(struct ptlrpc_request *req,
struct obd_device *obd, int *process)
{
struct obd_ioobj *ioo;
struct ost_body *body;
int objcount, niocount;
- int mode, opc, i;
+ int mode, opc, i, rc;
__u64 start, end;
ENTRY;
if (ioo == NULL)
RETURN(0);
+ rc = ost_validate_obdo(req->rq_export, &body->oa, ioo);
+ if (rc)
+ RETURN(rc);
+
for (niocount = i = 0; i < objcount; i++)
niocount += ioo[i].ioo_bufcnt;
nb[ioo->ioo_bufcnt - 1].len - 1) | ~CFS_PAGE_MASK;
LASSERT(lock->l_resource != NULL);
- if (!osc_res_name_eq(ioo->ioo_id, ioo->ioo_gr,
+ if (!osc_res_name_eq(ioo->ioo_id, ioo->ioo_seq,
&lock->l_resource->lr_name))
RETURN(0);
struct obd_ioobj *ioo;
struct ost_body *body;
int objcount, niocount;
- int mode, opc, i;
+ int mode, opc, i, rc;
ENTRY;
opc = lustre_msg_get_opc(req->rq_reqmsg);
if (ioo == NULL)
RETURN(-EFAULT);
+ rc = ost_validate_obdo(req->rq_export, &body->oa, ioo);
+ if (rc)
+ RETURN(rc);
+
for (niocount = i = 0; i < objcount; i++)
niocount += ioo[i].ioo_bufcnt;
nb = req_capsule_client_get(&req->rq_pill, &RMF_NIOBUF_REMOTE);
struct ldlm_lock *lock)
{
struct ost_body *body;
+ int rc;
ENTRY;
body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
if (body == NULL)
RETURN(0); /* can't return -EFAULT here */
+ rc = ost_validate_obdo(req->rq_export, &body->oa, NULL);
+ if (rc)
+ RETURN(rc);
+
if (body->oa.o_valid & OBD_MD_FLHANDLE &&
body->oa.o_handle.cookie == lock->l_handle.h_cookie)
RETURN(1);
static int ost_punch_hpreq_check(struct ptlrpc_request *req)
{
struct ost_body *body;
+ int rc;
body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
if (body == NULL)
RETURN(-EFAULT);
+ rc = ost_validate_obdo(req->rq_export, &body->oa, NULL);
+ if (rc)
+ RETURN(rc);
+
LASSERT(!(body->oa.o_valid & OBD_MD_FLFLAGS) ||
!(body->oa.o_flags & OBD_FL_SRVLOCK));
CERROR("Missing/short ost_body\n");
RETURN(-EFAULT);
}
+
objcount = req_capsule_get_size(&req->rq_pill,
&RMF_OBD_IOOBJ,
RCL_CLIENT) /
req_capsule_init(&req->rq_pill, req, RCL_SERVER);
if (lustre_msg_get_opc(req->rq_reqmsg) != OST_CONNECT) {
- int recovering;
-
if (!class_connected_export(req->rq_export)) {
CDEBUG(D_HA,"operation %d on unconnected OST from %s\n",
lustre_msg_get_opc(req->rq_reqmsg),
obd = req->rq_export->exp_obd;
/* Check for aborted recovery. */
- cfs_spin_lock_bh(&obd->obd_processing_task_lock);
- recovering = obd->obd_recovering;
- cfs_spin_unlock_bh(&obd->obd_processing_task_lock);
- if (recovering) {
+ if (obd->obd_recovering) {
rc = ost_filter_recovery_request(req, obd,
&should_process);
if (rc || !should_process)
if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_ENQUEUE))
RETURN(0);
rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast,
- ldlm_server_blocking_ast,
+ ost_blocking_ast,
ldlm_server_glimpse_ast);
fail = OBD_FAIL_OST_LDLM_REPLY_NET;
break;
GOTO(out_lprocfs, rc = -ENOMEM);
}
- rc = ptlrpc_start_threads(obd, ost->ost_service);
+ rc = ptlrpc_start_threads(ost->ost_service);
if (rc)
GOTO(out_service, rc = -EINVAL);
GOTO(out_service, rc = -ENOMEM);
}
- rc = ptlrpc_start_threads(obd, ost->ost_create_service);
+ rc = ptlrpc_start_threads(ost->ost_create_service);
if (rc)
GOTO(out_create, rc = -EINVAL);
ost->ost_io_service->srv_init = ost_thread_init;
ost->ost_io_service->srv_done = ost_thread_done;
ost->ost_io_service->srv_cpu_affinity = 1;
- rc = ptlrpc_start_threads(obd, ost->ost_io_service);
+ rc = ptlrpc_start_threads(ost->ost_io_service);
if (rc)
GOTO(out_io, rc = -EINVAL);
ping_evictor_stop();
- cfs_spin_lock_bh(&obd->obd_processing_task_lock);
- if (obd->obd_recovering) {
- target_cancel_recovery_timer(obd);
- obd->obd_recovering = 0;
- }
- cfs_spin_unlock_bh(&obd->obd_processing_task_lock);
-
+ /* there is no recovery for OST OBD, all recovery is controlled by
+ * obdfilter OBD */
+ LASSERT(obd->obd_recovering == 0);
cfs_down(&ost->ost_health_sem);
ptlrpc_unregister_service(ost->ost_service);
ptlrpc_unregister_service(ost->ost_create_service);