* Validate oa from client.
* If the request comes from 2.0 clients, currently only RSVD seq and IDIF
* req are valid.
- * a. for single MDS seq = FID_SEQ_OST_MDT0,
- * b. for CMD, seq = FID_SEQ_OST_MDT0, FID_SEQ_OST_MDT1 - FID_SEQ_OST_MAX
+ * a. objects in Single MDT FS seq = FID_SEQ_OST_MDT0, oi_id != 0
+ * b. Echo objects(seq = 2), old echo client still use oi_id/oi_seq to
+ * pack ost_id. Because non-zero oi_seq will make it diffcult to tell
+ * whether this is oi_fid or real ostid. So it will check
+ * OBD_CONNECT_FID, then convert the ostid to FID for old client.
+ * c. Old FID-disable osc will send IDIF.
+ * d. new FID-enable osc/osp will send normal FID.
+ *
+ * And also oi_id/f_oid should always start from 1. oi_id/f_oid = 0 will
+ * be used for LAST_ID file, and only being accessed inside OST now.
*/
static int ost_validate_obdo(struct obd_export *exp, struct obdo *oa,
struct obd_ioobj *ioobj)
{
- if (unlikely(oa != NULL && !(oa->o_valid & OBD_MD_FLGROUP))) {
- ostid_set_seq_mdt0(&oa->o_oi);
- if (ioobj)
- ostid_set_seq_mdt0(&ioobj->ioo_oid);
- } else if (unlikely(oa == NULL ||
- !(fid_seq_is_idif(ostid_seq(&oa->o_oi)) ||
- fid_seq_is_mdt(ostid_seq(&oa->o_oi)) ||
- fid_seq_is_echo(ostid_seq(&oa->o_oi))))) {
- CERROR("%s: client %s sent bad object "DOSTID": rc = -EPROTO\n",
- exp->exp_obd->obd_name, obd_export_nid2str(exp),
- oa ? ostid_seq(&oa->o_oi) : -1,
- oa ? ostid_id(&oa->o_oi) : -1);
- return -EPROTO;
+ int rc = 0;
+
+ if (unlikely(!(exp_connect_flags(exp) & OBD_CONNECT_FID) &&
+ fid_seq_is_echo(oa->o_oi.oi.oi_seq) && oa != NULL)) {
+ /* Sigh 2.[123] client still sends echo req with oi_id = 0
+ * during create, and we will reset this to 1, since this
+ * oi_id is basically useless in the following create process,
+ * but oi_id == 0 will make it difficult to tell whether it is
+ * real FID or ost_id. */
+ oa->o_oi.oi_fid.f_oid = oa->o_oi.oi.oi_id ?: 1;
+ oa->o_oi.oi_fid.f_seq = FID_SEQ_ECHO;
+ oa->o_oi.oi_fid.f_ver = 0;
+ } else {
+ if (unlikely((oa == NULL) || ostid_id(&oa->o_oi) == 0))
+ GOTO(out, rc = -EPROTO);
+
+ /* Note: this check might be forced in 2.5 or 2.6, i.e.
+ * all of the requests are required to setup FLGROUP */
+ if (unlikely(!(oa->o_valid & OBD_MD_FLGROUP))) {
+ ostid_set_seq_mdt0(&oa->o_oi);
+ if (ioobj)
+ ostid_set_seq_mdt0(&ioobj->ioo_oid);
+ oa->o_valid |= OBD_MD_FLGROUP;
+ }
+
+ if (unlikely(!(fid_seq_is_idif(ostid_seq(&oa->o_oi)) ||
+ fid_seq_is_mdt0(ostid_seq(&oa->o_oi)) ||
+ fid_seq_is_norm(ostid_seq(&oa->o_oi)) ||
+ fid_seq_is_echo(ostid_seq(&oa->o_oi)))))
+ GOTO(out, rc = -EPROTO);
}
if (ioobj != NULL) {
": rc = -EPROTO\n", exp->exp_obd->obd_name,
obd_export_nid2str(exp), max_brw,
POSTID(&oa->o_oi));
- return -EPROTO;
+ GOTO(out, rc = -EPROTO);
}
ioobj->ioo_oid = oa->o_oi;
}
- return 0;
+
+out:
+ if (rc != 0)
+ CERROR("%s: client %s sent bad object "DOSTID": rc = %d\n",
+ exp->exp_obd->obd_name, obd_export_nid2str(exp),
+ oa ? ostid_seq(&oa->o_oi) : -1,
+ oa ? ostid_id(&oa->o_oi) : -1, rc);
+ return rc;
}
void oti_to_request(struct obd_trans_info *oti, struct ptlrpc_request *req)
/* standard truncate optimization: if file body is completely
* destroyed, don't send data back to the server. */
if (body->oa.o_size == 0)
- flags |= LDLM_AST_DISCARD_DATA;
+ flags |= LDLM_FL_AST_DISCARD_DATA;
repbody = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
repbody->oa = body->oa;
if ((remote_nb[0].flags & OBD_BRW_MEMALLOC) &&
(exp->exp_connection->c_peer.nid == exp->exp_connection->c_self))
- cfs_memory_pressure_set();
+ memory_pressure_set();
if (body->oa.o_valid & OBD_MD_FLOSSCAPA) {
capa = req_capsule_client_get(&req->rq_pill, &RMF_CAPA1);
obd_uuid2str(&exp->exp_client_uuid),
obd_export_nid2str(exp), rc);
}
- cfs_memory_pressure_clr();
+ memory_pressure_clr();
RETURN(rc);
}
RETURN(rc);
}
+struct locked_region {
+ cfs_list_t list;
+ struct lustre_handle lh;
+};
+
+static int lock_region(struct obd_export *exp, struct obdo *oa,
+ unsigned long long begin, unsigned long long end,
+ cfs_list_t *locked)
+{
+ struct locked_region *region = NULL;
+ int rc;
+
+ LASSERT(begin <= end);
+ OBD_ALLOC_PTR(region);
+ if (region == NULL)
+ return -ENOMEM;
+
+ rc = ost_lock_get(exp, oa, begin, end - begin, ®ion->lh, LCK_PR, 0);
+ if (rc) {
+ OBD_FREE_PTR(region);
+ return rc;
+ }
+
+ CDEBUG(D_OTHER, "ost lock [%llu,%llu], lh=%p\n",
+ begin, end, ®ion->lh);
+ cfs_list_add(®ion->list, locked);
+
+ return 0;
+}
+
+static int lock_zero_regions(struct obd_export *exp, struct obdo *oa,
+ struct ll_user_fiemap *fiemap,
+ cfs_list_t *locked)
+{
+ __u64 begin = fiemap->fm_start;
+ unsigned int i;
+ int rc = 0;
+ struct ll_fiemap_extent *fiemap_start = fiemap->fm_extents;
+ ENTRY;
+
+ CDEBUG(D_OTHER, "extents count %u\n", fiemap->fm_mapped_extents);
+ for (i = 0; i < fiemap->fm_mapped_extents; i++) {
+ if (fiemap_start[i].fe_logical > begin) {
+ CDEBUG(D_OTHER, "ost lock [%llu,%llu]\n",
+ begin, fiemap_start[i].fe_logical);
+ rc = lock_region(exp, oa, begin,
+ fiemap_start[i].fe_logical, locked);
+ if (rc)
+ RETURN(rc);
+ }
+
+ begin = fiemap_start[i].fe_logical + fiemap_start[i].fe_length;
+ }
+
+ if (begin < (fiemap->fm_start + fiemap->fm_length)) {
+ CDEBUG(D_OTHER, "ost lock [%llu,%llu]\n",
+ begin, fiemap->fm_start + fiemap->fm_length);
+ rc = lock_region(exp, oa, begin,
+ fiemap->fm_start + fiemap->fm_length, locked);
+ }
+
+ RETURN(rc);
+}
+
+static void unlock_zero_regions(struct obd_export *exp, cfs_list_t *locked)
+{
+ struct locked_region *entry, *temp;
+ cfs_list_for_each_entry_safe(entry, temp, locked, list) {
+ CDEBUG(D_OTHER, "ost unlock lh=%p\n", &entry->lh);
+ ost_lock_put(exp, &entry->lh, LCK_PR);
+ cfs_list_del(&entry->list);
+ OBD_FREE_PTR(entry);
+ }
+}
+
static int ost_get_info(struct obd_export *exp, struct ptlrpc_request *req)
{
void *key, *reply;
int keylen, replylen, rc = 0;
struct req_capsule *pill = &req->rq_pill;
+ cfs_list_t locked = CFS_LIST_HEAD_INIT(locked);
+ struct ll_fiemap_info_key *fm_key = NULL;
+ struct ll_user_fiemap *fiemap;
ENTRY;
/* this common part for get_info rpc */
keylen = req_capsule_get_size(pill, &RMF_SETINFO_KEY, RCL_CLIENT);
if (KEY_IS(KEY_FIEMAP)) {
- struct ll_fiemap_info_key *fm_key = key;
- int rc;
-
+ fm_key = key;
rc = ost_validate_obdo(exp, &fm_key->oa, NULL);
if (rc)
RETURN(rc);
- }
+ }
rc = obd_get_info(req->rq_svc_thread->t_env, exp, keylen, key,
&replylen, NULL, NULL);
if (rc)
- RETURN(rc);
+ RETURN(rc);
req_capsule_set_size(pill, &RMF_GENERIC_DATA,
RCL_SERVER, replylen);
rc = req_capsule_server_pack(pill);
if (rc)
- RETURN(rc);
+ RETURN(rc);
reply = req_capsule_server_get(pill, &RMF_GENERIC_DATA);
if (reply == NULL)
- RETURN(-ENOMEM);
+ RETURN(-ENOMEM);
if (KEY_IS(KEY_LAST_FID)) {
void *val;
} else {
CERROR("%s: invalid req val %p vallen %d replylen %d\n",
exp->exp_obd->obd_name, val, vallen, replylen);
- GOTO(out, rc = -EINVAL);
+ RETURN(-EINVAL);
}
}
- /* call again to fill in the reply buffer */
- rc = obd_get_info(req->rq_svc_thread->t_env, exp, keylen, key,
- &replylen, reply, NULL);
-out:
- lustre_msg_set_status(req->rq_repmsg, 0);
+ /* call again to fill in the reply buffer */
+ rc = obd_get_info(req->rq_svc_thread->t_env, exp, keylen, key,
+ &replylen, reply, NULL);
+
+ /* LU-3219: Lock the sparse areas to make sure dirty flushed back
+ * from client, then call fiemap again. */
+ if (KEY_IS(KEY_FIEMAP) && (fm_key->oa.o_valid & OBD_MD_FLFLAGS) &&
+ (fm_key->oa.o_flags & OBD_FL_SRVLOCK)) {
+ fiemap = (struct ll_user_fiemap *)reply;
+ fm_key = key;
+
+ rc = lock_zero_regions(exp, &fm_key->oa, fiemap, &locked);
+ if (rc == 0 && !cfs_list_empty(&locked))
+ rc = obd_get_info(req->rq_svc_thread->t_env, exp,
+ keylen, key, &replylen, reply, NULL);
+ unlock_zero_regions(exp, &locked);
+ if (rc)
+ RETURN(rc);
+ }
+
+ lustre_msg_set_status(req->rq_repmsg, 0);
+
RETURN(rc);
}
lustre_msg_get_version(msg),
LUSTRE_OBD_VERSION);
break;
- case SEQ_QUERY:
- /* Note: client always use MDS_VERSION for FID request */
- rc = lustre_msg_check_version(msg, LUSTRE_MDS_VERSION);
- if (rc)
- CERROR("bad opc %u version %08x, expecting %08x\n",
- lustre_msg_get_opc(msg),
- lustre_msg_get_version(msg),
- LUSTRE_MDS_VERSION);
- break;
case OST_CREATE:
case OST_DESTROY:
case OST_GETATTR:
{
LASSERT(lock->l_export == opd->opd_exp);
- if (lock->l_destroyed) /* lock already cancelled */
+ if (lock->l_flags & LDLM_FL_DESTROYED) /* lock already cancelled */
return;
/* XXX: never try to grab resource lock here because we're inside
req_capsule_set(&req->rq_pill, &RQF_OST_GET_INFO_GENERIC);
rc = ost_get_info(req->rq_export, req);
break;
- case SEQ_QUERY:
- CDEBUG(D_INODE, "seq\n");
- rc = seq_handle(req);
- break;
case OST_QUOTACHECK:
CDEBUG(D_INODE, "quotacheck\n");
req_capsule_set(&req->rq_pill, &RQF_OST_QUOTACHECK);
if (rc)
RETURN(rc);
RETURN(ptlrpc_reply(req));
- case OBD_LOG_CANCEL:
- CDEBUG(D_INODE, "log cancel\n");
- req_capsule_set(&req->rq_pill, &RQF_LOG_CANCEL);
- if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_NET))
- RETURN(0);
- rc = llog_origin_handle_cancel(req);
- if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_REP))
- RETURN(0);
- req->rq_status = rc;
- rc = req_capsule_server_pack(&req->rq_pill);
- if (rc)
- RETURN(rc);
- RETURN(ptlrpc_reply(req));
case LDLM_ENQUEUE:
CDEBUG(D_INODE, "enqueue\n");
req_capsule_set(&req->rq_pill, &RQF_LDLM_ENQUEUE);
.cc_pattern = oss_cpts,
},
.psc_ops = {
- .so_req_handler = ost_handle,
+ .so_req_handler = tgt_request_handle,
.so_req_printer = target_print_req,
.so_hpreq_handler = NULL,
},
GOTO(out_io, rc);
}
- ping_evictor_start();
+#if 0
+ /* Object update service */
+ memset(&svc_conf, 0, sizeof(svc_conf));
+ svc_conf = (typeof(svc_conf)) {
+ .psc_name = "ost_out",
+ .psc_watchdog_factor = OSS_SERVICE_WATCHDOG_FACTOR,
+ .psc_buf = {
+ .bc_nbufs = OST_NBUFS,
+ .bc_buf_size = OUT_BUFSIZE,
+ .bc_req_max_size = OUT_MAXREQSIZE,
+ .bc_rep_max_size = OUT_MAXREPSIZE,
+ .bc_req_portal = OUT_PORTAL,
+ .bc_rep_portal = OSC_REPLY_PORTAL,
+ },
+ /*
+ * We'd like to have a mechanism to set this on a per-device
+ * basis, but alas...
+ */
+ .psc_thr = {
+ .tc_thr_name = "ll_ost_out",
+ .tc_thr_factor = OSS_CR_THR_FACTOR,
+ .tc_nthrs_init = OSS_CR_NTHRS_INIT,
+ .tc_nthrs_base = OSS_CR_NTHRS_BASE,
+ .tc_nthrs_max = OSS_CR_NTHRS_MAX,
+ .tc_nthrs_user = oss_num_create_threads,
+ .tc_cpu_affinity = 1,
+ .tc_ctx_tags = LCT_DT_THREAD,
+ },
+ .psc_cpt = {
+ .cc_pattern = oss_cpts,
+ },
+ .psc_ops = {
+ .so_req_handler = tgt_request_handle,
+ .so_req_printer = target_print_req,
+ .so_hpreq_handler = NULL,
+ },
+ };
+ ost->ost_out_service = ptlrpc_register_service(&svc_conf,
+ obd->obd_proc_entry);
+ if (IS_ERR(ost->ost_out_service)) {
+ rc = PTR_ERR(ost->ost_out_service);
+ CERROR("failed to start out service: %d\n", rc);
+ ost->ost_out_service = NULL;
+ GOTO(out_seq, rc);
+ }
+#endif
+ ping_evictor_start();
- RETURN(0);
+ RETURN(0);
out_io:
ptlrpc_unregister_service(ost->ost_io_service);
ost->ost_io_service = NULL;
ptlrpc_unregister_service(ost->ost_create_service);
ptlrpc_unregister_service(ost->ost_io_service);
ptlrpc_unregister_service(ost->ost_seq_service);
+#if 0
+ ptlrpc_unregister_service(ost->ost_out_service);
+#endif
ost->ost_service = NULL;
ost->ost_create_service = NULL;
ost->ost_io_service = NULL;
ost->ost_seq_service = NULL;
+ ost->ost_out_service = NULL;
mutex_unlock(&ost->ost_health_mutex);
int rc;
ENTRY;
- ost_page_to_corrupt = cfs_alloc_page(CFS_ALLOC_STD);
+ ost_page_to_corrupt = alloc_page(GFP_IOFS);
lprocfs_ost_init_vars(&lvars);
rc = class_register_type(&ost_obd_ops, NULL, lvars.module_vars,