#include <lustre_param.h>
#include <lustre_fid.h>
#include <obd_class.h>
+#include <obd.h>
+#include <lustre_net.h>
#include "osc_internal.h"
#include "osc_cl_internal.h"
+atomic_t osc_pool_req_count;
+unsigned int osc_reqpool_maxreqcount;
+struct ptlrpc_request_pool *osc_rq_pool;
+
+/* max memory used for request pool, unit is MB */
+static unsigned int osc_reqpool_mem_max = 5;
+module_param(osc_reqpool_mem_max, uint, 0444);
+
struct osc_brw_async_args {
struct obdo *aa_oa;
int aa_requested_nob;
int aa_nio_count;
- obd_count aa_page_count;
+ u32 aa_page_count;
int aa_resends;
struct brw_page **aa_ppga;
struct client_obd *aa_cli;
struct list_head aa_oaps;
struct list_head aa_exts;
- struct obd_capa *aa_ocapa;
struct cl_req *aa_clerq;
};
#define osc_grant_args osc_brw_async_args
-struct osc_async_args {
- struct obd_info *aa_oi;
-};
-
struct osc_setattr_args {
struct obdo *sa_oa;
obd_enqueue_update_f sa_upcall;
};
struct osc_fsync_args {
- struct obd_info *fa_oi;
- obd_enqueue_update_f fa_upcall;
+ struct osc_object *fa_obj;
+ struct obdo *fa_oa;
+ obd_enqueue_update_f fa_upcall;
void *fa_cookie;
};
struct osc_enqueue_args {
struct obd_export *oa_exp;
- ldlm_type_t oa_type;
- ldlm_mode_t oa_mode;
+ enum ldlm_type oa_type;
+ enum ldlm_mode oa_mode;
__u64 *oa_flags;
osc_enqueue_upcall_f oa_upcall;
void *oa_cookie;
unsigned int oa_agl:1;
};
-static void osc_release_ppga(struct brw_page **ppga, obd_count count);
+static void osc_release_ppga(struct brw_page **ppga, size_t count);
static int brw_interpret(const struct lu_env *env, struct ptlrpc_request *req,
void *data, int rc);
-static inline void osc_pack_capa(struct ptlrpc_request *req,
- struct ost_body *body, void *capa)
-{
- struct obd_capa *oc = (struct obd_capa *)capa;
- struct lustre_capa *c;
-
- if (!capa)
- return;
-
- c = req_capsule_client_get(&req->rq_pill, &RMF_CAPA1);
- LASSERT(c);
- capa_cpy(c, oc);
- body->oa.o_valid |= OBD_MD_FLOSSCAPA;
- DEBUG_CAPA(D_SEC, c, "pack");
-}
-
-static inline void osc_pack_req_body(struct ptlrpc_request *req,
- struct obd_info *oinfo)
+void osc_pack_req_body(struct ptlrpc_request *req, struct obdo *oa)
{
struct ost_body *body;
body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
LASSERT(body);
- lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
- oinfo->oi_oa);
- osc_pack_capa(req, body, oinfo->oi_capa);
-}
-
-static inline void osc_set_capa_size(struct ptlrpc_request *req,
- const struct req_msg_field *field,
- struct obd_capa *oc)
-{
- if (oc == NULL)
- req_capsule_set_size(&req->rq_pill, field, RCL_CLIENT, 0);
- else
- /* it is already calculated as sizeof struct obd_capa */
- ;
-}
-
-static int osc_getattr_interpret(const struct lu_env *env,
- struct ptlrpc_request *req,
- struct osc_async_args *aa, int rc)
-{
- struct ost_body *body;
- ENTRY;
-
- if (rc != 0)
- GOTO(out, rc);
-
- body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- if (body) {
- CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
- lustre_get_wire_obdo(&req->rq_import->imp_connect_data,
- aa->aa_oi->oi_oa, &body->oa);
-
- /* This should really be sent by the OST */
- aa->aa_oi->oi_oa->o_blksize = DT_MAX_BRW_SIZE;
- aa->aa_oi->oi_oa->o_valid |= OBD_MD_FLBLKSZ;
- } else {
- CDEBUG(D_INFO, "can't unpack ost_body\n");
- rc = -EPROTO;
- aa->aa_oi->oi_oa->o_valid = 0;
- }
-out:
- rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc);
- RETURN(rc);
-}
-
-static int osc_getattr_async(struct obd_export *exp, struct obd_info *oinfo,
- struct ptlrpc_request_set *set)
-{
- struct ptlrpc_request *req;
- struct osc_async_args *aa;
- int rc;
- ENTRY;
-
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR);
- if (req == NULL)
- RETURN(-ENOMEM);
-
- osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR);
- if (rc) {
- ptlrpc_request_free(req);
- RETURN(rc);
- }
-
- osc_pack_req_body(req, oinfo);
-
- ptlrpc_request_set_replen(req);
- req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_getattr_interpret;
-
- CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
- aa = ptlrpc_req_async_args(req);
- aa->aa_oi = oinfo;
-
- ptlrpc_set_add_req(set, req);
- RETURN(0);
+ lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
}
static int osc_getattr(const struct lu_env *env, struct obd_export *exp,
- struct obd_info *oinfo)
+ struct obdo *oa)
{
- struct ptlrpc_request *req;
- struct ost_body *body;
- int rc;
- ENTRY;
+ struct ptlrpc_request *req;
+ struct ost_body *body;
+ int rc;
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR);
- if (req == NULL)
- RETURN(-ENOMEM);
+ ENTRY;
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GETATTR);
+ if (req == NULL)
+ RETURN(-ENOMEM);
- osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR);
- if (rc) {
- ptlrpc_request_free(req);
- RETURN(rc);
- }
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GETATTR);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
- osc_pack_req_body(req, oinfo);
+ osc_pack_req_body(req, oa);
- ptlrpc_request_set_replen(req);
+ ptlrpc_request_set_replen(req);
- rc = ptlrpc_queue_wait(req);
- if (rc)
- GOTO(out, rc);
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(out, rc);
- body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- if (body == NULL)
- GOTO(out, rc = -EPROTO);
+ body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
+ if (body == NULL)
+ GOTO(out, rc = -EPROTO);
CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
- lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oinfo->oi_oa,
- &body->oa);
+ lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oa, &body->oa);
+
+ oa->o_blksize = cli_brw_size(exp->exp_obd);
+ oa->o_valid |= OBD_MD_FLBLKSZ;
- oinfo->oi_oa->o_blksize = cli_brw_size(exp->exp_obd);
- oinfo->oi_oa->o_valid |= OBD_MD_FLBLKSZ;
+ EXIT;
+out:
+ ptlrpc_req_finished(req);
- EXIT;
- out:
- ptlrpc_req_finished(req);
- return rc;
+ return rc;
}
static int osc_setattr(const struct lu_env *env, struct obd_export *exp,
- struct obd_info *oinfo, struct obd_trans_info *oti)
+ struct obdo *oa)
{
- struct ptlrpc_request *req;
- struct ost_body *body;
- int rc;
- ENTRY;
+ struct ptlrpc_request *req;
+ struct ost_body *body;
+ int rc;
- LASSERT(oinfo->oi_oa->o_valid & OBD_MD_FLGROUP);
+ ENTRY;
+ LASSERT(oa->o_valid & OBD_MD_FLGROUP);
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SETATTR);
- if (req == NULL)
- RETURN(-ENOMEM);
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SETATTR);
+ if (req == NULL)
+ RETURN(-ENOMEM);
- osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SETATTR);
- if (rc) {
- ptlrpc_request_free(req);
- RETURN(rc);
- }
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SETATTR);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
- osc_pack_req_body(req, oinfo);
+ osc_pack_req_body(req, oa);
- ptlrpc_request_set_replen(req);
+ ptlrpc_request_set_replen(req);
- rc = ptlrpc_queue_wait(req);
- if (rc)
- GOTO(out, rc);
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(out, rc);
- body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- if (body == NULL)
- GOTO(out, rc = -EPROTO);
+ body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
+ if (body == NULL)
+ GOTO(out, rc = -EPROTO);
- lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oinfo->oi_oa,
- &body->oa);
+ lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oa, &body->oa);
- EXIT;
+ EXIT;
out:
- ptlrpc_req_finished(req);
- RETURN(rc);
+ ptlrpc_req_finished(req);
+
+ RETURN(rc);
}
static int osc_setattr_interpret(const struct lu_env *env,
RETURN(rc);
}
-int osc_setattr_async_base(struct obd_export *exp, struct obd_info *oinfo,
- struct obd_trans_info *oti,
- obd_enqueue_update_f upcall, void *cookie,
- struct ptlrpc_request_set *rqset)
+int osc_setattr_async(struct obd_export *exp, struct obdo *oa,
+ obd_enqueue_update_f upcall, void *cookie,
+ struct ptlrpc_request_set *rqset)
{
- struct ptlrpc_request *req;
- struct osc_setattr_args *sa;
- int rc;
- ENTRY;
+ struct ptlrpc_request *req;
+ struct osc_setattr_args *sa;
+ int rc;
- req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SETATTR);
- if (req == NULL)
- RETURN(-ENOMEM);
+ ENTRY;
- osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SETATTR);
- if (rc) {
- ptlrpc_request_free(req);
- RETURN(rc);
- }
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_SETATTR);
+ if (req == NULL)
+ RETURN(-ENOMEM);
- if (oti && oinfo->oi_oa->o_valid & OBD_MD_FLCOOKIE)
- oinfo->oi_oa->o_lcookie = *oti->oti_logcookies;
+ rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SETATTR);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
- osc_pack_req_body(req, oinfo);
+ osc_pack_req_body(req, oa);
- ptlrpc_request_set_replen(req);
+ ptlrpc_request_set_replen(req);
- /* do mds to ost setattr asynchronously */
- if (!rqset) {
- /* Do not wait for response. */
- ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
- } else {
- req->rq_interpret_reply =
- (ptlrpc_interpterer_t)osc_setattr_interpret;
-
- CLASSERT (sizeof(*sa) <= sizeof(req->rq_async_args));
- sa = ptlrpc_req_async_args(req);
- sa->sa_oa = oinfo->oi_oa;
- sa->sa_upcall = upcall;
- sa->sa_cookie = cookie;
-
- if (rqset == PTLRPCD_SET)
- ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
- else
- ptlrpc_set_add_req(rqset, req);
- }
+ /* do mds to ost setattr asynchronously */
+ if (!rqset) {
+ /* Do not wait for response. */
+ ptlrpcd_add_req(req);
+ } else {
+ req->rq_interpret_reply =
+ (ptlrpc_interpterer_t)osc_setattr_interpret;
- RETURN(0);
-}
+ CLASSERT(sizeof(*sa) <= sizeof(req->rq_async_args));
+ sa = ptlrpc_req_async_args(req);
+ sa->sa_oa = oa;
+ sa->sa_upcall = upcall;
+ sa->sa_cookie = cookie;
-static int osc_setattr_async(struct obd_export *exp, struct obd_info *oinfo,
- struct obd_trans_info *oti,
- struct ptlrpc_request_set *rqset)
-{
- return osc_setattr_async_base(exp, oinfo, oti,
- oinfo->oi_cb_up, oinfo, rqset);
+ if (rqset == PTLRPCD_SET)
+ ptlrpcd_add_req(req);
+ else
+ ptlrpc_set_add_req(rqset, req);
+ }
+
+ RETURN(0);
}
static int osc_create(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa, struct obd_trans_info *oti)
+ struct obdo *oa)
{
struct ptlrpc_request *req;
struct ost_body *body;
ptlrpc_request_set_replen(req);
- if ((oa->o_valid & OBD_MD_FLFLAGS) &&
- oa->o_flags == OBD_FL_DELORPHAN) {
- DEBUG_REQ(D_HA, req,
- "delorphan from OST integration");
- /* Don't resend the delorphan req */
- req->rq_no_resend = req->rq_no_delay = 1;
- }
-
rc = ptlrpc_queue_wait(req);
if (rc)
GOTO(out_req, rc);
oa->o_blksize = cli_brw_size(exp->exp_obd);
oa->o_valid |= OBD_MD_FLBLKSZ;
- if (oti != NULL) {
- if (oa->o_valid & OBD_MD_FLCOOKIE) {
- if (oti->oti_logcookies == NULL)
- oti->oti_logcookies = &oti->oti_onecookie;
-
- *oti->oti_logcookies = oa->o_lcookie;
- }
- }
-
- CDEBUG(D_HA, "transno: "LPD64"\n",
- lustre_msg_get_transno(req->rq_repmsg));
+ CDEBUG(D_HA, "transno: "LPD64"\n",
+ lustre_msg_get_transno(req->rq_repmsg));
out_req:
- ptlrpc_req_finished(req);
+ ptlrpc_req_finished(req);
out:
RETURN(rc);
}
-int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo,
+int osc_punch_base(struct obd_export *exp, struct obdo *oa,
obd_enqueue_update_f upcall, void *cookie,
struct ptlrpc_request_set *rqset)
{
if (req == NULL)
RETURN(-ENOMEM);
- osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_PUNCH);
if (rc) {
ptlrpc_request_free(req);
body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
LASSERT(body);
- lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
- oinfo->oi_oa);
- osc_pack_capa(req, body, oinfo->oi_capa);
+ lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
- ptlrpc_request_set_replen(req);
+ ptlrpc_request_set_replen(req);
- req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_setattr_interpret;
- CLASSERT (sizeof(*sa) <= sizeof(req->rq_async_args));
- sa = ptlrpc_req_async_args(req);
- sa->sa_oa = oinfo->oi_oa;
- sa->sa_upcall = upcall;
- sa->sa_cookie = cookie;
- if (rqset == PTLRPCD_SET)
- ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
- else
- ptlrpc_set_add_req(rqset, req);
+ req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_setattr_interpret;
+ CLASSERT(sizeof(*sa) <= sizeof(req->rq_async_args));
+ sa = ptlrpc_req_async_args(req);
+ sa->sa_oa = oa;
+ sa->sa_upcall = upcall;
+ sa->sa_cookie = cookie;
+ if (rqset == PTLRPCD_SET)
+ ptlrpcd_add_req(req);
+ else
+ ptlrpc_set_add_req(rqset, req);
- RETURN(0);
+ RETURN(0);
}
static int osc_sync_interpret(const struct lu_env *env,
struct ptlrpc_request *req,
void *arg, int rc)
{
- struct osc_fsync_args *fa = arg;
- struct ost_body *body;
- ENTRY;
+ struct osc_fsync_args *fa = arg;
+ struct ost_body *body;
+ struct cl_attr *attr = &osc_env_info(env)->oti_attr;
+ unsigned long valid = 0;
+ struct cl_object *obj;
+ ENTRY;
- if (rc)
- GOTO(out, rc);
+ if (rc != 0)
+ GOTO(out, rc);
- body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
- if (body == NULL) {
- CERROR ("can't unpack ost_body\n");
- GOTO(out, rc = -EPROTO);
- }
+ body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
+ if (body == NULL) {
+ CERROR("can't unpack ost_body\n");
+ GOTO(out, rc = -EPROTO);
+ }
+
+ *fa->fa_oa = body->oa;
+ obj = osc2cl(fa->fa_obj);
+
+ /* Update osc object's blocks attribute */
+ cl_object_attr_lock(obj);
+ if (body->oa.o_valid & OBD_MD_FLBLOCKS) {
+ attr->cat_blocks = body->oa.o_blocks;
+ valid |= CAT_BLOCKS;
+ }
+
+ if (valid != 0)
+ cl_object_attr_update(env, obj, attr, valid);
+ cl_object_attr_unlock(obj);
- *fa->fa_oi->oi_oa = body->oa;
out:
rc = fa->fa_upcall(fa->fa_cookie, rc);
RETURN(rc);
}
-int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo,
+int osc_sync_base(struct osc_object *obj, struct obdo *oa,
obd_enqueue_update_f upcall, void *cookie,
struct ptlrpc_request_set *rqset)
{
+ struct obd_export *exp = osc_export(obj);
struct ptlrpc_request *req;
struct ost_body *body;
struct osc_fsync_args *fa;
if (req == NULL)
RETURN(-ENOMEM);
- osc_set_capa_size(req, &RMF_CAPA1, oinfo->oi_capa);
rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SYNC);
if (rc) {
ptlrpc_request_free(req);
/* overload the size and blocks fields in the oa with start/end */
body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
LASSERT(body);
- lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa,
- oinfo->oi_oa);
- osc_pack_capa(req, body, oinfo->oi_capa);
+ lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
- ptlrpc_request_set_replen(req);
- req->rq_interpret_reply = osc_sync_interpret;
+ ptlrpc_request_set_replen(req);
+ req->rq_interpret_reply = osc_sync_interpret;
CLASSERT(sizeof(*fa) <= sizeof(req->rq_async_args));
fa = ptlrpc_req_async_args(req);
- fa->fa_oi = oinfo;
+ fa->fa_obj = obj;
+ fa->fa_oa = oa;
fa->fa_upcall = upcall;
fa->fa_cookie = cookie;
if (rqset == PTLRPCD_SET)
- ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ ptlrpcd_add_req(req);
else
ptlrpc_set_add_req(rqset, req);
* locks added to @cancels list. */
static int osc_resource_get_unused(struct obd_export *exp, struct obdo *oa,
struct list_head *cancels,
- ldlm_mode_t mode, __u64 lock_flags)
+ enum ldlm_mode mode, __u64 lock_flags)
{
- struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
- struct ldlm_res_id res_id;
- struct ldlm_resource *res;
- int count;
- ENTRY;
+ struct ldlm_namespace *ns = exp->exp_obd->obd_namespace;
+ struct ldlm_res_id res_id;
+ struct ldlm_resource *res;
+ int count;
+ ENTRY;
/* Return, i.e. cancel nothing, only if ELC is supported (flag in
* export) but disabled through procfs (flag in NS).
return 0;
}
-/* Destroy requests can be async always on the client, and we don't even really
- * care about the return code since the client cannot do anything at all about
- * a destroy failure.
- * When the MDS is unlinking a filename, it saves the file objects into a
- * recovery llog, and these object records are cancelled when the OST reports
- * they were destroyed and sync'd to disk (i.e. transaction committed).
- * If the client dies, or the OST is down when the object should be destroyed,
- * the records are not cancelled, and when the OST reconnects to the MDS next,
- * it will retrieve the llog unlink logs and then sends the log cancellation
- * cookies to the MDS after committing destroy transactions. */
static int osc_destroy(const struct lu_env *env, struct obd_export *exp,
- struct obdo *oa, struct obd_trans_info *oti)
+ struct obdo *oa)
{
struct client_obd *cli = &exp->exp_obd->u.cli;
struct ptlrpc_request *req;
RETURN(-ENOMEM);
}
- osc_set_capa_size(req, &RMF_CAPA1, NULL);
rc = ldlm_prep_elc_req(exp, req, LUSTRE_OST_VERSION, OST_DESTROY,
0, &cancels, count);
if (rc) {
req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */
ptlrpc_at_set_req_timeout(req);
- if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE)
- oa->o_lcookie = *oti->oti_logcookies;
body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
LASSERT(body);
lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa);
ptlrpc_request_set_replen(req);
- /* If osc_destory is for destroying the unlink orphan,
- * sent from MDT to OST, which should not be blocked here,
- * because the process might be triggered by ptlrpcd, and
- * it is not good to block ptlrpcd thread (b=16006)*/
- if (!(oa->o_flags & OBD_FL_DELORPHAN)) {
- req->rq_interpret_reply = osc_destroy_interpret;
- if (!osc_can_send_destroy(cli)) {
- struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP,
- NULL);
-
- /*
- * Wait until the number of on-going destroy RPCs drops
- * under max_rpc_in_flight
- */
- l_wait_event_exclusive(cli->cl_destroy_waitq,
- osc_can_send_destroy(cli), &lwi);
- }
- }
+ req->rq_interpret_reply = osc_destroy_interpret;
+ if (!osc_can_send_destroy(cli)) {
+ struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
- /* Do not wait for response */
- ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
- RETURN(0);
+ /*
+ * Wait until the number of on-going destroy RPCs drops
+ * under max_rpc_in_flight
+ */
+ l_wait_event_exclusive(cli->cl_destroy_waitq,
+ osc_can_send_destroy(cli), &lwi);
+ }
+
+ /* Do not wait for response */
+ ptlrpcd_add_req(req);
+ RETURN(0);
}
static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
long writing_bytes)
{
- obd_flag bits = OBD_MD_FLBLOCKS|OBD_MD_FLGRANT;
+ u64 bits = OBD_MD_FLBLOCKS | OBD_MD_FLGRANT;
- LASSERT(!(oa->o_valid & bits));
+ LASSERT(!(oa->o_valid & bits));
oa->o_valid |= bits;
spin_lock(&cli->cl_loi_list_lock);
oa->o_undirty = 0;
} else if (unlikely(atomic_long_read(&obd_dirty_pages) -
atomic_long_read(&obd_dirty_transit_pages) >
- (obd_max_dirty_pages + 1))) {
+ (long)(obd_max_dirty_pages + 1))) {
/* The atomic_read() allowing the atomic_inc() are
* not covered by a lock thus they may safely race and trip
* this CERROR() unless we add in a small fudge factor (+1). */
- CERROR("%s: dirty %ld - %ld > system dirty_max %lu\n",
- cli->cl_import->imp_obd->obd_name,
- atomic_long_read(&obd_dirty_pages),
+ CERROR("%s: dirty %ld - %ld > system dirty_max %ld\n",
+ cli_name(cli), atomic_long_read(&obd_dirty_pages),
atomic_long_read(&obd_dirty_transit_pages),
obd_max_dirty_pages);
oa->o_undirty = 0;
cli->cl_next_shrink_grant);
}
-static void __osc_update_grant(struct client_obd *cli, obd_size grant)
+static void __osc_update_grant(struct client_obd *cli, u64 grant)
{
spin_lock(&cli->cl_loi_list_lock);
cli->cl_avail_grant += grant;
}
static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
- obd_count keylen, void *key, obd_count vallen,
- void *val, struct ptlrpc_request_set *set);
+ u32 keylen, void *key,
+ u32 vallen, void *val,
+ struct ptlrpc_request_set *set);
static int osc_shrink_grant_interpret(const struct lu_env *env,
struct ptlrpc_request *req,
static int osc_add_shrink_grant(struct client_obd *client)
{
- int rc;
+ int rc;
- rc = ptlrpc_add_timeout_client(client->cl_grant_shrink_interval,
- TIMEOUT_GRANT,
- osc_grant_shrink_grant_cb, NULL,
- &client->cl_grant_shrink_list);
- if (rc) {
- CERROR("add grant client %s error %d\n",
- client->cl_import->imp_obd->obd_name, rc);
- return rc;
- }
- CDEBUG(D_CACHE, "add grant client %s \n",
- client->cl_import->imp_obd->obd_name);
- osc_update_next_shrink(client);
- return 0;
+ rc = ptlrpc_add_timeout_client(client->cl_grant_shrink_interval,
+ TIMEOUT_GRANT,
+ osc_grant_shrink_grant_cb, NULL,
+ &client->cl_grant_shrink_list);
+ if (rc) {
+ CERROR("add grant client %s error %d\n", cli_name(client), rc);
+ return rc;
+ }
+ CDEBUG(D_CACHE, "add grant client %s\n", cli_name(client));
+ osc_update_next_shrink(client);
+ return 0;
}
static int osc_del_shrink_grant(struct client_obd *client)
if (cli->cl_avail_grant < 0) {
CWARN("%s: available grant < 0: avail/ocd/dirty %ld/%u/%ld\n",
- cli->cl_import->imp_obd->obd_name, cli->cl_avail_grant,
+ cli_name(cli), cli->cl_avail_grant,
ocd->ocd_grant, cli->cl_dirty_pages << PAGE_CACHE_SHIFT);
/* workaround for servers which do not have the patch from
* LU-2679 */
spin_unlock(&cli->cl_loi_list_lock);
CDEBUG(D_CACHE, "%s, setting cl_avail_grant: %ld cl_lost_grant: %ld."
- "chunk bits: %d.\n", cli->cl_import->imp_obd->obd_name,
- cli->cl_avail_grant, cli->cl_lost_grant, cli->cl_chunkbits);
+ "chunk bits: %d.\n", cli_name(cli), cli->cl_avail_grant,
+ cli->cl_lost_grant, cli->cl_chunkbits);
if (ocd->ocd_connect_flags & OBD_CONNECT_GRANT_SHRINK &&
list_empty(&cli->cl_grant_shrink_list))
* beyond the end of a stripe file; i.e. lustre is reading a sparse file
* via the LOV, and it _knows_ it's reading inside the file, it's just that
* this stripe never got written at or beyond this stripe offset yet. */
-static void handle_short_read(int nob_read, obd_count page_count,
+static void handle_short_read(int nob_read, size_t page_count,
struct brw_page **pga)
{
char *ptr;
if (pga[i]->count > nob_read) {
/* EOF inside this page */
ptr = kmap(pga[i]->pg) +
- (pga[i]->off & ~CFS_PAGE_MASK);
+ (pga[i]->off & ~PAGE_MASK);
memset(ptr + nob_read, 0, pga[i]->count - nob_read);
kunmap(pga[i]->pg);
page_count--;
/* zero remaining pages */
while (page_count-- > 0) {
- ptr = kmap(pga[i]->pg) + (pga[i]->off & ~CFS_PAGE_MASK);
+ ptr = kmap(pga[i]->pg) + (pga[i]->off & ~PAGE_MASK);
memset(ptr, 0, pga[i]->count);
kunmap(pga[i]->pg);
i++;
}
static int check_write_rcs(struct ptlrpc_request *req,
- int requested_nob, int niocount,
- obd_count page_count, struct brw_page **pga)
+ int requested_nob, int niocount,
+ size_t page_count, struct brw_page **pga)
{
int i;
__u32 *remote_rcs;
return (p1->off + p1->count == p2->off);
}
-static obd_count osc_checksum_bulk(int nob, obd_count pg_count,
- struct brw_page **pga, int opc,
- cksum_type_t cksum_type)
+static u32 osc_checksum_bulk(int nob, size_t pg_count,
+ struct brw_page **pga, int opc,
+ cksum_type_t cksum_type)
{
- __u32 cksum;
+ u32 cksum;
int i = 0;
struct cfs_crypto_hash_desc *hdesc;
unsigned int bufsize;
}
while (nob > 0 && pg_count > 0) {
- int count = pga[i]->count > nob ? nob : pga[i]->count;
+ unsigned int count = pga[i]->count > nob ? nob : pga[i]->count;
/* corrupt the data before we compute the checksum, to
* simulate an OST->client data error */
if (i == 0 && opc == OST_READ &&
OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) {
unsigned char *ptr = kmap(pga[i]->pg);
- int off = pga[i]->off & ~CFS_PAGE_MASK;
+ int off = pga[i]->off & ~PAGE_MASK;
- memcpy(ptr + off, "bad1", min(4, nob));
+ memcpy(ptr + off, "bad1", min_t(typeof(nob), 4, nob));
kunmap(pga[i]->pg);
}
cfs_crypto_hash_update_page(hdesc, pga[i]->pg,
- pga[i]->off & ~CFS_PAGE_MASK,
+ pga[i]->off & ~PAGE_MASK,
count);
LL_CDEBUG_PAGE(D_PAGE, pga[i]->pg, "off %d\n",
- (int)(pga[i]->off & ~CFS_PAGE_MASK));
+ (int)(pga[i]->off & ~PAGE_MASK));
nob -= pga[i]->count;
pg_count--;
return cksum;
}
-static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa,
- struct lov_stripe_md *lsm, obd_count page_count,
- struct brw_page **pga,
- struct ptlrpc_request **reqp,
- struct obd_capa *ocapa, int reserve,
- int resend)
+static int
+osc_brw_prep_request(int cmd, struct client_obd *cli, struct obdo *oa,
+ u32 page_count, struct brw_page **pga,
+ struct ptlrpc_request **reqp, int resend)
{
struct ptlrpc_request *req;
struct ptlrpc_bulk_desc *desc;
if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_PREP_REQ2))
RETURN(-EINVAL); /* Fatal */
- if ((cmd & OBD_BRW_WRITE) != 0) {
- opc = OST_WRITE;
- req = ptlrpc_request_alloc_pool(cli->cl_import,
- cli->cl_import->imp_rq_pool,
- &RQF_OST_BRW_WRITE);
- } else {
- opc = OST_READ;
- req = ptlrpc_request_alloc(cli->cl_import, &RQF_OST_BRW_READ);
- }
+ if ((cmd & OBD_BRW_WRITE) != 0) {
+ opc = OST_WRITE;
+ req = ptlrpc_request_alloc_pool(cli->cl_import,
+ osc_rq_pool,
+ &RQF_OST_BRW_WRITE);
+ } else {
+ opc = OST_READ;
+ req = ptlrpc_request_alloc(cli->cl_import, &RQF_OST_BRW_READ);
+ }
if (req == NULL)
RETURN(-ENOMEM);
sizeof(*ioobj));
req_capsule_set_size(pill, &RMF_NIOBUF_REMOTE, RCL_CLIENT,
niocount * sizeof(*niobuf));
- osc_set_capa_size(req, &RMF_CAPA1, ocapa);
rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, opc);
if (rc) {
desc = ptlrpc_prep_bulk_imp(req, page_count,
cli->cl_import->imp_connect_data.ocd_brw_size >> LNET_MTU_BITS,
- opc == OST_WRITE ? BULK_GET_SOURCE : BULK_PUT_SINK,
- OST_BULK_PORTAL);
+ (opc == OST_WRITE ? PTLRPC_BULK_GET_SOURCE :
+ PTLRPC_BULK_PUT_SINK) |
+ PTLRPC_BULK_BUF_KIOV,
+ OST_BULK_PORTAL,
+ &ptlrpc_bulk_kiov_pin_ops);
if (desc == NULL)
GOTO(out, rc = -ENOMEM);
* "max - 1" for old client compatibility sending "0", and also so the
* the actual maximum is a power-of-two number, not one less. LU-1431 */
ioobj_max_brw_set(ioobj, desc->bd_md_max_brw);
- osc_pack_capa(req, body, ocapa);
LASSERT(page_count > 0);
pg_prev = pga[0];
for (requested_nob = i = 0; i < page_count; i++, niobuf++) {
struct brw_page *pg = pga[i];
- int poff = pg->off & ~CFS_PAGE_MASK;
+ int poff = pg->off & ~PAGE_MASK;
LASSERT(pg->count > 0);
/* make sure there is no gap in the middle of page array */
LASSERT((pga[0]->flag & OBD_BRW_SRVLOCK) ==
(pg->flag & OBD_BRW_SRVLOCK));
- ptlrpc_prep_bulk_page_pin(desc, pg->pg, poff, pg->count);
+ desc->bd_frag_ops->add_kiov_frag(desc, pg->pg, poff, pg->count);
requested_nob += pg->count;
if (i > 0 && can_merge_pages(pg_prev, pg)) {
aa->aa_ppga = pga;
aa->aa_cli = cli;
INIT_LIST_HEAD(&aa->aa_oaps);
- if (ocapa && reserve)
- aa->aa_ocapa = capa_get(ocapa);
*reqp = req;
niobuf = req_capsule_client_get(pill, &RMF_NIOBUF_REMOTE);
}
static int check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer,
- __u32 client_cksum, __u32 server_cksum, int nob,
- obd_count page_count, struct brw_page **pga,
- cksum_type_t client_cksum_type)
+ __u32 client_cksum, __u32 server_cksum, int nob,
+ size_t page_count, struct brw_page **pga,
+ cksum_type_t client_cksum_type)
{
__u32 new_cksum;
char *msg;
&req->rq_import->imp_connection->c_peer;
struct client_obd *cli = aa->aa_cli;
struct ost_body *body;
- __u32 client_cksum = 0;
+ u32 client_cksum = 0;
ENTRY;
if (rc < 0 && rc != -EDQUOT) {
if (body->oa.o_valid & OBD_MD_FLCKSUM) {
static int cksum_counter;
- __u32 server_cksum = body->oa.o_cksum;
- char *via;
- char *router;
+ u32 server_cksum = body->oa.o_cksum;
+ char *via = "";
+ char *router = "";
cksum_type_t cksum_type;
cksum_type = cksum_type_unpack(body->oa.o_valid &OBD_MD_FLFLAGS?
aa->aa_ppga, OST_READ,
cksum_type);
- if (peer->nid == req->rq_bulk->bd_sender) {
- via = router = "";
- } else {
- via = " via ";
- router = libcfs_nid2str(req->rq_bulk->bd_sender);
- }
+ if (peer->nid != req->rq_bulk->bd_sender) {
+ via = " via ";
+ router = libcfs_nid2str(req->rq_bulk->bd_sender);
+ }
if (server_cksum != client_cksum) {
LCONSOLE_ERROR_MSG(0x133, "%s: BAD READ CHECKSUM: from "
DEBUG_REQ(rc == -EINPROGRESS ? D_RPCTRACE : D_ERROR, request,
"redo for recoverable error %d", rc);
- rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) ==
- OST_WRITE ? OBD_BRW_WRITE :OBD_BRW_READ,
- aa->aa_cli, aa->aa_oa,
- NULL /* lsm unused by osc currently */,
- aa->aa_page_count, aa->aa_ppga,
- &new_req, aa->aa_ocapa, 0, 1);
+ rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) ==
+ OST_WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
+ aa->aa_cli, aa->aa_oa, aa->aa_page_count,
+ aa->aa_ppga, &new_req, 1);
if (rc)
RETURN(rc);
}
}
- new_aa->aa_ocapa = aa->aa_ocapa;
- aa->aa_ocapa = NULL;
-
/* XXX: This code will run into problem if we're going to support
* to add a series of BRW RPCs into a self-defined ptlrpc_request_set
* and wait for all of them to be finished. We should inherit request
* set from old request. */
- ptlrpcd_add_req(new_req, PDL_POLICY_SAME, -1);
+ ptlrpcd_add_req(new_req);
DEBUG_REQ(D_INFO, new_req, "new request");
RETURN(0);
} while (stride > 1);
}
-static void osc_release_ppga(struct brw_page **ppga, obd_count count)
+static void osc_release_ppga(struct brw_page **ppga, size_t count)
{
LASSERT(ppga != NULL);
OBD_FREE(ppga, sizeof(*ppga) * count);
rc = -EIO;
}
- if (aa->aa_ocapa) {
- capa_put(aa->aa_ocapa);
- aa->aa_ocapa = NULL;
- }
-
if (rc == 0) {
struct obdo *oa = aa->aa_oa;
struct cl_attr *attr = &osc_env_info(env)->oti_attr;
}
if (valid != 0)
- cl_object_attr_set(env, obj, attr, valid);
+ cl_object_attr_update(env, obj, attr, valid);
cl_object_attr_unlock(obj);
}
OBDO_FREE(aa->aa_oa);
osc_wake_cache_waiters(cli);
spin_unlock(&cli->cl_loi_list_lock);
- osc_io_unplug(env, cli, NULL, PDL_POLICY_SAME);
+ osc_io_unplug(env, cli, NULL);
RETURN(rc);
}
* Extents in the list must be in OES_RPC state.
*/
int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
- struct list_head *ext_list, int cmd, pdl_policy_t pol)
+ struct list_head *ext_list, int cmd)
{
struct ptlrpc_request *req = NULL;
struct osc_extent *ext;
enum cl_req_type crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE :
CRT_READ;
struct cl_req_attr *crattr = NULL;
- obd_off starting_offset = OBD_OBJECT_EOF;
- obd_off ending_offset = 0;
+ loff_t starting_offset = OBD_OBJECT_EOF;
+ loff_t ending_offset = 0;
int mpflag = 0;
int mem_tight = 0;
int page_count = 0;
list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) {
++page_count;
list_add_tail(&oap->oap_rpc_item, &rpc_list);
- if (starting_offset > oap->oap_obj_off)
+ if (starting_offset == OBD_OBJECT_EOF ||
+ starting_offset > oap->oap_obj_off)
starting_offset = oap->oap_obj_off;
else
LASSERT(oap->oap_page_off == 0);
}
sort_brw_pages(pga, page_count);
- rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count,
- pga, &req, crattr->cra_capa, 1, 0);
+ rc = osc_brw_prep_request(cmd, cli, oa, page_count, pga, &req, 0);
if (rc != 0) {
CERROR("prep_req failed: %d\n", rc);
GOTO(out, rc);
page_count, aa, cli->cl_r_in_flight,
cli->cl_w_in_flight);
- /* XXX: Maybe the caller can check the RPC bulk descriptor to
- * see which CPU/NUMA node the majority of pages were allocated
- * on, and try to assign the async RPC to the CPU core
- * (PDL_POLICY_PREFERRED) to reduce cross-CPU memory traffic.
- *
- * But on the other hand, we expect that multiple ptlrpcd
- * threads and the initial write sponsor can run in parallel,
- * especially when data checksum is enabled, which is CPU-bound
- * operation and single ptlrpcd thread cannot process in time.
- * So more ptlrpcd threads sharing BRW load
- * (with PDL_POLICY_ROUND) seems better.
- */
- ptlrpcd_add_req(req, pol, -1);
+ ptlrpcd_add_req(req);
rc = 0;
EXIT;
if (mem_tight != 0)
cfs_memory_pressure_restore(mpflag);
- if (crattr != NULL) {
- capa_put(crattr->cra_capa);
+ if (crattr != NULL)
OBD_FREE(crattr, sizeof(*crattr));
- }
if (rc != 0) {
LASSERT(req == NULL);
return set;
}
-static int osc_change_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm,
- ldlm_iterator_t replace, void *data)
-{
- struct ldlm_res_id res_id;
- struct obd_device *obd = class_exp2obd(exp);
-
- ostid_build_res_name(&lsm->lsm_oi, &res_id);
- ldlm_resource_iterate(obd->obd_namespace, &res_id, replace, data);
- return 0;
-}
-
-/* find any ldlm lock of the inode in osc
- * return 0 not find
- * 1 find one
- * < 0 error */
-static int osc_find_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm,
- ldlm_iterator_t replace, void *data)
-{
- struct ldlm_res_id res_id;
- struct obd_device *obd = class_exp2obd(exp);
- int rc = 0;
-
- ostid_build_res_name(&lsm->lsm_oi, &res_id);
- rc = ldlm_resource_iterate(obd->obd_namespace, &res_id, replace, data);
- if (rc == LDLM_ITER_STOP)
- return(1);
- if (rc == LDLM_ITER_CONTINUE)
- return(0);
- return(rc);
-}
-
static int osc_enqueue_fini(struct ptlrpc_request *req,
osc_enqueue_upcall_f upcall, void *cookie,
- struct lustre_handle *lockh, ldlm_mode_t mode,
+ struct lustre_handle *lockh, enum ldlm_mode mode,
__u64 *flags, int agl, int errcode)
{
bool intent = *flags & LDLM_FL_HAS_INTENT;
}
static int osc_enqueue_interpret(const struct lu_env *env,
- struct ptlrpc_request *req,
- struct osc_enqueue_args *aa, int rc)
+ struct ptlrpc_request *req,
+ struct osc_enqueue_args *aa, int rc)
{
struct ldlm_lock *lock;
struct lustre_handle *lockh = &aa->oa_lockh;
- ldlm_mode_t mode = aa->oa_mode;
+ enum ldlm_mode mode = aa->oa_mode;
struct ost_lvb *lvb = aa->oa_lvb;
__u32 lvb_len = sizeof(*lvb);
__u64 flags = 0;
* is evicted from the cluster -- such scenarious make the life difficult, so
* release locks just after they are obtained. */
int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id,
- __u64 *flags, ldlm_policy_data_t *policy,
+ __u64 *flags, union ldlm_policy_data *policy,
struct ost_lvb *lvb, int kms_valid,
osc_enqueue_upcall_f upcall, void *cookie,
struct ldlm_enqueue_info *einfo,
struct ptlrpc_request *req = NULL;
int intent = *flags & LDLM_FL_HAS_INTENT;
__u64 match_lvb = agl ? 0 : LDLM_FL_LVB_READY;
- ldlm_mode_t mode;
+ enum ldlm_mode mode;
int rc;
ENTRY;
/* Filesystem lock extents are extended to page boundaries so that
* dealing with the page cache is a little smoother. */
- policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK;
- policy->l_extent.end |= ~CFS_PAGE_MASK;
+ policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK;
+ policy->l_extent.end |= ~PAGE_MASK;
/*
* kms is not valid when either object is completely fresh (so that no
if (req == NULL)
RETURN(-ENOMEM);
- rc = ptlrpc_request_pack(req, LUSTRE_DLM_VERSION, LDLM_ENQUEUE);
- if (rc < 0) {
+ rc = ldlm_prep_enqueue_req(exp, req, NULL, 0);
+ if (rc) {
ptlrpc_request_free(req);
RETURN(rc);
}
aa->oa_flags = NULL;
}
- req->rq_interpret_reply =
- (ptlrpc_interpterer_t)osc_enqueue_interpret;
- if (rqset == PTLRPCD_SET)
- ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
- else
- ptlrpc_set_add_req(rqset, req);
- } else if (intent) {
- ptlrpc_req_finished(req);
- }
- RETURN(rc);
- }
+ req->rq_interpret_reply =
+ (ptlrpc_interpterer_t)osc_enqueue_interpret;
+ if (rqset == PTLRPCD_SET)
+ ptlrpcd_add_req(req);
+ else
+ ptlrpc_set_add_req(rqset, req);
+ } else if (intent) {
+ ptlrpc_req_finished(req);
+ }
+ RETURN(rc);
+ }
rc = osc_enqueue_fini(req, upcall, cookie, &lockh, einfo->ei_mode,
flags, agl, rc);
}
int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id,
- __u32 type, ldlm_policy_data_t *policy, __u32 mode,
- __u64 *flags, void *data, struct lustre_handle *lockh,
- int unref)
+ enum ldlm_type type, union ldlm_policy_data *policy,
+ enum ldlm_mode mode, __u64 *flags, void *data,
+ struct lustre_handle *lockh, int unref)
{
struct obd_device *obd = exp->exp_obd;
__u64 lflags = *flags;
- ldlm_mode_t rc;
+ enum ldlm_mode rc;
ENTRY;
- if (OBD_FAIL_CHECK(OBD_FAIL_OSC_MATCH))
- RETURN(-EIO);
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_MATCH))
+ RETURN(-EIO);
- /* Filesystem lock extents are extended to page boundaries so that
- * dealing with the page cache is a little smoother */
- policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK;
- policy->l_extent.end |= ~CFS_PAGE_MASK;
+ /* Filesystem lock extents are extended to page boundaries so that
+ * dealing with the page cache is a little smoother */
+ policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK;
+ policy->l_extent.end |= ~PAGE_MASK;
/* Next, search for already existing extent locks that will cover us */
/* If we're trying to read, we also search for an existing PW lock. The
RETURN(rc);
}
-int osc_cancel_base(struct lustre_handle *lockh, __u32 mode)
-{
- ENTRY;
-
- if (unlikely(mode == LCK_GROUP))
- ldlm_lock_decref_and_cancel(lockh, mode);
- else
- ldlm_lock_decref(lockh, mode);
-
- RETURN(0);
-}
-
static int osc_statfs_interpret(const struct lu_env *env,
struct ptlrpc_request *req,
struct osc_async_args *aa, int rc)
err = ptlrpc_set_import_active(obd->u.cli.cl_import,
data->ioc_offset);
GOTO(out, err);
- case OBD_IOC_POLL_QUOTACHECK:
- err = osc_quota_poll_check(exp, (struct if_quotacheck *)karg);
- GOTO(out, err);
case OBD_IOC_PING_TARGET:
err = ptlrpc_obd_ping(obd);
GOTO(out, err);
return err;
}
-static int osc_get_info(const struct lu_env *env, struct obd_export *exp,
- obd_count keylen, void *key, __u32 *vallen, void *val,
- struct lov_stripe_md *lsm)
-{
- ENTRY;
- if (!vallen || !val)
- RETURN(-EFAULT);
-
- if (KEY_IS(KEY_FIEMAP)) {
- struct ll_fiemap_info_key *fm_key =
- (struct ll_fiemap_info_key *)key;
- struct ldlm_res_id res_id;
- ldlm_policy_data_t policy;
- struct lustre_handle lockh;
- ldlm_mode_t mode = 0;
- struct ptlrpc_request *req;
- struct ll_user_fiemap *reply;
- char *tmp;
- int rc;
-
- if (!(fm_key->fiemap.fm_flags & FIEMAP_FLAG_SYNC))
- goto skip_locking;
-
- policy.l_extent.start = fm_key->fiemap.fm_start &
- CFS_PAGE_MASK;
-
- if (OBD_OBJECT_EOF - fm_key->fiemap.fm_length <=
- fm_key->fiemap.fm_start + PAGE_CACHE_SIZE - 1)
- policy.l_extent.end = OBD_OBJECT_EOF;
- else
- policy.l_extent.end = (fm_key->fiemap.fm_start +
- fm_key->fiemap.fm_length +
- PAGE_CACHE_SIZE - 1) & CFS_PAGE_MASK;
-
- ostid_build_res_name(&fm_key->oa.o_oi, &res_id);
- mode = ldlm_lock_match(exp->exp_obd->obd_namespace,
- LDLM_FL_BLOCK_GRANTED |
- LDLM_FL_LVB_READY,
- &res_id, LDLM_EXTENT, &policy,
- LCK_PR | LCK_PW, &lockh, 0);
- if (mode) { /* lock is cached on client */
- if (mode != LCK_PR) {
- ldlm_lock_addref(&lockh, LCK_PR);
- ldlm_lock_decref(&lockh, LCK_PW);
- }
- } else { /* no cached lock, needs acquire lock on server side */
- fm_key->oa.o_valid |= OBD_MD_FLFLAGS;
- fm_key->oa.o_flags |= OBD_FL_SRVLOCK;
- }
-
-skip_locking:
- req = ptlrpc_request_alloc(class_exp2cliimp(exp),
- &RQF_OST_GET_INFO_FIEMAP);
- if (req == NULL)
- GOTO(drop_lock, rc = -ENOMEM);
-
- req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_KEY,
- RCL_CLIENT, keylen);
- req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL,
- RCL_CLIENT, *vallen);
- req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL,
- RCL_SERVER, *vallen);
-
- rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GET_INFO);
- if (rc) {
- ptlrpc_request_free(req);
- GOTO(drop_lock, rc);
- }
-
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_KEY);
- memcpy(tmp, key, keylen);
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_VAL);
- memcpy(tmp, val, *vallen);
-
- ptlrpc_request_set_replen(req);
- rc = ptlrpc_queue_wait(req);
- if (rc)
- GOTO(fini_req, rc);
-
- reply = req_capsule_server_get(&req->rq_pill, &RMF_FIEMAP_VAL);
- if (reply == NULL)
- GOTO(fini_req, rc = -EPROTO);
-
- memcpy(val, reply, *vallen);
-fini_req:
- ptlrpc_req_finished(req);
-drop_lock:
- if (mode)
- ldlm_lock_decref(&lockh, LCK_PR);
- RETURN(rc);
- }
-
- RETURN(-EINVAL);
-}
-
static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp,
- obd_count keylen, void *key, obd_count vallen,
- void *val, struct ptlrpc_request_set *set)
+ u32 keylen, void *key,
+ u32 vallen, void *val,
+ struct ptlrpc_request_set *set)
{
struct ptlrpc_request *req;
struct obd_device *obd = exp->exp_obd;
LASSERT(cli->cl_cache == NULL); /* only once */
cli->cl_cache = (struct cl_client_cache *)val;
- atomic_inc(&cli->cl_cache->ccc_users);
+ cl_cache_incref(cli->cl_cache);
cli->cl_lru_left = &cli->cl_cache->ccc_lru_left;
/* add this osc into entity list */
req->rq_interpret_reply = osc_shrink_grant_interpret;
}
- ptlrpc_request_set_replen(req);
- if (!KEY_IS(KEY_GRANT_SHRINK)) {
- LASSERT(set != NULL);
- ptlrpc_set_add_req(set, req);
- ptlrpc_check_set(NULL, set);
- } else
- ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
+ ptlrpc_request_set_replen(req);
+ if (!KEY_IS(KEY_GRANT_SHRINK)) {
+ LASSERT(set != NULL);
+ ptlrpc_set_add_req(set, req);
+ ptlrpc_check_set(NULL, set);
+ } else {
+ ptlrpcd_add_req(req);
+ }
- RETURN(0);
+ RETURN(0);
}
static int osc_reconnect(const struct lu_env *env,
return rc;
}
+static int osc_ldlm_resource_invalidate(struct cfs_hash *hs,
+ struct cfs_hash_bd *bd, struct hlist_node *hnode, void *arg)
+{
+ struct lu_env *env = arg;
+ struct ldlm_resource *res = cfs_hash_object(hs, hnode);
+ struct ldlm_lock *lock;
+ struct osc_object *osc = NULL;
+ ENTRY;
+
+ lock_res(res);
+ list_for_each_entry(lock, &res->lr_granted, l_res_link) {
+ if (lock->l_ast_data != NULL && osc == NULL) {
+ osc = lock->l_ast_data;
+ cl_object_get(osc2cl(osc));
+ }
+ lock->l_ast_data = NULL;
+ }
+ unlock_res(res);
+
+ if (osc != NULL) {
+ osc_object_invalidate(env, osc);
+ cl_object_put(env, osc2cl(osc));
+ }
+
+ RETURN(0);
+}
+
static int osc_import_event(struct obd_device *obd,
struct obd_import *imp,
enum obd_import_event event)
struct lu_env *env;
int refcheck;
+ ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
+
env = cl_env_get(&refcheck);
if (!IS_ERR(env)) {
- /* Reset grants */
- cli = &obd->u.cli;
- /* all pages go to failing rpcs due to the invalid
- * import */
- osc_io_unplug(env, cli, NULL, PDL_POLICY_ROUND);
-
- ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
- cl_env_put(env, &refcheck);
+ osc_io_unplug(env, &obd->u.cli, NULL);
+
+ cfs_hash_for_each_nolock(ns->ns_rs_hash,
+ osc_ldlm_resource_invalidate,
+ env, 0);
+ cl_env_put(env, &refcheck);
+
+ ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
} else
rc = PTR_ERR(env);
break;
CDEBUG(D_CACHE, "Run writeback work for client obd %p.\n", cli);
- osc_io_unplug(env, cli, NULL, PDL_POLICY_SAME);
+ osc_io_unplug(env, cli, NULL);
RETURN(0);
}
struct obd_type *type;
void *handler;
int rc;
+ int adding;
+ int added;
+ int req_count;
ENTRY;
rc = ptlrpcd_addref();
cli->cl_grant_shrink_interval = GRANT_SHRINK_INTERVAL;
-#ifdef LPROCFS
+#ifdef CONFIG_PROC_FS
obd->obd_vars = lprocfs_osc_obd_vars;
#endif
/* If this is true then both client (osc) and server (osp) are on the
* tree to type->typ_procsym instead of obd->obd_type->typ_procroot. */
type = class_search_type(LUSTRE_OSP_NAME);
if (type && type->typ_procsym) {
- obd->obd_proc_entry = lprocfs_seq_register(obd->obd_name,
- type->typ_procsym,
- obd->obd_vars, obd);
+ obd->obd_proc_entry = lprocfs_register(obd->obd_name,
+ type->typ_procsym,
+ obd->obd_vars, obd);
if (IS_ERR(obd->obd_proc_entry)) {
rc = PTR_ERR(obd->obd_proc_entry);
CERROR("error %d setting up lprocfs for %s\n", rc,
ptlrpc_lprocfs_register_obd(obd);
}
- /* We need to allocate a few requests more, because
- * brw_interpret tries to create new requests before freeing
- * previous ones, Ideally we want to have 2x max_rpcs_in_flight
- * reserved, but I'm afraid that might be too much wasted RAM
- * in fact, so 2 is just my guess and still should work. */
- cli->cl_import->imp_rq_pool =
- ptlrpc_init_rq_pool(cli->cl_max_rpcs_in_flight + 2,
- OST_MAXREQSIZE,
- ptlrpc_add_rqs_to_pool);
+ /*
+ * We try to control the total number of requests with a upper limit
+ * osc_reqpool_maxreqcount. There might be some race which will cause
+ * over-limit allocation, but it is fine.
+ */
+ req_count = atomic_read(&osc_pool_req_count);
+ if (req_count < osc_reqpool_maxreqcount) {
+ adding = cli->cl_max_rpcs_in_flight + 2;
+ if (req_count + adding > osc_reqpool_maxreqcount)
+ adding = osc_reqpool_maxreqcount - req_count;
+
+ added = ptlrpc_add_rqs_to_pool(osc_rq_pool, adding);
+ atomic_add(added, &osc_pool_req_count);
+ }
INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
ns_register_cancel(obd->obd_namespace, osc_cancel_weight);
list_del_init(&cli->cl_lru_osc);
spin_unlock(&cli->cl_cache->ccc_lru_lock);
cli->cl_lru_left = NULL;
- atomic_dec(&cli->cl_cache->ccc_users);
+ cl_cache_decref(cli->cl_cache);
cli->cl_cache = NULL;
}
/* free memory of osc quota cache */
- osc_quota_cleanup(obd);
+ osc_quota_cleanup(obd);
- rc = client_obd_cleanup(obd);
+ rc = client_obd_cleanup(obd);
- ptlrpcd_decref();
- RETURN(rc);
+ ptlrpcd_decref();
+ RETURN(rc);
}
int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg)
.o_create = osc_create,
.o_destroy = osc_destroy,
.o_getattr = osc_getattr,
- .o_getattr_async = osc_getattr_async,
.o_setattr = osc_setattr,
- .o_setattr_async = osc_setattr_async,
- .o_change_cbdata = osc_change_cbdata,
- .o_find_cbdata = osc_find_cbdata,
.o_iocontrol = osc_iocontrol,
- .o_get_info = osc_get_info,
.o_set_info_async = osc_set_info_async,
.o_import_event = osc_import_event,
.o_process_config = osc_process_config,
.o_quotactl = osc_quotactl,
- .o_quotacheck = osc_quotacheck,
};
static int __init osc_init(void)
{
bool enable_proc = true;
struct obd_type *type;
+ unsigned int reqpool_size;
+ unsigned int reqsize;
int rc;
+
ENTRY;
/* print an address of _any_ initialized kernel symbol from this
rc = class_register_type(&osc_obd_ops, NULL, enable_proc, NULL,
LUSTRE_OSC_NAME, &osc_device_type);
- if (rc) {
- lu_kmem_fini(osc_caches);
- RETURN(rc);
- }
+ if (rc)
+ GOTO(out_kmem, rc);
+
+ /* This is obviously too much memory, only prevent overflow here */
+ if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0)
+ GOTO(out_type, rc = -EINVAL);
+
+ reqpool_size = osc_reqpool_mem_max << 20;
+ reqsize = 1;
+ while (reqsize < OST_IO_MAXREQSIZE)
+ reqsize = reqsize << 1;
+
+ /*
+ * We don't enlarge the request count in OSC pool according to
+ * cl_max_rpcs_in_flight. The allocation from the pool will only be
+ * tried after normal allocation failed. So a small OSC pool won't
+ * cause much performance degression in most of cases.
+ */
+ osc_reqpool_maxreqcount = reqpool_size / reqsize;
+
+ atomic_set(&osc_pool_req_count, 0);
+ osc_rq_pool = ptlrpc_init_rq_pool(0, OST_IO_MAXREQSIZE,
+ ptlrpc_add_rqs_to_pool);
+
+ if (osc_rq_pool != NULL)
+ GOTO(out, rc);
+ rc = -ENOMEM;
+out_type:
+ class_unregister_type(LUSTRE_OSC_NAME);
+out_kmem:
+ lu_kmem_fini(osc_caches);
+out:
RETURN(rc);
}
{
class_unregister_type(LUSTRE_OSC_NAME);
lu_kmem_fini(osc_caches);
+ ptlrpc_free_rq_pool(osc_rq_pool);
}
MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
MODULE_DESCRIPTION("Lustre Object Storage Client (OSC)");
+MODULE_VERSION(LUSTRE_VERSION_STRING);
MODULE_LICENSE("GPL");
-cfs_module(osc, LUSTRE_VERSION_STRING, osc_init, osc_exit);
+module_init(osc_init);
+module_exit(osc_exit);