X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fosc%2Fosc_request.c;h=93567bc54bf439f04f25a77c772f2352a8144331;hp=357b8935f0f84b7e5968828bc8ce65e25a6263f3;hb=d6f104f056a5ec6e82e19f12f6faefa0d3ca10a9;hpb=929ec628e6fef5609e55d519a1eb9e2cbbf1f1e8 diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 357b893..93567bc 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -27,7 +27,7 @@ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2012, Whamcloud, Inc. + * Copyright (c) 2011, 2013, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -46,8 +46,6 @@ #include #include #include -#include -#include #ifdef __CYGWIN__ # include @@ -55,110 +53,121 @@ #include #include +#include #include #include #include +#include #include "osc_internal.h" #include "osc_cl_internal.h" -static void osc_release_ppga(struct brw_page **ppga, obd_count count); -static int brw_interpret(const struct lu_env *env, - struct ptlrpc_request *req, void *data, int rc); -int osc_cleanup(struct obd_device *obd); +struct osc_brw_async_args { + struct obdo *aa_oa; + int aa_requested_nob; + int aa_nio_count; + obd_count aa_page_count; + int aa_resends; + struct brw_page **aa_ppga; + struct client_obd *aa_cli; + struct list_head aa_oaps; + struct list_head aa_exts; + struct obd_capa *aa_ocapa; + struct cl_req *aa_clerq; +}; -/* Pack OSC object metadata for disk storage (LE byte order). */ -static int osc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, - struct lov_stripe_md *lsm) -{ - int lmm_size; - ENTRY; +#define osc_grant_args osc_brw_async_args - lmm_size = sizeof(**lmmp); - if (!lmmp) - RETURN(lmm_size); +struct osc_async_args { + struct obd_info *aa_oi; +}; - if (*lmmp && !lsm) { - OBD_FREE(*lmmp, lmm_size); - *lmmp = NULL; - RETURN(0); - } +struct osc_setattr_args { + struct obdo *sa_oa; + obd_enqueue_update_f sa_upcall; + void *sa_cookie; +}; - if (!*lmmp) { - OBD_ALLOC(*lmmp, lmm_size); - if (!*lmmp) - RETURN(-ENOMEM); - } +struct osc_fsync_args { + struct obd_info *fa_oi; + obd_enqueue_update_f fa_upcall; + void *fa_cookie; +}; - if (lsm) { - LASSERT(lsm->lsm_object_id); - LASSERT_SEQ_IS_MDT(lsm->lsm_object_seq); - (*lmmp)->lmm_object_id = cpu_to_le64(lsm->lsm_object_id); - (*lmmp)->lmm_object_seq = cpu_to_le64(lsm->lsm_object_seq); - } +struct osc_enqueue_args { + struct obd_export *oa_exp; + __u64 *oa_flags; + obd_enqueue_update_f oa_upcall; + void *oa_cookie; + struct ost_lvb *oa_lvb; + struct lustre_handle *oa_lockh; + struct ldlm_enqueue_info *oa_ei; + unsigned int oa_agl:1; +}; - RETURN(lmm_size); -} +static void osc_release_ppga(struct brw_page **ppga, obd_count count); +static int brw_interpret(const struct lu_env *env, struct ptlrpc_request *req, + void *data, int rc); /* Unpack OSC object metadata from disk storage (LE byte order). */ static int osc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, - struct lov_mds_md *lmm, int lmm_bytes) + struct lov_mds_md *lmm, int lmm_bytes) { - int lsm_size; - struct obd_import *imp = class_exp2cliimp(exp); - ENTRY; + int lsm_size; + struct obd_import *imp = class_exp2cliimp(exp); + ENTRY; - if (lmm != NULL) { - if (lmm_bytes < sizeof (*lmm)) { - CERROR("lov_mds_md too small: %d, need %d\n", - lmm_bytes, (int)sizeof(*lmm)); - RETURN(-EINVAL); - } - /* XXX LOV_MAGIC etc check? */ + if (lmm != NULL) { + if (lmm_bytes < sizeof(*lmm)) { + CERROR("%s: lov_mds_md too small: %d, need %d\n", + exp->exp_obd->obd_name, lmm_bytes, + (int)sizeof(*lmm)); + RETURN(-EINVAL); + } + /* XXX LOV_MAGIC etc check? */ - if (lmm->lmm_object_id == 0) { - CERROR("lov_mds_md: zero lmm_object_id\n"); - RETURN(-EINVAL); - } - } + if (unlikely(ostid_id(&lmm->lmm_oi) == 0)) { + CERROR("%s: zero lmm_object_id: rc = %d\n", + exp->exp_obd->obd_name, -EINVAL); + RETURN(-EINVAL); + } + } - lsm_size = lov_stripe_md_size(1); - if (lsmp == NULL) - RETURN(lsm_size); + lsm_size = lov_stripe_md_size(1); + if (lsmp == NULL) + RETURN(lsm_size); - if (*lsmp != NULL && lmm == NULL) { - OBD_FREE((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo)); - OBD_FREE(*lsmp, lsm_size); - *lsmp = NULL; - RETURN(0); - } + if (*lsmp != NULL && lmm == NULL) { + OBD_FREE((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo)); + OBD_FREE(*lsmp, lsm_size); + *lsmp = NULL; + RETURN(0); + } - if (*lsmp == NULL) { - OBD_ALLOC(*lsmp, lsm_size); - if (*lsmp == NULL) - RETURN(-ENOMEM); - OBD_ALLOC((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo)); - if ((*lsmp)->lsm_oinfo[0] == NULL) { - OBD_FREE(*lsmp, lsm_size); - RETURN(-ENOMEM); - } - loi_init((*lsmp)->lsm_oinfo[0]); - } + if (*lsmp == NULL) { + OBD_ALLOC(*lsmp, lsm_size); + if (unlikely(*lsmp == NULL)) + RETURN(-ENOMEM); + OBD_ALLOC((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo)); + if (unlikely((*lsmp)->lsm_oinfo[0] == NULL)) { + OBD_FREE(*lsmp, lsm_size); + RETURN(-ENOMEM); + } + loi_init((*lsmp)->lsm_oinfo[0]); + } else if (unlikely(ostid_id(&(*lsmp)->lsm_oi) == 0)) { + RETURN(-EBADF); + } - if (lmm != NULL) { - /* XXX zero *lsmp? */ - (*lsmp)->lsm_object_id = le64_to_cpu (lmm->lmm_object_id); - (*lsmp)->lsm_object_seq = le64_to_cpu (lmm->lmm_object_seq); - LASSERT((*lsmp)->lsm_object_id); - LASSERT_SEQ_IS_MDT((*lsmp)->lsm_object_seq); - } + if (lmm != NULL) + /* XXX zero *lsmp? */ + ostid_le_to_cpu(&lmm->lmm_oi, &(*lsmp)->lsm_oi); - if (imp != NULL && - (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES)) - (*lsmp)->lsm_maxbytes = imp->imp_connect_data.ocd_maxbytes; - else - (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES; + if (imp != NULL && + (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_MAXBYTES)) + (*lsmp)->lsm_maxbytes = imp->imp_connect_data.ocd_maxbytes; + else + (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES; - RETURN(lsm_size); + RETURN(lsm_size); } static inline void osc_pack_capa(struct ptlrpc_request *req, @@ -180,13 +189,14 @@ static inline void osc_pack_capa(struct ptlrpc_request *req, static inline void osc_pack_req_body(struct ptlrpc_request *req, struct obd_info *oinfo) { - struct ost_body *body; + struct ost_body *body; - body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); - LASSERT(body); + body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); + LASSERT(body); - lustre_set_wire_obdo(&body->oa, oinfo->oi_oa); - osc_pack_capa(req, body, oinfo->oi_capa); + lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, + oinfo->oi_oa); + osc_pack_capa(req, body, oinfo->oi_capa); } static inline void osc_set_capa_size(struct ptlrpc_request *req, @@ -212,12 +222,13 @@ static int osc_getattr_interpret(const struct lu_env *env, body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY); if (body) { - CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode); - lustre_get_wire_obdo(aa->aa_oi->oi_oa, &body->oa); + CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode); + lustre_get_wire_obdo(&req->rq_import->imp_connect_data, + aa->aa_oi->oi_oa, &body->oa); - /* This should really be sent by the OST */ - aa->aa_oi->oi_oa->o_blksize = PTLRPC_MAX_BRW_SIZE; - aa->aa_oi->oi_oa->o_valid |= OBD_MD_FLBLKSZ; + /* This should really be sent by the OST */ + aa->aa_oi->oi_oa->o_blksize = DT_MAX_BRW_SIZE; + aa->aa_oi->oi_oa->o_valid |= OBD_MD_FLBLKSZ; } else { CDEBUG(D_INFO, "can't unpack ost_body\n"); rc = -EPROTO; @@ -291,12 +302,12 @@ static int osc_getattr(const struct lu_env *env, struct obd_export *exp, if (body == NULL) GOTO(out, rc = -EPROTO); - CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode); - lustre_get_wire_obdo(oinfo->oi_oa, &body->oa); + CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode); + lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oinfo->oi_oa, + &body->oa); - /* This should really be sent by the OST */ - oinfo->oi_oa->o_blksize = PTLRPC_MAX_BRW_SIZE; - oinfo->oi_oa->o_valid |= OBD_MD_FLBLKSZ; + oinfo->oi_oa->o_blksize = cli_brw_size(exp->exp_obd); + oinfo->oi_oa->o_valid |= OBD_MD_FLBLKSZ; EXIT; out: @@ -337,7 +348,8 @@ static int osc_setattr(const struct lu_env *env, struct obd_export *exp, if (body == NULL) GOTO(out, rc = -EPROTO); - lustre_get_wire_obdo(oinfo->oi_oa, &body->oa); + lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oinfo->oi_oa, + &body->oa); EXIT; out: @@ -359,7 +371,8 @@ static int osc_setattr_interpret(const struct lu_env *env, if (body == NULL) GOTO(out, rc = -EPROTO); - lustre_get_wire_obdo(sa->sa_oa, &body->oa); + lustre_get_wire_obdo(&req->rq_import->imp_connect_data, sa->sa_oa, + &body->oa); out: rc = sa->sa_upcall(sa->sa_cookie, rc); RETURN(rc); @@ -455,7 +468,8 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa, body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); LASSERT(body); - lustre_set_wire_obdo(&body->oa, oa); + + lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa); ptlrpc_request_set_replen(req); @@ -475,26 +489,24 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa, if (body == NULL) GOTO(out_req, rc = -EPROTO); - lustre_get_wire_obdo(oa, &body->oa); + CDEBUG(D_INFO, "oa flags %x\n", oa->o_flags); + lustre_get_wire_obdo(&req->rq_import->imp_connect_data, oa, &body->oa); - /* This should really be sent by the OST */ - oa->o_blksize = PTLRPC_MAX_BRW_SIZE; - oa->o_valid |= OBD_MD_FLBLKSZ; + oa->o_blksize = cli_brw_size(exp->exp_obd); + oa->o_valid |= OBD_MD_FLBLKSZ; - /* XXX LOV STACKING: the lsm that is passed to us from LOV does not - * have valid lsm_oinfo data structs, so don't go touching that. - * This needs to be fixed in a big way. - */ - lsm->lsm_object_id = oa->o_id; - lsm->lsm_object_seq = oa->o_seq; - *ea = lsm; + /* XXX LOV STACKING: the lsm that is passed to us from LOV does not + * have valid lsm_oinfo data structs, so don't go touching that. + * This needs to be fixed in a big way. + */ + lsm->lsm_oi = oa->o_oi; + *ea = lsm; if (oti != NULL) { - oti->oti_transno = lustre_msg_get_transno(req->rq_repmsg); - if (oa->o_valid & OBD_MD_FLCOOKIE) { - if (!oti->oti_logcookies) - oti_alloc_cookies(oti, 1); + if (oti->oti_logcookies == NULL) + oti->oti_logcookies = &oti->oti_onecookie; + *oti->oti_logcookies = oa->o_lcookie; } } @@ -532,10 +544,11 @@ int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo, req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */ ptlrpc_at_set_req_timeout(req); - body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); - LASSERT(body); - lustre_set_wire_obdo(&body->oa, oinfo->oi_oa); - osc_pack_capa(req, body, oinfo->oi_capa); + body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); + LASSERT(body); + lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, + oinfo->oi_oa); + osc_pack_capa(req, body, oinfo->oi_capa); ptlrpc_request_set_replen(req); @@ -553,17 +566,6 @@ int osc_punch_base(struct obd_export *exp, struct obd_info *oinfo, RETURN(0); } -static int osc_punch(const struct lu_env *env, struct obd_export *exp, - struct obd_info *oinfo, struct obd_trans_info *oti, - struct ptlrpc_request_set *rqset) -{ - oinfo->oi_oa->o_size = oinfo->oi_policy.l_extent.start; - oinfo->oi_oa->o_blocks = oinfo->oi_policy.l_extent.end; - oinfo->oi_oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; - return osc_punch_base(exp, oinfo, - oinfo->oi_cb_up, oinfo, rqset); -} - static int osc_sync_interpret(const struct lu_env *env, struct ptlrpc_request *req, void *arg, int rc) @@ -608,11 +610,12 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo, RETURN(rc); } - /* overload the size and blocks fields in the oa with start/end */ - body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); - LASSERT(body); - lustre_set_wire_obdo(&body->oa, oinfo->oi_oa); - osc_pack_capa(req, body, oinfo->oi_capa); + /* overload the size and blocks fields in the oa with start/end */ + body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); + LASSERT(body); + lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, + oinfo->oi_oa); + osc_pack_capa(req, body, oinfo->oi_capa); ptlrpc_request_set_replen(req); req->rq_interpret_reply = osc_sync_interpret; @@ -631,30 +634,12 @@ int osc_sync_base(struct obd_export *exp, struct obd_info *oinfo, RETURN (0); } -static int osc_sync(const struct lu_env *env, struct obd_export *exp, - struct obd_info *oinfo, obd_size start, obd_size end, - struct ptlrpc_request_set *set) -{ - ENTRY; - - if (!oinfo->oi_oa) { - CDEBUG(D_INFO, "oa NULL\n"); - RETURN(-EINVAL); - } - - oinfo->oi_oa->o_size = start; - oinfo->oi_oa->o_blocks = end; - oinfo->oi_oa->o_valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS); - - RETURN(osc_sync_base(exp, oinfo, oinfo->oi_cb_up, oinfo, set)); -} - /* Find and cancel locally locks matched by @mode in the resource found by * @objid. Found locks are added into @cancel list. Returns the amount of * locks added to @cancels list. */ static int osc_resource_get_unused(struct obd_export *exp, struct obdo *oa, - cfs_list_t *cancels, - ldlm_mode_t mode, int lock_flags) + struct list_head *cancels, + ldlm_mode_t mode, __u64 lock_flags) { struct ldlm_namespace *ns = exp->exp_obd->obd_namespace; struct ldlm_res_id res_id; @@ -671,10 +656,10 @@ static int osc_resource_get_unused(struct obd_export *exp, struct obdo *oa, if (exp_connect_cancelset(exp) && !ns_connect_cancelset(ns)) RETURN(0); - osc_build_res_name(oa->o_id, oa->o_seq, &res_id); - res = ldlm_resource_get(ns, NULL, &res_id, 0, 0); - if (res == NULL) - RETURN(0); + ostid_build_res_name(&oa->o_oi, &res_id); + res = ldlm_resource_get(ns, NULL, &res_id, 0, 0); + if (IS_ERR(res)) + RETURN(0); LDLM_RESOURCE_ADDREF(res); count = ldlm_cancel_resource_local(res, cancels, NULL, mode, @@ -685,32 +670,32 @@ static int osc_resource_get_unused(struct obd_export *exp, struct obdo *oa, } static int osc_destroy_interpret(const struct lu_env *env, - struct ptlrpc_request *req, void *data, - int rc) + struct ptlrpc_request *req, void *data, + int rc) { - struct client_obd *cli = &req->rq_import->imp_obd->u.cli; + struct client_obd *cli = &req->rq_import->imp_obd->u.cli; - cfs_atomic_dec(&cli->cl_destroy_in_flight); - cfs_waitq_signal(&cli->cl_destroy_waitq); - return 0; + atomic_dec(&cli->cl_destroy_in_flight); + wake_up(&cli->cl_destroy_waitq); + return 0; } static int osc_can_send_destroy(struct client_obd *cli) { - if (cfs_atomic_inc_return(&cli->cl_destroy_in_flight) <= - cli->cl_max_rpcs_in_flight) { - /* The destroy request can be sent */ - return 1; - } - if (cfs_atomic_dec_return(&cli->cl_destroy_in_flight) < - cli->cl_max_rpcs_in_flight) { - /* - * The counter has been modified between the two atomic - * operations. - */ - cfs_waitq_signal(&cli->cl_destroy_waitq); - } - return 0; + if (atomic_inc_return(&cli->cl_destroy_in_flight) <= + cli->cl_max_rpcs_in_flight) { + /* The destroy request can be sent */ + return 1; + } + if (atomic_dec_return(&cli->cl_destroy_in_flight) < + cli->cl_max_rpcs_in_flight) { + /* + * The counter has been modified between the two atomic + * operations. + */ + wake_up(&cli->cl_destroy_waitq); + } + return 0; } int osc_create(const struct lu_env *env, struct obd_export *exp, @@ -729,7 +714,7 @@ int osc_create(const struct lu_env *env, struct obd_export *exp, RETURN(osc_real_create(exp, oa, ea, oti)); } - if (!fid_seq_is_mdt(oa->o_seq)) + if (!fid_seq_is_mdt(ostid_seq(&oa->o_oi))) RETURN(osc_real_create(exp, oa, ea, oti)); /* we should not get here anymore */ @@ -756,7 +741,7 @@ static int osc_destroy(const struct lu_env *env, struct obd_export *exp, struct client_obd *cli = &exp->exp_obd->u.cli; struct ptlrpc_request *req; struct ost_body *body; - CFS_LIST_HEAD(cancels); + struct list_head cancels = LIST_HEAD_INIT(cancels); int rc, count; ENTRY; @@ -785,11 +770,11 @@ static int osc_destroy(const struct lu_env *env, struct obd_export *exp, req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */ ptlrpc_at_set_req_timeout(req); - if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE) - oa->o_lcookie = *oti->oti_logcookies; - body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); - LASSERT(body); - lustre_set_wire_obdo(&body->oa, oa); + if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE) + oa->o_lcookie = *oti->oti_logcookies; + body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); + LASSERT(body); + lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa); osc_pack_capa(req, body, (struct obd_capa *)capa); ptlrpc_request_set_replen(req); @@ -825,32 +810,38 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa, LASSERT(!(oa->o_valid & bits)); - oa->o_valid |= bits; - client_obd_list_lock(&cli->cl_loi_list_lock); - oa->o_dirty = cli->cl_dirty; - if (cli->cl_dirty - cli->cl_dirty_transit > cli->cl_dirty_max) { - CERROR("dirty %lu - %lu > dirty_max %lu\n", - cli->cl_dirty, cli->cl_dirty_transit, cli->cl_dirty_max); - oa->o_undirty = 0; - } else if (cfs_atomic_read(&obd_dirty_pages) - - cfs_atomic_read(&obd_dirty_transit_pages) > - obd_max_dirty_pages + 1){ - /* The cfs_atomic_read() allowing the cfs_atomic_inc() are - * not covered by a lock thus they may safely race and trip - * this CERROR() unless we add in a small fudge factor (+1). */ - CERROR("dirty %d - %d > system dirty_max %d\n", - cfs_atomic_read(&obd_dirty_pages), - cfs_atomic_read(&obd_dirty_transit_pages), - obd_max_dirty_pages); - oa->o_undirty = 0; - } else if (cli->cl_dirty_max - cli->cl_dirty > 0x7fffffff) { - CERROR("dirty %lu - dirty_max %lu too big???\n", - cli->cl_dirty, cli->cl_dirty_max); - oa->o_undirty = 0; - } else { - long max_in_flight = (cli->cl_max_pages_per_rpc << CFS_PAGE_SHIFT)* - (cli->cl_max_rpcs_in_flight + 1); - oa->o_undirty = max(cli->cl_dirty_max, max_in_flight); + oa->o_valid |= bits; + client_obd_list_lock(&cli->cl_loi_list_lock); + oa->o_dirty = cli->cl_dirty_pages << PAGE_CACHE_SHIFT; + if (unlikely(cli->cl_dirty_pages - cli->cl_dirty_transit > + cli->cl_dirty_max_pages)) { + CERROR("dirty %lu - %lu > dirty_max %lu\n", + cli->cl_dirty_pages, cli->cl_dirty_transit, + cli->cl_dirty_max_pages); + oa->o_undirty = 0; + } else if (unlikely(atomic_read(&obd_dirty_pages) - + atomic_read(&obd_dirty_transit_pages) > + (long)(obd_max_dirty_pages + 1))) { + /* The atomic_read() allowing the atomic_inc() are + * not covered by a lock thus they may safely race and trip + * this CERROR() unless we add in a small fudge factor (+1). */ + CERROR("%s: dirty %d - %d > system dirty_max %d\n", + cli->cl_import->imp_obd->obd_name, + atomic_read(&obd_dirty_pages), + atomic_read(&obd_dirty_transit_pages), + obd_max_dirty_pages); + oa->o_undirty = 0; + } else if (unlikely(cli->cl_dirty_max_pages - cli->cl_dirty_pages > + 0x7fffffff)) { + CERROR("dirty %lu - dirty_max %lu too big???\n", + cli->cl_dirty_pages, cli->cl_dirty_max_pages); + oa->o_undirty = 0; + } else { + long max_in_flight = (cli->cl_max_pages_per_rpc << + PAGE_CACHE_SHIFT) * + (cli->cl_max_rpcs_in_flight + 1); + oa->o_undirty = max(cli->cl_dirty_max_pages << PAGE_CACHE_SHIFT, + max_in_flight); } oa->o_grant = cli->cl_avail_grant + cli->cl_reserved_grant; oa->o_dropped = cli->cl_lost_grant; @@ -929,45 +920,45 @@ static void osc_shrink_grant_local(struct client_obd *cli, struct obdo *oa) * needed, and avoids shrinking the grant piecemeal. */ static int osc_shrink_grant(struct client_obd *cli) { - long target = (cli->cl_max_rpcs_in_flight + 1) * - cli->cl_max_pages_per_rpc; + __u64 target_bytes = (cli->cl_max_rpcs_in_flight + 1) * + (cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT); - client_obd_list_lock(&cli->cl_loi_list_lock); - if (cli->cl_avail_grant <= target) - target = cli->cl_max_pages_per_rpc; - client_obd_list_unlock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); + if (cli->cl_avail_grant <= target_bytes) + target_bytes = cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT; + client_obd_list_unlock(&cli->cl_loi_list_lock); - return osc_shrink_grant_to_target(cli, target); + return osc_shrink_grant_to_target(cli, target_bytes); } -int osc_shrink_grant_to_target(struct client_obd *cli, long target) +int osc_shrink_grant_to_target(struct client_obd *cli, __u64 target_bytes) { - int rc = 0; - struct ost_body *body; - ENTRY; - - client_obd_list_lock(&cli->cl_loi_list_lock); - /* Don't shrink if we are already above or below the desired limit - * We don't want to shrink below a single RPC, as that will negatively - * impact block allocation and long-term performance. */ - if (target < cli->cl_max_pages_per_rpc) - target = cli->cl_max_pages_per_rpc; + int rc = 0; + struct ost_body *body; + ENTRY; - if (target >= cli->cl_avail_grant) { - client_obd_list_unlock(&cli->cl_loi_list_lock); - RETURN(0); - } - client_obd_list_unlock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); + /* Don't shrink if we are already above or below the desired limit + * We don't want to shrink below a single RPC, as that will negatively + * impact block allocation and long-term performance. */ + if (target_bytes < cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT) + target_bytes = cli->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT; + + if (target_bytes >= cli->cl_avail_grant) { + client_obd_list_unlock(&cli->cl_loi_list_lock); + RETURN(0); + } + client_obd_list_unlock(&cli->cl_loi_list_lock); - OBD_ALLOC_PTR(body); - if (!body) - RETURN(-ENOMEM); + OBD_ALLOC_PTR(body); + if (!body) + RETURN(-ENOMEM); - osc_announce_cached(cli, &body->oa, 0); + osc_announce_cached(cli, &body->oa, 0); - client_obd_list_lock(&cli->cl_loi_list_lock); - body->oa.o_grant = cli->cl_avail_grant - target; - cli->cl_avail_grant = target; + client_obd_list_lock(&cli->cl_loi_list_lock); + body->oa.o_grant = cli->cl_avail_grant - target_bytes; + cli->cl_avail_grant = target_bytes; client_obd_list_unlock(&cli->cl_loi_list_lock); if (!(body->oa.o_valid & OBD_MD_FLFLAGS)) { body->oa.o_valid |= OBD_MD_FLFLAGS; @@ -985,7 +976,6 @@ int osc_shrink_grant_to_target(struct client_obd *cli, long target) RETURN(rc); } -#define GRANT_SHRINK_LIMIT PTLRPC_MAX_BRW_SIZE static int osc_should_shrink_grant(struct client_obd *client) { cfs_time_t time = cfs_time_current(); @@ -995,26 +985,30 @@ static int osc_should_shrink_grant(struct client_obd *client) OBD_CONNECT_GRANT_SHRINK) == 0) return 0; - if (cfs_time_aftereq(time, next_shrink - 5 * CFS_TICK)) { - if (client->cl_import->imp_state == LUSTRE_IMP_FULL && - client->cl_avail_grant > GRANT_SHRINK_LIMIT) - return 1; - else - osc_update_next_shrink(client); - } + if (cfs_time_aftereq(time, next_shrink - 5 * CFS_TICK)) { + /* Get the current RPC size directly, instead of going via: + * cli_brw_size(obd->u.cli.cl_import->imp_obd->obd_self_export) + * Keep comment here so that it can be found by searching. */ + int brw_size = client->cl_max_pages_per_rpc << PAGE_CACHE_SHIFT; + + if (client->cl_import->imp_state == LUSTRE_IMP_FULL && + client->cl_avail_grant > brw_size) + return 1; + else + osc_update_next_shrink(client); + } return 0; } static int osc_grant_shrink_grant_cb(struct timeout_item *item, void *data) { - struct client_obd *client; + struct client_obd *client; - cfs_list_for_each_entry(client, &item->ti_obd_list, - cl_grant_shrink_list) { - if (osc_should_shrink_grant(client)) - osc_shrink_grant(client); - } - return 0; + list_for_each_entry(client, &item->ti_obd_list, cl_grant_shrink_list) { + if (osc_should_shrink_grant(client)) + osc_shrink_grant(client); + } + return 0; } static int osc_add_shrink_grant(struct client_obd *client) @@ -1044,31 +1038,33 @@ static int osc_del_shrink_grant(struct client_obd *client) static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd) { - /* - * ocd_grant is the total grant amount we're expect to hold: if we've - * been evicted, it's the new avail_grant amount, cl_dirty will drop - * to 0 as inflight RPCs fail out; otherwise, it's avail_grant + dirty. - * - * race is tolerable here: if we're evicted, but imp_state already - * left EVICTED state, then cl_dirty must be 0 already. - */ - client_obd_list_lock(&cli->cl_loi_list_lock); - if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED) - cli->cl_avail_grant = ocd->ocd_grant; - else - cli->cl_avail_grant = ocd->ocd_grant - cli->cl_dirty; + /* + * ocd_grant is the total grant amount we're expect to hold: if we've + * been evicted, it's the new avail_grant amount, cl_dirty_pages will + * drop to 0 as inflight RPCs fail out; otherwise, it's avail_grant + + * dirty. + * + * race is tolerable here: if we're evicted, but imp_state already + * left EVICTED state, then cl_dirty_pages must be 0 already. + */ + client_obd_list_lock(&cli->cl_loi_list_lock); + if (cli->cl_import->imp_state == LUSTRE_IMP_EVICTED) + cli->cl_avail_grant = ocd->ocd_grant; + else + cli->cl_avail_grant = ocd->ocd_grant - + (cli->cl_dirty_pages << PAGE_CACHE_SHIFT); if (cli->cl_avail_grant < 0) { - CWARN("%s: available grant < 0, the OSS is probably not running" - " with patch from bug20278 (%ld) \n", - cli->cl_import->imp_obd->obd_name, cli->cl_avail_grant); - /* workaround for 1.6 servers which do not have - * the patch from bug20278 */ - cli->cl_avail_grant = ocd->ocd_grant; + CWARN("%s: available grant < 0: avail/ocd/dirty %ld/%u/%ld\n", + cli->cl_import->imp_obd->obd_name, cli->cl_avail_grant, + ocd->ocd_grant, cli->cl_dirty_pages << PAGE_CACHE_SHIFT); + /* workaround for servers which do not have the patch from + * LU-2679 */ + cli->cl_avail_grant = ocd->ocd_grant; } /* determine the appropriate chunk size used by osc_extent. */ - cli->cl_chunkbits = max_t(int, CFS_PAGE_SHIFT, ocd->ocd_blocksize); + cli->cl_chunkbits = max_t(int, PAGE_CACHE_SHIFT, ocd->ocd_blocksize); client_obd_list_unlock(&cli->cl_loi_list_lock); CDEBUG(D_CACHE, "%s, setting cl_avail_grant: %ld cl_lost_grant: %ld." @@ -1076,7 +1072,7 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd) cli->cl_avail_grant, cli->cl_lost_grant, cli->cl_chunkbits); if (ocd->ocd_connect_flags & OBD_CONNECT_GRANT_SHRINK && - cfs_list_empty(&cli->cl_grant_shrink_list)) + list_empty(&cli->cl_grant_shrink_list)) osc_add_shrink_grant(cli); } @@ -1094,29 +1090,29 @@ static void handle_short_read(int nob_read, obd_count page_count, while (nob_read > 0) { LASSERT (page_count > 0); - if (pga[i]->count > nob_read) { - /* EOF inside this page */ - ptr = cfs_kmap(pga[i]->pg) + - (pga[i]->off & ~CFS_PAGE_MASK); - memset(ptr + nob_read, 0, pga[i]->count - nob_read); - cfs_kunmap(pga[i]->pg); - page_count--; - i++; - break; - } + if (pga[i]->count > nob_read) { + /* EOF inside this page */ + ptr = kmap(pga[i]->pg) + + (pga[i]->off & ~CFS_PAGE_MASK); + memset(ptr + nob_read, 0, pga[i]->count - nob_read); + kunmap(pga[i]->pg); + page_count--; + i++; + break; + } nob_read -= pga[i]->count; page_count--; i++; } - /* zero remaining pages */ - while (page_count-- > 0) { - ptr = cfs_kmap(pga[i]->pg) + (pga[i]->off & ~CFS_PAGE_MASK); - memset(ptr, 0, pga[i]->count); - cfs_kunmap(pga[i]->pg); - i++; - } + /* zero remaining pages */ + while (page_count-- > 0) { + ptr = kmap(pga[i]->pg) + (pga[i]->off & ~CFS_PAGE_MASK); + memset(ptr, 0, pga[i]->count); + kunmap(pga[i]->pg); + i++; + } } static int check_write_rcs(struct ptlrpc_request *req, @@ -1158,14 +1154,15 @@ static int check_write_rcs(struct ptlrpc_request *req, static inline int can_merge_pages(struct brw_page *p1, struct brw_page *p2) { if (p1->flag != p2->flag) { - unsigned mask = ~(OBD_BRW_FROM_GRANT| OBD_BRW_NOCACHE| - OBD_BRW_SYNC|OBD_BRW_ASYNC|OBD_BRW_NOQUOTA); + unsigned mask = ~(OBD_BRW_FROM_GRANT | OBD_BRW_NOCACHE | + OBD_BRW_SYNC | OBD_BRW_ASYNC | + OBD_BRW_NOQUOTA | OBD_BRW_SOFT_SYNC); /* warn if we try to combine flags that we don't know to be * safe to combine */ if (unlikely((p1->flag & mask) != (p2->flag & mask))) { CWARN("Saw flags 0x%x and 0x%x in the same brw, please " - "report this at http://bugs.whamcloud.com/\n", + "report this at https://jira.hpdd.intel.com/\n", p1->flag, p2->flag); } return 0; @@ -1201,28 +1198,26 @@ static obd_count osc_checksum_bulk(int nob, obd_count pg_count, * simulate an OST->client data error */ if (i == 0 && opc == OST_READ && OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_RECEIVE)) { - unsigned char *ptr = cfs_kmap(pga[i]->pg); + unsigned char *ptr = kmap(pga[i]->pg); int off = pga[i]->off & ~CFS_PAGE_MASK; + memcpy(ptr + off, "bad1", min(4, nob)); - cfs_kunmap(pga[i]->pg); + kunmap(pga[i]->pg); } cfs_crypto_hash_update_page(hdesc, pga[i]->pg, - pga[i]->off & ~CFS_PAGE_MASK, - count); - LL_CDEBUG_PAGE(D_PAGE, pga[i]->pg, "off %d checksum %x\n", - (int)(pga[i]->off & ~CFS_PAGE_MASK), cksum); + pga[i]->off & ~CFS_PAGE_MASK, + count); + LL_CDEBUG_PAGE(D_PAGE, pga[i]->pg, "off %d\n", + (int)(pga[i]->off & ~CFS_PAGE_MASK)); nob -= pga[i]->count; pg_count--; i++; } - bufsize = 4; + bufsize = sizeof(cksum); err = cfs_crypto_hash_final(hdesc, (unsigned char *)&cksum, &bufsize); - if (err) - cfs_crypto_hash_final(hdesc, NULL, NULL); - /* For sending we only compute the wrong checksum instead * of corrupting the data so it is still correct on a redo */ if (opc == OST_WRITE && OBD_FAIL_CHECK(OBD_FAIL_OSC_CHECKSUM_SEND)) @@ -1289,12 +1284,10 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa, * retry logic */ req->rq_no_retry_einprogress = 1; - if (opc == OST_WRITE) - desc = ptlrpc_prep_bulk_imp(req, page_count, - BULK_GET_SOURCE, OST_BULK_PORTAL); - else - desc = ptlrpc_prep_bulk_imp(req, page_count, - BULK_PUT_SINK, OST_BULK_PORTAL); + desc = ptlrpc_prep_bulk_imp(req, page_count, + cli->cl_import->imp_connect_data.ocd_brw_size >> LNET_MTU_BITS, + opc == OST_WRITE ? BULK_GET_SOURCE : BULK_PUT_SINK, + OST_BULK_PORTAL); if (desc == NULL) GOTO(out, rc = -ENOMEM); @@ -1305,26 +1298,32 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa, niobuf = req_capsule_client_get(pill, &RMF_NIOBUF_REMOTE); LASSERT(body != NULL && ioobj != NULL && niobuf != NULL); - lustre_set_wire_obdo(&body->oa, oa); - - obdo_to_ioobj(oa, ioobj); - ioobj->ioo_bufcnt = niocount; - osc_pack_capa(req, body, ocapa); - LASSERT (page_count > 0); - pg_prev = pga[0]; + lustre_set_wire_obdo(&req->rq_import->imp_connect_data, &body->oa, oa); + + obdo_to_ioobj(oa, ioobj); + ioobj->ioo_bufcnt = niocount; + /* The high bits of ioo_max_brw tells server _maximum_ number of bulks + * that might be send for this request. The actual number is decided + * when the RPC is finally sent in ptlrpc_register_bulk(). It sends + * "max - 1" for old client compatibility sending "0", and also so the + * the actual maximum is a power-of-two number, not one less. LU-1431 */ + ioobj_max_brw_set(ioobj, desc->bd_md_max_brw); + osc_pack_capa(req, body, ocapa); + LASSERT(page_count > 0); + pg_prev = pga[0]; for (requested_nob = i = 0; i < page_count; i++, niobuf++) { struct brw_page *pg = pga[i]; int poff = pg->off & ~CFS_PAGE_MASK; LASSERT(pg->count > 0); /* make sure there is no gap in the middle of page array */ - LASSERTF(page_count == 1 || - (ergo(i == 0, poff + pg->count == CFS_PAGE_SIZE) && - ergo(i > 0 && i < page_count - 1, - poff == 0 && pg->count == CFS_PAGE_SIZE) && - ergo(i == page_count - 1, poff == 0)), - "i: %d/%d pg: %p off: "LPU64", count: %u\n", - i, page_count, pg, pg->off, pg->count); + LASSERTF(page_count == 1 || + (ergo(i == 0, poff + pg->count == PAGE_CACHE_SIZE) && + ergo(i > 0 && i < page_count - 1, + poff == 0 && pg->count == PAGE_CACHE_SIZE) && + ergo(i == page_count - 1, poff == 0)), + "i: %d/%d pg: %p off: "LPU64", count: %u\n", + i, page_count, pg, pg->off, pg->count); #ifdef __linux__ LASSERTF(i == 0 || pg->off > pg_prev->off, "i %d p_c %u pg %p [pri %lu ind %lu] off "LPU64 @@ -1345,11 +1344,11 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa, if (i > 0 && can_merge_pages(pg_prev, pg)) { niobuf--; - niobuf->len += pg->count; - } else { - niobuf->offset = pg->off; - niobuf->len = pg->count; - niobuf->flags = pg->flag; + niobuf->rnb_len += pg->count; + } else { + niobuf->rnb_offset = pg->off; + niobuf->rnb_len = pg->count; + niobuf->rnb_flags = pg->flag; } pg_prev = pg; } @@ -1423,7 +1422,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa, aa->aa_resends = 0; aa->aa_ppga = pga; aa->aa_cli = cli; - CFS_INIT_LIST_HEAD(&aa->aa_oaps); + INIT_LIST_HEAD(&aa->aa_oaps); if (ocapa && reserve) aa->aa_ocapa = capa_get(ocapa); @@ -1466,20 +1465,18 @@ static int check_write_checksum(struct obdo *oa, const lnet_process_id_t *peer, msg = "changed in transit AND doesn't match the original - " "likely false positive due to mmap IO (bug 11742)"; - LCONSOLE_ERROR_MSG(0x132, "BAD WRITE CHECKSUM: %s: from %s inode "DFID - " object "LPU64"/"LPU64" extent ["LPU64"-"LPU64"]\n", - msg, libcfs_nid2str(peer->nid), - oa->o_valid & OBD_MD_FLFID ? oa->o_parent_seq : (__u64)0, - oa->o_valid & OBD_MD_FLFID ? oa->o_parent_oid : 0, - oa->o_valid & OBD_MD_FLFID ? oa->o_parent_ver : 0, - oa->o_id, - oa->o_valid & OBD_MD_FLGROUP ? oa->o_seq : (__u64)0, - pga[0]->off, - pga[page_count-1]->off + pga[page_count-1]->count - 1); - CERROR("original client csum %x (type %x), server csum %x (type %x), " - "client csum now %x\n", client_cksum, client_cksum_type, - server_cksum, cksum_type, new_cksum); - return 1; + LCONSOLE_ERROR_MSG(0x132, "BAD WRITE CHECKSUM: %s: from %s inode "DFID + " object "DOSTID" extent ["LPU64"-"LPU64"]\n", + msg, libcfs_nid2str(peer->nid), + oa->o_valid & OBD_MD_FLFID ? oa->o_parent_seq : (__u64)0, + oa->o_valid & OBD_MD_FLFID ? oa->o_parent_oid : 0, + oa->o_valid & OBD_MD_FLFID ? oa->o_parent_ver : 0, + POSTID(&oa->o_oi), pga[0]->off, + pga[page_count-1]->off + pga[page_count-1]->count - 1); + CERROR("original client csum %x (type %x), server csum %x (type %x), " + "client csum now %x\n", client_cksum, client_cksum_type, + server_cksum, cksum_type, new_cksum); + return 1; } /* Note rc enters this function as number of bytes transferred */ @@ -1588,42 +1585,34 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc) router = libcfs_nid2str(req->rq_bulk->bd_sender); } - if (server_cksum == ~0 && rc > 0) { - CERROR("Protocol error: server %s set the 'checksum' " - "bit, but didn't send a checksum. Not fatal, " - "but please notify on http://bugs.whamcloud.com/\n", - libcfs_nid2str(peer->nid)); - } else if (server_cksum != client_cksum) { - LCONSOLE_ERROR_MSG(0x133, "%s: BAD READ CHECKSUM: from " - "%s%s%s inode "DFID" object " - LPU64"/"LPU64" extent " - "["LPU64"-"LPU64"]\n", - req->rq_import->imp_obd->obd_name, - libcfs_nid2str(peer->nid), - via, router, - body->oa.o_valid & OBD_MD_FLFID ? - body->oa.o_parent_seq : (__u64)0, - body->oa.o_valid & OBD_MD_FLFID ? - body->oa.o_parent_oid : 0, - body->oa.o_valid & OBD_MD_FLFID ? - body->oa.o_parent_ver : 0, - body->oa.o_id, - body->oa.o_valid & OBD_MD_FLGROUP ? - body->oa.o_seq : (__u64)0, - aa->aa_ppga[0]->off, - aa->aa_ppga[aa->aa_page_count-1]->off + - aa->aa_ppga[aa->aa_page_count-1]->count - - 1); - CERROR("client %x, server %x, cksum_type %x\n", - client_cksum, server_cksum, cksum_type); - cksum_counter = 0; - aa->aa_oa->o_cksum = client_cksum; - rc = -EAGAIN; - } else { - cksum_counter++; - CDEBUG(D_PAGE, "checksum %x confirmed\n", client_cksum); - rc = 0; - } + if (server_cksum != client_cksum) { + LCONSOLE_ERROR_MSG(0x133, "%s: BAD READ CHECKSUM: from " + "%s%s%s inode "DFID" object "DOSTID + " extent ["LPU64"-"LPU64"]\n", + req->rq_import->imp_obd->obd_name, + libcfs_nid2str(peer->nid), + via, router, + body->oa.o_valid & OBD_MD_FLFID ? + body->oa.o_parent_seq : (__u64)0, + body->oa.o_valid & OBD_MD_FLFID ? + body->oa.o_parent_oid : 0, + body->oa.o_valid & OBD_MD_FLFID ? + body->oa.o_parent_ver : 0, + POSTID(&body->oa.o_oi), + aa->aa_ppga[0]->off, + aa->aa_ppga[aa->aa_page_count-1]->off + + aa->aa_ppga[aa->aa_page_count-1]->count - + 1); + CERROR("client %x, server %x, cksum_type %x\n", + client_cksum, server_cksum, cksum_type); + cksum_counter = 0; + aa->aa_oa->o_cksum = client_cksum; + rc = -EAGAIN; + } else { + cksum_counter++; + CDEBUG(D_PAGE, "checksum %x confirmed\n", client_cksum); + rc = 0; + } } else if (unlikely(client_cksum)) { static int cksum_missed; @@ -1635,82 +1624,13 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc) rc = 0; } out: - if (rc >= 0) - lustre_get_wire_obdo(aa->aa_oa, &body->oa); + if (rc >= 0) + lustre_get_wire_obdo(&req->rq_import->imp_connect_data, + aa->aa_oa, &body->oa); RETURN(rc); } -static int osc_brw_internal(int cmd, struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *lsm, - obd_count page_count, struct brw_page **pga, - struct obd_capa *ocapa) -{ - struct ptlrpc_request *req; - int rc; - cfs_waitq_t waitq; - int generation, resends = 0; - struct l_wait_info lwi; - - ENTRY; - - cfs_waitq_init(&waitq); - generation = exp->exp_obd->u.cli.cl_import->imp_generation; - -restart_bulk: - rc = osc_brw_prep_request(cmd, &exp->exp_obd->u.cli, oa, lsm, - page_count, pga, &req, ocapa, 0, resends); - if (rc != 0) - return (rc); - - if (resends) { - req->rq_generation_set = 1; - req->rq_import_generation = generation; - req->rq_sent = cfs_time_current_sec() + resends; - } - - rc = ptlrpc_queue_wait(req); - - if (rc == -ETIMEDOUT && req->rq_resend) { - DEBUG_REQ(D_HA, req, "BULK TIMEOUT"); - ptlrpc_req_finished(req); - goto restart_bulk; - } - - rc = osc_brw_fini_request(req, rc); - - ptlrpc_req_finished(req); - /* When server return -EINPROGRESS, client should always retry - * regardless of the number of times the bulk was resent already.*/ - if (osc_recoverable_error(rc)) { - resends++; - if (rc != -EINPROGRESS && - !client_should_resend(resends, &exp->exp_obd->u.cli)) { - CERROR("%s: too many resend retries for object: " - ""LPU64":"LPU64", rc = %d.\n", - exp->exp_obd->obd_name, oa->o_id, oa->o_seq, rc); - goto out; - } - if (generation != - exp->exp_obd->u.cli.cl_import->imp_generation) { - CDEBUG(D_HA, "%s: resend cross eviction for object: " - ""LPU64":"LPU64", rc = %d.\n", - exp->exp_obd->obd_name, oa->o_id, oa->o_seq, rc); - goto out; - } - - lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL, - NULL); - l_wait_event(waitq, 0, &lwi); - - goto restart_bulk; - } -out: - if (rc == -EAGAIN || rc == -EINPROGRESS) - rc = -EIO; - RETURN (rc); -} - static int osc_brw_redo_request(struct ptlrpc_request *request, struct osc_brw_async_args *aa, int rc) { @@ -1731,7 +1651,7 @@ static int osc_brw_redo_request(struct ptlrpc_request *request, if (rc) RETURN(rc); - cfs_list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) { + list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) { if (oap->oap_request != NULL) { LASSERTF(request == oap->oap_request, "request %p != oap_request %p\n", @@ -1747,6 +1667,7 @@ static int osc_brw_redo_request(struct ptlrpc_request *request, aa->aa_resends++; new_req->rq_interpret_reply = request->rq_interpret_reply; new_req->rq_async_args = request->rq_async_args; + new_req->rq_commit_cb = request->rq_commit_cb; /* cap resend delay to the current request timeout, this is similar to * what ptlrpc does (see after_reply()) */ if (aa->aa_resends > new_req->rq_timeout) @@ -1758,12 +1679,13 @@ static int osc_brw_redo_request(struct ptlrpc_request *request, new_aa = ptlrpc_req_async_args(new_req); - CFS_INIT_LIST_HEAD(&new_aa->aa_oaps); - cfs_list_splice_init(&aa->aa_oaps, &new_aa->aa_oaps); - CFS_INIT_LIST_HEAD(&new_aa->aa_exts); - cfs_list_splice_init(&aa->aa_exts, &new_aa->aa_exts); + INIT_LIST_HEAD(&new_aa->aa_oaps); + list_splice_init(&aa->aa_oaps, &new_aa->aa_oaps); + INIT_LIST_HEAD(&new_aa->aa_exts); + list_splice_init(&aa->aa_exts, &new_aa->aa_exts); + new_aa->aa_resends = aa->aa_resends; - cfs_list_for_each_entry(oap, &new_aa->aa_oaps, oap_rpc_item) { + list_for_each_entry(oap, &new_aa->aa_oaps, oap_rpc_item) { if (oap->oap_request) { ptlrpc_req_finished(oap->oap_request); oap->oap_request = ptlrpc_request_addref(new_req); @@ -1814,133 +1736,18 @@ static void sort_brw_pages(struct brw_page **array, int num) } while (stride > 1); } -static obd_count max_unfragmented_pages(struct brw_page **pg, obd_count pages) -{ - int count = 1; - int offset; - int i = 0; - - LASSERT (pages > 0); - offset = pg[i]->off & ~CFS_PAGE_MASK; - - for (;;) { - pages--; - if (pages == 0) /* that's all */ - return count; - - if (offset + pg[i]->count < CFS_PAGE_SIZE) - return count; /* doesn't end on page boundary */ - - i++; - offset = pg[i]->off & ~CFS_PAGE_MASK; - if (offset != 0) /* doesn't start on page boundary */ - return count; - - count++; - } -} - -static struct brw_page **osc_build_ppga(struct brw_page *pga, obd_count count) -{ - struct brw_page **ppga; - int i; - - OBD_ALLOC(ppga, sizeof(*ppga) * count); - if (ppga == NULL) - return NULL; - - for (i = 0; i < count; i++) - ppga[i] = pga + i; - return ppga; -} - static void osc_release_ppga(struct brw_page **ppga, obd_count count) { LASSERT(ppga != NULL); OBD_FREE(ppga, sizeof(*ppga) * count); } -static int osc_brw(int cmd, struct obd_export *exp, struct obd_info *oinfo, - obd_count page_count, struct brw_page *pga, - struct obd_trans_info *oti) -{ - struct obdo *saved_oa = NULL; - struct brw_page **ppga, **orig; - struct obd_import *imp = class_exp2cliimp(exp); - struct client_obd *cli; - int rc, page_count_orig; - ENTRY; - - LASSERT((imp != NULL) && (imp->imp_obd != NULL)); - cli = &imp->imp_obd->u.cli; - - if (cmd & OBD_BRW_CHECK) { - /* The caller just wants to know if there's a chance that this - * I/O can succeed */ - - if (imp->imp_invalid) - RETURN(-EIO); - RETURN(0); - } - - /* test_brw with a failed create can trip this, maybe others. */ - LASSERT(cli->cl_max_pages_per_rpc); - - rc = 0; - - orig = ppga = osc_build_ppga(pga, page_count); - if (ppga == NULL) - RETURN(-ENOMEM); - page_count_orig = page_count; - - sort_brw_pages(ppga, page_count); - while (page_count) { - obd_count pages_per_brw; - - if (page_count > cli->cl_max_pages_per_rpc) - pages_per_brw = cli->cl_max_pages_per_rpc; - else - pages_per_brw = page_count; - - pages_per_brw = max_unfragmented_pages(ppga, pages_per_brw); - - if (saved_oa != NULL) { - /* restore previously saved oa */ - *oinfo->oi_oa = *saved_oa; - } else if (page_count > pages_per_brw) { - /* save a copy of oa (brw will clobber it) */ - OBDO_ALLOC(saved_oa); - if (saved_oa == NULL) - GOTO(out, rc = -ENOMEM); - *saved_oa = *oinfo->oi_oa; - } - - rc = osc_brw_internal(cmd, exp, oinfo->oi_oa, oinfo->oi_md, - pages_per_brw, ppga, oinfo->oi_capa); - - if (rc != 0) - break; - - page_count -= pages_per_brw; - ppga += pages_per_brw; - } - -out: - osc_release_ppga(orig, page_count_orig); - - if (saved_oa != NULL) - OBDO_FREE(saved_oa); - - RETURN(rc); -} - static int brw_interpret(const struct lu_env *env, struct ptlrpc_request *req, void *data, int rc) { struct osc_brw_async_args *aa = data; struct osc_extent *ext; struct osc_extent *tmp; - struct cl_object *obj = NULL; struct client_obd *cli = aa->aa_cli; ENTRY; @@ -1948,52 +1755,45 @@ static int brw_interpret(const struct lu_env *env, CDEBUG(D_INODE, "request %p aa %p rc %d\n", req, aa, rc); /* When server return -EINPROGRESS, client should always retry * regardless of the number of times the bulk was resent already. */ - if (osc_recoverable_error(rc)) { - if (req->rq_import_generation != - req->rq_import->imp_generation) { - CDEBUG(D_HA, "%s: resend cross eviction for object: " - ""LPU64":"LPU64", rc = %d.\n", - req->rq_import->imp_obd->obd_name, - aa->aa_oa->o_id, aa->aa_oa->o_seq, rc); - } else if (rc == -EINPROGRESS || - client_should_resend(aa->aa_resends, aa->aa_cli)) { - rc = osc_brw_redo_request(req, aa, rc); - } else { - CERROR("%s: too many resent retries for object: " - ""LPU64":"LPU64", rc = %d.\n", - req->rq_import->imp_obd->obd_name, - aa->aa_oa->o_id, aa->aa_oa->o_seq, rc); - } + if (osc_recoverable_error(rc)) { + if (req->rq_import_generation != + req->rq_import->imp_generation) { + CDEBUG(D_HA, "%s: resend cross eviction for object: " + ""DOSTID", rc = %d.\n", + req->rq_import->imp_obd->obd_name, + POSTID(&aa->aa_oa->o_oi), rc); + } else if (rc == -EINPROGRESS || + client_should_resend(aa->aa_resends, aa->aa_cli)) { + rc = osc_brw_redo_request(req, aa, rc); + } else { + CERROR("%s: too many resent retries for object: " + ""LPU64":"LPU64", rc = %d.\n", + req->rq_import->imp_obd->obd_name, + POSTID(&aa->aa_oa->o_oi), rc); + } - if (rc == 0) - RETURN(0); - else if (rc == -EAGAIN || rc == -EINPROGRESS) - rc = -EIO; - } + if (rc == 0) + RETURN(0); + else if (rc == -EAGAIN || rc == -EINPROGRESS) + rc = -EIO; + } if (aa->aa_ocapa) { capa_put(aa->aa_ocapa); aa->aa_ocapa = NULL; } - cfs_list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) { - if (obj == NULL && rc == 0) { - obj = osc2cl(ext->oe_obj); - cl_object_get(obj); - } - - cfs_list_del_init(&ext->oe_link); - osc_extent_finish(env, ext, 1, rc); - } - LASSERT(cfs_list_empty(&aa->aa_exts)); - LASSERT(cfs_list_empty(&aa->aa_oaps)); - - if (obj != NULL) { + if (rc == 0) { struct obdo *oa = aa->aa_oa; - struct cl_attr *attr = &osc_env_info(env)->oti_attr; + struct cl_attr *attr = &osc_env_info(env)->oti_attr; unsigned long valid = 0; + struct cl_object *obj; + struct osc_async_page *last; + + last = brw_page2oap(aa->aa_ppga[aa->aa_page_count - 1]); + obj = osc2cl(last->oap_obj); - LASSERT(rc == 0); + cl_object_attr_lock(obj); if (oa->o_valid & OBD_MD_FLBLOCKS) { attr->cat_blocks = oa->o_blocks; valid |= CAT_BLOCKS; @@ -2010,15 +1810,41 @@ static int brw_interpret(const struct lu_env *env, attr->cat_ctime = oa->o_ctime; valid |= CAT_CTIME; } - if (valid != 0) { - cl_object_attr_lock(obj); - cl_object_attr_set(env, obj, attr, valid); - cl_object_attr_unlock(obj); + + if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE) { + struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo; + loff_t last_off = last->oap_count + last->oap_obj_off; + + /* Change file size if this is an out of quota or + * direct IO write and it extends the file size */ + if (loi->loi_lvb.lvb_size < last_off) { + attr->cat_size = last_off; + valid |= CAT_SIZE; + } + /* Extend KMS if it's not a lockless write */ + if (loi->loi_kms < last_off && + oap2osc_page(last)->ops_srvlock == 0) { + attr->cat_kms = last_off; + valid |= CAT_KMS; + } } - cl_object_put(env, obj); + + if (valid != 0) + cl_object_attr_set(env, obj, attr, valid); + cl_object_attr_unlock(obj); } OBDO_FREE(aa->aa_oa); + if (lustre_msg_get_opc(req->rq_reqmsg) == OST_WRITE && rc == 0) + osc_inc_unstable_pages(req); + + list_for_each_entry_safe(ext, tmp, &aa->aa_exts, oe_link) { + list_del_init(&ext->oe_link); + osc_extent_finish(env, ext, 1, rc); + } + LASSERT(list_empty(&aa->aa_exts)); + LASSERT(list_empty(&aa->aa_oaps)); + cl_req_completion(env, aa->aa_clerq, rc < 0 ? rc : req->rq_bulk->bd_nob_transferred); osc_release_ppga(aa->aa_ppga, aa->aa_page_count); @@ -2039,40 +1865,63 @@ static int brw_interpret(const struct lu_env *env, RETURN(rc); } +static void brw_commit(struct ptlrpc_request *req) +{ + /* If osc_inc_unstable_pages (via osc_extent_finish) races with + * this called via the rq_commit_cb, I need to ensure + * osc_dec_unstable_pages is still called. Otherwise unstable + * pages may be leaked. */ + spin_lock(&req->rq_lock); + if (likely(req->rq_unstable)) { + req->rq_unstable = 0; + spin_unlock(&req->rq_lock); + + osc_dec_unstable_pages(req); + } else { + req->rq_committed = 1; + spin_unlock(&req->rq_lock); + } +} + /** * Build an RPC by the list of extent @ext_list. The caller must ensure * that the total pages in this list are NOT over max pages per RPC. * Extents in the list must be in OES_RPC state. */ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, - cfs_list_t *ext_list, int cmd, pdl_policy_t pol) -{ - struct ptlrpc_request *req = NULL; - struct osc_extent *ext; - CFS_LIST_HEAD(rpc_list); - struct brw_page **pga = NULL; - struct osc_brw_async_args *aa = NULL; - struct obdo *oa = NULL; - struct osc_async_page *oap; - struct osc_async_page *tmp; - struct cl_req *clerq = NULL; - enum cl_req_type crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : CRT_READ; - struct ldlm_lock *lock = NULL; - struct cl_req_attr crattr; - obd_off starting_offset = OBD_OBJECT_EOF; - obd_off ending_offset = 0; - int i, rc, mpflag = 0, mem_tight = 0, page_count = 0; + struct list_head *ext_list, int cmd, pdl_policy_t pol) +{ + struct ptlrpc_request *req = NULL; + struct osc_extent *ext; + struct brw_page **pga = NULL; + struct osc_brw_async_args *aa = NULL; + struct obdo *oa = NULL; + struct osc_async_page *oap; + struct osc_async_page *tmp; + struct cl_req *clerq = NULL; + enum cl_req_type crt = (cmd & OBD_BRW_WRITE) ? CRT_WRITE : + CRT_READ; + struct cl_req_attr *crattr = NULL; + obd_off starting_offset = OBD_OBJECT_EOF; + obd_off ending_offset = 0; + int mpflag = 0; + int mem_tight = 0; + int page_count = 0; + bool soft_sync = false; + int i; + int rc; + struct list_head rpc_list = LIST_HEAD_INIT(rpc_list); ENTRY; - LASSERT(!cfs_list_empty(ext_list)); + LASSERT(!list_empty(ext_list)); /* add pages into rpc_list to build BRW rpc */ - cfs_list_for_each_entry(ext, ext_list, oe_link) { + list_for_each_entry(ext, ext_list, oe_link) { LASSERT(ext->oe_state == OES_RPC); mem_tight |= ext->oe_memalloc; - cfs_list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) { + list_for_each_entry(oap, &ext->oe_pages, oap_pending_item) { ++page_count; - cfs_list_add_tail(&oap->oap_rpc_item, &rpc_list); + list_add_tail(&oap->oap_rpc_item, &rpc_list); if (starting_offset > oap->oap_obj_off) starting_offset = oap->oap_obj_off; else @@ -2082,14 +1931,18 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, oap->oap_count; else LASSERT(oap->oap_page_off + oap->oap_count == - CFS_PAGE_SIZE); + PAGE_CACHE_SIZE); } } + soft_sync = osc_over_unstable_soft_limit(cli); if (mem_tight) mpflag = cfs_memory_pressure_get_and_set(); - memset(&crattr, 0, sizeof crattr); + OBD_ALLOC(crattr, sizeof(*crattr)); + if (crattr == NULL) + GOTO(out, rc = -ENOMEM); + OBD_ALLOC(pga, sizeof(*pga) * page_count); if (pga == NULL) GOTO(out, rc = -ENOMEM); @@ -2099,77 +1952,74 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, GOTO(out, rc = -ENOMEM); i = 0; - cfs_list_for_each_entry(oap, &rpc_list, oap_rpc_item) { + list_for_each_entry(oap, &rpc_list, oap_rpc_item) { struct cl_page *page = oap2cl_page(oap); if (clerq == NULL) { clerq = cl_req_alloc(env, page, crt, - 1 /* only 1-object rpcs for - * now */); + 1 /* only 1-object rpcs for now */); if (IS_ERR(clerq)) GOTO(out, rc = PTR_ERR(clerq)); - lock = oap->oap_ldlm_lock; } if (mem_tight) oap->oap_brw_flags |= OBD_BRW_MEMALLOC; - pga[i] = &oap->oap_brw_page; - pga[i]->off = oap->oap_obj_off + oap->oap_page_off; - CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n", - pga[i]->pg, cfs_page_index(oap->oap_page), oap, pga[i]->flag); - i++; - cl_req_page_add(env, clerq, page); - } + if (soft_sync) + oap->oap_brw_flags |= OBD_BRW_SOFT_SYNC; + pga[i] = &oap->oap_brw_page; + pga[i]->off = oap->oap_obj_off + oap->oap_page_off; + CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n", + pga[i]->pg, page_index(oap->oap_page), oap, + pga[i]->flag); + i++; + cl_req_page_add(env, clerq, page); + } - /* always get the data for the obdo for the rpc */ + /* always get the data for the obdo for the rpc */ LASSERT(clerq != NULL); - crattr.cra_oa = oa; - crattr.cra_capa = NULL; - memset(crattr.cra_jobid, 0, JOBSTATS_JOBID_SIZE); - cl_req_attr_set(env, clerq, &crattr, ~0ULL); - if (lock) { - oa->o_handle = lock->l_remote_handle; - oa->o_valid |= OBD_MD_FLHANDLE; - } + crattr->cra_oa = oa; + cl_req_attr_set(env, clerq, crattr, ~0ULL); - rc = cl_req_prep(env, clerq); - if (rc != 0) { - CERROR("cl_req_prep failed: %d\n", rc); + rc = cl_req_prep(env, clerq); + if (rc != 0) { + CERROR("cl_req_prep failed: %d\n", rc); GOTO(out, rc); } sort_brw_pages(pga, page_count); rc = osc_brw_prep_request(cmd, cli, oa, NULL, page_count, - pga, &req, crattr.cra_capa, 1, 0); + pga, &req, crattr->cra_capa, 1, 0); if (rc != 0) { CERROR("prep_req failed: %d\n", rc); GOTO(out, rc); } + req->rq_commit_cb = brw_commit; req->rq_interpret_reply = brw_interpret; + if (mem_tight != 0) - req->rq_memalloc = 1; + req->rq_memalloc = 1; - /* Need to update the timestamps after the request is built in case - * we race with setattr (locally or in queue at OST). If OST gets - * later setattr before earlier BRW (as determined by the request xid), - * the OST will not use BRW timestamps. Sadly, there is no obvious - * way to do this in a single call. bug 10150 */ - cl_req_attr_set(env, clerq, &crattr, - OBD_MD_FLMTIME|OBD_MD_FLCTIME|OBD_MD_FLATIME); + /* Need to update the timestamps after the request is built in case + * we race with setattr (locally or in queue at OST). If OST gets + * later setattr before earlier BRW (as determined by the request xid), + * the OST will not use BRW timestamps. Sadly, there is no obvious + * way to do this in a single call. bug 10150 */ + cl_req_attr_set(env, clerq, crattr, + OBD_MD_FLMTIME|OBD_MD_FLCTIME|OBD_MD_FLATIME); - lustre_msg_set_jobid(req->rq_reqmsg, crattr.cra_jobid); + lustre_msg_set_jobid(req->rq_reqmsg, crattr->cra_jobid); CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); aa = ptlrpc_req_async_args(req); - CFS_INIT_LIST_HEAD(&aa->aa_oaps); - cfs_list_splice_init(&rpc_list, &aa->aa_oaps); - CFS_INIT_LIST_HEAD(&aa->aa_exts); - cfs_list_splice_init(ext_list, &aa->aa_exts); + INIT_LIST_HEAD(&aa->aa_oaps); + list_splice_init(&rpc_list, &aa->aa_oaps); + INIT_LIST_HEAD(&aa->aa_exts); + list_splice_init(ext_list, &aa->aa_exts); aa->aa_clerq = clerq; /* queued sync pages can be torn down while the pages * were between the pending list and the rpc */ tmp = NULL; - cfs_list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) { + list_for_each_entry(oap, &aa->aa_oaps, oap_rpc_item) { /* only one oap gets a request reference */ if (tmp == NULL) tmp = oap; @@ -2183,7 +2033,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, tmp->oap_request = ptlrpc_request_addref(req); client_obd_list_lock(&cli->cl_loi_list_lock); - starting_offset >>= CFS_PAGE_SHIFT; + starting_offset >>= PAGE_CACHE_SHIFT; if (cmd == OBD_BRW_READ) { cli->cl_r_in_flight++; lprocfs_oh_tally_log2(&cli->cl_read_page_hist, page_count); @@ -2223,20 +2073,24 @@ out: if (mem_tight != 0) cfs_memory_pressure_restore(mpflag); - capa_put(crattr.cra_capa); + if (crattr != NULL) { + capa_put(crattr->cra_capa); + OBD_FREE(crattr, sizeof(*crattr)); + } + if (rc != 0) { LASSERT(req == NULL); - if (oa) - OBDO_FREE(oa); - if (pga) - OBD_FREE(pga, sizeof(*pga) * page_count); - /* this should happen rarely and is pretty bad, it makes the - * pending list not follow the dirty order */ - while (!cfs_list_empty(ext_list)) { - ext = cfs_list_entry(ext_list->next, struct osc_extent, - oe_link); - cfs_list_del_init(&ext->oe_link); + if (oa) + OBDO_FREE(oa); + if (pga) + OBD_FREE(pga, sizeof(*pga) * page_count); + /* this should happen rarely and is pretty bad, it makes the + * pending list not follow the dirty order */ + while (!list_empty(ext_list)) { + ext = list_entry(ext_list->next, struct osc_extent, + oe_link); + list_del_init(&ext->oe_link); osc_extent_finish(env, ext, 0, rc); } if (clerq && !IS_ERR(clerq)) @@ -2292,7 +2146,7 @@ static int osc_change_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm, struct ldlm_res_id res_id; struct obd_device *obd = class_exp2obd(exp); - osc_build_res_name(lsm->lsm_object_id, lsm->lsm_object_seq, &res_id); + ostid_build_res_name(&lsm->lsm_oi, &res_id); ldlm_resource_iterate(obd->obd_namespace, &res_id, replace, data); return 0; } @@ -2308,7 +2162,7 @@ static int osc_find_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm, struct obd_device *obd = class_exp2obd(exp); int rc = 0; - osc_build_res_name(lsm->lsm_object_id, lsm->lsm_object_seq, &res_id); + ostid_build_res_name(&lsm->lsm_oi, &res_id); rc = ldlm_resource_iterate(obd->obd_namespace, &res_id, replace, data); if (rc == LDLM_ITER_STOP) return(1); @@ -2332,6 +2186,8 @@ static int osc_enqueue_fini(struct ptlrpc_request *req, struct ost_lvb *lvb, &RMF_DLM_REP); LASSERT(rep != NULL); + rep->lock_policy_res1 = + ptlrpc_status_ntoh(rep->lock_policy_res1); if (rep->lock_policy_res1) rc = rep->lock_policy_res1; } @@ -2375,6 +2231,9 @@ static int osc_enqueue_interpret(const struct lu_env *env, * osc_enqueue_fini(). */ ldlm_lock_addref(&handle, mode); + /* Let cl_lock_state_wait fail with -ERESTARTSYS to unuse sublocks. */ + OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_HANG, 2); + /* Let CP AST to grant the lock first. */ OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_CP_ENQ_RACE, 1); @@ -2411,51 +2270,6 @@ static int osc_enqueue_interpret(const struct lu_env *env, return rc; } -void osc_update_enqueue(struct lustre_handle *lov_lockhp, - struct lov_oinfo *loi, int flags, - struct ost_lvb *lvb, __u32 mode, int rc) -{ - struct ldlm_lock *lock = ldlm_handle2lock(lov_lockhp); - - if (rc == ELDLM_OK) { - __u64 tmp; - - LASSERT(lock != NULL); - loi->loi_lvb = *lvb; - tmp = loi->loi_lvb.lvb_size; - /* Extend KMS up to the end of this lock and no further - * A lock on [x,y] means a KMS of up to y + 1 bytes! */ - if (tmp > lock->l_policy_data.l_extent.end) - tmp = lock->l_policy_data.l_extent.end + 1; - if (tmp >= loi->loi_kms) { - LDLM_DEBUG(lock, "lock acquired, setting rss="LPU64 - ", kms="LPU64, loi->loi_lvb.lvb_size, tmp); - loi_kms_set(loi, tmp); - } else { - LDLM_DEBUG(lock, "lock acquired, setting rss=" - LPU64"; leaving kms="LPU64", end="LPU64, - loi->loi_lvb.lvb_size, loi->loi_kms, - lock->l_policy_data.l_extent.end); - } - ldlm_lock_allow_match(lock); - } else if (rc == ELDLM_LOCK_ABORTED && (flags & LDLM_FL_HAS_INTENT)) { - LASSERT(lock != NULL); - loi->loi_lvb = *lvb; - ldlm_lock_allow_match(lock); - CDEBUG(D_INODE, "glimpsed, setting rss="LPU64"; leaving" - " kms="LPU64"\n", loi->loi_lvb.lvb_size, loi->loi_kms); - rc = ELDLM_OK; - } - - if (lock != NULL) { - if (rc != ELDLM_OK) - ldlm_lock_fail_match(lock); - - LDLM_LOCK_PUT(lock); - } -} -EXPORT_SYMBOL(osc_update_enqueue); - struct ptlrpc_request_set *PTLRPCD_SET = (void *)1; /* When enqueuing asynchronously, locks are not ordered, we can obtain a lock @@ -2467,19 +2281,19 @@ struct ptlrpc_request_set *PTLRPCD_SET = (void *)1; * release locks just after they are obtained. */ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, __u64 *flags, ldlm_policy_data_t *policy, - struct ost_lvb *lvb, int kms_valid, - obd_enqueue_update_f upcall, void *cookie, - struct ldlm_enqueue_info *einfo, - struct lustre_handle *lockh, - struct ptlrpc_request_set *rqset, int async, int agl) + struct ost_lvb *lvb, int kms_valid, + obd_enqueue_update_f upcall, void *cookie, + struct ldlm_enqueue_info *einfo, + struct lustre_handle *lockh, + struct ptlrpc_request_set *rqset, int async, int agl) { - struct obd_device *obd = exp->exp_obd; - struct ptlrpc_request *req = NULL; - int intent = *flags & LDLM_FL_HAS_INTENT; - int match_lvb = (agl != 0 ? 0 : LDLM_FL_LVB_READY); - ldlm_mode_t mode; - int rc; - ENTRY; + struct obd_device *obd = exp->exp_obd; + struct ptlrpc_request *req = NULL; + int intent = *flags & LDLM_FL_HAS_INTENT; + __u64 match_lvb = (agl != 0 ? 0 : LDLM_FL_LVB_READY); + ldlm_mode_t mode; + int rc; + ENTRY; /* Filesystem lock extents are extended to page boundaries so that * dealing with the page cache is a little smoother. */ @@ -2515,7 +2329,7 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, if (mode) { struct ldlm_lock *matched = ldlm_handle2lock(lockh); - if ((agl != 0) && !(matched->l_flags & LDLM_FL_LVB_READY)) { + if ((agl != 0) && !ldlm_is_lvb_ready(matched)) { /* For AGL, if enqueue RPC is sent but the lock is not * granted, then skip to process this strpe. * Return -ECANCELED to tell the caller. */ @@ -2555,14 +2369,13 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, no_match: if (intent) { - CFS_LIST_HEAD(cancels); - req = ptlrpc_request_alloc(class_exp2cliimp(exp), - &RQF_LDLM_ENQUEUE_LVB); - if (req == NULL) - RETURN(-ENOMEM); + req = ptlrpc_request_alloc(class_exp2cliimp(exp), + &RQF_LDLM_ENQUEUE_LVB); + if (req == NULL) + RETURN(-ENOMEM); - rc = ldlm_prep_enqueue_req(exp, req, &cancels, 0); - if (rc) { + rc = ptlrpc_request_pack(req, LUSTRE_DLM_VERSION, LDLM_ENQUEUE); + if (rc < 0) { ptlrpc_request_free(req); RETURN(rc); } @@ -2610,34 +2423,15 @@ int osc_enqueue_base(struct obd_export *exp, struct ldlm_res_id *res_id, RETURN(rc); } -static int osc_enqueue(struct obd_export *exp, struct obd_info *oinfo, - struct ldlm_enqueue_info *einfo, - struct ptlrpc_request_set *rqset) -{ - struct ldlm_res_id res_id; - int rc; - ENTRY; - - osc_build_res_name(oinfo->oi_md->lsm_object_id, - oinfo->oi_md->lsm_object_seq, &res_id); - - rc = osc_enqueue_base(exp, &res_id, &oinfo->oi_flags, &oinfo->oi_policy, - &oinfo->oi_md->lsm_oinfo[0]->loi_lvb, - oinfo->oi_md->lsm_oinfo[0]->loi_kms_valid, - oinfo->oi_cb_up, oinfo, einfo, oinfo->oi_lockh, - rqset, rqset != NULL, 0); - RETURN(rc); -} - int osc_match_base(struct obd_export *exp, struct ldlm_res_id *res_id, - __u32 type, ldlm_policy_data_t *policy, __u32 mode, - int *flags, void *data, struct lustre_handle *lockh, - int unref) + __u32 type, ldlm_policy_data_t *policy, __u32 mode, + __u64 *flags, void *data, struct lustre_handle *lockh, + int unref) { - struct obd_device *obd = exp->exp_obd; - int lflags = *flags; - ldlm_mode_t rc; - ENTRY; + struct obd_device *obd = exp->exp_obd; + __u64 lflags = *flags; + ldlm_mode_t rc; + ENTRY; if (OBD_FAIL_CHECK(OBD_FAIL_OSC_MATCH)) RETURN(-EIO); @@ -2685,29 +2479,6 @@ int osc_cancel_base(struct lustre_handle *lockh, __u32 mode) RETURN(0); } -static int osc_cancel(struct obd_export *exp, struct lov_stripe_md *md, - __u32 mode, struct lustre_handle *lockh) -{ - ENTRY; - RETURN(osc_cancel_base(lockh, mode)); -} - -static int osc_cancel_unused(struct obd_export *exp, - struct lov_stripe_md *lsm, - ldlm_cancel_flags_t flags, - void *opaque) -{ - struct obd_device *obd = class_exp2obd(exp); - struct ldlm_res_id res_id, *resp = NULL; - - if (lsm != NULL) { - resp = osc_build_res_name(lsm->lsm_object_id, - lsm->lsm_object_seq, &res_id); - } - - return ldlm_cli_cancel_unused(obd->obd_namespace, resp, flags, opaque); -} - static int osc_statfs_interpret(const struct lu_env *env, struct ptlrpc_request *req, struct osc_async_args *aa, int rc) @@ -2849,70 +2620,6 @@ static int osc_statfs(const struct lu_env *env, struct obd_export *exp, return rc; } -/* Retrieve object striping information. - * - * @lmmu is a pointer to an in-core struct with lmm_ost_count indicating - * the maximum number of OST indices which will fit in the user buffer. - * lmm_magic must be LOV_MAGIC (we only use 1 slot here). - */ -static int osc_getstripe(struct lov_stripe_md *lsm, struct lov_user_md *lump) -{ - /* we use lov_user_md_v3 because it is larger than lov_user_md_v1 */ - struct lov_user_md_v3 lum, *lumk; - struct lov_user_ost_data_v1 *lmm_objects; - int rc = 0, lum_size; - ENTRY; - - if (!lsm) - RETURN(-ENODATA); - - /* we only need the header part from user space to get lmm_magic and - * lmm_stripe_count, (the header part is common to v1 and v3) */ - lum_size = sizeof(struct lov_user_md_v1); - if (cfs_copy_from_user(&lum, lump, lum_size)) - RETURN(-EFAULT); - - if ((lum.lmm_magic != LOV_USER_MAGIC_V1) && - (lum.lmm_magic != LOV_USER_MAGIC_V3)) - RETURN(-EINVAL); - - /* lov_user_md_vX and lov_mds_md_vX must have the same size */ - LASSERT(sizeof(struct lov_user_md_v1) == sizeof(struct lov_mds_md_v1)); - LASSERT(sizeof(struct lov_user_md_v3) == sizeof(struct lov_mds_md_v3)); - LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lumk->lmm_objects[0])); - - /* we can use lov_mds_md_size() to compute lum_size - * because lov_user_md_vX and lov_mds_md_vX have the same size */ - if (lum.lmm_stripe_count > 0) { - lum_size = lov_mds_md_size(lum.lmm_stripe_count, lum.lmm_magic); - OBD_ALLOC(lumk, lum_size); - if (!lumk) - RETURN(-ENOMEM); - - if (lum.lmm_magic == LOV_USER_MAGIC_V1) - lmm_objects = &(((struct lov_user_md_v1 *)lumk)->lmm_objects[0]); - else - lmm_objects = &(lumk->lmm_objects[0]); - lmm_objects->l_object_id = lsm->lsm_object_id; - } else { - lum_size = lov_mds_md_size(0, lum.lmm_magic); - lumk = &lum; - } - - lumk->lmm_object_id = lsm->lsm_object_id; - lumk->lmm_object_seq = lsm->lsm_object_seq; - lumk->lmm_stripe_count = 1; - - if (cfs_copy_to_user(lump, lumk, lum_size)) - rc = -EFAULT; - - if (lumk != &lum) - OBD_FREE(lumk, lum_size); - - RETURN(rc); -} - - static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, void *karg, void *uarg) { @@ -2921,58 +2628,12 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, int err = 0; ENTRY; - if (!cfs_try_module_get(THIS_MODULE)) { - CERROR("Can't get module. Is it alive?"); - return -EINVAL; - } + if (!try_module_get(THIS_MODULE)) { + CERROR("%s: cannot get module '%s'\n", obd->obd_name, + module_name(THIS_MODULE)); + return -EINVAL; + } switch (cmd) { - case OBD_IOC_LOV_GET_CONFIG: { - char *buf; - struct lov_desc *desc; - struct obd_uuid uuid; - - buf = NULL; - len = 0; - if (obd_ioctl_getdata(&buf, &len, (void *)uarg)) - GOTO(out, err = -EINVAL); - - data = (struct obd_ioctl_data *)buf; - - if (sizeof(*desc) > data->ioc_inllen1) { - obd_ioctl_freedata(buf, len); - GOTO(out, err = -EINVAL); - } - - if (data->ioc_inllen2 < sizeof(uuid)) { - obd_ioctl_freedata(buf, len); - GOTO(out, err = -EINVAL); - } - - desc = (struct lov_desc *)data->ioc_inlbuf1; - desc->ld_tgt_count = 1; - desc->ld_active_tgt_count = 1; - desc->ld_default_stripe_count = 1; - desc->ld_default_stripe_size = 0; - desc->ld_default_stripe_offset = 0; - desc->ld_pattern = 0; - memcpy(&desc->ld_uuid, &obd->obd_uuid, sizeof(uuid)); - - memcpy(data->ioc_inlbuf2, &obd->obd_uuid, sizeof(uuid)); - - err = cfs_copy_to_user((void *)uarg, buf, len); - if (err) - err = -EFAULT; - obd_ioctl_freedata(buf, len); - GOTO(out, err); - } - case LL_IOC_LOV_SETSTRIPE: - err = obd_alloc_memmd(exp, karg); - if (err > 0) - err = 0; - GOTO(out, err); - case LL_IOC_LOV_GETSTRIPE: - err = osc_getstripe(karg, uarg); - GOTO(out, err); case OBD_IOC_CLIENT_RECOVER: err = ptlrpc_recover_import(obd->u.cli.cl_import, data->ioc_inlbuf1, 0); @@ -2989,14 +2650,14 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case OBD_IOC_PING_TARGET: err = ptlrpc_obd_ping(obd); GOTO(out, err); - default: - CDEBUG(D_INODE, "unrecognised ioctl %#x by %s\n", - cmd, cfs_curproc_comm()); - GOTO(out, err = -ENOTTY); - } + default: + CDEBUG(D_INODE, "unrecognised ioctl %#x by %s\n", + cmd, current_comm()); + GOTO(out, err = -ENOTTY); + } out: - cfs_module_put(THIS_MODULE); - return err; + module_put(THIS_MODULE); + return err; } static int osc_get_info(const struct lu_env *env, struct obd_export *exp, @@ -3049,15 +2710,52 @@ static int osc_get_info(const struct lu_env *env, struct obd_export *exp, ptlrpc_req_finished(req); RETURN(rc); } else if (KEY_IS(KEY_FIEMAP)) { - struct ptlrpc_request *req; - struct ll_user_fiemap *reply; - char *tmp; - int rc; + struct ll_fiemap_info_key *fm_key = + (struct ll_fiemap_info_key *)key; + struct ldlm_res_id res_id; + ldlm_policy_data_t policy; + struct lustre_handle lockh; + ldlm_mode_t mode = 0; + struct ptlrpc_request *req; + struct ll_user_fiemap *reply; + char *tmp; + int rc; + + if (!(fm_key->fiemap.fm_flags & FIEMAP_FLAG_SYNC)) + goto skip_locking; + + policy.l_extent.start = fm_key->fiemap.fm_start & + CFS_PAGE_MASK; + + if (OBD_OBJECT_EOF - fm_key->fiemap.fm_length <= + fm_key->fiemap.fm_start + PAGE_CACHE_SIZE - 1) + policy.l_extent.end = OBD_OBJECT_EOF; + else + policy.l_extent.end = (fm_key->fiemap.fm_start + + fm_key->fiemap.fm_length + + PAGE_CACHE_SIZE - 1) & CFS_PAGE_MASK; + + ostid_build_res_name(&fm_key->oa.o_oi, &res_id); + mode = ldlm_lock_match(exp->exp_obd->obd_namespace, + LDLM_FL_BLOCK_GRANTED | + LDLM_FL_LVB_READY, + &res_id, LDLM_EXTENT, &policy, + LCK_PR | LCK_PW, &lockh, 0); + if (mode) { /* lock is cached on client */ + if (mode != LCK_PR) { + ldlm_lock_addref(&lockh, LCK_PR); + ldlm_lock_decref(&lockh, LCK_PW); + } + } else { /* no cached lock, needs acquire lock on server side */ + fm_key->oa.o_valid |= OBD_MD_FLFLAGS; + fm_key->oa.o_flags |= OBD_FL_SRVLOCK; + } +skip_locking: req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_GET_INFO_FIEMAP); if (req == NULL) - RETURN(-ENOMEM); + GOTO(drop_lock, rc = -ENOMEM); req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_KEY, RCL_CLIENT, keylen); @@ -3069,7 +2767,7 @@ static int osc_get_info(const struct lu_env *env, struct obd_export *exp, rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_GET_INFO); if (rc) { ptlrpc_request_free(req); - RETURN(rc); + GOTO(drop_lock, rc); } tmp = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_KEY); @@ -3080,16 +2778,18 @@ static int osc_get_info(const struct lu_env *env, struct obd_export *exp, ptlrpc_request_set_replen(req); rc = ptlrpc_queue_wait(req); if (rc) - GOTO(out1, rc); + GOTO(fini_req, rc); reply = req_capsule_server_get(&req->rq_pill, &RMF_FIEMAP_VAL); if (reply == NULL) - GOTO(out1, rc = -EPROTO); + GOTO(fini_req, rc = -EPROTO); memcpy(val, reply, *vallen); - out1: +fini_req: ptlrpc_req_finished(req); - +drop_lock: + if (mode) + ldlm_lock_decref(&lockh, LCK_PR); RETURN(rc); } @@ -3131,13 +2831,13 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp, LASSERT(cli->cl_cache == NULL); /* only once */ cli->cl_cache = (struct cl_client_cache *)val; - cfs_atomic_inc(&cli->cl_cache->ccc_users); + atomic_inc(&cli->cl_cache->ccc_users); cli->cl_lru_left = &cli->cl_cache->ccc_lru_left; /* add this osc into entity list */ - LASSERT(cfs_list_empty(&cli->cl_lru_osc)); + LASSERT(list_empty(&cli->cl_lru_osc)); spin_lock(&cli->cl_cache->ccc_lru_lock); - cfs_list_add(&cli->cl_lru_osc, &cli->cl_cache->ccc_lru); + list_add(&cli->cl_lru_osc, &cli->cl_cache->ccc_lru); spin_unlock(&cli->cl_cache->ccc_lru_lock); RETURN(0); @@ -3145,10 +2845,10 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp, if (KEY_IS(KEY_CACHE_LRU_SHRINK)) { struct client_obd *cli = &obd->u.cli; - int nr = cfs_atomic_read(&cli->cl_lru_in_list) >> 1; + int nr = atomic_read(&cli->cl_lru_in_list) >> 1; int target = *(int *)val; - nr = osc_lru_shrink(cli, min(nr, target)); + nr = osc_lru_shrink(env, cli, min(nr, target), true); *(int *)val -= nr; RETURN(0); } @@ -3163,27 +2863,28 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp, Even if something bad goes through, we'd get a -EINVAL from OST anyway. */ - if (KEY_IS(KEY_GRANT_SHRINK)) - req = ptlrpc_request_alloc(imp, &RQF_OST_SET_GRANT_INFO); - else - req = ptlrpc_request_alloc(imp, &RQF_OBD_SET_INFO); - - if (req == NULL) - RETURN(-ENOMEM); - - req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_KEY, - RCL_CLIENT, keylen); - req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_VAL, - RCL_CLIENT, vallen); - rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SET_INFO); - if (rc) { - ptlrpc_request_free(req); - RETURN(rc); - } + req = ptlrpc_request_alloc(imp, KEY_IS(KEY_GRANT_SHRINK) ? + &RQF_OST_SET_GRANT_INFO : + &RQF_OBD_SET_INFO); + if (req == NULL) + RETURN(-ENOMEM); + + req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_KEY, + RCL_CLIENT, keylen); + if (!KEY_IS(KEY_GRANT_SHRINK)) + req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_VAL, + RCL_CLIENT, vallen); + rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_SET_INFO); + if (rc) { + ptlrpc_request_free(req); + RETURN(rc); + } - tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY); - memcpy(tmp, key, keylen); - tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_VAL); + tmp = req_capsule_client_get(&req->rq_pill, &RMF_SETINFO_KEY); + memcpy(tmp, key, keylen); + tmp = req_capsule_client_get(&req->rq_pill, KEY_IS(KEY_GRANT_SHRINK) ? + &RMF_OST_BODY : + &RMF_SETINFO_VAL); memcpy(tmp, val, vallen); if (KEY_IS(KEY_GRANT_SHRINK)) { @@ -3213,34 +2914,6 @@ static int osc_set_info_async(const struct lu_env *env, struct obd_export *exp, RETURN(0); } - -static int osc_llog_init(struct obd_device *obd, struct obd_llog_group *olg, - struct obd_device *disk_obd, int *index) -{ - /* this code is not supposed to be used with LOD/OSP - * to be removed soon */ - LBUG(); - return 0; -} - -static int osc_llog_finish(struct obd_device *obd, int count) -{ - struct llog_ctxt *ctxt; - - ENTRY; - - ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT); - if (ctxt) { - llog_cat_close(NULL, ctxt->loc_handle); - llog_cleanup(NULL, ctxt); - } - - ctxt = llog_get_context(obd, LLOG_SIZE_REPL_CTXT); - if (ctxt) - llog_cleanup(NULL, ctxt); - RETURN(0); -} - static int osc_reconnect(const struct lu_env *env, struct obd_export *exp, struct obd_device *obd, struct obd_uuid *cluuid, @@ -3252,9 +2925,10 @@ static int osc_reconnect(const struct lu_env *env, if (data != NULL && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) { long lost_grant; - client_obd_list_lock(&cli->cl_loi_list_lock); - data->ocd_grant = (cli->cl_avail_grant + cli->cl_dirty) ?: - 2 * cli->cl_max_pages_per_rpc << CFS_PAGE_SHIFT; + client_obd_list_lock(&cli->cl_loi_list_lock); + data->ocd_grant = (cli->cl_avail_grant + + (cli->cl_dirty_pages << PAGE_CACHE_SHIFT)) ?: + 2 * cli_brw_size(obd); lost_grant = cli->cl_lost_grant; cli->cl_lost_grant = 0; client_obd_list_unlock(&cli->cl_loi_list_lock); @@ -3390,23 +3064,17 @@ static int osc_import_event(struct obd_device *obd, * \retval zero the lock can't be canceled * \retval other ok to cancel */ -static int osc_cancel_for_recovery(struct ldlm_lock *lock) +static int osc_cancel_weight(struct ldlm_lock *lock) { - check_res_locked(lock->l_resource); - - /* - * Cancel all unused extent lock in granted mode LCK_PR or LCK_CR. - * - * XXX as a future improvement, we can also cancel unused write lock - * if it doesn't have dirty data and active mmaps. - */ - if (lock->l_resource->lr_type == LDLM_EXTENT && - (lock->l_granted_mode == LCK_PR || - lock->l_granted_mode == LCK_CR) && - (osc_dlm_lock_pageref(lock) == 0)) - RETURN(1); + /* + * Cancel all unused and granted extent lock. + */ + if (lock->l_resource->lr_type == LDLM_EXTENT && + lock->l_granted_mode == lock->l_req_mode && + osc_ldlm_weigh_ast(lock) == 0) + RETURN(1); - RETURN(0); + RETURN(0); } static int brw_queue_work(const struct lu_env *env, void *data) @@ -3421,10 +3089,10 @@ static int brw_queue_work(const struct lu_env *env, void *data) int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) { - struct lprocfs_static_vars lvars = { 0 }; - struct client_obd *cli = &obd->u.cli; - void *handler; - int rc; + struct client_obd *cli = &obd->u.cli; + struct obd_type *type; + void *handler; + int rc; ENTRY; rc = ptlrpcd_addref(); @@ -3440,13 +3108,42 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) GOTO(out_client_setup, rc = PTR_ERR(handler)); cli->cl_writeback_work = handler; + handler = ptlrpcd_alloc_work(cli->cl_import, lru_queue_work, cli); + if (IS_ERR(handler)) + GOTO(out_ptlrpcd_work, rc = PTR_ERR(handler)); + cli->cl_lru_work = handler; + rc = osc_quota_setup(obd); if (rc) GOTO(out_ptlrpcd_work, rc); cli->cl_grant_shrink_interval = GRANT_SHRINK_INTERVAL; - lprocfs_osc_init_vars(&lvars); - if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) { + +#ifdef LPROCFS + obd->obd_vars = lprocfs_osc_obd_vars; +#endif + /* If this is true then both client (osc) and server (osp) are on the + * same node. The osp layer if loaded first will register the osc proc + * directory. In that case this obd_device will be attached its proc + * tree to type->typ_procsym instead of obd->obd_type->typ_procroot. */ + type = class_search_type(LUSTRE_OSP_NAME); + if (type && type->typ_procsym) { + obd->obd_proc_entry = lprocfs_seq_register(obd->obd_name, + type->typ_procsym, + obd->obd_vars, obd); + if (IS_ERR(obd->obd_proc_entry)) { + rc = PTR_ERR(obd->obd_proc_entry); + CERROR("error %d setting up lprocfs for %s\n", rc, + obd->obd_name); + obd->obd_proc_entry = NULL; + } + } else { + rc = lprocfs_seq_obd_setup(obd); + } + + /* If the basic OSC proc tree construction succeeded then + * lets do the rest. */ + if (rc == 0) { lproc_osc_attach_seqstat(obd); sptlrpc_lprocfs_cliobd_attach(obd); ptlrpc_lprocfs_register_obd(obd); @@ -3462,12 +3159,19 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) OST_MAXREQSIZE, ptlrpc_add_rqs_to_pool); - CFS_INIT_LIST_HEAD(&cli->cl_grant_shrink_list); - ns_register_cancel(obd->obd_namespace, osc_cancel_for_recovery); - RETURN(rc); + INIT_LIST_HEAD(&cli->cl_grant_shrink_list); + ns_register_cancel(obd->obd_namespace, osc_cancel_weight); + RETURN(0); out_ptlrpcd_work: - ptlrpcd_destroy_work(handler); + if (cli->cl_writeback_work != NULL) { + ptlrpcd_destroy_work(cli->cl_writeback_work); + cli->cl_writeback_work = NULL; + } + if (cli->cl_lru_work != NULL) { + ptlrpcd_destroy_work(cli->cl_lru_work); + cli->cl_lru_work = NULL; + } out_client_setup: client_obd_cleanup(obd); out_ptlrpcd: @@ -3508,6 +3212,10 @@ static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage) ptlrpcd_destroy_work(cli->cl_writeback_work); cli->cl_writeback_work = NULL; } + if (cli->cl_lru_work) { + ptlrpcd_destroy_work(cli->cl_lru_work); + cli->cl_lru_work = NULL; + } obd_cleanup_client_import(obd); ptlrpc_lprocfs_unregister_obd(obd); lprocfs_obd_cleanup(obd); @@ -3529,12 +3237,12 @@ int osc_cleanup(struct obd_device *obd) /* lru cleanup */ if (cli->cl_cache != NULL) { - LASSERT(cfs_atomic_read(&cli->cl_cache->ccc_users) > 0); + LASSERT(atomic_read(&cli->cl_cache->ccc_users) > 0); spin_lock(&cli->cl_cache->ccc_lru_lock); - cfs_list_del_init(&cli->cl_lru_osc); + list_del_init(&cli->cl_lru_osc); spin_unlock(&cli->cl_cache->ccc_lru_lock); cli->cl_lru_left = NULL; - cfs_atomic_dec(&cli->cl_cache->ccc_users); + atomic_dec(&cli->cl_cache->ccc_users); cli->cl_cache = NULL; } @@ -3549,21 +3257,9 @@ int osc_cleanup(struct obd_device *obd) int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg) { - struct lprocfs_static_vars lvars = { 0 }; - int rc = 0; - - lprocfs_osc_init_vars(&lvars); - - switch (lcfg->lcfg_command) { - default: - rc = class_process_proc_param(PARAM_OSC, lvars.obd_vars, - lcfg, obd); - if (rc > 0) - rc = 0; - break; - } - - return(rc); + int rc = class_process_proc_seq_param(PARAM_OSC, obd->obd_vars, + lcfg, obd); + return rc > 0 ? 0: rc; } static int osc_process_config(struct obd_device *obd, obd_count len, void *buf) @@ -3583,7 +3279,6 @@ struct obd_ops osc_obd_ops = { .o_disconnect = osc_disconnect, .o_statfs = osc_statfs, .o_statfs_async = osc_statfs_async, - .o_packmd = osc_packmd, .o_unpackmd = osc_unpackmd, .o_create = osc_create, .o_destroy = osc_destroy, @@ -3591,20 +3286,12 @@ struct obd_ops osc_obd_ops = { .o_getattr_async = osc_getattr_async, .o_setattr = osc_setattr, .o_setattr_async = osc_setattr_async, - .o_brw = osc_brw, - .o_punch = osc_punch, - .o_sync = osc_sync, - .o_enqueue = osc_enqueue, .o_change_cbdata = osc_change_cbdata, .o_find_cbdata = osc_find_cbdata, - .o_cancel = osc_cancel, - .o_cancel_unused = osc_cancel_unused, .o_iocontrol = osc_iocontrol, .o_get_info = osc_get_info, .o_set_info_async = osc_set_info_async, .o_import_event = osc_import_event, - .o_llog_init = osc_llog_init, - .o_llog_finish = osc_llog_finish, .o_process_config = osc_process_config, .o_quotactl = osc_quotactl, .o_quotacheck = osc_quotacheck, @@ -3616,9 +3303,10 @@ extern struct lock_class_key osc_ast_guard_class; int __init osc_init(void) { - struct lprocfs_static_vars lvars = { 0 }; - int rc; - ENTRY; + bool enable_proc = true; + struct obd_type *type; + int rc; + ENTRY; /* print an address of _any_ initialized kernel symbol from this * module, to allow debugging with gdb that doesn't support data @@ -3626,11 +3314,18 @@ int __init osc_init(void) CDEBUG(D_INFO, "Lustre OSC module (%p).\n", &osc_caches); rc = lu_kmem_init(osc_caches); + if (rc) + RETURN(rc); - lprocfs_osc_init_vars(&lvars); + type = class_search_type(LUSTRE_OSP_NAME); + if (type != NULL && type->typ_procsym != NULL) + enable_proc = false; - rc = class_register_type(&osc_obd_ops, NULL, lvars.module_vars, - LUSTRE_OSC_NAME, &osc_device_type); + rc = class_register_type(&osc_obd_ops, NULL, enable_proc, NULL, +#ifndef HAVE_ONLY_PROCFS_SEQ + NULL, +#endif + LUSTRE_OSC_NAME, &osc_device_type); if (rc) { lu_kmem_fini(osc_caches); RETURN(rc);