From 027bea1c2bfd602b6b5e29619bb8bec5296e483b Mon Sep 17 00:00:00 2001 From: bobijam Date: Wed, 6 May 2009 02:31:02 +0000 Subject: [PATCH] Branch b_release_1_8_1 b=12069 i=adilger i=tom.wang (wangdi) Enable adjusting grant_shrink_interval and grant target value via /proc. --- lustre/include/obd.h | 15 +++--- lustre/include/obd_support.h | 3 +- lustre/osc/lproc_osc.c | 69 +++++++++++++++++++++++++- lustre/osc/osc_internal.h | 9 ++-- lustre/osc/osc_request.c | 113 ++++++++++++++++++++++++++++--------------- 5 files changed, 157 insertions(+), 52 deletions(-) diff --git a/lustre/include/obd.h b/lustre/include/obd.h index cfeb383..a906c12 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -428,6 +428,7 @@ struct client_obd { cfs_time_t cl_next_shrink_grant; /* jiffies */ struct list_head cl_grant_shrink_list; /* Timeout event list */ struct semaphore cl_grant_sem; /*grant shrink list semaphore*/ + int cl_grant_shrink_interval; /* seconds */ /* keep track of objects that have lois that contain pages which * have been queued for async brw. this lock also protects the @@ -542,7 +543,7 @@ struct mds_obd { char *mds_profile; struct obd_export *mds_osc_exp; /* XXX lov_exp */ struct lov_desc mds_lov_desc; - + /* mark pages dirty for write. */ bitmap_t *mds_lov_page_dirty; /* array for store pages with obd_id */ @@ -1063,7 +1064,7 @@ enum obd_cleanup_stage { #define KEY_ASYNC "async" #define KEY_CAPA_KEY "capa_key" #define KEY_GRANT_SHRINK "grant_shrink" -#define KEY_OFF_RPCSIZE "off_rpcsize" +#define KEY_OFF_RPCSIZE "off_rpcsize" struct obd_ops { struct module *o_owner; @@ -1316,14 +1317,14 @@ static inline struct lsm_operations *lsm_op_find(int magic) int lvfs_check_io_health(struct obd_device *obd, struct file *file); /* Requests for obd_extent_calc() */ -#define OBD_CALC_STRIPE_START 0x0001 -#define OBD_CALC_STRIPE_END 0x0010 -#define OBD_CALC_STRIPE_RPC_ALIGN 0x0100 +#define OBD_CALC_STRIPE_START 0x0001 +#define OBD_CALC_STRIPE_END 0x0010 +#define OBD_CALC_STRIPE_RPC_ALIGN 0x0100 #define OBD_CALC_STRIPE_RPC_START_ALIGN (OBD_CALC_STRIPE_START | \ - OBD_CALC_STRIPE_RPC_ALIGN) + OBD_CALC_STRIPE_RPC_ALIGN) #define OBD_CALC_STRIPE_RPC_END_ALIGN (OBD_CALC_STRIPE_START | \ - OBD_CALC_STRIPE_RPC_ALIGN) + OBD_CALC_STRIPE_RPC_ALIGN) static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno, struct obd_export *exp, int error) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 81c8c0a..ad8ec2a 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -121,8 +121,7 @@ extern unsigned int obd_alloc_fail_rate; * Time interval of shrink, if the client is "idle" more than this interval, * then the ll_grant thread will return the requested grant space to filter */ -#define GRANT_SHRINK_INTERVAL 360/*6 minutes*/ - +#define GRANT_SHRINK_INTERVAL 1200/*20 minutes*/ #define OBD_FAIL_MDS 0x100 #define OBD_FAIL_MDS_HANDLE_UNPACK 0x101 diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index 8c42b04..37d2ec8 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -223,6 +223,70 @@ static int osc_rd_cur_grant_bytes(char *page, char **start, off_t off, return rc; } +static int osc_wr_cur_grant_bytes(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct client_obd *cli = &obd->u.cli; + int rc; + __u64 val; + + if (obd == NULL) + return 0; + + rc = lprocfs_write_u64_helper(buffer, count, &val); + if (rc) + return rc; + + /* this is only for shrinking grant */ + client_obd_list_lock(&cli->cl_loi_list_lock); + if (val >= cli->cl_avail_grant) { + client_obd_list_unlock(&cli->cl_loi_list_lock); + return 0; + } + client_obd_list_unlock(&cli->cl_loi_list_lock); + + LPROCFS_CLIMP_CHECK(obd); + if (cli->cl_import->imp_state == LUSTRE_IMP_FULL) + rc = osc_shrink_grant_to_target(cli, val); + LPROCFS_CLIMP_EXIT(obd); + if (rc) + return rc; + return count; +} + +static int osc_rd_grant_shrink_interval(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + + if (obd == NULL) + return 0; + return snprintf(page, count, "%d\n", + obd->u.cli.cl_grant_shrink_interval); +} + +static int osc_wr_grant_shrink_interval(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + int val, rc; + + if (obd == NULL) + return 0; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= 0) + return -ERANGE; + + obd->u.cli.cl_grant_shrink_interval = val; + + return count; +} + static int osc_rd_create_count(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -465,7 +529,10 @@ static struct lprocfs_vars lprocfs_osc_obd_vars[] = { osc_wr_max_rpcs_in_flight, 0 }, { "max_dirty_mb", osc_rd_max_dirty_mb, osc_wr_max_dirty_mb, 0 }, { "cur_dirty_bytes", osc_rd_cur_dirty_bytes, 0, 0 }, - { "cur_grant_bytes", osc_rd_cur_grant_bytes, 0, 0 }, + { "cur_grant_bytes", osc_rd_cur_grant_bytes, + osc_wr_cur_grant_bytes, 0 }, + { "grant_shrink_interval", osc_rd_grant_shrink_interval, + osc_wr_grant_shrink_interval, 0 }, { "create_count", osc_rd_create_count, osc_wr_create_count, 0 }, { "max_create_count", osc_rd_max_create_count, osc_wr_max_create_count, 0}, diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index 393e6d9..3a31967 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -91,11 +91,12 @@ struct osc_cache_waiter { int osc_precreate(struct obd_export *exp); int osc_create(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md **ea, struct obd_trans_info *oti); + struct lov_stripe_md **ea, struct obd_trans_info *oti); int osc_real_create(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md **ea, struct obd_trans_info *oti); + struct lov_stripe_md **ea, struct obd_trans_info *oti); void oscc_init(struct obd_device *obd); void osc_wake_cache_waiters(struct client_obd *cli); +int osc_shrink_grant_to_target(struct client_obd *cli, long target); #ifdef LPROCFS int lproc_osc_attach_seqstat(struct obd_device *dev); @@ -121,8 +122,8 @@ static inline int osc_recoverable_error(int rc) /* return 1 if osc should be resend request */ static inline int osc_should_resend(int resend, struct client_obd *cli) { - return atomic_read(&cli->cl_resends) ? - atomic_read(&cli->cl_resends) > resend : 1; + return atomic_read(&cli->cl_resends) ? + atomic_read(&cli->cl_resends) > resend : 1; } static inline int osc_exp_is_2_0_server(struct obd_export *exp) { diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index df93f5e..0a71674 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -233,7 +233,7 @@ static int osc_getattr(struct obd_export *exp, struct obd_info *oinfo) RETURN(-ENOMEM); body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - lustre_set_wire_obdo(&body->oa, oinfo->oi_oa); + lustre_set_wire_obdo(&body->oa, oinfo->oi_oa); ptlrpc_req_set_repsize(req, 2, size); @@ -278,7 +278,7 @@ static int osc_setattr(struct obd_export *exp, struct obd_info *oinfo, RETURN(-ENOMEM); body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - lustre_set_wire_obdo(&body->oa, oinfo->oi_oa); + lustre_set_wire_obdo(&body->oa, oinfo->oi_oa); ptlrpc_req_set_repsize(req, 2, size); @@ -291,7 +291,7 @@ static int osc_setattr(struct obd_export *exp, struct obd_info *oinfo, if (body == NULL) GOTO(out, rc = -EPROTO); - lustre_get_wire_obdo(oinfo->oi_oa, &body->oa); + lustre_get_wire_obdo(oinfo->oi_oa, &body->oa); EXIT; out: @@ -315,7 +315,7 @@ static int osc_setattr_interpret(struct ptlrpc_request *req, GOTO(out, rc = -EPROTO); } - lustre_get_wire_obdo(aa->aa_oi->oi_oa, &body->oa); + lustre_get_wire_obdo(aa->aa_oi->oi_oa, &body->oa); out: rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc); RETURN(rc); @@ -348,7 +348,7 @@ static int osc_setattr_async(struct obd_export *exp, struct obd_info *oinfo, oinfo->oi_oa->o_lcookie = *oti->oti_logcookies; } - lustre_set_wire_obdo(&body->oa, oinfo->oi_oa); + lustre_set_wire_obdo(&body->oa, oinfo->oi_oa); ptlrpc_req_set_repsize(req, 2, size); /* do mds to ost setattr asynchronouly */ if (!rqset) { @@ -393,7 +393,7 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa, GOTO(out, rc = -ENOMEM); body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body)); - lustre_set_wire_obdo(&body->oa, oa); + lustre_set_wire_obdo(&body->oa, oa); ptlrpc_req_set_repsize(req, 2, size); if ((oa->o_valid & OBD_MD_FLFLAGS) && @@ -415,7 +415,7 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa, GOTO (out_req, rc = -EPROTO); } - lustre_get_wire_obdo(oa, &body->oa); + lustre_get_wire_obdo(oa, &body->oa); /* This should really be sent by the OST */ oa->o_blksize = PTLRPC_MAX_BRW_SIZE; @@ -464,7 +464,7 @@ static int osc_punch_interpret(struct ptlrpc_request *req, GOTO(out, rc = -EPROTO); } - lustre_get_wire_obdo(aa->aa_oi->oi_oa, &body->oa); + lustre_get_wire_obdo(aa->aa_oi->oi_oa, &body->oa); out: rc = aa->aa_oi->oi_cb_up(aa->aa_oi, rc); RETURN(rc); @@ -734,8 +734,8 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa, static void osc_update_next_shrink(struct client_obd *cli) { - int time = GRANT_SHRINK_INTERVAL; - cli->cl_next_shrink_grant = cfs_time_shift(time); + cli->cl_next_shrink_grant = + cfs_time_shift(cli->cl_grant_shrink_interval); CDEBUG(D_CACHE, "next time %ld to shrink grant \n", cli->cl_next_shrink_grant); } @@ -860,7 +860,7 @@ static int osc_shrink_grant_interpret(struct ptlrpc_request *req, struct client_obd *cli = &req->rq_import->imp_obd->u.cli; struct obdo *oa = aa->aa_oa; struct ost_body *body; - + if (rc != 0) { client_obd_list_lock(&cli->cl_loi_list_lock); cli->cl_avail_grant += oa->o_grant; @@ -872,31 +872,68 @@ static int osc_shrink_grant_interpret(struct ptlrpc_request *req, osc_update_grant(cli, body); out: OBD_FREE_PTR(oa); - return rc; + return rc; } static void osc_shrink_grant_local(struct client_obd *cli, struct obdo *oa) { client_obd_list_lock(&cli->cl_loi_list_lock); oa->o_grant = cli->cl_avail_grant / 4; - cli->cl_avail_grant -= oa->o_grant; + cli->cl_avail_grant -= oa->o_grant; client_obd_list_unlock(&cli->cl_loi_list_lock); oa->o_flags |= OBD_FL_SHRINK_GRANT; osc_update_next_shrink(cli); } +/* Shrink the current grant, either from some large amount to enough for a + * full set of in-flight RPCs, or if we have already shrunk to that limit + * then to enough for a single RPC. This avoids keeping more grant than + * needed, and avoids shrinking the grant piecemeal. */ static int osc_shrink_grant(struct client_obd *cli) { + long target = (cli->cl_max_rpcs_in_flight + 1) * + cli->cl_max_pages_per_rpc; + + client_obd_list_lock(&cli->cl_loi_list_lock); + if (cli->cl_avail_grant <= target) + target = cli->cl_max_pages_per_rpc; + client_obd_list_unlock(&cli->cl_loi_list_lock); + + return osc_shrink_grant_to_target(cli, target); +} + +int osc_shrink_grant_to_target(struct client_obd *cli, long target) +{ int rc = 0; struct ost_body *body; ENTRY; + client_obd_list_lock(&cli->cl_loi_list_lock); + /* Don't shrink if we are already above or below the desired limit + * We don't want to shrink below a single RPC, as that will negatively + * impact block allocation and long-term performance. */ + if (target < cli->cl_max_pages_per_rpc) + target = cli->cl_max_pages_per_rpc; + + if (target >= cli->cl_avail_grant) { + client_obd_list_unlock(&cli->cl_loi_list_lock); + RETURN(0); + } + client_obd_list_unlock(&cli->cl_loi_list_lock); + OBD_ALLOC_PTR(body); if (!body) RETURN(-ENOMEM); osc_announce_cached(cli, &body->oa, 0); - osc_shrink_grant_local(cli, &body->oa); + + client_obd_list_lock(&cli->cl_loi_list_lock); + body->oa.o_grant = cli->cl_avail_grant - target; + cli->cl_avail_grant = target; + client_obd_list_unlock(&cli->cl_loi_list_lock); + body->oa.o_flags |= OBD_FL_SHRINK_GRANT; + osc_update_next_shrink(cli); + rc = osc_set_info_async(cli->cl_import->imp_obd->obd_self_export, sizeof(KEY_GRANT_SHRINK), KEY_GRANT_SHRINK, sizeof(*body), body, NULL); @@ -905,8 +942,7 @@ static int osc_shrink_grant(struct client_obd *cli) cli->cl_avail_grant += body->oa.o_grant; client_obd_list_unlock(&cli->cl_loi_list_lock); } - if (body) - OBD_FREE_PTR(body); + OBD_FREE_PTR(body); RETURN(rc); } @@ -940,24 +976,24 @@ static int osc_add_shrink_grant(struct client_obd *client) { int rc; - rc = ptlrpc_add_timeout_client(GRANT_SHRINK_INTERVAL, - TIMEOUT_GRANT, - osc_grant_shrink_grant_cb, NULL, - &client->cl_grant_shrink_list); + rc = ptlrpc_add_timeout_client(client->cl_grant_shrink_interval, + TIMEOUT_GRANT, + osc_grant_shrink_grant_cb, NULL, + &client->cl_grant_shrink_list); if (rc) { - CERROR("add grant client %s error %d\n", + CERROR("add grant client %s error %d\n", client->cl_import->imp_obd->obd_name, rc); return rc; } - CDEBUG(D_CACHE, "add grant client %s \n", + CDEBUG(D_CACHE, "add grant client %s \n", client->cl_import->imp_obd->obd_name); osc_update_next_shrink(client); - return 0; + return 0; } static int osc_del_shrink_grant(struct client_obd *client) { - return ptlrpc_del_timeout_client(&client->cl_grant_shrink_list, + return ptlrpc_del_timeout_client(&client->cl_grant_shrink_list, TIMEOUT_GRANT); } @@ -1210,7 +1246,7 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa, osc_announce_cached(cli, &body->oa, opc == OST_WRITE ? requested_nob:0); if (osc_should_shrink_grant(cli)) - osc_shrink_grant_local(cli, &body->oa); + osc_shrink_grant_local(cli, &body->oa); /* size[REQ_REC_OFF] still sizeof (*body) */ if (opc == OST_WRITE) { @@ -1838,7 +1874,7 @@ static int osc_brw_async(int cmd, struct obd_export *exp, if (copy == NULL) GOTO(out, rc = -ENOMEM); memcpy(copy, ppga, pages_per_brw * sizeof(*copy)); - + OBDO_ALLOC(oa); if (oa == NULL) { OBD_FREE(copy, pages_per_brw * sizeof(*copy)); @@ -2182,7 +2218,7 @@ static int brw_interpret(struct ptlrpc_request *request, void *data, int rc) obd_count i; for (i = 0; i < aa->aa_page_count; i++) osc_release_write_grant(aa->aa_cli, aa->aa_ppga[i], 1); - + if (aa->aa_oa->o_flags & OBD_FL_TEMPORARY) OBDO_FREE(aa->aa_oa); } @@ -2396,7 +2432,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, #if defined(__KERNEL__) && defined(__linux__) if(!(PageLocked(oap->oap_page) && (CheckWriteback(oap->oap_page, cmd) || oap->oap_oig !=NULL))) { - CDEBUG(D_PAGE, "page %p lost wb %lx/%x\n", + CDEBUG(D_PAGE, "page %p lost wb %lx/%x\n", oap->oap_page, (long)oap->oap_page->flags, oap->oap_async_flags); LBUG(); } @@ -3787,12 +3823,12 @@ static int osc_get_info(struct obd_export *exp, obd_count keylen, *stripe = 0; RETURN(0); } else if (KEY_IS(KEY_OFF_RPCSIZE)) { - struct client_obd *cli = &exp->exp_obd->u.cli; - __u64 *rpcsize = val; - LASSERT(*vallen == sizeof(__u64)); - *rpcsize = (__u64)cli->cl_max_pages_per_rpc; - RETURN(0); - } else if (KEY_IS(KEY_LAST_ID)) { + struct client_obd *cli = &exp->exp_obd->u.cli; + __u64 *rpcsize = val; + LASSERT(*vallen == sizeof(__u64)); + *rpcsize = (__u64)cli->cl_max_pages_per_rpc; + RETURN(0); + } else if (KEY_IS(KEY_LAST_ID)) { struct ptlrpc_request *req; obd_id *reply; char *bufs[2] = { NULL, key }; @@ -3969,7 +4005,7 @@ static int osc_set_info_async(struct obd_export *exp, obd_count keylen, } *oa = ((struct ost_body *)val)->oa; aa->aa_oa = oa; - + size[1] = vallen; ptlrpc_req_set_repsize(req, 2, size); ptlrpcd_add_req(req); @@ -4103,17 +4139,17 @@ static int osc_disconnect(struct obd_export *exp) * causes the following problem if setup (connect) and cleanup * (disconnect) are tangled together. * connect p1 disconnect p2 - * ptlrpc_connect_import + * ptlrpc_connect_import * ............... class_manual_cleanup * osc_disconnect * del_shrink_grant * ptlrpc_connect_interrupt * init_grant_shrink - * add this client to shrink list + * add this client to shrink list * cleanup_osc * Bang! pinger trigger the shrink. * So the osc should be disconnected from the shrink list, after we - * are sure the import has been destroyed. BUG18662 + * are sure the import has been destroyed. BUG18662 */ if (obd->u.cli.cl_import == NULL) osc_del_shrink_grant(&obd->u.cli); @@ -4218,6 +4254,7 @@ int osc_setup(struct obd_device *obd, obd_count len, void *buf) struct lprocfs_static_vars lvars = { 0 }; struct client_obd *cli = &obd->u.cli; + cli->cl_grant_shrink_interval = GRANT_SHRINK_INTERVAL; lprocfs_osc_init_vars(&lvars); if (lprocfs_obd_setup(obd, lvars.obd_vars) == 0) { lproc_osc_attach_seqstat(obd); -- 1.8.3.1