From 860404143dafbf696b4d38bb9135eabfd471cc36 Mon Sep 17 00:00:00 2001 From: Rahul Deshmukh Date: Tue, 14 Dec 2010 11:49:09 +0530 Subject: [PATCH] b=22176 Add .sync_fs super block handler i=oleg i=ericm This patch adds .sync_fs super block handler i.e. on issuing sync command, the dirty loi's are marked urgent and synced. --- lustre/include/obd.h | 14 +++- lustre/include/obd_class.h | 14 ++++ lustre/ldlm/ldlm_lib.c | 1 + lustre/llite/llite_internal.h | 1 + lustre/llite/llite_lib.c | 13 ++++ lustre/llite/super25.c | 1 + lustre/lov/lov_internal.h | 4 ++ lustre/lov/lov_obd.c | 34 +++++++++ lustre/lov/lov_request.c | 67 +++++++++++++++++ lustre/obdclass/lprocfs_status.c | 1 + lustre/osc/osc_internal.h | 1 + lustre/osc/osc_request.c | 152 +++++++++++++++++++++++++++++++++++---- 12 files changed, 287 insertions(+), 16 deletions(-) diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 5491b3c..e04224b 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -96,7 +96,7 @@ struct lov_oinfo { /* per-stripe data structure */ cfs_list_t loi_hp_ready_item; cfs_list_t loi_write_item; cfs_list_t loi_read_item; - + cfs_list_t loi_sync_fs_item; unsigned long loi_kms_valid:1; __u64 loi_kms; /* known minimum size */ struct ost_lvb loi_lvb; @@ -123,6 +123,7 @@ static inline void loi_init(struct lov_oinfo *loi) CFS_INIT_LIST_HEAD(&loi->loi_hp_ready_item); CFS_INIT_LIST_HEAD(&loi->loi_write_item); CFS_INIT_LIST_HEAD(&loi->loi_read_item); + CFS_INIT_LIST_HEAD(&loi->loi_sync_fs_item); } struct lov_stripe_md { @@ -159,6 +160,12 @@ struct obd_info; typedef int (*obd_enqueue_update_f)(void *cookie, int rc); +struct osc_sync_fs_wait { + struct obd_info *sfw_oi; + obd_enqueue_update_f sfw_upcall; + int started; +}; + /* obd info for a particular level (lov, osc). */ struct obd_info { /* Lock policy. It keeps an extent which is specific for a particular @@ -440,6 +447,7 @@ struct client_obd { cfs_list_t cl_loi_hp_ready_list; cfs_list_t cl_loi_write_list; cfs_list_t cl_loi_read_list; + cfs_list_t cl_loi_sync_fs_list; int cl_r_in_flight; int cl_w_in_flight; /* just a sum of the loi/lop pending numbers to be exported by /proc */ @@ -487,7 +495,9 @@ struct client_obd { struct lu_client_seq *cl_seq; cfs_atomic_t cl_resends; /* resend count */ + struct osc_sync_fs_wait cl_sf_wait; }; + #define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid) #define CL_NOT_QUOTACHECKED 1 /* client->cl_qchk_stat init value */ @@ -1416,6 +1426,8 @@ struct obd_ops { char *ostname); void (*o_getref)(struct obd_device *obd); void (*o_putref)(struct obd_device *obd); + int (*o_sync_fs)(struct obd_export *exp, struct obd_info *oinfo, + int wait); /* * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c. diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index fb6f702..63d4095 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -1266,6 +1266,20 @@ static inline int obd_sync(struct obd_export *exp, struct obdo *oa, RETURN(rc); } +static inline int obd_sync_fs(struct obd_export *exp, struct obd_info *oinfo, + int wait) +{ + int rc; + ENTRY; + + OBD_CHECK_DT_OP(exp->exp_obd, sync_fs, -EOPNOTSUPP); + EXP_COUNTER_INCREMENT(exp, sync); + + rc = OBP(exp->exp_obd, sync_fs)(exp, oinfo, wait); + + RETURN(rc); +} + static inline int obd_punch_rqset(struct obd_export *exp, struct obd_info *oinfo, struct obd_trans_info *oti) diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 775a297..fb20bad 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -289,6 +289,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) CFS_INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list); CFS_INIT_LIST_HEAD(&cli->cl_loi_write_list); CFS_INIT_LIST_HEAD(&cli->cl_loi_read_list); + CFS_INIT_LIST_HEAD(&cli->cl_loi_sync_fs_list); client_obd_list_lock_init(&cli->cl_loi_list_lock); cli->cl_r_in_flight = 0; cli->cl_w_in_flight = 0; diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index f5d36c5..ba1a9a95 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -728,6 +728,7 @@ void ll_umount_begin(struct super_block *sb); #endif int ll_remount_fs(struct super_block *sb, int *flags, char *data); int ll_show_options(struct seq_file *seq, struct vfsmount *vfs); +int ll_sync_fs(struct super_block *sb, int wait); int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, struct super_block *); void lustre_dump_dentry(struct dentry *, int recur); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 73170fc..0bb1315 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -2116,3 +2116,16 @@ int ll_show_options(struct seq_file *seq, struct vfsmount *vfs) RETURN(0); } + +int ll_sync_fs(struct super_block *sb, int wait) +{ + struct ll_sb_info *sbi = ll_s2sbi(sb); + int rc = 0; + ENTRY; + + rc = obd_sync_fs(sbi->ll_dt_exp, NULL, wait); + if (rc) + CERROR("sync_fs fails: rc = %d\n", rc); + + RETURN(rc); +} diff --git a/lustre/llite/super25.c b/lustre/llite/super25.c index 358b769..0bf72df 100644 --- a/lustre/llite/super25.c +++ b/lustre/llite/super25.c @@ -99,6 +99,7 @@ struct super_operations lustre_super_operations = .umount_begin = ll_umount_begin, .remount_fs = ll_remount_fs, .show_options = ll_show_options, + .sync_fs = ll_sync_fs, }; diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index 9d899fc..5247dd5 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -258,6 +258,10 @@ int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs, int lov_fini_statfs_set(struct lov_request_set *set); int lov_statfs_interpret(struct ptlrpc_request_set *rqset, void *data, int rc); +int cb_sync_fs_update(void *cookie, int rc); +int lov_fini_sync_fs_set(struct lov_request_set *set); +int lov_prep_sync_fs_set(struct obd_export *exp, struct obd_info *oinfo, + struct lov_request_set **reqset); /* lov_obd.c */ void lov_fix_desc(struct lov_desc *desc); void lov_fix_desc_stripe_size(__u64 *val); diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index c13ccaf..9c0c93b 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -1592,6 +1592,39 @@ static int lov_sync(struct obd_export *exp, struct obdo *oa, RETURN(rc); } +static int lov_sync_fs(struct obd_export *exp, struct obd_info *dummy, + int wait) +{ + struct lov_obd *lov; + struct obd_info oinfo = { { { 0 } } }; + struct lov_request *req; + struct lov_request_set *set; + struct l_wait_info lwi = { 0 }; + cfs_list_t *pos; + int rc = 0; + ENTRY; + + lov = &exp->exp_obd->u.lov; + rc = lov_prep_sync_fs_set(exp, &oinfo, &set); + if (rc) + RETURN(rc); + + cfs_list_for_each(pos, &set->set_list) { + req = cfs_list_entry(pos, struct lov_request, rq_link); + rc = obd_sync_fs(lov->lov_tgts[req->rq_idx]->ltd_exp, + &req->rq_oi, wait); + if (rc) + break; + } + /* if wait then check if all sync_fs IO's are done */ + if (wait) + l_wait_event(set->set_waitq, lov_finished_set(set), &lwi); + + rc = lov_fini_sync_fs_set(set); + + RETURN(rc); +} + static int lov_brw_check(struct lov_obd *lov, struct obd_info *lov_oinfo, obd_count oa_bufs, struct brw_page *pga) { @@ -2827,6 +2860,7 @@ struct obd_ops lov_obd_ops = { .o_pool_del = lov_pool_del, .o_getref = lov_getref, .o_putref = lov_putref, + .o_sync_fs = lov_sync_fs, }; static quota_interface_t *quota_interface; diff --git a/lustre/lov/lov_request.c b/lustre/lov/lov_request.c index 79d2943..9e50791 100644 --- a/lustre/lov/lov_request.c +++ b/lustre/lov/lov_request.c @@ -1524,6 +1524,17 @@ int lov_fini_statfs_set(struct lov_request_set *set) RETURN(rc); } +int lov_fini_sync_fs_set(struct lov_request_set *set) +{ + int rc = 0; + ENTRY; + + if (set == NULL) + RETURN(rc); + lov_put_reqset(set); + RETURN(rc); +} + void lov_update_statfs(struct obd_statfs *osfs, struct obd_statfs *lov_sfs, int success) { @@ -1703,3 +1714,59 @@ out_set: lov_fini_statfs_set(set); RETURN(rc); } + +int cb_sync_fs_update(void *cookie, int rc) +{ + struct obd_info *oinfo = cookie; + struct lov_request *lovreq; + ENTRY; + + lovreq = container_of(oinfo, struct lov_request, rq_oi); + lov_update_set(lovreq->rq_rqset, lovreq, rc); + + RETURN(rc); +} + +int lov_prep_sync_fs_set(struct obd_export *exp, struct obd_info *oinfo, + struct lov_request_set **request) +{ + struct lov_request_set *set; + struct lov_obd *lov = &exp->exp_obd->u.lov; + int rc = 0; + int i; + + ENTRY; + + OBD_ALLOC(set, sizeof(*set)); + if (set == NULL) + RETURN(ENOMEM); + lov_init_set(set); + set->set_obd = class_exp2obd(exp); + set->set_oi = oinfo; + + for (i = 0; i < lov->desc.ld_tgt_count; i++) { + struct lov_request *req; + + if (!lov->lov_tgts[i] || !lov->lov_tgts[i]->ltd_active || + !lov->lov_tgts[i]->ltd_exp) { + CDEBUG(D_INFO, "lov idx %d inactive or disabled\n", i); + continue; + } + + OBD_ALLOC(req, sizeof(*req)); + if (req == NULL) + GOTO(out, rc = ENOMEM); + + req->rq_idx = i; + req->rq_oi.oi_cb_up = cb_sync_fs_update; + + lov_set_add_req(req, set); + } + if (!set->set_count) + GOTO(out, rc = -EIO); + *request = set; + RETURN(rc); +out: + lov_fini_sync_fs_set(set); + RETURN(rc); +} diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index a6981db..7e0b7cb 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -1508,6 +1508,7 @@ void lprocfs_init_ops_stats(int num_private_stats, struct lprocfs_stats *stats) LPROCFS_OBD_OP_INIT(num_private_stats, stats, pool_del); LPROCFS_OBD_OP_INIT(num_private_stats, stats, getref); LPROCFS_OBD_OP_INIT(num_private_stats, stats, putref); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, sync_fs); } int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats) diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index 9928169..b30fc9b 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -49,6 +49,7 @@ enum async_flags { to give the caller a chance to update or cancel the size of the io */ ASYNC_HP = 0x10, + ASYNC_SYNCFS = 0x20, }; struct obd_async_page_ops { diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 8ede767..2b5f8a8 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -864,6 +864,17 @@ static unsigned long rpcs_in_flight(struct client_obd *cli) return cli->cl_r_in_flight + cli->cl_w_in_flight; } +int osc_wake_sync_fs(struct client_obd *cli) +{ + ENTRY; + if (cfs_list_empty(&cli->cl_loi_sync_fs_list) && + cli->cl_sf_wait.started) { + cli->cl_sf_wait.sfw_upcall(cli->cl_sf_wait.sfw_oi, 0); + cli->cl_sf_wait.started = 0; + } + RETURN(0); +} + /* caller must hold loi_list_lock */ void osc_wake_cache_waiters(struct client_obd *cli) { @@ -1932,6 +1943,24 @@ static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap, osc_release_write_grant(cli, &oap->oap_brw_page, sent); } +static int lop_makes_syncfs_rpc(struct loi_oap_pages *lop) +{ + struct osc_async_page *oap; + ENTRY; + + if (cfs_list_empty(&lop->lop_urgent)) + RETURN(0); + + oap = cfs_list_entry(lop->lop_urgent.next, + struct osc_async_page, oap_urgent_item); + + if (oap->oap_async_flags & ASYNC_SYNCFS) { + CDEBUG(D_CACHE, "syncfs request forcing RPC\n"); + RETURN(1); + } + + RETURN(0); +} /* This maintains the lists of pending pages to read/write for a given object * (lop). This is used by osc_check_rpcs->osc_next_loi() and loi_list_maint() @@ -2020,10 +2049,19 @@ void loi_list_maint(struct client_obd *cli, struct lov_oinfo *loi) on_list(&loi->loi_ready_item, &cli->cl_loi_ready_list, 0); on_list(&loi->loi_hp_ready_item, &cli->cl_loi_hp_ready_list, 1); } else { - on_list(&loi->loi_hp_ready_item, &cli->cl_loi_hp_ready_list, 0); - on_list(&loi->loi_ready_item, &cli->cl_loi_ready_list, - lop_makes_rpc(cli, &loi->loi_write_lop, OBD_BRW_WRITE)|| - lop_makes_rpc(cli, &loi->loi_read_lop, OBD_BRW_READ)); + if (lop_makes_syncfs_rpc(&loi->loi_write_lop)) { + on_list(&loi->loi_sync_fs_item, + &cli->cl_loi_sync_fs_list, + loi->loi_write_lop.lop_num_pending); + } else { + on_list(&loi->loi_hp_ready_item, + &cli->cl_loi_hp_ready_list, 0); + on_list(&loi->loi_ready_item, &cli->cl_loi_ready_list, + lop_makes_rpc(cli, &loi->loi_write_lop, + OBD_BRW_WRITE)|| + lop_makes_rpc(cli, &loi->loi_read_lop, + OBD_BRW_READ)); + } } on_list(&loi->loi_write_item, &cli->cl_loi_write_list, @@ -2111,6 +2149,34 @@ static void osc_process_ar(struct osc_async_rc *ar, __u64 xid, ar->ar_force_sync = 0; } +static int osc_add_to_lop_urgent(struct loi_oap_pages *lop, + struct osc_async_page *oap, + obd_flag async_flags) +{ + + /* If true, then already present in lop urgent */ + if (!cfs_list_empty(&oap->oap_urgent_item)) { + CWARN("Request to add duplicate oap_urgent for flag = %d\n", + oap->oap_async_flags); + return 1; + } + + /* item from sync_fs, to avoid duplicates check the existing flags */ + if (async_flags & ASYNC_SYNCFS) { + cfs_list_add_tail(&oap->oap_urgent_item, + &lop->lop_urgent); + return 0; + } + + if (oap->oap_async_flags & ASYNC_HP) + cfs_list_add(&oap->oap_urgent_item, &lop->lop_urgent); + else if (oap->oap_async_flags & ASYNC_URGENT || + async_flags & ASYNC_URGENT) + cfs_list_add_tail(&oap->oap_urgent_item, &lop->lop_urgent); + + return 0; +} + void osc_oap_to_pending(struct osc_async_page *oap) { struct loi_oap_pages *lop; @@ -2120,10 +2186,7 @@ void osc_oap_to_pending(struct osc_async_page *oap) else lop = &oap->oap_loi->loi_read_lop; - if (oap->oap_async_flags & ASYNC_HP) - cfs_list_add(&oap->oap_urgent_item, &lop->lop_urgent); - else if (oap->oap_async_flags & ASYNC_URGENT) - cfs_list_add_tail(&oap->oap_urgent_item, &lop->lop_urgent); + osc_add_to_lop_urgent(lop, oap, 0); cfs_list_add_tail(&oap->oap_pending_item, &lop->lop_pending); lop_update_pending(oap->oap_cli, lop, oap->oap_cmd, 1); } @@ -2239,6 +2302,7 @@ static int brw_interpret(const struct lu_env *env, osc_release_write_grant(aa->aa_cli, aa->aa_ppga[i], 1); } osc_wake_cache_waiters(cli); + osc_wake_sync_fs(cli); osc_check_rpcs(env, cli); client_obd_list_unlock(&cli->cl_loi_list_lock); if (!async) @@ -2571,7 +2635,7 @@ osc_send_oap_rpc(const struct lu_env *env, struct client_obd *cli, } osc_wake_cache_waiters(cli); - + osc_wake_sync_fs(cli); loi_list_maint(cli, loi); client_obd_list_unlock(&cli->cl_loi_list_lock); @@ -2664,6 +2728,9 @@ struct lov_oinfo *osc_next_loi(struct client_obd *cli) if (!cfs_list_empty(&cli->cl_loi_ready_list)) RETURN(cfs_list_entry(cli->cl_loi_ready_list.next, struct lov_oinfo, loi_ready_item)); + if (!cfs_list_empty(&cli->cl_loi_sync_fs_list)) + RETURN(cfs_list_entry(cli->cl_loi_sync_fs_list.next, + struct lov_oinfo, loi_sync_fs_item)); /* then if we have cache waiters, return all objects with queued * writes. This is especially important when many small files @@ -2779,6 +2846,8 @@ void osc_check_rpcs(const struct lu_env *env, struct client_obd *cli) cfs_list_del_init(&loi->loi_write_item); if (!cfs_list_empty(&loi->loi_read_item)) cfs_list_del_init(&loi->loi_read_item); + if (!cfs_list_empty(&loi->loi_sync_fs_item)) + cfs_list_del_init(&loi->loi_sync_fs_item); loi_list_maint(cli, loi); @@ -3043,16 +3112,27 @@ int osc_set_async_flags_base(struct client_obd *cli, if ((oap->oap_async_flags & async_flags) == async_flags) RETURN(0); + /* XXX: This introduces a tiny insignificant race for the case if this + * loi already had other urgent items. + */ + if (SETTING(oap->oap_async_flags, async_flags, ASYNC_SYNCFS) && + cfs_list_empty(&oap->oap_rpc_item) && + cfs_list_empty(&oap->oap_urgent_item)) { + osc_add_to_lop_urgent(lop, oap, ASYNC_SYNCFS); + flags |= ASYNC_SYNCFS; + cfs_spin_lock(&oap->oap_lock); + oap->oap_async_flags |= flags; + cfs_spin_unlock(&oap->oap_lock); + loi_list_maint(cli, loi); + RETURN(0); + } + if (SETTING(oap->oap_async_flags, async_flags, ASYNC_READY)) flags |= ASYNC_READY; if (SETTING(oap->oap_async_flags, async_flags, ASYNC_URGENT) && cfs_list_empty(&oap->oap_rpc_item)) { - if (oap->oap_async_flags & ASYNC_HP) - cfs_list_add(&oap->oap_urgent_item, &lop->lop_urgent); - else - cfs_list_add_tail(&oap->oap_urgent_item, - &lop->lop_urgent); + osc_add_to_lop_urgent(lop, oap, ASYNC_URGENT); flags |= ASYNC_URGENT; loi_list_maint(cli, loi); } @@ -3099,7 +3179,8 @@ int osc_teardown_async_page(struct obd_export *exp, if (!cfs_list_empty(&oap->oap_urgent_item)) { cfs_list_del_init(&oap->oap_urgent_item); cfs_spin_lock(&oap->oap_lock); - oap->oap_async_flags &= ~(ASYNC_URGENT | ASYNC_HP); + oap->oap_async_flags &= ~(ASYNC_URGENT | ASYNC_HP | + ASYNC_SYNCFS); cfs_spin_unlock(&oap->oap_lock); } if (!cfs_list_empty(&oap->oap_pending_item)) { @@ -4521,6 +4602,46 @@ int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *lcfg) return(rc); } +static int osc_sync_fs(struct obd_export *exp, struct obd_info *oinfo, + int wait) +{ + struct obd_device *obd = class_exp2obd(exp); + struct client_obd *cli; + struct lov_oinfo *loi; + struct lov_oinfo *tloi; + struct osc_async_page *oap; + struct osc_async_page *toap; + struct loi_oap_pages *lop; + struct lu_env *env; + int refcheck; + int rc = 0; + ENTRY; + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + RETURN(PTR_ERR(env)); + + cli = &obd->u.cli; + client_obd_list_lock(&cli->cl_loi_list_lock); + cli->cl_sf_wait.sfw_oi = oinfo; + cli->cl_sf_wait.sfw_upcall = oinfo->oi_cb_up; + cli->cl_sf_wait.started = 1; + /* creating cl_loi_sync_fs list */ + cfs_list_for_each_entry_safe(loi, tloi, &cli->cl_loi_write_list, + loi_write_item) { + lop = &loi->loi_write_lop; + cfs_list_for_each_entry_safe(oap, toap, &lop->lop_pending, + oap_pending_item) + osc_set_async_flags_base(cli, loi, oap, ASYNC_SYNCFS); + } + osc_check_rpcs(env, cli); + osc_wake_sync_fs(cli); + client_obd_list_unlock(&cli->cl_loi_list_lock); + cl_env_put(env, &refcheck); + + RETURN(rc); +} + static int osc_process_config(struct obd_device *obd, obd_count len, void *buf) { return osc_process_config_base(obd, buf); @@ -4563,6 +4684,7 @@ struct obd_ops osc_obd_ops = { .o_llog_init = osc_llog_init, .o_llog_finish = osc_llog_finish, .o_process_config = osc_process_config, + .o_sync_fs = osc_sync_fs, }; extern struct lu_kmem_descr osc_caches[]; -- 1.8.3.1