From 6ac4dc4d55f8cec7efa398fa8a71b763d8192899 Mon Sep 17 00:00:00 2001 From: alex Date: Thu, 30 Apr 2009 07:03:22 +0000 Subject: [PATCH] - ldiskfs osd doesn't wait for write io to complete in osd_do_bio(), instead it does in osd_trans_stop() to allow concurrent data IO and IO to journal --- lustre/dmu-osd/osd_handler.c | 8 ++++---- lustre/include/dt_object.h | 4 ++-- lustre/osd/osd_handler.c | 23 ++++++++++++++++++----- lustre/osd/osd_io.c | 8 +++++++- 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/lustre/dmu-osd/osd_handler.c b/lustre/dmu-osd/osd_handler.c index 0692bde..26305c5 100644 --- a/lustre/dmu-osd/osd_handler.c +++ b/lustre/dmu-osd/osd_handler.c @@ -193,7 +193,7 @@ static int osd_index_try (const struct lu_env *env, static void osd_conf_get (const struct lu_env *env, const struct dt_device *dev, struct dt_device_param *param); -static void osd_trans_stop (const struct lu_env *env, +static int osd_trans_stop (const struct lu_env *env, struct thandle *th); static int osd_object_is_root(const struct osd_object *obj); @@ -201,7 +201,6 @@ static struct thandle *osd_trans_create(const struct lu_env *env, struct dt_device *dt); static int osd_trans_start(const struct lu_env *env, struct dt_device *d, struct thandle *th); -static void osd_trans_stop(const struct lu_env *env, struct thandle *th); static struct osd_object *osd_obj (const struct lu_object *o); static struct osd_device *osd_dev (const struct lu_device *d); @@ -832,7 +831,7 @@ static int osd_trans_start(const struct lu_env *env, struct dt_device *d, /* * Concurrency: shouldn't matter. */ -static void osd_trans_stop(const struct lu_env *env, struct thandle *th) +static int osd_trans_stop(const struct lu_env *env, struct thandle *th) { struct osd_device *osd = osd_dt_dev(th->th_dev); struct osd_thandle *oh; @@ -848,7 +847,8 @@ static void osd_trans_stop(const struct lu_env *env, struct thandle *th) udmu_tx_commit(oh->ot_tx); if (0 && oh->ot_sync) udmu_wait_synced(&osd->od_objset, oh->ot_tx); - EXIT; + + RETURN(result); } /* diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index d5fef26..75425a4 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -99,7 +99,7 @@ struct dt_device_operations { /** * Finish previously started transaction. */ - void (*dt_trans_stop)(const struct lu_env *env, + int (*dt_trans_stop)(const struct lu_env *env, struct thandle *th); /** * Return fid of root index object. @@ -715,7 +715,7 @@ static inline int dt_trans_start(const struct lu_env *env, return d->dd_ops->dt_trans_start(env, d, th); } -static inline void dt_trans_stop(const struct lu_env *env, +static inline int dt_trans_stop(const struct lu_env *env, struct dt_device *d, struct thandle *th) { diff --git a/lustre/osd/osd_handler.c b/lustre/osd/osd_handler.c index 38ed948..ad6dbe5 100644 --- a/lustre/osd/osd_handler.c +++ b/lustre/osd/osd_handler.c @@ -183,7 +183,7 @@ static int osd_it_ea_key_size(const struct lu_env *env, static void osd_conf_get (const struct lu_env *env, const struct dt_device *dev, struct dt_device_param *param); -static void osd_trans_stop (const struct lu_env *env, +static int osd_trans_stop (const struct lu_env *env, struct thandle *th); static int osd_object_is_root(const struct osd_object *obj); @@ -778,11 +778,12 @@ out: /* * Concurrency: shouldn't matter. */ -static void osd_trans_stop(const struct lu_env *env, struct thandle *th) +static int osd_trans_stop(const struct lu_env *env, struct thandle *th) { - int result; - struct osd_thandle *oh; + int result; + struct osd_thandle *oh; struct osd_thread_info *oti = osd_oti_get(env); + struct filter_iobuf *iobuf = &oti->oti_iobuf; ENTRY; @@ -822,7 +823,19 @@ static void osd_trans_stop(const struct lu_env *env, struct thandle *th) } else { OBD_FREE_PTR(oh); } - EXIT; + + /* as we want IO to journal and data IO be concurrent, we don't block + * awaiting data IO completion in osd_do_bio(), instead we wait here + * once transaction is submitted to the journal. + * + * IMPORTANT: we have to wait till any IO submited by the thread is + * completed otherwise iobuf may be corrupted by different request + */ + wait_event(iobuf->dr_wait, atomic_read(&iobuf->dr_numreqs) == 0); + if (!result) + result = iobuf->dr_error; + + RETURN(result); } /* diff --git a/lustre/osd/osd_io.c b/lustre/osd/osd_io.c index 79df0b7..1752e54 100644 --- a/lustre/osd/osd_io.c +++ b/lustre/osd/osd_io.c @@ -328,7 +328,12 @@ static int osd_do_bio(struct inode *inode, struct filter_iobuf *iobuf, int rw) } out: - wait_event(iobuf->dr_wait, atomic_read(&iobuf->dr_numreqs) == 0); + /* in order to achieve better IO throughput, we don't wait for writes + * completion here. instead we proceed with transaction commit in + * parallel and wait for IO completion once transaction is stopped + * see osd_trans_stop() for more details -bzzz */ + if (rw == OBD_BRW_WRITE) + wait_event(iobuf->dr_wait, atomic_read(&iobuf->dr_numreqs) == 0); if (rc == 0) rc = iobuf->dr_error; @@ -540,6 +545,7 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt, /* preceding filemap_write_and_wait() should have clean pages */ #if 0 + /* XXX */ if (fo->fo_writethrough_cache) clear_page_dirty_for_io(lb[i].page); #endif -- 1.8.3.1