Whamcloud - gitweb
LU-3469 osp: osp_sync() to flush pending changes synchronously 46/10046/7
authorAlex Zhuravlev <alexey.zhuravlev@intel.com>
Wed, 30 Apr 2014 04:46:17 +0000 (12:46 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 4 Jun 2014 03:32:46 +0000 (03:32 +0000)
also a test added to sanity to verify this.

Signed-off-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: Iab153ec5ff1e9031a721530c5eee10a1f01adf7a
Reviewed-on: http://review.whamcloud.com/10046
Tested-by: Jenkins
Reviewed-by: James Simmons <uja.ornl@gmail.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
lustre/lfsck/lfsck_layout.c
lustre/osp/lproc_osp.c
lustre/osp/osp_dev.c
lustre/osp/osp_internal.h
lustre/osp/osp_sync.c
lustre/tests/sanity-lfsck.sh
lustre/tests/sanity.sh

index 3ee62b2..98e61d2 100644 (file)
@@ -3559,6 +3559,9 @@ static int lfsck_layout_assistant(void *args)
                                com->lc_time_last_checkpoint +
                                cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
 
                                com->lc_time_last_checkpoint +
                                cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
 
+                       /* flush all async updating before handling orphan. */
+                       dt_sync(env, lfsck->li_next);
+
                        while (llmd->llmd_in_double_scan) {
                                struct lfsck_tgt_descs  *ltds =
                                                        &lfsck->li_ost_descs;
                        while (llmd->llmd_in_double_scan) {
                                struct lfsck_tgt_descs  *ltds =
                                                        &lfsck->li_ost_descs;
index c23ce18..f9b3fe5 100644 (file)
@@ -117,7 +117,26 @@ static int osp_syn_changes_seq_show(struct seq_file *m, void *data)
 
        return seq_printf(m, "%lu\n", osp->opd_syn_changes);
 }
 
        return seq_printf(m, "%lu\n", osp->opd_syn_changes);
 }
-LPROC_SEQ_FOPS_RO(osp_syn_changes);
+
+static ssize_t osp_syn_changes_seq_write(struct file *file, const char *buffer,
+                                        size_t count, loff_t *off)
+{
+       struct seq_file         *m      = file->private_data;
+       struct obd_device       *dev    = m->private;
+       struct osp_device       *osp    = lu2osp_dev(dev->obd_lu_dev);
+       struct lu_env            env;
+       int                      rc;
+
+       rc = lu_env_init(&env, LCT_LOCAL);
+       if (rc != 0)
+               return rc;
+
+       rc = dt_sync(&env, &osp->opd_dt_dev);
+       lu_env_fini(&env);
+
+       return rc == 0 ? count : rc;
+}
+LPROC_SEQ_FOPS(osp_syn_changes);
 
 static int osp_max_rpcs_in_flight_seq_show(struct seq_file *m, void *data)
 {
 
 static int osp_max_rpcs_in_flight_seq_show(struct seq_file *m, void *data)
 {
index 27e3011..45aff9f 100644 (file)
@@ -471,15 +471,95 @@ static int osp_statfs(const struct lu_env *env, struct dt_device *dev,
        RETURN(0);
 }
 
        RETURN(0);
 }
 
+static int osp_sync_timeout(void *data)
+{
+       return 1;
+}
+
 static int osp_sync(const struct lu_env *env, struct dt_device *dev)
 {
 static int osp_sync(const struct lu_env *env, struct dt_device *dev)
 {
+       struct osp_device *d = dt2osp_dev(dev);
+       cfs_time_t         expire;
+       struct l_wait_info lwi = { 0 };
+       unsigned long      id, old;
+       int                rc = 0;
+       unsigned long      start = cfs_time_current();
        ENTRY;
 
        ENTRY;
 
-       /*
-        * XXX: wake up sync thread, command it to start flushing asap?
-        */
+       if (unlikely(d->opd_imp_active == 0))
+               RETURN(-ENOTCONN);
 
 
-       RETURN(0);
+       id = d->opd_syn_last_used_id;
+
+       CDEBUG(D_OTHER, "%s: id: used %lu, processed %lu\n",
+              d->opd_obd->obd_name, id, d->opd_syn_last_processed_id);
+
+       /* wait till all-in-line are processed */
+       while (d->opd_syn_last_processed_id < id) {
+
+               old = d->opd_syn_last_processed_id;
+
+               /* make sure the connection is fine */
+               expire = cfs_time_shift(obd_timeout);
+               lwi = LWI_TIMEOUT(expire - cfs_time_current(),
+                                 osp_sync_timeout, d);
+               l_wait_event(d->opd_syn_barrier_waitq,
+                            d->opd_syn_last_processed_id >= id,
+                            &lwi);
+
+               if (d->opd_syn_last_processed_id >= id)
+                       break;
+
+               if (d->opd_syn_last_processed_id != old) {
+                       /* some progress have been made,
+                        * keep trying... */
+                       continue;
+               }
+
+               /* no changes and expired, something is wrong */
+               GOTO(out, rc = -ETIMEDOUT);
+       }
+
+       /* block new processing (barrier>0 - few callers are possible */
+       atomic_inc(&d->opd_syn_barrier);
+
+       CDEBUG(D_OTHER, "%s: %u in flight\n", d->opd_obd->obd_name,
+              d->opd_syn_rpc_in_flight);
+
+       /* wait till all-in-flight are replied, so executed by the target */
+       /* XXX: this is used by LFSCK at the moment, which doesn't require
+        *      all the changes to be committed, but in general it'd be
+        *      better to wait till commit */
+       while (d->opd_syn_rpc_in_flight > 0) {
+
+               old = d->opd_syn_rpc_in_flight;
+
+               expire = cfs_time_shift(obd_timeout);
+               lwi = LWI_TIMEOUT(expire - cfs_time_current(),
+                                 osp_sync_timeout, d);
+               l_wait_event(d->opd_syn_barrier_waitq,
+                               d->opd_syn_rpc_in_flight == 0, &lwi);
+
+               if (d->opd_syn_rpc_in_flight == 0)
+                       break;
+
+               if (d->opd_syn_rpc_in_flight != old) {
+                       /* some progress have been made */
+                       continue;
+               }
+
+               /* no changes and expired, something is wrong */
+               GOTO(out, rc = -ETIMEDOUT);
+       }
+
+       CDEBUG(D_OTHER, "%s: done in %lu\n", d->opd_obd->obd_name,
+              cfs_time_current() - start);
+out:
+       /* resume normal processing (barrier=0) */
+       atomic_dec(&d->opd_syn_barrier);
+       __osp_sync_check_for_work(d);
+
+       RETURN(rc);
 }
 
 const struct dt_device_operations osp_dt_ops = {
 }
 
 const struct dt_device_operations osp_dt_ops = {
index f183955..2f08aaa 100644 (file)
@@ -174,6 +174,9 @@ struct osp_device {
        unsigned long                    opd_syn_last_processed_id;
        struct osp_id_tracker           *opd_syn_tracker;
        struct list_head                 opd_syn_ontrack;
        unsigned long                    opd_syn_last_processed_id;
        struct osp_id_tracker           *opd_syn_tracker;
        struct list_head                 opd_syn_ontrack;
+       /* stop processing new requests until barrier=0 */
+       atomic_t                         opd_syn_barrier;
+       wait_queue_head_t                opd_syn_barrier_waitq;
 
        /*
         * statfs related fields: OSP maintains it on its own
 
        /*
         * statfs related fields: OSP maintains it on its own
index 814b4b1..34a0e2c 100644 (file)
@@ -151,6 +151,8 @@ static inline int osp_sync_can_process_new(struct osp_device *d,
 {
        LASSERT(d);
 
 {
        LASSERT(d);
 
+       if (unlikely(atomic_read(&d->opd_syn_barrier) > 0))
+               return 0;
        if (!osp_sync_low_in_progress(d))
                return 0;
        if (!osp_sync_low_in_flight(d))
        if (!osp_sync_low_in_progress(d))
                return 0;
        if (!osp_sync_low_in_flight(d))
@@ -399,6 +401,8 @@ static int osp_sync_interpret(const struct lu_env *env,
        spin_lock(&d->opd_syn_lock);
        d->opd_syn_rpc_in_flight--;
        spin_unlock(&d->opd_syn_lock);
        spin_lock(&d->opd_syn_lock);
        d->opd_syn_rpc_in_flight--;
        spin_unlock(&d->opd_syn_lock);
+       if (unlikely(atomic_read(&d->opd_syn_barrier) > 0))
+               wake_up(&d->opd_syn_barrier_waitq);
        CDEBUG(D_OTHER, "%s: %d in flight, %d in progress\n",
               d->opd_obd->obd_name, d->opd_syn_rpc_in_flight,
               d->opd_syn_rpc_in_progress);
        CDEBUG(D_OTHER, "%s: %d in flight, %d in progress\n",
               d->opd_obd->obd_name, d->opd_syn_rpc_in_flight,
               d->opd_syn_rpc_in_progress);
@@ -709,8 +713,10 @@ static int osp_sync_process_record(const struct lu_env *env,
                         * NOTE: it's possible to meet same id if
                         * OST stores few stripes of same file
                         */
                         * NOTE: it's possible to meet same id if
                         * OST stores few stripes of same file
                         */
-                       if (rec->lrh_id > d->opd_syn_last_processed_id)
+                       if (rec->lrh_id > d->opd_syn_last_processed_id) {
                                d->opd_syn_last_processed_id = rec->lrh_id;
                                d->opd_syn_last_processed_id = rec->lrh_id;
+                               wake_up(&d->opd_syn_barrier_waitq);
+                       }
 
                        d->opd_syn_changes--;
                }
 
                        d->opd_syn_changes--;
                }
@@ -1148,6 +1154,7 @@ int osp_sync_init(const struct lu_env *env, struct osp_device *d)
        d->opd_syn_max_rpc_in_progress = OSP_MAX_IN_PROGRESS;
        spin_lock_init(&d->opd_syn_lock);
        init_waitqueue_head(&d->opd_syn_waitq);
        d->opd_syn_max_rpc_in_progress = OSP_MAX_IN_PROGRESS;
        spin_lock_init(&d->opd_syn_lock);
        init_waitqueue_head(&d->opd_syn_waitq);
+       init_waitqueue_head(&d->opd_syn_barrier_waitq);
        init_waitqueue_head(&d->opd_syn_thread.t_ctl_waitq);
        INIT_LIST_HEAD(&d->opd_syn_committed_there);
 
        init_waitqueue_head(&d->opd_syn_thread.t_ctl_waitq);
        INIT_LIST_HEAD(&d->opd_syn_committed_there);
 
index 69f9eb3..bb11656 100644 (file)
@@ -1869,11 +1869,6 @@ test_18d() {
                awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 ||
                error "(3.0) MDS1 is not the expected 'scanning-phase2'"
 
                awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 ||
                error "(3.0) MDS1 is not the expected 'scanning-phase2'"
 
-       # LU-3469: before osp_sync() is enabled, wait for a while to guarantee
-       # that former async repair operations have been executed on the OST(s).
-       sync
-       sleep 2
-
        do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
 
        for k in $(seq $MDSCOUNT); do
        do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
 
        for k in $(seq $MDSCOUNT); do
index 71de744..edaed12 100644 (file)
@@ -12641,6 +12641,19 @@ test_238() {
 }
 run_test 238 "Verify linkea consistency"
 
 }
 run_test 238 "Verify linkea consistency"
 
+test_239() {
+       local list=$(comma_list $(mdts_nodes))
+
+       mkdir -p $DIR/$tdir
+       createmany -o $DIR/$tdir/f- 5000
+       unlinkmany $DIR/$tdir/f- 5000
+       do_nodes $list "lctl set_param -n osp*.*.sync_changes 1"
+       changes=$(do_nodes $list "lctl get_param -n osc.*MDT*.sync_changes \
+                       osc.*MDT*.sync_in_flight" | calc_sum)
+       [ "$changes" -eq 0 ] || error "$changes not synced"
+}
+run_test 239 "osp_sync test"
+
 cleanup_test_300() {
        trap 0
        umask $SAVE_UMASK
 cleanup_test_300() {
        trap 0
        umask $SAVE_UMASK