also a test added to sanity to verify this.
Signed-off-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: Iab153ec5ff1e9031a721530c5eee10a1f01adf7a
Reviewed-on: http://review.whamcloud.com/10046
Tested-by: Jenkins
Reviewed-by: James Simmons <uja.ornl@gmail.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
com->lc_time_last_checkpoint +
cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
+ /* flush all async updating before handling orphan. */
+ dt_sync(env, lfsck->li_next);
+
while (llmd->llmd_in_double_scan) {
struct lfsck_tgt_descs *ltds =
&lfsck->li_ost_descs;
return seq_printf(m, "%lu\n", osp->opd_syn_changes);
}
-LPROC_SEQ_FOPS_RO(osp_syn_changes);
+
+static ssize_t osp_syn_changes_seq_write(struct file *file, const char *buffer,
+ size_t count, loff_t *off)
+{
+ struct seq_file *m = file->private_data;
+ struct obd_device *dev = m->private;
+ struct osp_device *osp = lu2osp_dev(dev->obd_lu_dev);
+ struct lu_env env;
+ int rc;
+
+ rc = lu_env_init(&env, LCT_LOCAL);
+ if (rc != 0)
+ return rc;
+
+ rc = dt_sync(&env, &osp->opd_dt_dev);
+ lu_env_fini(&env);
+
+ return rc == 0 ? count : rc;
+}
+LPROC_SEQ_FOPS(osp_syn_changes);
static int osp_max_rpcs_in_flight_seq_show(struct seq_file *m, void *data)
{
RETURN(0);
}
+static int osp_sync_timeout(void *data)
+{
+ return 1;
+}
+
static int osp_sync(const struct lu_env *env, struct dt_device *dev)
{
+ struct osp_device *d = dt2osp_dev(dev);
+ cfs_time_t expire;
+ struct l_wait_info lwi = { 0 };
+ unsigned long id, old;
+ int rc = 0;
+ unsigned long start = cfs_time_current();
ENTRY;
- /*
- * XXX: wake up sync thread, command it to start flushing asap?
- */
+ if (unlikely(d->opd_imp_active == 0))
+ RETURN(-ENOTCONN);
- RETURN(0);
+ id = d->opd_syn_last_used_id;
+
+ CDEBUG(D_OTHER, "%s: id: used %lu, processed %lu\n",
+ d->opd_obd->obd_name, id, d->opd_syn_last_processed_id);
+
+ /* wait till all-in-line are processed */
+ while (d->opd_syn_last_processed_id < id) {
+
+ old = d->opd_syn_last_processed_id;
+
+ /* make sure the connection is fine */
+ expire = cfs_time_shift(obd_timeout);
+ lwi = LWI_TIMEOUT(expire - cfs_time_current(),
+ osp_sync_timeout, d);
+ l_wait_event(d->opd_syn_barrier_waitq,
+ d->opd_syn_last_processed_id >= id,
+ &lwi);
+
+ if (d->opd_syn_last_processed_id >= id)
+ break;
+
+ if (d->opd_syn_last_processed_id != old) {
+ /* some progress have been made,
+ * keep trying... */
+ continue;
+ }
+
+ /* no changes and expired, something is wrong */
+ GOTO(out, rc = -ETIMEDOUT);
+ }
+
+ /* block new processing (barrier>0 - few callers are possible */
+ atomic_inc(&d->opd_syn_barrier);
+
+ CDEBUG(D_OTHER, "%s: %u in flight\n", d->opd_obd->obd_name,
+ d->opd_syn_rpc_in_flight);
+
+ /* wait till all-in-flight are replied, so executed by the target */
+ /* XXX: this is used by LFSCK at the moment, which doesn't require
+ * all the changes to be committed, but in general it'd be
+ * better to wait till commit */
+ while (d->opd_syn_rpc_in_flight > 0) {
+
+ old = d->opd_syn_rpc_in_flight;
+
+ expire = cfs_time_shift(obd_timeout);
+ lwi = LWI_TIMEOUT(expire - cfs_time_current(),
+ osp_sync_timeout, d);
+ l_wait_event(d->opd_syn_barrier_waitq,
+ d->opd_syn_rpc_in_flight == 0, &lwi);
+
+ if (d->opd_syn_rpc_in_flight == 0)
+ break;
+
+ if (d->opd_syn_rpc_in_flight != old) {
+ /* some progress have been made */
+ continue;
+ }
+
+ /* no changes and expired, something is wrong */
+ GOTO(out, rc = -ETIMEDOUT);
+ }
+
+ CDEBUG(D_OTHER, "%s: done in %lu\n", d->opd_obd->obd_name,
+ cfs_time_current() - start);
+out:
+ /* resume normal processing (barrier=0) */
+ atomic_dec(&d->opd_syn_barrier);
+ __osp_sync_check_for_work(d);
+
+ RETURN(rc);
}
const struct dt_device_operations osp_dt_ops = {
unsigned long opd_syn_last_processed_id;
struct osp_id_tracker *opd_syn_tracker;
struct list_head opd_syn_ontrack;
+ /* stop processing new requests until barrier=0 */
+ atomic_t opd_syn_barrier;
+ wait_queue_head_t opd_syn_barrier_waitq;
/*
* statfs related fields: OSP maintains it on its own
{
LASSERT(d);
+ if (unlikely(atomic_read(&d->opd_syn_barrier) > 0))
+ return 0;
if (!osp_sync_low_in_progress(d))
return 0;
if (!osp_sync_low_in_flight(d))
spin_lock(&d->opd_syn_lock);
d->opd_syn_rpc_in_flight--;
spin_unlock(&d->opd_syn_lock);
+ if (unlikely(atomic_read(&d->opd_syn_barrier) > 0))
+ wake_up(&d->opd_syn_barrier_waitq);
CDEBUG(D_OTHER, "%s: %d in flight, %d in progress\n",
d->opd_obd->obd_name, d->opd_syn_rpc_in_flight,
d->opd_syn_rpc_in_progress);
* NOTE: it's possible to meet same id if
* OST stores few stripes of same file
*/
- if (rec->lrh_id > d->opd_syn_last_processed_id)
+ if (rec->lrh_id > d->opd_syn_last_processed_id) {
d->opd_syn_last_processed_id = rec->lrh_id;
+ wake_up(&d->opd_syn_barrier_waitq);
+ }
d->opd_syn_changes--;
}
d->opd_syn_max_rpc_in_progress = OSP_MAX_IN_PROGRESS;
spin_lock_init(&d->opd_syn_lock);
init_waitqueue_head(&d->opd_syn_waitq);
+ init_waitqueue_head(&d->opd_syn_barrier_waitq);
init_waitqueue_head(&d->opd_syn_thread.t_ctl_waitq);
INIT_LIST_HEAD(&d->opd_syn_committed_there);
awk '/^status/ { print \\\$2 }'" "scanning-phase2" 6 ||
error "(3.0) MDS1 is not the expected 'scanning-phase2'"
- # LU-3469: before osp_sync() is enabled, wait for a while to guarantee
- # that former async repair operations have been executed on the OST(s).
- sync
- sleep 2
-
do_facet $SINGLEMDS $LCTL set_param fail_val=0 fail_loc=0
for k in $(seq $MDSCOUNT); do
}
run_test 238 "Verify linkea consistency"
+test_239() {
+ local list=$(comma_list $(mdts_nodes))
+
+ mkdir -p $DIR/$tdir
+ createmany -o $DIR/$tdir/f- 5000
+ unlinkmany $DIR/$tdir/f- 5000
+ do_nodes $list "lctl set_param -n osp*.*.sync_changes 1"
+ changes=$(do_nodes $list "lctl get_param -n osc.*MDT*.sync_changes \
+ osc.*MDT*.sync_in_flight" | calc_sum)
+ [ "$changes" -eq 0 ] || error "$changes not synced"
+}
+run_test 239 "osp_sync test"
+
cleanup_test_300() {
trap 0
umask $SAVE_UMASK