b=19884 wait mds-ost sync patch + mds_ost proc value

author Mikhail Pershin <tappro@sun.com>

Tue, 18 May 2010 18:48:18 +0000 (22:48 +0400)

committer Johann Lombardi <johann@sun.com>

Wed, 19 May 2010 11:13:19 +0000 (13:13 +0200)
author Mikhail Pershin <tappro@sun.com>
Tue, 18 May 2010 18:48:18 +0000 (22:48 +0400)
committer Johann Lombardi <johann@sun.com>
Wed, 19 May 2010 11:13:19 +0000 (13:13 +0200)
diff --git a/lustre/include/obd.h b/lustre/include/obd.h

index c3b4103..7574479 100644 (file)
--- a/lustre/include/obd.h
+++ b/lustre/include/obd.h
@@ -350,7 +350,8 @@ struct filter_obd {
                               fo_writethrough_cache:1, /* writetrhough cache */
                               fo_syncjournal:1,      /* sync journal on writes */
                               fo_sync_lock_cancel:2, /* sync on lock cancel */
-                             fo_raid_degraded:1;    /* RAID device degraded */
+                             fo_raid_degraded:1,    /* RAID device degraded */
+                             fo_mds_ost_sync:1; /**< MDS-OST orphan recovery*/
  
          struct obd_import   *fo_mdc_imp;
          struct obd_uuid      fo_mdc_uuid;
@@ -1287,6 +1288,7 @@ struct obd_ops {
          int (*o_llog_init)(struct obd_device *obd, struct obd_device *disk_obd,
                             int *idx);
          int (*o_llog_finish)(struct obd_device *obd, int count);
+        int (*o_llog_connect)(struct obd_export *, struct llogd_conn_body *);
  
          /* metadata-only methods */
          int (*o_pin)(struct obd_export *, struct ll_fid *,
diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h

index fcd7070..58d0912 100644 (file)
--- a/lustre/include/obd_class.h
+++ b/lustre/include/obd_class.h
@@ -1524,6 +1524,19 @@ static inline void obd_import_event(struct obd_device *obd,
          EXIT;
  }
  
+static inline int obd_llog_connect(struct obd_export *exp,
+                                   struct llogd_conn_body *body)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_OP(exp->exp_obd, llog_connect, 0);
+        EXP_COUNTER_INCREMENT(exp, llog_connect);
+
+        rc = OBP(exp->exp_obd, llog_connect)(exp, body);
+        RETURN(rc);
+}
+
  static inline int obd_notify(struct obd_device *obd,
                               struct obd_device *watched,
                               enum obd_notify_event ev, void *data)
diff --git a/lustre/obdclass/llog_cat.c b/lustre/obdclass/llog_cat.c

index 10aa38e..5944548 100644 (file)
--- a/lustre/obdclass/llog_cat.c
+++ b/lustre/obdclass/llog_cat.c
@@ -416,7 +416,7 @@ int llog_cat_process_thread(void *data)
          struct llog_process_cat_args *args = data;
          struct llog_ctxt *ctxt = args->lpca_ctxt;
          struct llog_handle *llh = NULL;
-        void  *cb = args->lpca_cb;
+        llog_cb_t cb = args->lpca_cb;
          struct llog_logid logid;
          int rc;
          ENTRY;
@@ -436,9 +436,10 @@ int llog_cat_process_thread(void *data)
          }
  
          if (cb) {
-                rc = llog_cat_process(llh, (llog_cb_t)cb, NULL);
+                rc = llog_cat_process(llh, cb, NULL);
                  if (rc != LLOG_PROC_BREAK && rc != 0)
                          CERROR("llog_cat_process() failed %d\n", rc);
+                cb(llh, NULL, NULL);
          } else {
                  CWARN("No callback function for recovery\n");
          }
diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c

index 2692490..e75ee5e 100644 (file)
--- a/lustre/obdclass/lprocfs_status.c
+++ b/lustre/obdclass/lprocfs_status.c
@@ -1413,6 +1413,7 @@ void lprocfs_init_ops_stats(int num_private_stats, struct lprocfs_stats *stats)
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export);
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, extent_calc);
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, llog_init);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, llog_connect);
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, llog_finish);
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, pin);
          LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpin);
diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c

index 8e9aec4..1501c98 100644 (file)
--- a/lustre/obdfilter/filter.c
+++ b/lustre/obdfilter/filter.c
@@ -2205,6 +2205,37 @@ static int filter_llog_finish(struct obd_device *obd, int count)
          RETURN(rc);
  }
  
+static int filter_llog_connect(struct obd_export *exp,
+                               struct llogd_conn_body *body)
+{
+        struct obd_device *obd = exp->exp_obd;
+        struct llog_ctxt *ctxt;
+        int rc;
+        ENTRY;
+
+        CDEBUG(D_OTHER, "%s: Recovery from log "LPX64"/"LPX64":%x\n",
+               obd->obd_name, body->lgdc_logid.lgl_oid,
+               body->lgdc_logid.lgl_ogr, body->lgdc_logid.lgl_ogen);
+
+        spin_lock_bh(&obd->obd_processing_task_lock);
+        obd->u.filter.fo_mds_ost_sync = 1;
+        spin_unlock_bh(&obd->obd_processing_task_lock);
+
+        ctxt = llog_get_context(obd, body->lgdc_ctxt_idx);
+        if (ctxt == NULL) {
+                CERROR("NULL ctxt at idx %d\n", body->lgdc_ctxt_idx);
+                RETURN(-ENOENT);
+        }
+
+        rc = llog_connect(ctxt, &body->lgdc_logid, &body->lgdc_gen, NULL);
+        llog_ctxt_put(ctxt);
+        if (rc != 0)
+                CERROR("%s: failed to connect rc %d idx %d\n", obd->obd_name,
+                       rc, body->lgdc_ctxt_idx);
+
+        RETURN(rc);
+}
+
  static int filter_precleanup(struct obd_device *obd,
                               enum obd_cleanup_stage stage)
  {
@@ -4102,6 +4133,7 @@ static struct obd_ops filter_obd_ops = {
          .o_preprw         = filter_preprw,
          .o_commitrw       = filter_commitrw,
          .o_llog_init      = filter_llog_init,
+        .o_llog_connect   = filter_llog_connect,
          .o_llog_finish    = filter_llog_finish,
          .o_iocontrol      = filter_iocontrol,
          .o_health_check   = filter_health_check,
diff --git a/lustre/obdfilter/filter_log.c b/lustre/obdfilter/filter_log.c

index 29c2a54..e2f6fcf 100644 (file)
--- a/lustre/obdfilter/filter_log.c
+++ b/lustre/obdfilter/filter_log.c
@@ -244,6 +244,13 @@ int filter_recov_log_mds_ost_cb(struct llog_handle *llh,
          if (ctxt->loc_obd->obd_stopping)
                  RETURN(LLOG_PROC_BREAK);
  
+        if (rec == NULL) {
+                cfs_spin_lock_bh(&ctxt->loc_obd->obd_processing_task_lock);
+                ctxt->loc_obd->u.filter.fo_mds_ost_sync = 0;
+                cfs_spin_unlock_bh(&ctxt->loc_obd->obd_processing_task_lock);
+                RETURN(0);
+        }
+
          if (!(llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)) {
                  CERROR("log is not plain\n");
                  RETURN(-EINVAL);
diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c

index a179928..5bc1d0b 100644 (file)
--- a/lustre/obdfilter/lproc_obdfilter.c
+++ b/lustre/obdfilter/lproc_obdfilter.c
@@ -310,6 +310,15 @@ int lprocfs_filter_wr_sync_lock_cancel(struct file *file, const char *buffer,
          return count;
  }
  
+static int lprocfs_filter_rd_mds_sync(char *page, char **start, off_t off,
+                                      int count, int *eof, void *data)
+{
+        struct obd_device *obd = (struct obd_device *)data;
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%u\n", obd->u.filter.fo_mds_ost_sync);
+}
+
  int lprocfs_filter_rd_degraded(char *page, char **start, off_t off,
                                 int count, int *eof, void *data)
  {
@@ -391,6 +400,7 @@ static struct lprocfs_vars lprocfs_filter_obd_vars[] = {
                            lprocfs_filter_wr_syncjournal, 0 },
          { "sync_on_lock_cancel", lprocfs_filter_rd_sync_lock_cancel,
                                   lprocfs_filter_wr_sync_lock_cancel, 0 },
+        { "mds_sync",     lprocfs_filter_rd_mds_sync, 0, 0},
          { "degraded",     lprocfs_filter_rd_degraded,
                            lprocfs_filter_wr_degraded, 0 },
          { 0 }
diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c

index 37d2ec8..33d6fb5 100644 (file)
--- a/lustre/osc/lproc_osc.c
+++ b/lustre/osc/lproc_osc.c
@@ -508,6 +508,14 @@ static int osc_wr_resend_count(struct file *file, const char *buffer,
          return count;
  }
  
+static int osc_rd_destroys_in_flight(char *page, char **start, off_t off,
+                                     int count, int *eof, void *data)
+{
+        struct obd_device *obd = data;
+        return snprintf(page, count, "%u\n",
+                        atomic_read(&obd->u.cli.cl_destroy_in_flight));
+}
+
  static struct lprocfs_vars lprocfs_osc_obd_vars[] = {
          { "uuid",            lprocfs_rd_uuid,        0, 0 },
          { "ping",            0, lprocfs_wr_ping,     0, 0, 0222 },
@@ -527,6 +535,7 @@ static struct lprocfs_vars lprocfs_osc_obd_vars[] = {
                                 osc_wr_max_pages_per_rpc, 0 },
          { "max_rpcs_in_flight", osc_rd_max_rpcs_in_flight,
                                  osc_wr_max_rpcs_in_flight, 0 },
+        { "destroys_in_flight", osc_rd_destroys_in_flight, 0, 0 },
          { "max_dirty_mb",    osc_rd_max_dirty_mb, osc_wr_max_dirty_mb, 0 },
          { "cur_dirty_bytes", osc_rd_cur_dirty_bytes, 0, 0 },
          { "cur_grant_bytes", osc_rd_cur_grant_bytes,
diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c

index e264761..29ac077 100644 (file)
--- a/lustre/ost/ost_handler.c
+++ b/lustre/ost/ost_handler.c
@@ -1774,6 +1774,7 @@ static int ost_handle(struct ptlrpc_request *req)
          struct obd_trans_info *oti = &trans_info;
          int should_process, fail = OBD_FAIL_OST_ALL_REPLY_NET, rc = 0;
          struct obd_device *obd = NULL;
+        struct llogd_conn_body *body;
          ENTRY;
  
          LASSERT(current->journal_info == NULL);
@@ -1930,7 +1931,9 @@ static int ost_handle(struct ptlrpc_request *req)
          /* FIXME - just reply status */
          case LLOG_ORIGIN_CONNECT:
                  DEBUG_REQ(D_INODE, req, "log connect");
-                rc = llog_handle_connect(req);
+                body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF,
+                                      sizeof(*body));
+                rc = obd_llog_connect(req->rq_export, body);
                  req->rq_status = rc;
                  rc = lustre_pack_reply(req, 1, NULL, NULL);
                  if (rc)
diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh

index b82612d..74c6d51 100755 (executable)
--- a/lustre/tests/replay-dual.sh
+++ b/lustre/tests/replay-dual.sh
@@ -241,7 +241,7 @@ test_13() {
      kill -USR1 $MULTIPID || return 3
      wait $MULTIPID || return 4
  
-    # drop close 
+    # drop close
      do_facet mds lctl set_param fail_loc=0x80000115
      facet_failover mds
      do_facet mds lctl set_param fail_loc=0
@@ -287,14 +287,17 @@ test_14b() {
      umount $MOUNT2
  
      fail mds
+    wait_recovery_complete mds || error "MDS recovery isn't done"
  
      # first 25 files should have been replayed
      unlinkmany $MOUNT1/$tfile- 5 || return 2
      unlinkmany $MOUNT1/$tfile-3- 5 || return 3
  
      zconf_mount `hostname` $MOUNT2 || error "mount $MOUNT2 fail"
-    # give ost time to process llogs
-    sleep 3
+
+    wait_mds_ost_sync || return 5
+    wait_destroy_complete || return 6
+
      AFTERUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'`
      log "before $BEFOREUSED, after $AFTERUSED"
      [ $AFTERUSED -ne $BEFOREUSED ] && \
@@ -303,7 +306,7 @@ test_14b() {
  }
  run_test 14b "delete ost orphans if gap occured in objids due to VBR"
  
-test_15a() {   # was test_15
+test_15a() { # was test_15
      replay_barrier mds
      createmany -o $MOUNT1/$tfile- 25
      createmany -o $MOUNT2/$tfile-2- 1
diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh

index 2292222..1bcda18 100755 (executable)
--- a/lustre/tests/replay-ost-single.sh
+++ b/lustre/tests/replay-ost-single.sh
@@ -124,7 +124,7 @@ iozone_bg () {
      # need to check iozone output  on "complete"
      local iozonelog=$TMP/${TESTSUITE}.iozone.log
      rm -f $iozonelog
-    cat $tmppipe | while read line ; do 
+    cat $tmppipe | while read line ; do
          echo "$line"
          echo "$line" >>$iozonelog
      done;
@@ -138,7 +138,7 @@ iozone_bg () {
      fi
      rm -f $tmppipe
      rm -f $iozonelog
-    return $rc 
+    return $rc
  }
  
  test_5() {
@@ -158,7 +158,7 @@ test_5() {
      local pid=$!
  
      echo iozone bg pid=$pid
-    
+
      sleep 8
      fail ost1
      local rc=0
@@ -187,7 +187,7 @@ test_6() {
      get_stripe_info client $f
  
      sync
-    sleep 2                                    # ensure we have a fresh statfs
+    sleep 2 # ensure we have a fresh statfs
      sync
  #define OBD_FAIL_MDS_REINT_NET_REP       0x119
      do_facet mds "lctl set_param fail_loc=0x80000119"
@@ -196,10 +196,12 @@ test_6() {
      (( $before > $after_dd )) || return 1
      rm -f $f
      fail ost$((stripe_index + 1))
+    wait_recovery_complete ost$((stripe_index + 1)) || error "OST recovery isn't done"
      $CHECKSTAT -t file $f && return 2 || true
      sync
      # let the delete happen
-    sleep 5
+    wait_mds_ost_sync || return 4
+    wait_destroy_complete || return 5
      after=`kbytesfree`
      log "before: $before after: $after"
      (( $before <= $after + 40 )) || return 3   # take OST logs into account
@@ -213,7 +215,7 @@ test_7() {
      before=`kbytesfree`
      dd if=/dev/urandom bs=4096 count=1280 of=$f || return 4
      sync
-    sleep 2                                    # ensure we have a fresh statfs
+    sleep 2 # ensure we have a fresh statfs
      sync
      after_dd=`kbytesfree`
      log "before: $before after_dd: $after_dd"
@@ -221,13 +223,15 @@ test_7() {
      replay_barrier ost1
      rm -f $f
      fail ost1
+    wait_recovery_complete ost1 || error "OST recovery isn't done"
      $CHECKSTAT -t file $f && return 2 || true
      sync
      # let the delete happen
-    sleep 2
+    wait_mds_ost_sync || return 4
+    wait_destroy_complete || return 5
      after=`kbytesfree`
      log "before: $before after: $after"
-    (( $before <= $after + 40 )) || return 3   # take OST logs into account
+    (( $before <= $after + 40 )) || return 3 # take OST logs into account
  }
  run_test 7 "Fail OST before obd_destroy"
  
diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh

index dd6f1c0..2c2b35f 100644 (file)
--- a/lustre/tests/replay-single.sh
+++ b/lustre/tests/replay-single.sh
@@ -411,13 +411,8 @@ test_20b() { # bug 10480
      client_up || client_up || true    # reconnect
  
      fail mds                            # start orphan recovery
-    wait_recovery_complete mds || error "MDS recovery not done"
-
-    # For interop with 2.0 only:
-    # FIXME just because recovery is done doesn't mean we've finished
-    # orphan cleanup.  Fake it with a sleep for now...
-    sleep 10
-
+    wait_recovery_complete mds || error "MDS recovery isn't done"
+    wait_mds_ost_sync || return 3
      AFTERUSED=`df -P $DIR | tail -1 | awk '{ print $3 }'`
      log "before $BEFOREUSED, after $AFTERUSED"
      [ $AFTERUSED -gt $((BEFOREUSED + 20)) ] && \
diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh

index c16c4b6..a0e2f38 100644 (file)
--- a/lustre/tests/test-framework.sh
+++ b/lustre/tests/test-framework.sh
@@ -956,12 +956,13 @@ wait_delete_completed () {
          sleep 1
          TOTAL=`lctl get_param -n osc.*.kbytesavail | \
                 awk 'BEGIN{total=0}; {total+=$1}; END{print total}'`
-        [ "$TOTAL" -eq "$TOTALPREV" ] && break
+        [ "$TOTAL" -eq "$TOTALPREV" ] && return 0
          echo "Waiting delete completed ... prev: $TOTALPREV current: $TOTAL "
          TOTALPREV=$TOTAL
          WAIT=$(( WAIT + 1))
      done
-    echo "Delete completed."
+    echo "Delete is not completed in $MAX_WAIT sec"
+    return 1
  }
  
  wait_for_host() {
@@ -998,6 +999,57 @@ wait_recovery_complete () {
      return 1
  }
  
+wait_mds_ost_sync () {
+    # just because recovery is done doesn't mean we've finished
+    # orphan cleanup. Wait for llogs to get synchronized.
+    echo "Waiting for orphan cleanup..."
+    # MAX value includes time needed for MDS-OST reconnection
+    local MAX=$(( TIMEOUT * 2 ))
+    local WAIT=0
+    while [ $WAIT -lt $MAX ]; do
+        local -a sync=($(do_nodes $(comma_list $(osts_nodes)) \
+            "$LCTL get_param -n obdfilter.*.mds_sync"))
+        local con=1
+        for ((i=0; i<${#sync[@]}; i++)); do
+            [ ${sync[$i]} -eq 0 ] && continue
+            # there is a not finished MDS-OST synchronization
+            con=0
+            break;
+        done
+        sleep 2 # increase waiting time and cover statfs cache
+        [ ${con} -eq 1 ] && return 0
+        echo "Waiting $WAIT secs for $facet mds-ost sync done."
+        WAIT=$((WAIT + 2))
+    done
+    echo "$facet recovery not done in $MAX sec. $STATUS"
+    return 1
+}
+
+wait_destroy_complete () {
+    echo "Waiting for destroy to be done..."
+    # MAX value shouldn't be big as this mean server responsiveness
+    # never increase this just to make test pass but investigate
+    # why it takes so long time
+    local MAX=5
+    local WAIT=0
+    while [ $WAIT -lt $MAX ]; do
+        local -a RPCs=($($LCTL get_param -n osc.*.destroys_in_flight))
+        local con=1
+        for ((i=0; i<${#RPCs[@]}; i++)); do
+            [ ${RPCs[$i]} -eq 0 ] && continue
+            # there are still some destroy RPCs in flight
+            con=0
+            break;
+        done
+        sleep 1
+        [ ${con} -eq 1 ] && return 0 # done waiting
+        echo "Waiting $WAIT secs for destroys to be done."
+        WAIT=$((WAIT + 1))
+    done
+    echo "Destroys weren't done in $MAX sec."
+    return 1
+}
+
  wait_exit_ST () {
      local facet=$1
author	Mikhail Pershin <tappro@sun.com>
	Tue, 18 May 2010 18:48:18 +0000 (22:48 +0400)
committer	Johann Lombardi <johann@sun.com>
	Wed, 19 May 2010 11:13:19 +0000 (13:13 +0200)
lustre/include/obd.h		patch \| blob \| history
lustre/include/obd_class.h		patch \| blob \| history
lustre/obdclass/llog_cat.c		patch \| blob \| history
lustre/obdclass/lprocfs_status.c		patch \| blob \| history
lustre/obdfilter/filter.c		patch \| blob \| history
lustre/obdfilter/filter_log.c		patch \| blob \| history
lustre/obdfilter/lproc_obdfilter.c		patch \| blob \| history
lustre/osc/lproc_osc.c		patch \| blob \| history
lustre/ost/ost_handler.c		patch \| blob \| history
lustre/tests/replay-dual.sh		patch \| blob \| history
lustre/tests/replay-ost-single.sh		patch \| blob \| history
lustre/tests/replay-single.sh		patch \| blob \| history
lustre/tests/test-framework.sh		patch \| blob \| history