Whamcloud - gitweb
LU-7759 llite: handle inactive OSTs better in statfs 95/19195/28
authorAndreas Dilger <andreas.dilger@intel.com>
Tue, 29 Mar 2016 17:29:57 +0000 (11:29 -0600)
committerOleg Drokin <oleg.drokin@intel.com>
Mon, 27 Jun 2016 18:56:25 +0000 (18:56 +0000)
Change the order of checks for inactive OSCs in lov_prep_statfs_set()
so that administratively disabled OSTs do not generate any output in
"lfs df" at all, to avoid needlessly cluttering the output.

Enable the lazystatfs mount option by default, so that "df" does not
hang when an OST is temporarily offline.

Fix test-framework.sh to use "lfs df $MOUNT" instead of "df $MOUNT" to
determine when recovery is complete, now that "df" does not block.

Signed-off-by: Andreas Dilger <andreas.dilger@intel.com>
Change-Id: I993761a7eb120a36a1b80c2822cb6d8011ccab07
Reviewed-on: http://review.whamcloud.com/19195
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/llite/llite_lib.c
lustre/lov/lov_request.c
lustre/obdclass/genops.c
lustre/tests/recovery-small.sh
lustre/tests/replay-ost-single.sh
lustre/tests/test-framework.sh

index d442e22..276457b 100644 (file)
@@ -117,6 +117,7 @@ static struct ll_sb_info *ll_init_sbi(void)
 #ifdef HAVE_LRU_RESIZE_SUPPORT
         sbi->ll_flags |= LL_SBI_LRU_RESIZE;
 #endif
+       sbi->ll_flags |= LL_SBI_LAZYSTATFS;
 
         for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
                spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
index a96bfbb..dc03ce6 100644 (file)
@@ -361,19 +361,20 @@ int lov_prep_statfs_set(struct obd_device *obd, struct obd_info *oinfo,
                        continue;
                }
 
+               /* skip targets that have been explicitely disabled by the
+                * administrator */
+               if (!lov->lov_tgts[i]->ltd_exp) {
+                       CDEBUG(D_HA, "lov idx %d administratively disabled\n",
+                              i);
+                       continue;
+               }
+
                if (!lov->lov_tgts[i]->ltd_active)
                        lov_check_and_wait_active(lov, i);
 
-                /* skip targets that have been explicitely disabled by the
-                 * administrator */
-                if (!lov->lov_tgts[i]->ltd_exp) {
-                        CDEBUG(D_HA, "lov idx %d administratively disabled\n", i);
-                        continue;
-                }
-
-                OBD_ALLOC(req, sizeof(*req));
-                if (req == NULL)
-                        GOTO(out_set, rc = -ENOMEM);
+               OBD_ALLOC(req, sizeof(*req));
+               if (req == NULL)
+                       GOTO(out_set, rc = -ENOMEM);
 
                 OBD_ALLOC(req->rq_oi.oi_osfs, sizeof(*req->rq_oi.oi_osfs));
                 if (req->rq_oi.oi_osfs == NULL) {
index 749d0ac..fc93b4a 100644 (file)
@@ -1123,6 +1123,7 @@ void __class_export_add_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
                lock, exp, lock->l_exp_refs_nr);
        spin_unlock(&exp->exp_locks_list_guard);
 }
+EXPORT_SYMBOL(__class_export_add_lock_ref);
 
 void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
 {
@@ -1141,6 +1142,7 @@ void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
                lock, exp, lock->l_exp_refs_nr);
        spin_unlock(&exp->exp_locks_list_guard);
 }
+EXPORT_SYMBOL(__class_export_del_lock_ref);
 #endif
 
 /* A connection defines an export context in which preallocation can
@@ -1529,6 +1531,7 @@ int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid)
 
 #if LUSTRE_TRACKS_LOCK_EXP_REFS
 void (*class_export_dump_hook)(struct obd_export*) = NULL;
+EXPORT_SYMBOL(class_export_dump_hook);
 #endif
 
 static void print_export_data(struct obd_export *exp, const char *status,
index 0d2cfa7..1c498b7 100755 (executable)
@@ -255,11 +255,12 @@ test_10d() {
        rm -f $TMP/$tfile
        echo -n ", world" | dd of=$TMP/$tfile bs=1c seek=5
 
+       remount_client $MOUNT
        mount_client $MOUNT2
 
        cancel_lru_locks osc
        $LFS setstripe -i 0 -c 1 $DIR1/$tfile
-       echo -n hello > $DIR1/$tfile
+       echo -n hello | dd of=$DIR1/$tfile bs=5
 
        stat $DIR2/$tfile >& /dev/null
        $LCTL set_param fail_err=71
@@ -267,8 +268,9 @@ test_10d() {
 
        client_reconnect
 
-       cmp $DIR1/$tfile $DIR2/$tfile || error "file contents differ"
-       cmp $DIR1/$tfile $TMP/$tfile || error "wrong content found"
+       cancel_lru_locks osc
+       cmp -l $DIR1/$tfile $DIR2/$tfile || error "file contents differ"
+       cmp -l $DIR1/$tfile $TMP/$tfile || error "wrong content found"
 
        evict=$(do_facet client $LCTL get_param osc.$FSNAME-OST0000*.state | \
                tr -d '\-\[\] ' | \
@@ -519,10 +521,10 @@ test_18c() {
     do_facet ost1 lctl set_param fail_loc=0x80000225
     # force reconnect
     sleep 1
-    df $MOUNT > /dev/null 2>&1
+    $LFS df $MOUNT > /dev/null 2>&1
     sleep 2
     # my understanding is that there should be nothing in the page
-    # cache after the client reconnects?     
+    # cache after the client reconnects?
     rc=0
     pgcache_empty || rc=2
     rm -f $f $TMP/$tfile
@@ -1495,6 +1497,11 @@ check_target_ir_state()
         local recovery_proc=obdfilter.${!name}.recovery_status
         local st
 
+       while : ; do
+               st=$(do_facet $target "$LCTL get_param -n $recovery_proc |
+                       awk '/status:/{ print \\\$2}'")
+               [ x$st = xRECOVERING ] || break
+       done
         st=$(do_facet $target "lctl get_param -n $recovery_proc |
                                awk '/IR:/{ print \\\$2}'")
        [ $st != ON -o $st != OFF -o $st != ENABLED -o $st != DISABLED ] ||
index 476b91d..ac9f543 100755 (executable)
@@ -49,7 +49,7 @@ test_0a() {
        # needs to run during initial client->OST connection
        #define OBD_FAIL_OST_ALL_REPLY_NET       0x211
        do_facet ost1 "lctl set_param fail_loc=0x80000211"
-       zconf_mount $(hostname) $MOUNT && df $MOUNT || error "0a mount fail"
+       zconf_mount $(hostname) $MOUNT && $LFS df $MOUNT || error "mount fail"
 }
 run_test 0a "target handle mismatch (bug 5317)"
 
@@ -382,12 +382,12 @@ test_8e() {
        sleep 1 # ensure we have a fresh statfs
        #define OBD_FAIL_OST_STATFS_EINPROGRESS 0x231
        do_facet ost1 "lctl set_param fail_loc=0x231"
-       df $MOUNT &
+       $LFS df $MOUNT &
        dfpid=$!
        sleep $TIMEOUT
        if ! ps -p $dfpid  > /dev/null 2>&1; then
-                       do_facet ost1 "lctl set_param fail_loc=0"
-                       error "df shouldn't have completed!"
+               do_facet ost1 "lctl set_param fail_loc=0"
+               error "df shouldn't have completed!"
        fi
 }
 run_test 8e "Verify that ptlrpc resends request on -EINPROGRESS"
index 427683d..6d6843a 100755 (executable)
@@ -2425,16 +2425,17 @@ client_evicted() {
 }
 
 client_reconnect_try() {
-    uname -n >> $MOUNT/recon
-    if [ -z "$CLIENTS" ]; then
-        df $MOUNT; uname -n >> $MOUNT/recon
-    else
-        do_nodes $CLIENTS "df $MOUNT; uname -n >> $MOUNT/recon" > /dev/null
-    fi
-    echo Connected clients:
-    cat $MOUNT/recon
-    ls -l $MOUNT/recon > /dev/null
-    rm $MOUNT/recon
+       local f=$MOUNT/recon
+
+       uname -n >> $f
+       if [ -z "$CLIENTS" ]; then
+               $LFS df $MOUNT; uname -n >> $f
+       else
+               do_nodes $CLIENTS "$LFS df $MOUNT; uname -n >> $f" > /dev/null
+       fi
+       echo "Connected clients: $(cat $f)"
+       ls -l $f > /dev/null
+       rm $f
 }
 
 client_reconnect() {
@@ -2529,7 +2530,7 @@ obd_name() {
 replay_barrier() {
        local facet=$1
        do_facet $facet "sync; sync; sync"
-       df $MOUNT
+       $LFS df $MOUNT
 
        # make sure there will be no seq change
        local clients=${CLIENTS:-$HOSTNAME}
@@ -2614,7 +2615,7 @@ fail() {
 
        facet_failover $* || error "failover: $?"
        wait_clients_import_state "$clients" "$facets" FULL
-       clients_up || error "post-failover df: $?"
+       clients_up || error "post-failover stat: $?"
 }
 
 fail_nodf() {
@@ -2628,8 +2629,8 @@ fail_abort() {
        change_active $facet
        wait_for_facet $facet
        mount_facet $facet -o abort_recovery
-       clients_up || echo "first df failed: $?"
-       clients_up || error "post-failover df: $?"
+       clients_up || echo "first stat failed: $?"
+       clients_up || error "post-failover stat: $?"
 }
 
 do_lmc() {