Whamcloud - gitweb
LU-2731 scripts: Speed up /etc/init.d/lustre stop
authorPrakash Surya <surya1@llnl.gov>
Thu, 31 Jan 2013 23:06:07 +0000 (15:06 -0800)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 22 Mar 2013 17:37:29 +0000 (13:37 -0400)
This patch parallelizes the shutdown of multiple services running on the
same node. This has been empirically shown to drastically reduce the
runtime of the script for an OSS with many OSTs.

This patch was tested on a Lustre 2.1 ldiskfs OSS node with 32 OSTs
attached, by recording startup and shutdown times for the OSS. The
number of OSTs used in varied, ranging from a single one, up to all 32,
incrementing by powers of two (i.e. timed startup/shutdown of 1 OST,
then of 2 OSTs, then 4, etc.).

Results of startup and shutdown times *without* this patch applied:

    +------------------------------------------------+
    | $ time /etc/init.d/lustre start # (w/o patch)  |
    +-----------+------------+-----------+-----------+
    | # of OSTs |    real    |    user   |    sys    |
    +-----------+------------+-----------+-----------+
    |      1    | 0m  2.184s | 0m 0.162s | 0m 0.077s |
    |      2    | 0m  4.285s | 0m 0.281s | 0m 0.148s |
    |      4    | 0m  8.508s | 0m 0.500s | 0m 0.302s |
    |      8    | 0m 16.961s | 0m 1.017s | 0m 0.568s |
    |     16    | 0m 33.884s | 0m 1.964s | 0m 1.176s |
    |     32    | 1m  7.744s | 0m 3.986s | 0m 2.280s |
    +-----------+------------+-----------+-----------+

    +------------------------------------------------+
    | $ time /etc/init.d/lustre stop # (w/o patch)   |
    +-----------+------------+-----------+-----------+
    | # of OSTs |    real    |    user   |    sys    |
    +-----------+------------+-----------+-----------+
    |     1     | 0m  4.758s | 0m 0.072s | 0m 0.030s |
    |     2     | 0m  9.018s | 0m 0.118s | 0m 0.049s |
    |     4     | 0m 18.813s | 0m 0.185s | 0m 0.083s |
    |     8     | 0m 37.586s | 0m 0.337s | 0m 0.141s |
    |    16     | 1m 16.092s | 0m 0.597s | 0m 0.263s |
    |    32     | 2m 37.550s | 0m 1.181s | 0m 0.403s |
    +-----------+------------+-----------+-----------+

Results of startup and shutdown time *with* this patch:

    +------------------------------------------------+
    | $ time /etc/init.d/lustre start # (w/ patch)   |
    +-----------+------------+-----------+-----------+
    | # of OSTs |    real    |    user   |    sys    |
    +-----------+------------+-----------+-----------+
    |      1    | 0m  2.183s | 0m 0.158s | 0m 0.083s |
    |      2    | 0m  4.282s | 0m 0.274s | 0m 0.153s |
    |      4    | 0m  8.519s | 0m 0.510s | 0m 0.303s |
    |      8    | 0m 16.966s | 0m 1.019s | 0m 0.583s |
    |     16    | 0m 33.878s | 0m 1.984s | 0m 1.154s |
    |     32    | 1m  7.745s | 0m 3.944s | 0m 2.322s |
    +-----------+------------+-----------+-----------+

    +------------------------------------------------+
    | $ time /etc/init.d/lustre stop # (w/ patch)    |
    +-----------+------------+-----------+-----------+
    | # of OSTs |    real    |    user   |    sys    |
    +-----------+------------+-----------+-----------+
    |      1    | 0m  4.566s | 0m 0.075s | 0m 0.023s |
    |      2    | 0m  4.857s | 0m 0.105s | 0m 0.070s |
    |      4    | 0m  4.777s | 0m 0.175s | 0m 0.064s |
    |      8    | 0m  5.449s | 0m 0.323s | 0m 0.153s |
    |     16    | 0m  5.862s | 0m 0.606s | 0m 0.208s |
    |     32    | 0m  6.307s | 0m 1.183s | 0m 0.811s |
    +-----------+------------+-----------+-----------+

Signed-off-by: Prakash Surya <surya1@llnl.gov>
Change-Id: I90c1f6a265a8d86bbc8ddfb88aa635e5b96fd975
Reviewed-on: http://review.whamcloud.com/5235
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Emoly Liu <emoly.liu@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
lustre/scripts/lustre

index b97951f..eeb5941 100644 (file)
@@ -498,6 +498,7 @@ stop_services ()
 {
        local labels=$*
        local result=0
+       local pids=""
        local dir dev label
 
        for label in $labels; do
@@ -512,9 +513,22 @@ stop_services ()
                        # no error
                        continue
                fi
+
                echo "Unmounting $dir"
-               umount $dir || result=2
+               umount $dir &
+
+               if [ -z "$pids" ]; then
+                       pids="$!"
+               else
+                       pids="$pids $!"
+               fi
        done
+
+       # wait for all umount processes to complete, report any errors
+       for pid in $pids; do
+               wait $pid || result=2
+       done
+
        # double check!
        for label in $labels; do
                if mountpt_is_active $label; then