Whamcloud - gitweb
LU-3478 iokit: NUMA support in sgpdd-survey 83/6683/7
authorGregoire Pichon <gregoire.pichon@bull.net>
Tue, 18 Jun 2013 14:19:10 +0000 (16:19 +0200)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 20 Aug 2013 03:12:16 +0000 (03:12 +0000)
This patch provides NUMA support in sgpdd-survey script so that
devices can be accessed similarly to threads on Lustre servers.
Typically, IO buffers must be located close to cpus that are
local to the device.

It is based on the "numactl" command and an external command
provided by the caller that returns a cpu list from a device.

Signed-off-by: Gregoire Pichon <gregoire.pichon@bull.net>
Change-Id: I42d41a69a2ff24b3384cada9d742f163d3777db2
Reviewed-on: http://review.whamcloud.com/6683
Tested-by: Hudson
Reviewed-by: Jian Yu <jian.yu@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Minh Diep <minh.diep@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre-iokit/sgpdd-survey/sgpdd-survey

index 4082720..8edd1e8 100755 (executable)
@@ -38,12 +38,27 @@ boundary=${boundary:-1024}
 thrlo=${thrlo:-1}
 thrhi=${thrhi:-4096}
 
+# NUMA support
+# User provided script that returns a cpu list from a specified device.
+# Implementation depends on the type of device (scsi/raw, with/without
+# multipath, technology fc/sas/ib)
+# For example:
+#   $ cat bin/dev2cpus
+#   #!/bin/bash
+#   dev=$(basename $1)
+#   pci=$(readlink -f /sys/class/block/$dev | cut -d/ -f1-5)
+#   cat ${pci}/local_cpulist
+dev2cpus=${dev2cpus:-""}
+
 #####################################################################
 # leave the rest of this alone unless you know what you're doing...
 
 # and max # threads one instance will spawn
 SG_MAX_QUEUE=16
 
+# numactl command
+NUMACTL=${NUMACTL:-"/usr/bin/numactl"}
+
 unique () {
     echo "$@" | xargs -n1 echo | sort -u
 }
@@ -123,6 +138,14 @@ for d in $scsidevs $rawdevs; do
 done
 unique_hosts=(`unique ${hosts[@]}`)
 
+# get device cpu list
+devcpus=()
+if [ -n "$dev2cpus" ]; then
+    for ((i=0; i < $ndevs; i++)); do
+       devcpus[$i]=$(remote_shell ${hosts[$i]} $dev2cpus ${devs[$i]})
+    done
+fi
+
 # map given device names into SG device names
 if [ "$scsidevs" ]; then
     # make sure sg kernel module is loaded
@@ -277,8 +300,14 @@ for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do
                        outf="of=$dev"
                        skip=seek
                    fi
+                   if [ -n "${devcpus[$i]}" -a -x "$NUMACTL" ]; then
+                       numacmd="$NUMACTL --physcpubind=${devcpus[$i]} --localalloc"
+                   else
+                       numacmd=""
+                   fi
                    for ((j=0;j<crg;j++)); do 
                        echo >> ${cmdsf}_${host} \
+                               "$numacmd " \
                                "sgp_dd 2> ${tmpf}_${i}_${j} $inf $outf " \
                                "${skip}=$((boundary+j*blocks)) " \
                                "thr=$((thr/crg)) count=$count bs=${bs[$i]} " \