From 58fcb8ac7eaae96c933796a01175fe7d41157b25 Mon Sep 17 00:00:00 2001 From: Gregoire Pichon Date: Tue, 18 Jun 2013 16:19:10 +0200 Subject: [PATCH] LU-3478 iokit: NUMA support in sgpdd-survey This patch provides NUMA support in sgpdd-survey script so that devices can be accessed similarly to threads on Lustre servers. Typically, IO buffers must be located close to cpus that are local to the device. It is based on the "numactl" command and an external command provided by the caller that returns a cpu list from a device. Signed-off-by: Gregoire Pichon Change-Id: I42d41a69a2ff24b3384cada9d742f163d3777db2 Reviewed-on: http://review.whamcloud.com/6683 Tested-by: Hudson Reviewed-by: Jian Yu Tested-by: Maloo Reviewed-by: Minh Diep Reviewed-by: Oleg Drokin --- lustre-iokit/sgpdd-survey/sgpdd-survey | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/lustre-iokit/sgpdd-survey/sgpdd-survey b/lustre-iokit/sgpdd-survey/sgpdd-survey index 4082720..8edd1e8 100755 --- a/lustre-iokit/sgpdd-survey/sgpdd-survey +++ b/lustre-iokit/sgpdd-survey/sgpdd-survey @@ -38,12 +38,27 @@ boundary=${boundary:-1024} thrlo=${thrlo:-1} thrhi=${thrhi:-4096} +# NUMA support +# User provided script that returns a cpu list from a specified device. +# Implementation depends on the type of device (scsi/raw, with/without +# multipath, technology fc/sas/ib) +# For example: +# $ cat bin/dev2cpus +# #!/bin/bash +# dev=$(basename $1) +# pci=$(readlink -f /sys/class/block/$dev | cut -d/ -f1-5) +# cat ${pci}/local_cpulist +dev2cpus=${dev2cpus:-""} + ##################################################################### # leave the rest of this alone unless you know what you're doing... # and max # threads one instance will spawn SG_MAX_QUEUE=16 +# numactl command +NUMACTL=${NUMACTL:-"/usr/bin/numactl"} + unique () { echo "$@" | xargs -n1 echo | sort -u } @@ -123,6 +138,14 @@ for d in $scsidevs $rawdevs; do done unique_hosts=(`unique ${hosts[@]}`) +# get device cpu list +devcpus=() +if [ -n "$dev2cpus" ]; then + for ((i=0; i < $ndevs; i++)); do + devcpus[$i]=$(remote_shell ${hosts[$i]} $dev2cpus ${devs[$i]}) + done +fi + # map given device names into SG device names if [ "$scsidevs" ]; then # make sure sg kernel module is loaded @@ -277,8 +300,14 @@ for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do outf="of=$dev" skip=seek fi + if [ -n "${devcpus[$i]}" -a -x "$NUMACTL" ]; then + numacmd="$NUMACTL --physcpubind=${devcpus[$i]} --localalloc" + else + numacmd="" + fi for ((j=0;j> ${cmdsf}_${host} \ + "$numacmd " \ "sgp_dd 2> ${tmpf}_${i}_${j} $inf $outf " \ "${skip}=$((boundary+j*blocks)) " \ "thr=$((thr/crg)) count=$count bs=${bs[$i]} " \ -- 1.8.3.1