Whamcloud - gitweb
Lustre 2.x Operations Manual as Docbook 5.0
[doc/manual.git] / BenchmarkingTests.xml
1 <?xml version="1.0" encoding="UTF-8"?>
2 <article version="5.0" xml:lang="en-US" xmlns="http://docbook.org/ns/docbook" xmlns:xl="http://www.w3.org/1999/xlink">
3   <info>
4     <title>Benchmarking Lustre Performance (Lustre I/O Kit)</title>
5   </info>
6   <informaltable frame="none">
7     <tgroup cols="2">
8       <colspec colname="c1" colwidth="50*"/>
9       <colspec colname="c2" colwidth="50*"/>
10       
11       
12       <tbody>
13         <row>
14           <entry align="left"><para>Lustre 2.0 Operations Manual</para></entry>
15           <entry align="right" valign="top"><para><link xl:href="index.html"><inlinemediaobject><imageobject role="html">
16                     <imagedata contentdepth="26" contentwidth="30" fileref="./shared/toc01.gif" scalefit="1"/>
17                   </imageobject>
18 <imageobject role="fo">
19                     <imagedata contentdepth="100%" contentwidth="" depth="" fileref="./shared/toc01.gif" scalefit="1" width="100%"/>
20                   </imageobject>
21 </inlinemediaobject></link><link xl:href="LNETSelfTest.html"><inlinemediaobject><imageobject role="html">
22                     <imagedata contentdepth="26" contentwidth="30" fileref="./shared/prev01.gif" scalefit="1"/>
23                   </imageobject>
24 <imageobject role="fo">
25                     <imagedata contentdepth="100%" contentwidth="" depth="" fileref="./shared/prev01.gif" scalefit="1" width="100%"/>
26                   </imageobject>
27 </inlinemediaobject></link><link xl:href="LustreTuning.html"><inlinemediaobject><imageobject role="html">
28                     <imagedata contentdepth="26" contentwidth="30" fileref="./shared/next01.gif" scalefit="1"/>
29                   </imageobject>
30 <imageobject role="fo">
31                     <imagedata contentdepth="100%" contentwidth="" depth="" fileref="./shared/next01.gif" scalefit="1" width="100%"/>
32                   </imageobject>
33 </inlinemediaobject></link><link xl:href="ix.html"><inlinemediaobject><imageobject role="html">
34                     <imagedata contentdepth="26" contentwidth="30" fileref="./shared/index01.gif" scalefit="1"/>
35                   </imageobject>
36 <imageobject role="fo">
37                     <imagedata contentdepth="100%" contentwidth="" depth="" fileref="./shared/index01.gif" scalefit="1" width="100%"/>
38                   </imageobject>
39 </inlinemediaobject></link></para></entry>
40         </row>
41       </tbody>
42     </tgroup>
43   </informaltable>
44   <para><link xl:href=""/></para>
45   <informaltable frame="none">
46     <tgroup cols="1">
47       <colspec colname="c1" colwidth="100*"/>
48       
49       <tbody>
50         <row>
51           <entry align="right"><para><anchor xml:id="dbdoclet.50438212_pgfId-874" xreflabel=""/>C H A P T E R  24<anchor xml:id="dbdoclet.50438212_37215" xreflabel=""/></para></entry>
52         </row>
53       </tbody>
54     </tgroup>
55   </informaltable>
56   <informaltable frame="none">
57     <tgroup cols="1">
58       <colspec colname="c1" colwidth="100*"/>
59       
60       <tbody>
61         <row>
62           <entry align="right"><para><anchor xml:id="dbdoclet.50438212_pgfId-5529" xreflabel=""/><anchor xml:id="dbdoclet.50438212_66186" xreflabel=""/>Benchmarking Lustre Performance (Lustre I/O Kit)</para></entry>
63         </row>
64       </tbody>
65     </tgroup>
66   </informaltable>
67   <para><anchor xml:id="dbdoclet.50438212_pgfId-1289895" xreflabel=""/>This chapter describes the Lustre I/O kit, a collection of I/O benchmarking tools for a Lustre cluster, and PIOS, a parallel I/O simulator for Linux and Solaris. It includes:</para>
68   <itemizedlist><listitem>
69       <para><anchor xml:id="dbdoclet.50438212_pgfId-1289899" xreflabel=""/><link xl:href="BenchmarkingTests.html#50438212_44437">Using Lustre I/O Kit Tools</link></para>
70     </listitem>
71 <listitem>
72       <para> </para>
73     </listitem>
74 <listitem>
75       <para><anchor xml:id="dbdoclet.50438212_pgfId-1303084" xreflabel=""/><link xl:href="BenchmarkingTests.html#50438212_51053">Testing I/O Performance of Raw Hardware (sgpdd_survey)</link></para>
76     </listitem>
77 <listitem>
78       <para> </para>
79     </listitem>
80 <listitem>
81       <para><anchor xml:id="dbdoclet.50438212_pgfId-1303088" xreflabel=""/><link xl:href="BenchmarkingTests.html#50438212_26516">Testing OST Performance (obdfilter_survey)</link></para>
82     </listitem>
83 <listitem>
84       <para> </para>
85     </listitem>
86 <listitem>
87       <para><anchor xml:id="dbdoclet.50438212_pgfId-1303097" xreflabel=""/><link xl:href="BenchmarkingTests.html#50438212_85136">Testing OST I/O Performance (ost_survey)</link></para>
88     </listitem>
89 <listitem>
90       <para> </para>
91     </listitem>
92 <listitem>
93       <para><anchor xml:id="dbdoclet.50438212_pgfId-1303101" xreflabel=""/><link xl:href="BenchmarkingTests.html#50438212_58201">Collecting Application Profiling Information (stats-collect)</link></para>
94     </listitem>
95 <listitem>
96       <para> </para>
97     </listitem>
98 </itemizedlist>
99   <section remap="h2">
100     <title><anchor xml:id="dbdoclet.50438212_pgfId-1289909" xreflabel=""/></title>
101     <section remap="h2">
102       <title>24.1 <anchor xml:id="dbdoclet.50438212_44437" xreflabel=""/>Using Lustre I/O Kit Tools</title>
103       <para><anchor xml:id="dbdoclet.50438212_pgfId-1302857" xreflabel=""/>The tools in the Lustre I/O Kit are used to benchmark Lustre hardware and validate that it is working as expected before you install the Lustre software. It can also be used to to validate the performance of the various hardware and software layers in the cluster and also to find and troubleshoot I/O issues.</para>
104       <para><anchor xml:id="dbdoclet.50438212_pgfId-1300612" xreflabel=""/>Typically, performance is measured starting with single raw devices and then proceeding to groups of devices. Once raw performance has been established, other software layers are then added incrementally and tested.</para>
105       <section remap="h3">
106         <title><anchor xml:id="dbdoclet.50438212_pgfId-1289911" xreflabel=""/>24.1.1 Contents of the Lustre I/O Kit</title>
107         <para><anchor xml:id="dbdoclet.50438212_pgfId-1303002" xreflabel=""/>The I/O kit contains three tests, each of which tests a progressively higher layer in the Lustre stack:</para>
108         <itemizedlist><listitem>
109             <para><anchor xml:id="dbdoclet.50438212_pgfId-1300548" xreflabel=""/>sgpdd_survey  - Measure basic â€œbare metal†performance of devices while bypassing the kernel block device layers, buffer cache, and file system.</para>
110           </listitem>
111 <listitem>
112             <para> </para>
113           </listitem>
114 <listitem>
115             <para><anchor xml:id="dbdoclet.50438212_pgfId-1300572" xreflabel=""/>obdfilter_survey  - Measure the performance of one or more OSTs directly on the OSS node or alternately over the network from a Lustre client.</para>
116           </listitem>
117 <listitem>
118             <para> </para>
119           </listitem>
120 <listitem>
121             <para><anchor xml:id="dbdoclet.50438212_pgfId-1300557" xreflabel=""/>ost_survey  - Performs I/O against OSTs individually to allow performance comparisons to detect if an OST is performing suboptimally due to hardware issues.</para>
122           </listitem>
123 <listitem>
124             <para> </para>
125           </listitem>
126 </itemizedlist>
127         <para><anchor xml:id="dbdoclet.50438212_pgfId-1300595" xreflabel=""/>Typically with these tests, Lustre should deliver 85-90% of the raw device performance.</para>
128         <para><anchor xml:id="dbdoclet.50438212_pgfId-1300756" xreflabel=""/>A utility stats-collect is also provided to collect application profiling information from Lustre clients and servers. See <link xl:href="BenchmarkingTests.html#50438212_58201">Collecting Application Profiling Information (stats-collect)</link> for more information.</para>
129       </section>
130       <section remap="h3">
131         <title><anchor xml:id="dbdoclet.50438212_pgfId-1289914" xreflabel=""/>24.1.2 Preparing to Use the Lustre <anchor xml:id="dbdoclet.50438212_marker-1289913" xreflabel=""/>I/O Kit</title>
132         <para><anchor xml:id="dbdoclet.50438212_pgfId-1300699" xreflabel=""/>The following prerequisites must be met to use the tests in the Lustre I/O kit:</para>
133         <itemizedlist><listitem>
134             <para><anchor xml:id="dbdoclet.50438212_pgfId-1300700" xreflabel=""/> Password-free remote access to nodes in the system (provided by ssh or rsh).</para>
135           </listitem>
136 <listitem>
137             <para> </para>
138           </listitem>
139 <listitem>
140             <para><anchor xml:id="dbdoclet.50438212_pgfId-1303852" xreflabel=""/> LNET self-test completed to test that Lustre Networking has been properly installed and configured. See <link xl:href="LNETSelfTest.html#50438223_71556">Chapter 23</link>: <link xl:href="LNETSelfTest.html#50438223_21832">Testing Lustre Network Performance (LNET Self-Test)</link>.</para>
141           </listitem>
142 <listitem>
143             <para> </para>
144           </listitem>
145 <listitem>
146             <para><anchor xml:id="dbdoclet.50438212_pgfId-1300701" xreflabel=""/> Lustre file system software installed.</para>
147           </listitem>
148 <listitem>
149             <para> </para>
150           </listitem>
151 <listitem>
152             <para><anchor xml:id="dbdoclet.50438212_pgfId-1300702" xreflabel=""/>sg3_utils  package providing the sgp_dd tool (sg3_utils is a separate RPM package available online using YUM).</para>
153           </listitem>
154 <listitem>
155             <para> </para>
156           </listitem>
157 </itemizedlist>
158         <para><anchor xml:id="dbdoclet.50438212_pgfId-1289915" xreflabel=""/>Download the Lustre I/O kit (lustre-iokit)from:</para>
159         <para><anchor xml:id="dbdoclet.50438212_pgfId-1300015" xreflabel=""/><link xl:href="http://downloads.lustre.org/public/tools/lustre-iokit/">http://downloads.lustre.org/public/tools/lustre-iokit/</link></para>
160       </section>
161     </section>
162     <section remap="h2">
163       <title>24.2 Testing I/O Performance of Raw Hardware (sgpdd_survey<anchor xml:id="dbdoclet.50438212_51053" xreflabel=""/><anchor xml:id="dbdoclet.50438212_marker-1302844" xreflabel=""/>)</title>
164       <para><anchor xml:id="dbdoclet.50438212_pgfId-1289936" xreflabel=""/>The sgpdd_survey tool is used to test bare metal I/O performance of the raw hardware, while bypassing as much of the kernel as possible. This survey may be used to characterize the performance of a SCSI device by simulating an OST serving multiple stripe files. The data gathered by this survey can help set expectations for the performance of a Lustre OST using this device.</para>
165       <para><anchor xml:id="dbdoclet.50438212_pgfId-1300866" xreflabel=""/>The script uses sgp_dd to carry out raw sequential disk I/O. It runs with variable numbers of sgp_dd threads to show how performance varies with different request queue depths.</para>
166       <para><anchor xml:id="dbdoclet.50438212_pgfId-1300867" xreflabel=""/>The script spawns variable numbers of sgp_dd instances, each reading or writing a separate area of the disk to demonstrate performance variance within a number of concurrent stripe files.</para>
167       <para><anchor xml:id="dbdoclet.50438212_pgfId-1305925" xreflabel=""/>Several tips and insights for disk performance measurement are described below. Some of this information is specific to RAID arrays and/or the Linux RAID implementation.</para>
168       <itemizedlist><listitem>
169           <para><anchor xml:id="dbdoclet.50438212_pgfId-1305926" xreflabel=""/><emphasis>Performance is limited by the slowest disk.</emphasis></para>
170         </listitem>
171 <listitem>
172           <para> </para>
173         </listitem>
174 </itemizedlist>
175       <para><anchor xml:id="dbdoclet.50438212_pgfId-1305927" xreflabel=""/>Before creating a RAID array, benchmark all disks individually. We have frequently encountered situations where drive performance was not consistent for all devices in the array. Replace any disks that are significantly slower than the rest.</para>
176       <itemizedlist><listitem>
177           <para><anchor xml:id="dbdoclet.50438212_pgfId-1305928" xreflabel=""/><emphasis>Disks and arrays are very sensitive to request size.</emphasis></para>
178         </listitem>
179 <listitem>
180           <para> </para>
181         </listitem>
182 </itemizedlist>
183       <para><anchor xml:id="dbdoclet.50438212_pgfId-1305929" xreflabel=""/>To identify the optimal request size for a given disk, benchmark the disk with different record sizes ranging from 4 KB to 1 to 2 MB.</para>
184       <informaltable frame="none">
185         <tgroup cols="2">
186           <colspec colname="c1" colwidth="5*"/>
187           <colspec colname="c2" colwidth="95*"/>
188           
189           
190           <tbody>
191             <row>
192               <entry><para><inlinemediaobject><imageobject role="html">
193                       <imagedata fileref="./shared/caution.gif" scalefit="1"/>
194                     </imageobject>
195 <imageobject role="fo">
196                       <imagedata contentdepth="100%" contentwidth="" depth="" fileref="./shared/caution.gif" scalefit="1" width="100%"/>
197                     </imageobject>
198 </inlinemediaobject></para></entry>
199               <entry><para><emphasis role="bold">Caution -</emphasis><anchor xml:id="dbdoclet.50438212_pgfId-1300876" xreflabel=""/>The sgpdd_survey script overwrites the device being tested, which results in the <emphasis>LOSS OF ALL DATA</emphasis> on that device. Exercise caution when selecting the device to be tested.</para></entry>
200             </row>
201           </tbody>
202         </tgroup>
203       </informaltable>
204        <informaltable frame="none">
205         <tgroup cols="1">
206           <colspec colname="c1" colwidth="100*"/>
207           <tbody>
208             <row>
209               <entry><para><emphasis role="bold">Note -</emphasis> Array performance with all LUNs loaded does not always match the performance of a single LUN when tested in isolation.</para></entry>
210             </row>
211           </tbody>
212         </tgroup>
213       </informaltable>
214       <para><anchor xml:id="dbdoclet.50438212_pgfId-1303815" xreflabel=""/></para>
215       <para><emphasis role="bold">Prequisites:</emphasis></para>
216       <itemizedlist><listitem>
217           <para><anchor xml:id="dbdoclet.50438212_pgfId-1300842" xreflabel=""/>sgp_dd  tool in the sg3_utils package</para>
218         </listitem>
219 <listitem>
220           <para> </para>
221         </listitem>
222 <listitem>
223           <para><anchor xml:id="dbdoclet.50438212_pgfId-1300845" xreflabel=""/> Lustre software is <emphasis>NOT</emphasis> required</para>
224         </listitem>
225 <listitem>
226           <para> </para>
227         </listitem>
228 </itemizedlist>
229       <para><anchor xml:id="dbdoclet.50438212_pgfId-1289939" xreflabel=""/>The device(s) being tested must meet one of these two requirements:</para>
230       <itemizedlist><listitem>
231           <para><anchor xml:id="dbdoclet.50438212_pgfId-1300909" xreflabel=""/> If the device is a SCSI device, it must appear in the output of sg_map (make sure the kernel module sg is loaded).</para>
232         </listitem>
233 <listitem>
234           <para> </para>
235         </listitem>
236 <listitem>
237           <para><anchor xml:id="dbdoclet.50438212_pgfId-1300920" xreflabel=""/> If the device is a raw device, it must appear in the output of raw -qa.</para>
238         </listitem>
239 <listitem>
240           <para> </para>
241         </listitem>
242 </itemizedlist>
243       <para><anchor xml:id="dbdoclet.50438212_pgfId-1300930" xreflabel=""/>Raw and SCSI devices cannot be mixed in the test specification.</para>
244       <informaltable frame="none">
245         <tgroup cols="1">
246           <colspec colname="c1" colwidth="100*"/>
247           <tbody>
248             <row>
249               <entry><para><emphasis role="bold">Note -</emphasis><anchor xml:id="dbdoclet.50438212_pgfId-1300900" xreflabel=""/>If you need to create raw devices to use the sgpdd_survey tool, note that raw device 0 cannot be used due to a bug in certain versions of the &quot;raw&quot; utility (including that shipped with RHEL4U4.)</para></entry>
250             </row>
251           </tbody>
252         </tgroup>
253       </informaltable>
254       <section remap="h3">
255         <title><anchor xml:id="dbdoclet.50438212_pgfId-1289945" xreflabel=""/>24.2.1 Tuning Linux Storage Devices</title>
256         <para><anchor xml:id="dbdoclet.50438212_pgfId-1303368" xreflabel=""/>To get large I/O transfers (1 MB) to disk, it may be necessary to tune several kernel parameters as specified:</para>
257         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1303369" xreflabel=""/>/sys/block/sdN/queue/max_sectors_kb = 4096
258 <anchor xml:id="dbdoclet.50438212_pgfId-1303370" xreflabel=""/>/sys/block/sdN/queue/max_phys_segments = 256
259 <anchor xml:id="dbdoclet.50438212_pgfId-1303371" xreflabel=""/>/proc/scsi/sg/allow_dio = 1
260 <anchor xml:id="dbdoclet.50438212_pgfId-1303372" xreflabel=""/>/sys/module/ib_srp/parameters/srp_sg_tablesize = 255
261 <anchor xml:id="dbdoclet.50438212_pgfId-1304119" xreflabel=""/>/sys/block/sdN/queue/scheduler</screen>
262       </section>
263       <section remap="h3">
264         <title><anchor xml:id="dbdoclet.50438212_pgfId-1303366" xreflabel=""/>24.2.2 Running sgpdd_survey</title>
265         <para><anchor xml:id="dbdoclet.50438212_pgfId-1304151" xreflabel=""/>The sgpdd_survey script must be customized for the particular device being tested and for the location where the script saves its working and result files (by specifying the ${rslt} variable). Customization variables are described at the beginning of the script.</para>
266         <para><anchor xml:id="dbdoclet.50438212_pgfId-1301030" xreflabel=""/>When the sgpdd_survey script runs, it creates a number of working files and a pair of result files. The names of all the files created start with the prefixdefined in the variable ${rslt}. (The default value is /tmp.) The files include:</para>
267         <itemizedlist><listitem>
268             <para><anchor xml:id="dbdoclet.50438212_pgfId-1301035" xreflabel=""/> File containing standard output data (same as stdout)</para>
269           </listitem>
270 <listitem>
271             <para> </para>
272           </listitem>
273 </itemizedlist>
274         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1301031" xreflabel=""/>${rslt}_<emphasis>&lt;date/time&gt;</emphasis>.summary<emphasis/></screen>
275         <itemizedlist><listitem>
276             <para><anchor xml:id="dbdoclet.50438212_pgfId-1301053" xreflabel=""/> Temporary (tmp) files</para>
277           </listitem>
278 <listitem>
279             <para> </para>
280           </listitem>
281 </itemizedlist>
282         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1301032" xreflabel=""/>${rslt}_<emphasis>&lt;date/time&gt;</emphasis>_*
283 </screen>
284         <itemizedlist><listitem>
285             <para><anchor xml:id="dbdoclet.50438212_pgfId-1301063" xreflabel=""/> Collected tmp files for post-mortem</para>
286           </listitem>
287 <listitem>
288             <para> </para>
289           </listitem>
290 </itemizedlist>
291         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1289951" xreflabel=""/>${rslt}_<emphasis>&lt;date/time&gt;</emphasis>.detail
292 </screen>
293         <para><anchor xml:id="dbdoclet.50438212_pgfId-1289952" xreflabel=""/>The stdout and the .summary file will contain lines like this:</para>
294         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1289953" xreflabel=""/>total_size 8388608K rsz 1024 thr 1 crg 1 180.45 MB/s 1 x 180.50 \=/ 180.50 \
295 MB/s
296 </screen>
297         <para><anchor xml:id="dbdoclet.50438212_pgfId-1289954" xreflabel=""/>Each line corresponds to a run of the test. Each test run will have a different number of threads, record size, or number of regions.</para>
298         <itemizedlist><listitem>
299             <para><anchor xml:id="dbdoclet.50438212_pgfId-1303307" xreflabel=""/>total_size  - Size of file being tested in KBs (8 GB in above example).</para>
300           </listitem>
301 <listitem>
302             <para> </para>
303           </listitem>
304 <listitem>
305             <para><anchor xml:id="dbdoclet.50438212_pgfId-1303308" xreflabel=""/>rsz  - Record size in KBs (1 MB in above example).</para>
306           </listitem>
307 <listitem>
308             <para> </para>
309           </listitem>
310 <listitem>
311             <para><anchor xml:id="dbdoclet.50438212_pgfId-1303311" xreflabel=""/>thr  - Number of threads generating I/O (1 thread in above example).</para>
312           </listitem>
313 <listitem>
314             <para> </para>
315           </listitem>
316 <listitem>
317             <para><anchor xml:id="dbdoclet.50438212_pgfId-1303312" xreflabel=""/> crg - Current regions, the number of disjount areas on the disk to which I/O is being sent (1 region in above example, indicating that no seeking is done).</para>
318           </listitem>
319 <listitem>
320             <para> </para>
321           </listitem>
322 <listitem>
323             <para><anchor xml:id="dbdoclet.50438212_pgfId-1303313" xreflabel=""/>MB/s  - Aggregate bandwidth measured by dividing the total amount of data by the elapsed time (180.45 MB/s in the above example).</para>
324           </listitem>
325 <listitem>
326             <para> </para>
327           </listitem>
328 <listitem>
329             <para><anchor xml:id="dbdoclet.50438212_pgfId-1303314" xreflabel=""/>MB/s  - The remaining numbers show the number of regions X performance of the slowest disk as a sanity check on the aggregate bandwidth.</para>
330           </listitem>
331 <listitem>
332             <para> </para>
333           </listitem>
334 </itemizedlist>
335         <para><anchor xml:id="dbdoclet.50438212_pgfId-1289955" xreflabel=""/>If there are so many threads that the sgp_dd script is unlikely to be able to allocate I/O buffers, then ENOMEM is printed in place of the aggregate bandwidth result.</para>
336         <para><anchor xml:id="dbdoclet.50438212_pgfId-1289956" xreflabel=""/>If one or more sgp_dd instances do not successfully report a bandwidth number, then FAILED is printed in place of the aggregate bandwidth result.</para>
337       </section>
338     </section>
339     <section remap="h2">
340       <title>24.3 <anchor xml:id="dbdoclet.50438212_26516" xreflabel=""/><anchor xml:id="dbdoclet.50438212_40624" xreflabel=""/>Testing OST Performance (obdfilter_survey<anchor xml:id="dbdoclet.50438212_marker-1289957" xreflabel=""/>)</title>
341       <para><anchor xml:id="dbdoclet.50438212_pgfId-1301303" xreflabel=""/>The obdfilter_survey script generates sequential I/O from varying numbers of threads and objects (files) to simulate the I/O patterns of a Lustre client.</para>
342       <para><anchor xml:id="dbdoclet.50438212_pgfId-1301200" xreflabel=""/>The obdfilter_survey script can be run directly on the OSS node to measure the OST storage performance without any intervening network, or it can be run remotely on a Lustre client to measure the OST performance including network overhead.</para>
343       <para><anchor xml:id="dbdoclet.50438212_pgfId-1303417" xreflabel=""/>The obdfilter_survey is used to characterize the performance of the following:</para>
344       <itemizedlist><listitem>
345           <para><anchor xml:id="dbdoclet.50438212_pgfId-1289960" xreflabel=""/><emphasis role="bold">Local file system</emphasis>  - In this mode, the obdfilter_survey script exercises one or more instances of the obdfilter directly. The script may run on one or more OSS nodes, for example, when the OSSs are all attached to the same multi-ported disk subsystem.</para>
346         </listitem>
347 <listitem>
348           <para> </para>
349         </listitem>
350 </itemizedlist>
351       <para><anchor xml:id="dbdoclet.50438212_pgfId-1289962" xreflabel=""/>Run the script using the case=disk parameter to run the test against all the local OSTs. The script automatically detects all local OSTs and includes them in the survey.</para>
352       <para><anchor xml:id="dbdoclet.50438212_pgfId-1303453" xreflabel=""/>To run the test against only specific OSTs, run the script using the target= parameter to list the OSTs to be tested explicitly. If some OSTs are on remote nodes, specify their hostnames in addition to the OST name (for example, oss2:lustre-OST0004).</para>
353       <para><anchor xml:id="dbdoclet.50438212_pgfId-1303461" xreflabel=""/>All obdfilter instances are driven directly. The script automatically loads the obdecho module (if required) and creates one instance of echo_client for each obdfilter instance in order to generate I/O requests directly to the OST.</para>
354       <para><anchor xml:id="dbdoclet.50438212_pgfId-1304398" xreflabel=""/>For more details, see <link xl:href="BenchmarkingTests.html#50438212_59319">Testing Local Disk Performance</link>.</para>
355       <itemizedlist><listitem>
356           <para><anchor xml:id="dbdoclet.50438212_pgfId-1289963" xreflabel=""/><emphasis role="bold">Network</emphasis>  - In this mode, the Lustre client generates I/O requests over the network but these requests are not sent to the OST file system. The OSS node runs the obdecho server to receive the requests but discards them before they are sent to the disk.</para>
357         </listitem>
358 <listitem>
359           <para> </para>
360         </listitem>
361 </itemizedlist>
362       <para><anchor xml:id="dbdoclet.50438212_pgfId-1303500" xreflabel=""/>Pass the parameters case=network and target=<emphasis>&lt;hostname</emphasis>|<emphasis>IP_of_server&gt;</emphasis> to the script. For each network case, the script does the required setup.</para>
363       <para><anchor xml:id="dbdoclet.50438212_pgfId-1304403" xreflabel=""/>For more details, see <link xl:href="BenchmarkingTests.html#50438212_36037">Testing Network Performance</link></para>
364       <itemizedlist><listitem>
365           <para><anchor xml:id="dbdoclet.50438212_pgfId-1304334" xreflabel=""/><emphasis role="bold">Remote file system over the network</emphasis>  - In this mode the obdfilter_survey script generates I/O from a Lustre client to a remote OSS to write the data to the file system.</para>
366         </listitem>
367 <listitem>
368           <para> </para>
369         </listitem>
370 </itemizedlist>
371       <para><anchor xml:id="dbdoclet.50438212_pgfId-1304336" xreflabel=""/>To run the test against all the local OSCs, pass the parameter case=netdisk to the script. Alternately you can pass the target= parameter with one or more OSC devices (e.g., lustre-OST0000-osc-ffff88007754bc00) against which the tests are to be run.</para>
372       <para><anchor xml:id="dbdoclet.50438212_pgfId-1304408" xreflabel=""/>For more details, see <link xl:href="BenchmarkingTests.html#50438212_62662">Testing Remote Disk Performance</link>.</para>
373       <informaltable frame="none">
374         <tgroup cols="2">
375           <colspec colname="c1" colwidth="5*"/>
376           <colspec colname="c2" colwidth="95*"/>
377           
378           
379           <tbody>
380             <row>
381               <entry><para><inlinemediaobject><imageobject role="html">
382                       <imagedata fileref="./shared/caution.gif" scalefit="1"/>
383                     </imageobject>
384 <imageobject role="fo">
385                       <imagedata contentdepth="100%" contentwidth="" depth="" fileref="./shared/caution.gif" scalefit="1" width="100%"/>
386                     </imageobject>
387 </inlinemediaobject></para></entry>
388               <entry><para><emphasis role="bold">Caution -</emphasis><anchor xml:id="dbdoclet.50438212_pgfId-1304418" xreflabel=""/>The obdfilter_survey script is destructive and should not be run on devices that containing existing data that needs to be preserved. Thus, tests using obdfilter_survey should be run before the Lustre file system is placed in production.</para></entry>
389             </row>
390           </tbody>
391         </tgroup>
392       </informaltable>
393        <informaltable frame="none">
394         <tgroup cols="1">
395           <colspec colname="c1" colwidth="100*"/>
396           <tbody>
397             <row>
398               <entry><para><emphasis role="bold">Note -</emphasis> If the obdfilter_survey test is terminated before it completes, some small amount of space is leaked. You can either ignore it or reformat the file system.</para></entry>
399             </row>
400           </tbody>
401         </tgroup>
402       </informaltable>
403        <informaltable frame="none">
404         <tgroup cols="1">
405           <colspec colname="c1" colwidth="100*"/>
406           <tbody>
407             <row>
408               <entry><para><emphasis role="bold">Note -</emphasis> The obdfilter_survey script is <emphasis>NOT</emphasis> scalable beyond tens of OSTs since it is only intended to measure the I/O performance of individual storage subsystems, not the scalability of the entire system.</para></entry>
409             </row>
410           </tbody>
411         </tgroup>
412       </informaltable>
413        <informaltable frame="none">
414         <tgroup cols="1">
415           <colspec colname="c1" colwidth="100*"/>
416           <tbody>
417             <row>
418               <entry><para><emphasis role="bold">Note -</emphasis><anchor xml:id="dbdoclet.50438212_pgfId-1297549" xreflabel=""/>The obdfilter_survey script must be customized, depending on the components under test and where the script’s working files should be kept. Customization variables are described at the beginning of the obdfilter_survey script. In particular, pay attention to the listed maximum values listed for each parameter in the script.</para></entry>
419             </row>
420           </tbody>
421         </tgroup>
422       </informaltable>
423       <section remap="h3">
424         <title><anchor xml:id="dbdoclet.50438212_pgfId-1289969" xreflabel=""/>24.3.1 <anchor xml:id="dbdoclet.50438212_59319" xreflabel=""/>Testing Local Disk Performance</title>
425         <para><anchor xml:id="dbdoclet.50438212_pgfId-1298366" xreflabel=""/>The obdfilter_survey script can be run automatically or manually against a local disk. This script profiles the overall throughput of storage hardware, including the file system and RAID layers managing the storage, by sending workloads to the OSTs that vary in thread count, object count, and I/O size.</para>
426         <para><anchor xml:id="dbdoclet.50438212_pgfId-1298465" xreflabel=""/>When the obdfilter_survey script is run, it provides information about the performance abilities of the storage hardware and shows the saturation points.</para>
427         <para><anchor xml:id="dbdoclet.50438212_pgfId-1303569" xreflabel=""/>The plot-obdfilter script generates from the output of the obdfilter_survey a CSV file and parameters for importing into a spreadsheet or gnuplot to visualize the data.</para>
428         <para><anchor xml:id="dbdoclet.50438212_pgfId-1297883" xreflabel=""/>To run the obdfilter_survey script, create a standard Lustre configuration; no special setup is needed.</para>
429         <para><anchor xml:id="dbdoclet.50438212_pgfId-1297701" xreflabel=""/><emphasis role="bold">To perform an automatic run:</emphasis></para>
430         <para><anchor xml:id="dbdoclet.50438212_pgfId-1297574" xreflabel=""/> 1. Start the Lustre OSTs.</para>
431         <para><anchor xml:id="dbdoclet.50438212_pgfId-1304489" xreflabel=""/>The Lustre OSTs should be mounted on the OSS node(s) to be tested. The Lustre client is not required to be mounted at this time.</para>
432         <para><anchor xml:id="dbdoclet.50438212_pgfId-1297587" xreflabel=""/> 2. Verify that the obdecho module is loaded. Run:</para>
433         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1304502" xreflabel=""/>modprobe obdecho</screen>
434         <para><anchor xml:id="dbdoclet.50438212_pgfId-1297591" xreflabel=""/> 3. Run the obdfilter_survey script with the parameter case=disk.</para>
435         <para><anchor xml:id="dbdoclet.50438212_pgfId-1304517" xreflabel=""/>For example, to run a local test with up to two objects (nobjhi), up to two threads (thrhi), and 1024 MB transfer size (size):</para>
436         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1297595" xreflabel=""/>$ nobjhi=2 thrhi=2 size=1024 case=disk sh obdfilter-survey
437 <anchor xml:id="dbdoclet.50438212_pgfId-1304711" xreflabel=""/> 
438 </screen>
439         <para><anchor xml:id="dbdoclet.50438212_pgfId-1299344" xreflabel=""/><emphasis role="bold">To perform a manual run:</emphasis></para>
440         <para><anchor xml:id="dbdoclet.50438212_pgfId-1303657" xreflabel=""/> 1. Start the Lustre OSTs.</para>
441         <para><anchor xml:id="dbdoclet.50438212_pgfId-1304535" xreflabel=""/>The Lustre OSTs should be mounted on the OSS node(s) to be tested. The Lustre client is not required to be mounted at this time.</para>
442         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305056" xreflabel=""/> 2. Verify that the obdecho module is loaded. Run:</para>
443         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305057" xreflabel=""/>modprobe obdecho</para>
444         <para><anchor xml:id="dbdoclet.50438212_pgfId-1303655" xreflabel=""/> 3. Determine the OST names.</para>
445         <para><anchor xml:id="dbdoclet.50438212_pgfId-1304551" xreflabel=""/>On the OSS nodes to be tested, run the lctldl command. The OST device names are listed in the fourth column of the output. For example:</para>
446         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1303717" xreflabel=""/>$ lctl dl |grep obdfilter
447 <anchor xml:id="dbdoclet.50438212_pgfId-1303733" xreflabel=""/>0 UP obdfilter lustre-OST0001 lustre-OST0001_UUID 1159
448 <anchor xml:id="dbdoclet.50438212_pgfId-1303751" xreflabel=""/>2 UP obdfilter lustre-OST0002 lustre-OST0002_UUID 1159
449 <anchor xml:id="dbdoclet.50438212_pgfId-1303742" xreflabel=""/>...
450 </screen>
451         <para><anchor xml:id="dbdoclet.50438212_pgfId-1303687" xreflabel=""/> 4. List all OSTs you want to test.</para>
452         <para><anchor xml:id="dbdoclet.50438212_pgfId-1304568" xreflabel=""/>Use the target= parameter to list the OSTs separated by spaces. List the individual OSTs by name using the format <emphasis>&lt;fsname&gt;-&lt;OSTnumber&gt;</emphasis> (for example, lustre-OST0001). You do not have to specify an MDS or LOV.</para>
453         <para><anchor xml:id="dbdoclet.50438212_pgfId-1303805" xreflabel=""/> 5. Run the obdfilter_survey script with the target= parameter.</para>
454         <para><anchor xml:id="dbdoclet.50438212_pgfId-1304650" xreflabel=""/>For example, to run a local test with up to two objects (nobjhi), up to two threads (thrhi), and 1024 Mb (size) transfer size:</para>
455         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1303806" xreflabel=""/>$ nobjhi=2 thrhi=2 size=1024 targets=â€lustre-OST0001 \
456 <anchor xml:id="dbdoclet.50438212_pgfId-1304757" xreflabel=""/>lustre-OST0002†sh obdfilter-survey
457 </screen>
458       </section>
459       <section remap="h3">
460         <title><anchor xml:id="dbdoclet.50438212_pgfId-1289982" xreflabel=""/>24.3.2 <anchor xml:id="dbdoclet.50438212_36037" xreflabel=""/>Testing Network Performance</title>
461         <para><anchor xml:id="dbdoclet.50438212_pgfId-1289983" xreflabel=""/>The obdfilter_survey script can only be run automatically against a network; no manual test is provided.</para>
462         <para><anchor xml:id="dbdoclet.50438212_pgfId-1297847" xreflabel=""/>To run the network test, a specific Lustre setup is needed. Make sure that these configuration requirements have been met.</para>
463         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305077" xreflabel=""/><emphasis role="bold">To perform an automatic run:</emphasis></para>
464         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305363" xreflabel=""/> 1. Start the Lustre OSTs.</para>
465         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305364" xreflabel=""/>The Lustre OSTs should be mounted on the OSS node(s) to be tested. The Lustre client is not required to be mounted at this time.</para>
466         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305365" xreflabel=""/> 2. Verify that the obdecho module is loaded. Run:</para>
467         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1305366" xreflabel=""/>modprobe obdecho</screen>
468         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305389" xreflabel=""/> 3. Start lctl and check the device list, which must be empty. Run:</para>
469         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1305940" xreflabel=""/>lctl dl
470 </screen>
471         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305639" xreflabel=""/> 4. Run the obdfilter_survey script with the parameters case=network and <emphasis role="bold">targets=</emphasis><emphasis>&lt;hostname</emphasis>|<emphasis>ip_of_server&gt;</emphasis>. For example:</para>
472         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1304777" xreflabel=""/>$ nobjhi=2 thrhi=2 size=1024 targets=â€oss1 oss2†case=network sh obdfilte\
473 r-survey
474 </screen>
475         <para><anchor xml:id="dbdoclet.50438212_pgfId-1297769" xreflabel=""/> 5. On the server side, view the statistics at:</para>
476         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1297820" xreflabel=""/>/proc/fs/lustre/obdecho/<emphasis>&lt;echo_srv&gt;</emphasis>/stats
477 </screen>
478         <para><anchor xml:id="dbdoclet.50438212_pgfId-1297824" xreflabel=""/>where <emphasis>&lt;echo_srv&gt;</emphasis> is the obdecho server created by the script.</para>
479       </section>
480       <section remap="h3">
481         <title><anchor xml:id="dbdoclet.50438212_pgfId-1297766" xreflabel=""/>24.3.3 <anchor xml:id="dbdoclet.50438212_62662" xreflabel=""/>Testing Remote Disk Performance</title>
482         <para><anchor xml:id="dbdoclet.50438212_pgfId-1297832" xreflabel=""/>The obdfilter_survey script can be run automatically or manually against a network disk. To run the network disk test, start with a standard Lustre configuration. No special setup is needed.</para>
483         <para><anchor xml:id="dbdoclet.50438212_pgfId-1297904" xreflabel=""/><emphasis role="bold">To perform an automatic run:</emphasis></para>
484         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305421" xreflabel=""/> 1. Start the Lustre OSTs.</para>
485         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305422" xreflabel=""/>The Lustre OSTs should be mounted on the OSS node(s) to be tested. The Lustre client is not required to be mounted at this time.</para>
486         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305423" xreflabel=""/> 2. Verify that the obdecho module is loaded. Run:</para>
487         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1305424" xreflabel=""/>modprobe obdecho</screen>
488         <para><anchor xml:id="dbdoclet.50438212_pgfId-1297925" xreflabel=""/> 3. Run the obdfilter_survey script with the parameter case=netdisk. For example:</para>
489         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1297926" xreflabel=""/>$ nobjhi=2 thrhi=2 size=1024 case=netdisk sh obdfilter-survey
490 </screen>
491         <para><anchor xml:id="dbdoclet.50438212_pgfId-1297909" xreflabel=""/><emphasis role="bold">To perform a manual run:</emphasis></para>
492         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305433" xreflabel=""/> 1. Start the Lustre OSTs.</para>
493         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305434" xreflabel=""/>The Lustre OSTs should be mounted on the OSS node(s) to be tested. The Lustre client is not required to be mounted at this time.</para>
494         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305435" xreflabel=""/> 2. Verify that the obdecho module is loaded. Run:</para>
495         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305436" xreflabel=""/>modprobe obdecho</para>
496         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305437" xreflabel=""/> 3. Determine the OSC names.</para>
497         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305438" xreflabel=""/>On the OSS nodes to be tested, run the lctldl command. The OSC device names are listed in the fourth column of the output. For example:</para>
498         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1305483" xreflabel=""/>$ lctl dl |grep obdfilter
499 <anchor xml:id="dbdoclet.50438212_pgfId-1305461" xreflabel=""/>3 UP osc lustre-OST0000-osc-ffff88007754bc00 54b91eab-0ea9-1516-b571-5e6df3\
500 49592e 5
501 <anchor xml:id="dbdoclet.50438212_pgfId-1305462" xreflabel=""/>4 UP osc lustre-OST0001-osc-ffff88007754bc00 54b91eab-0ea9-1516-b571-5e6df3\
502 49592e 5
503 <anchor xml:id="dbdoclet.50438212_pgfId-1305515" xreflabel=""/>...
504 </screen>
505         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305443" xreflabel=""/> 4. List all OSCs you want to test.</para>
506         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305444" xreflabel=""/>Use the target= parameter to list the OSCs separated by spaces. List the individual OSCs by name seperated by spaces using the format <emphasis>&lt;fsname&gt;-&lt;OST_name&gt;</emphasis>-osc-<emphasis>&lt;OSC_number&gt;</emphasis> (for example, lustre-OST0000-osc-ffff88007754bc00). You <emphasis role="bold">do not have to specify an MDS or LOV.</emphasis></para>
507         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305445" xreflabel=""/> 5. Run the <emphasis role="bold">o</emphasis>bdfilter_survey script with the target= parameter and case=netdisk.</para>
508         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305446" xreflabel=""/>An example of a local test run with up to two objects (nobjhi), up to two threads (thrhi), and 1024 Mb (size) transfer size is shown below:</para>
509         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1305576" xreflabel=""/>$ nobjhi=2 thrhi=2 size=1024 \
510 <anchor xml:id="dbdoclet.50438212_pgfId-1305577" xreflabel=""/>targets=&quot;lustre-OST0000-osc-ffff88007754bc00 \
511 <anchor xml:id="dbdoclet.50438212_pgfId-1305578" xreflabel=""/>lustre-OST0001-osc-ffff88007754bc00&quot; \ 
512 <anchor xml:id="dbdoclet.50438212_pgfId-1305579" xreflabel=""/>sh obdfilter-survey
513 </screen>
514       </section>
515       <section remap="h3">
516         <title><anchor xml:id="dbdoclet.50438212_pgfId-1290021" xreflabel=""/>24.3.4 Output Files</title>
517         <para><anchor xml:id="dbdoclet.50438212_pgfId-1298013" xreflabel=""/>When the obdfilter_survey script runs, it creates a number of working files and a pair of result files. All files start with the prefix defined in the variable ${rslt}.</para>
518         <informaltable frame="all">
519           <tgroup cols="2">
520             <colspec colname="c1" colwidth="50*"/>
521             <colspec colname="c2" colwidth="50*"/>
522             <thead>
523               <row>
524                 <entry><para><emphasis role="bold"><anchor xml:id="dbdoclet.50438212_pgfId-1303053" xreflabel=""/>File</emphasis></para></entry>
525                 <entry><para><emphasis role="bold"><anchor xml:id="dbdoclet.50438212_pgfId-1303055" xreflabel=""/>Description</emphasis></para></entry>
526               </row>
527             </thead>
528             <tbody>
529               <row>
530                 <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1303057" xreflabel=""/>${rslt}.summary</para></entry>
531                 <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1303059" xreflabel=""/>Same as stdout</para></entry>
532               </row>
533               <row>
534                 <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1303061" xreflabel=""/>${rslt}.script_*</para></entry>
535                 <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1303063" xreflabel=""/>Per-host test script files</para></entry>
536               </row>
537               <row>
538                 <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1303065" xreflabel=""/>${rslt}.detail_tmp*</para></entry>
539                 <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1303067" xreflabel=""/>Per-OST result files</para></entry>
540               </row>
541               <row>
542                 <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1303069" xreflabel=""/>${rslt}.detail</para></entry>
543                 <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1303071" xreflabel=""/>Collected result files for post-mortem</para></entry>
544               </row>
545             </tbody>
546           </tgroup>
547         </informaltable>
548         <para><anchor xml:id="dbdoclet.50438212_pgfId-1290022" xreflabel=""/>The obdfilter_survey script iterates over the given number of threads and objects performing the specified tests and checks that all test processes have completed successfully.</para>
549         <informaltable frame="none">
550           <tgroup cols="1">
551             <colspec colname="c1" colwidth="100*"/>
552             <tbody>
553               <row>
554                 <entry><para><emphasis role="bold">Note -</emphasis><anchor xml:id="dbdoclet.50438212_pgfId-1298109" xreflabel=""/>The obdfilter_survey script may not clean up properly if it is aborted or if it encounters an unrecoverable error. In this case, a manual cleanup may be required, possibly including killing any running instances of lctl (local or remote), removing echo_client instances created by the script and unloading obdecho.</para></entry>
555               </row>
556             </tbody>
557           </tgroup>
558         </informaltable>
559         <section remap="h4">
560           <title><anchor xml:id="dbdoclet.50438212_pgfId-1298104" xreflabel=""/>24.3.4.1 Script Output</title>
561           <para><anchor xml:id="dbdoclet.50438212_pgfId-1298134" xreflabel=""/>The .summary file and stdout of the obdfilter_survey script contain lines like:</para>
562           <screen><anchor xml:id="dbdoclet.50438212_pgfId-1294098" xreflabel=""/>ost 8 sz 67108864K rsz 1024 obj 8 thr 8 write 613.54 [ 64.00, 82.00]
563 </screen>
564           <para><anchor xml:id="dbdoclet.50438212_pgfId-1294150" xreflabel=""/>Where:</para>
565           <informaltable frame="all">
566             <tgroup cols="2">
567               <colspec colname="c1" colwidth="50*"/>
568               <colspec colname="c2" colwidth="50*"/>
569               <thead>
570                 <row>
571                   <entry><para><emphasis role="bold"><anchor xml:id="dbdoclet.50438212_pgfId-1294115" xreflabel=""/>Parameter and value</emphasis></para></entry>
572                   <entry><para><emphasis role="bold"><anchor xml:id="dbdoclet.50438212_pgfId-1294117" xreflabel=""/>Description</emphasis></para></entry>
573                 </row>
574               </thead>
575               <tbody>
576                 <row>
577                   <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1294119" xreflabel=""/>ost 8</para></entry>
578                   <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1294121" xreflabel=""/>Total number of OSTs being tested.</para></entry>
579                 </row>
580                 <row>
581                   <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1294123" xreflabel=""/>sz 67108864K</para></entry>
582                   <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1294125" xreflabel=""/>Total amount of data read or written (in KB).</para></entry>
583                 </row>
584                 <row>
585                   <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1294127" xreflabel=""/>rsz 1024</para></entry>
586                   <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1294129" xreflabel=""/>Record size (size of each echo_client I/O, in KB).</para></entry>
587                 </row>
588                 <row>
589                   <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1294131" xreflabel=""/>obj 8</para></entry>
590                   <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1294133" xreflabel=""/>Total number of objects over all OSTs.</para></entry>
591                 </row>
592                 <row>
593                   <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1294135" xreflabel=""/>thr 8</para></entry>
594                   <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1294137" xreflabel=""/>Total number of threads over all OSTs and objects.</para></entry>
595                 </row>
596                 <row>
597                   <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1294139" xreflabel=""/>write</para></entry>
598                   <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1294141" xreflabel=""/>Test name. If more tests have been specified, they all appear on the same line.</para></entry>
599                 </row>
600                 <row>
601                   <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1294143" xreflabel=""/>613.54</para></entry>
602                   <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1294145" xreflabel=""/>Aggregate bandwidth over all OSTs (measured by dividing the total number of MB by the elapsed time).</para></entry>
603                 </row>
604                 <row>
605                   <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1294147" xreflabel=""/>[64, 82.00]</para></entry>
606                   <entry><para> <anchor xml:id="dbdoclet.50438212_pgfId-1294149" xreflabel=""/>Minimum and maximum instantaneous bandwidths on an individual OST.</para></entry>
607                 </row>
608               </tbody>
609             </tgroup>
610           </informaltable>
611           <informaltable frame="none">
612             <tgroup cols="1">
613               <colspec colname="c1" colwidth="100*"/>
614               <tbody>
615                 <row>
616                   <entry><para><emphasis role="bold">Note -</emphasis><anchor xml:id="dbdoclet.50438212_pgfId-1290062" xreflabel=""/>Although the numbers of threads and objects are specified per-OST in the customization section of the script, the reported results are aggregated over all OSTs.</para></entry>
617                 </row>
618               </tbody>
619             </tgroup>
620           </informaltable>
621         </section>
622         <section remap="h4">
623           <title><anchor xml:id="dbdoclet.50438212_pgfId-1290063" xreflabel=""/>24.3.4.2 Visualizing Results</title>
624           <para><anchor xml:id="dbdoclet.50438212_pgfId-1290064" xreflabel=""/>It is useful to import the obdfilter_survey script summary data (it is fixed width) into Excel (or any graphing package) and graph the bandwidth versus the number of threads for varying numbers of concurrent regions. This shows how the OSS performs for a given number of concurrently-accessed objects (files) with varying numbers of I/Os in flight.</para>
625           <para><anchor xml:id="dbdoclet.50438212_pgfId-1305127" xreflabel=""/>It is also useful to monitor and record average disk I/O sizes during each test using the â€œdisk io size†histogram in the file /proc/fs/lustre/obdfilter/ (see <link xl:href="LustreProc.html#50438271_55057">Watching the OST Block I/O Stream</link> for details). These numbers help identify problems in the system when full-sized I/Os are not submitted to the underlying disk. This may be caused by problems in the device driver or Linux block layer.</para>
626           <para> */brw_stats</para>
627           <para><anchor xml:id="dbdoclet.50438212_pgfId-1290066" xreflabel=""/>The plot-obdfilter script included in the I/O toolkit is an example of processing output files to a .csv format and plotting a graph using gnuplot.</para>
628         </section>
629       </section>
630     </section>
631     <section remap="h2">
632       <title>24.4 <anchor xml:id="dbdoclet.50438212_85136" xreflabel=""/>Testing OST I/O Performance (ost_<anchor xml:id="dbdoclet.50438212_marker-1290067" xreflabel=""/>survey)</title>
633       <para><anchor xml:id="dbdoclet.50438212_pgfId-1290069" xreflabel=""/>The ost_survey tool is a shell script that uses lfs setstripe to perform I/O against a single OST. The script writes a file (currently using dd) to each OST in the Lustre file system, and compares read and write speeds. The ost_survey tool is used to detect anomalies between otherwise identical disk subsystems.</para>
634       <informaltable frame="none">
635         <tgroup cols="1">
636           <colspec colname="c1" colwidth="100*"/>
637           <tbody>
638             <row>
639               <entry><para><emphasis role="bold">Note -</emphasis><anchor xml:id="dbdoclet.50438212_pgfId-1290070" xreflabel=""/>We have frequently discovered wide performance variations across all LUNs in a cluster. This may be caused by faulty disks, RAID parity reconstruction during the test, or faulty network hardware.</para></entry>
640             </row>
641           </tbody>
642         </tgroup>
643       </informaltable>
644       <para><anchor xml:id="dbdoclet.50438212_pgfId-1290071" xreflabel=""/>To run the ost_survey script, supply a file size (in KB) and the Lustre mount point. For example, run:</para>
645       <screen><anchor xml:id="dbdoclet.50438212_pgfId-1290072" xreflabel=""/>$ ./ost-survey.sh 10 /mnt/lustre
646 </screen>
647       <para><anchor xml:id="dbdoclet.50438212_pgfId-1301765" xreflabel=""/>Typical output is:</para>
648       <screen><anchor xml:id="dbdoclet.50438212_pgfId-1290073" xreflabel=""/>Average read Speed:                  6.73
649 <anchor xml:id="dbdoclet.50438212_pgfId-1290074" xreflabel=""/>Average write Speed:                       5.41
650 <anchor xml:id="dbdoclet.50438212_pgfId-1290075" xreflabel=""/>read - Worst OST indx 0                    5.84 MB/s
651 <anchor xml:id="dbdoclet.50438212_pgfId-1290076" xreflabel=""/>write - Worst OST indx 0           3.77 MB/s
652 <anchor xml:id="dbdoclet.50438212_pgfId-1290077" xreflabel=""/>read - Best OST indx 1                     7.38 MB/s
653 <anchor xml:id="dbdoclet.50438212_pgfId-1290078" xreflabel=""/>write - Best OST indx 1                    6.31 MB/s
654 <anchor xml:id="dbdoclet.50438212_pgfId-1290079" xreflabel=""/>3 OST devices found
655 <anchor xml:id="dbdoclet.50438212_pgfId-1290080" xreflabel=""/>Ost index 0 Read speed                     5.84            Write speed     \
656         3.77
657 <anchor xml:id="dbdoclet.50438212_pgfId-1290081" xreflabel=""/>Ost index 0 Read time                      0.17            Write time      \
658         0.27
659 <anchor xml:id="dbdoclet.50438212_pgfId-1290082" xreflabel=""/>Ost index 1 Read speed                     7.38            Write speed     \
660         6.31
661 <anchor xml:id="dbdoclet.50438212_pgfId-1290083" xreflabel=""/>Ost index 1 Read time                      0.14            Write time      \
662         0.16
663 <anchor xml:id="dbdoclet.50438212_pgfId-1290084" xreflabel=""/>Ost index 2 Read speed                     6.98            Write speed     \
664         6.16
665 <anchor xml:id="dbdoclet.50438212_pgfId-1290085" xreflabel=""/>Ost index 2 Read time                      0.14            Write time      \
666         0.16 
667 </screen>
668     </section>
669     <section remap="h2">
670       <title>24.5 <anchor xml:id="dbdoclet.50438212_58201" xreflabel=""/>Collecting Application Profiling Information (stats-collect)</title>
671       <para><anchor xml:id="dbdoclet.50438212_pgfId-1299523" xreflabel=""/>The stats-collect utility contains the following scripts used to collect application profiling information from Lustre clients and servers:</para>
672       <itemizedlist><listitem>
673           <para><anchor xml:id="dbdoclet.50438212_pgfId-1299524" xreflabel=""/>lstat.sh  - Script for a single node that is run on each profile node.</para>
674         </listitem>
675 <listitem>
676           <para> </para>
677         </listitem>
678 <listitem>
679           <para><anchor xml:id="dbdoclet.50438212_pgfId-1299525" xreflabel=""/>gather_stats_everywhere.sh  - Script that collect statistics.</para>
680         </listitem>
681 <listitem>
682           <para> </para>
683         </listitem>
684 <listitem>
685           <para><anchor xml:id="dbdoclet.50438212_pgfId-1299526" xreflabel=""/>config.sh  - Script that contains customized configuration descriptions.</para>
686         </listitem>
687 <listitem>
688           <para> </para>
689         </listitem>
690 </itemizedlist>
691       <para><anchor xml:id="dbdoclet.50438212_pgfId-1299527" xreflabel=""/>The stats-collect utility requires:</para>
692       <itemizedlist><listitem>
693           <para><anchor xml:id="dbdoclet.50438212_pgfId-1299528" xreflabel=""/> Lustre to be installed and set up on your cluster</para>
694         </listitem>
695 <listitem>
696           <para> </para>
697         </listitem>
698 <listitem>
699           <para><anchor xml:id="dbdoclet.50438212_pgfId-1299529" xreflabel=""/> SSH and SCP access to these nodes without requiring a password</para>
700         </listitem>
701 <listitem>
702           <para> </para>
703         </listitem>
704 </itemizedlist>
705       <section remap="h3">
706         <title><anchor xml:id="dbdoclet.50438212_pgfId-1299531" xreflabel=""/>24.5.1 Using stats-collect</title>
707         <para><anchor xml:id="dbdoclet.50438212_pgfId-1301821" xreflabel=""/>The stats-collect utility is configured by including profiling configuration variables in the config.sh script. Each configuration variable takes the following form, where 0 indicates statistics are to be collected only when the script starts and stops and <emphasis>n</emphasis> indicates the interval in seconds at which statistics are to be collected:</para>
708         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1301853" xreflabel=""/><emphasis>&lt;statistic&gt;</emphasis>_INTERVAL=<emphasis>[</emphasis>0<emphasis>|n]</emphasis></screen>
709         <para><anchor xml:id="dbdoclet.50438212_pgfId-1301887" xreflabel=""/>Statistics that can be collected include:</para>
710         <itemizedlist><listitem>
711             <para><anchor xml:id="dbdoclet.50438212_pgfId-1301893" xreflabel=""/>VMSTAT  - Memory and CPU usage and aggregate read/write operations</para>
712           </listitem>
713 <listitem>
714             <para> </para>
715           </listitem>
716 <listitem>
717             <para><anchor xml:id="dbdoclet.50438212_pgfId-1301898" xreflabel=""/>SERVICE  - Lustre OST and MDT RPC service statistics</para>
718           </listitem>
719 <listitem>
720             <para> </para>
721           </listitem>
722 <listitem>
723             <para><anchor xml:id="dbdoclet.50438212_pgfId-1301899" xreflabel=""/>BRW  - OST block read/write statistics (brw_stats)</para>
724           </listitem>
725 <listitem>
726             <para> </para>
727           </listitem>
728 <listitem>
729             <para><anchor xml:id="dbdoclet.50438212_pgfId-1301900" xreflabel=""/>SDIO  - SCSI disk IO statistics (sd_iostats)</para>
730           </listitem>
731 <listitem>
732             <para> </para>
733           </listitem>
734 <listitem>
735             <para><anchor xml:id="dbdoclet.50438212_pgfId-1301901" xreflabel=""/>MBALLOC  - ldiskfs block allocation statistics</para>
736           </listitem>
737 <listitem>
738             <para> </para>
739           </listitem>
740 <listitem>
741             <para><anchor xml:id="dbdoclet.50438212_pgfId-1301902" xreflabel=""/>IO  - Lustre target operations statistics</para>
742           </listitem>
743 <listitem>
744             <para> </para>
745           </listitem>
746 <listitem>
747             <para><anchor xml:id="dbdoclet.50438212_pgfId-1301903" xreflabel=""/>JBD  - ldisfs journal statistics</para>
748           </listitem>
749 <listitem>
750             <para> </para>
751           </listitem>
752 <listitem>
753             <para><anchor xml:id="dbdoclet.50438212_pgfId-1301904" xreflabel=""/>CLIENT  - Lustre OSC request statistics</para>
754           </listitem>
755 <listitem>
756             <para> </para>
757           </listitem>
758 </itemizedlist>
759         <para><anchor xml:id="dbdoclet.50438212_pgfId-1301989" xreflabel=""/>To collect profile information:</para>
760         <para><anchor xml:id="dbdoclet.50438212_pgfId-1302001" xreflabel=""/> 1. Begin collecting statistics on each node specified in the config.sh script.</para>
761         <para><anchor xml:id="dbdoclet.50438212_pgfId-1304893" xreflabel=""/>Starting the collect profile daemon on each node by entering:</para>
762         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1302024" xreflabel=""/>sh gather_stats_everywhere.sh config.sh start 
763 </screen>
764         <para><anchor xml:id="dbdoclet.50438212_pgfId-1302184" xreflabel=""/> 2. Run the test.</para>
765         <para><anchor xml:id="dbdoclet.50438212_pgfId-1302047" xreflabel=""/> 3. Stop collecting statistics on each node, clean up the temporary file, and create a profiling tarball.</para>
766         <para><anchor xml:id="dbdoclet.50438212_pgfId-1304926" xreflabel=""/>Enter:</para>
767         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1302051" xreflabel=""/>sh gather_stats_everywhere.sh config.sh stop <emphasis>&lt;log_name.tgz&gt;</emphasis></screen>
768         <para><anchor xml:id="dbdoclet.50438212_pgfId-1305257" xreflabel=""/>When <emphasis>&lt;log_name.tgz&gt;</emphasis> is specified, a profile tarball /tmp/<emphasis>&lt;log_name.tgz&gt;</emphasis> is created.</para>
769         <para><anchor xml:id="dbdoclet.50438212_pgfId-1302055" xreflabel=""/> 4. Analyze the collected statistics and create a csv tarball for the specified profiling data.</para>
770         <screen><anchor xml:id="dbdoclet.50438212_pgfId-1302059" xreflabel=""/>sh gather_stats_everywhere.sh config.sh analyse log_tarball.tgz csv
771 </screen>
772         <para><anchor xml:id="dbdoclet.50438212_pgfId-1302020" xreflabel=""/> </para>
773         <!--
774 Begin SiteCatalyst code version: G.5.
775 -->
776         <!--
777 End SiteCatalyst code version: G.5.
778 -->
779           <informaltable frame="none">
780           <tgroup cols="3">
781             <colspec colname="c1" colwidth="33*"/>
782             <colspec colname="c2" colwidth="33*"/>
783             <colspec colname="c3" colwidth="33*"/>
784             
785             
786             
787             <tbody>
788               <row>
789                 <entry align="left"><para>Lustre 2.0 Operations Manual</para></entry>
790                 <entry align="right"><para>821-2076-10</para></entry>
791                 <entry align="right" valign="top"><para><link xl:href="index.html"><inlinemediaobject><imageobject role="html">
792                           <imagedata contentdepth="26" contentwidth="30" fileref="./shared/toc01.gif" scalefit="1"/>
793                         </imageobject>
794 <imageobject role="fo">
795                           <imagedata contentdepth="100%" contentwidth="" depth="" fileref="./shared/toc01.gif" scalefit="1" width="100%"/>
796                         </imageobject>
797 </inlinemediaobject></link><link xl:href="LNETSelfTest.html"><inlinemediaobject><imageobject role="html">
798                           <imagedata contentdepth="26" contentwidth="30" fileref="./shared/prev01.gif" scalefit="1"/>
799                         </imageobject>
800 <imageobject role="fo">
801                           <imagedata contentdepth="100%" contentwidth="" depth="" fileref="./shared/prev01.gif" scalefit="1" width="100%"/>
802                         </imageobject>
803 </inlinemediaobject></link><link xl:href="LustreTuning.html"><inlinemediaobject><imageobject role="html">
804                           <imagedata contentdepth="26" contentwidth="30" fileref="./shared/next01.gif" scalefit="1"/>
805                         </imageobject>
806 <imageobject role="fo">
807                           <imagedata contentdepth="100%" contentwidth="" depth="" fileref="./shared/next01.gif" scalefit="1" width="100%"/>
808                         </imageobject>
809 </inlinemediaobject></link><link xl:href="ix.html"><inlinemediaobject><imageobject role="html">
810                           <imagedata contentdepth="26" contentwidth="30" fileref="./shared/index01.gif" scalefit="1"/>
811                         </imageobject>
812 <imageobject role="fo">
813                           <imagedata contentdepth="100%" contentwidth="" depth="" fileref="./shared/index01.gif" scalefit="1" width="100%"/>
814                         </imageobject>
815 </inlinemediaobject></link></para></entry>
816               </row>
817             </tbody>
818           </tgroup>
819         </informaltable>
820         <para><link xl:href=""/></para>
821         <para><link xl:href="copyright.html">Copyright</link> © 2011, Oracle and/or its affiliates. All rights reserved.</para>
822       </section>
823     </section>
824   </section>
825 </article>