LUDOC-7 cleanups to Lustre limits table

author Andreas Dilger <adilger@whamcloud.com>

Wed, 6 Jul 2011 18:28:24 +0000 (12:28 -0600)

committer Andreas Dilger <adilger@whamcloud.com>

Wed, 6 Jul 2011 20:18:51 +0000 (14:18 -0600)
author Andreas Dilger <adilger@whamcloud.com>
Wed, 6 Jul 2011 18:28:24 +0000 (12:28 -0600)
committer Andreas Dilger <adilger@whamcloud.com>
Wed, 6 Jul 2011 20:18:51 +0000 (14:18 -0600)
diff --git a/UnderstandingLustre.xml b/UnderstandingLustre.xml

index 4e84b97..a806956 100644 (file)
--- a/UnderstandingLustre.xml
+++ b/UnderstandingLustre.xml
@@ -34,21 +34,17 @@
        <para><xref linkend="understandinglustre.tab1"/> shows the practical range of scalability and performance characteristics of the Lustre file system and some test results in production systems.</para>
        <table frame="all">
          <title xml:id="understandinglustre.tab1">Lustre Scalability and Performance</title>
-        <tgroup cols="4">
+        <tgroup cols="3">
            <colspec colname="c1" colwidth="1*"/>
            <colspec colname="c2" colwidth="2*"/>
            <colspec colname="c3" colwidth="3*"/>
-          <colspec colname="c4" colwidth="3*"/>
            <thead>
              <row>
                <entry>
-                <para><emphasis role="bold"/></para>
-              </entry>
-              <entry>
                  <para><emphasis role="bold">Feature</emphasis></para>
                </entry>
                <entry>
-                <para><emphasis role="bold">Practical Range</emphasis></para>
+                <para><emphasis role="bold">Current Practical Range</emphasis></para>
                </entry>
                <entry>
                  <para><emphasis role="bold">Tested in Production</emphasis></para>
@@ -58,10 +54,7 @@
            <tbody>
              <row>
                <entry>
-                <para> <emphasis role="bold">Clients</emphasis></para>
-              </entry>
-              <entry>
-                <para> Scalabity</para>
+                <para> <emphasis role="bold">Client Scalability</emphasis></para>
                </entry>
                <entry>
                  <para> 100-100,000</para>
@@ -72,81 +65,100 @@
                </entry>
              </row>
              <row>
-              <entry/>
                <entry>
-                <para> Performance</para>
+                <para>Client Performance</para>
                </entry>
                <entry>
-                <para> <emphasis>Single client: </emphasis> 2 GB/sec I/O,</para>
-                <para>1,000 metadata ops/sec</para>
-                <para><emphasis>File system:</emphasis></para>
-                <para>2.5 TB/sec</para>
+                <para> <emphasis>Single client: </emphasis></para>
+                <para>I/O 90% of network bandwidth</para>
+                <para><emphasis>Aggregate:</emphasis></para>
+                <para>2.5 TB/sec I/O</para>
                </entry>
                <entry>
-                <para> <emphasis>Single client: </emphasis> 2 GB/sec</para>
-                <para>&#160;</para>
-                <para><emphasis>File system:</emphasis></para>
-                <para>240 GB/sec I/O</para>
+                <para> <emphasis>Single client: </emphasis></para>
+                <para>2 GB/sec I/O, 1000 metadata ops/sec</para>
+                <para><emphasis>Aggregate:</emphasis></para>
+                <para>240 GB/sec I/O </para>
                </entry>
              </row>
              <row>
                <entry>
-                <para> <emphasis role="bold">OSSs</emphasis></para>
+                <para> <emphasis role="bold">OSS Scalability</emphasis></para>
                </entry>
                <entry>
-                <para> Scalabity</para>
+                <para> <emphasis>Single OSS:</emphasis></para>
+                <para>1-8 OSTs per OSS, 128TB per OST</para>
+                <para> <emphasis>OSS count:</emphasis></para>
+                <para>500 OSSes, with up to 4000 OSTs, </para>
                </entry>
                <entry>
-                <para> <emphasis>OSSs:</emphasis></para>
-                <para>4-500 with up to 4000 OSTs</para>
-                <para>&#160;</para>
-                <para><emphasis>File system:</emphasis></para>
-                <para>64 PB, file size 320 TB</para>
-              </entry>
-              <entry>
-                <para> <emphasis>OSSs:</emphasis></para>
-                <para>450 OSSs with 1,000 OSTs</para>
-                <para>192 OSSs with 1344 OSTs</para>
-                <para><emphasis>File system:</emphasis></para>
-                <para>10 PB, file size multi-TB</para>
+                <para> <emphasis>Single OSS:</emphasis></para>
+                <para>8 OSTs per OSS, 16TB per OST</para>
+                <para> <emphasis>OSS count:</emphasis></para>
+                <para>450 OSSs with 1000 4TB OSTs</para>
+                <para>192 OSSs with 1344 8TB OSTs</para>
                </entry>
              </row>
              <row>
-              <entry/>
                <entry>
-                <para> Performance</para>
+                <para> <emphasis role="bold">OSS Performance</emphasis></para>
                </entry>
                <entry>
-                <para> Up to 5 GB/sec</para>
+                <para> <emphasis>Single OSS:</emphasis></para>
+                <para> 5 GB/sec</para>
+                <para> <emphasis>Aggregate:</emphasis></para>
+                <para> 2.5 TB/sec</para>
                </entry>
                <entry>
-                <para> OSS throughput at 2.0+ GB/sec</para>
+                <para> <emphasis>Single OSS:</emphasis></para>
+                <para> 2.0+ GB/sec</para>
+                <para> <emphasis>Aggregate:</emphasis></para>
+                <para> 240 GB/sec</para>
                </entry>
              </row>
              <row>
                <entry>
-                <para> <emphasis role="bold">MDSs</emphasis></para>
+                <para> <emphasis role="bold">MDS Scalability</emphasis></para>
+              </entry>
+              <entry>
+                <para> <emphasis>Single MDS:</emphasis></para>
+                <para> 4 billion files</para>
+                <para> <emphasis>MDS count:</emphasis></para>
+                <para> 1 + 1 (primary with one backup)</para>
                </entry>
                <entry>
-                <para> Scalabity</para>
+                <para> <emphasis>Single MDS:</emphasis></para>
+                <para> 750 million files</para>
+                <para> <emphasis>MDS count:</emphasis></para>
+                <para> 1 + 1 (primary with one backup)</para>
+              </entry>
+            </row>
+            <row>
+              <entry>
+                <para> <emphasis role="bold">MDS Performance</emphasis></para>
                </entry>
                <entry>
-                <para> 1 + 1 (failover with one backup)</para>
+                <para> 35000/s create, 100000/s stat metadata operations</para>
                </entry>
                <entry>
-                <para> &#160;</para>
+                <para> 15000/s create, 35000/s stat metadata operations</para>
                </entry>
              </row>
              <row>
-              <entry/>
                <entry>
-                <para> Performance</para>
+                <para> <emphasis role="bold">Filesystem Scalability</emphasis></para>
                </entry>
                <entry>
-                <para> Up to 35,000/s create, 100,000/s stat metadata operations</para>
+                <para> <emphasis>Single File:</emphasis></para>
+                <para>320 TB max file size</para>
+                <para> <emphasis>Aggregate:</emphasis></para>
+                <para>64 PB space, 4 billion files</para>
                </entry>
                <entry>
-                <para> 15,000/s create, 25,000/s stat metadata operations</para>
+                <para> <emphasis>Single File:</emphasis></para>
+                <para>multi-TB max file size</para>
+                <para> <emphasis>Aggregate:</emphasis></para>
+                <para>10 PB space, 750 million files</para>
                </entry>
              </row>
            </tbody>
@@ -155,42 +167,42 @@
        <para>Other Lustre features are:</para>
        <itemizedlist>
          <listitem>
-          <para><emphasis role="bold">Performance-enhanced ext4 file system:</emphasis>  Lustre uses a modified version of the ext4 journaling file system to store data and metadata. This version, called <emphasis role="italic">
+          <para><emphasis role="bold">Performance-enhanced ext4 file system:</emphasis>  Lustre uses an improved version of the ext4 journaling file system to store data and metadata. This version, called <emphasis role="italic">
                <literal>ldiskfs</literal>
              </emphasis>, has been enhanced to improve performance and provide additional functionality needed by Lustre.</para>
          </listitem>
          <listitem>
-          <para><emphasis role="bold">POSIX compliance</emphasis> : The full POSIX test suite passes with limited exceptions on Lustre clients. In a cluster, most operations are atomic so that clients never see stale data or metadata. Lustre supports mmap() file I/O.</para>
+          <para><emphasis role="bold">POSIX compliance</emphasis> : The full POSIX test suite passes in an identical manner to a local ext4 filesystem, with limited exceptions on Lustre clients. In a cluster, most operations are atomic so that clients never see stale data or metadata. Lustre supports mmap() file I/O.</para>
          </listitem>
          <listitem>
-          <para><emphasis role="bold">High-performance heterogeneous networking:</emphasis>  Lustre supports a variety of high performance, low latency networks and permits Remote Direct Memory Access (RDMA) for Infiniband (OFED). This enables multiple, bridging RDMA networks to useLustre routing for maximum performance. Lustre also provides integrated network diagnostics.</para>
+          <para><emphasis role="bold">High-performance heterogeneous networking:</emphasis>  Lustre supports a variety of high performance, low latency networks and permits Remote Direct Memory Access (RDMA) for Infiniband (OFED) and other advanced networks for fast and efficient network transport. Multiple RDMA networks can be bridged using Lustre routing for maximum performance. Lustre also provides integrated network diagnostics.</para>
          </listitem>
          <listitem>
            <para><emphasis role="bold">High-availability:</emphasis>  Lustre offers active/active failover using shared storage partitions for OSS targets (OSTs) and active/passive failover using a shared storage partition for the MDS target (MDT). This allows application transparent recovery. Lustre can work with a variety of high availability (HA) managers to allow automated failover and has no single point of failure (NSPF). Multiple mount protection (MMP) provides integrated protection from errors in highly-available systems that would otherwise cause file system corruption.</para>
          </listitem>
          <listitem>
-          <para><emphasis role="bold">Security:</emphasis>  In Lustre, an option is available to have TCP connections only from privileged ports. Group membership handling is server-based.</para>
+          <para><emphasis role="bold">Security:</emphasis>  By default TCP connections are only allowed from privileged ports. Unix group membership is verified is done on the MDS.</para>
          </listitem>
          <listitem>
-          <para><emphasis role="bold">Access control list (ACL), exended attributes</emphasis> : Currently, the Lustre security model follows that of a UNIX file system, enhanced with POSIX ACLs. Noteworthy additional features include root squash and connecting only from privileged ports.</para>
+          <para><emphasis role="bold">Access control list (ACL), exended attributes:</emphasis>  the Lustre security model follows that of a UNIX file system, enhanced with POSIX ACLs. Noteworthy additional features include root squash.</para>
          </listitem>
          <listitem>
-          <para><emphasis role="bold">Interoperability:</emphasis>  Lustre runs on a variety of CPU architectures and mixed-endian clusters and is interoperable between adjacent Lustre software releases.</para>
+          <para><emphasis role="bold">Interoperability:</emphasis>  Lustre runs on a variety of CPU architectures and mixed-endian clusters and is interoperable between successive major Lustre software releases.</para>
          </listitem>
          <listitem>
            <para><emphasis role="bold">Object-based architecture:</emphasis>  Clients are isolated from the on-disk file structure enabling upgrading of the storage architecture without affecting the client.</para>
          </listitem>
          <listitem>
-          <para><emphasis role="bold">Byte-granular file and fine-grained metadata locking:</emphasis>  Any clients can operate on the same file and directory concurrently. A Lustre distributed lock manager (DLM) ensures that files are coherent between all the clients in a file system and the servers. Multiple clients can access the same files concurrently, and the DLM ensures that all the clients see consistent data at all times. The DLM on each MDT and each OST manages the locking for objects stored in that file system. The MDT manages locks on inodes permissions and path names. OST manages locks for each stripe of a file and the data within each object</para>
+          <para><emphasis role="bold">Byte-granular file and fine-grained metadata locking:</emphasis>  Many clients can read and modify the same file or directory concurrently. The Lustre distributed lock manager (LDLM) ensures that files are coherent between all clients and servers in the filesystem.  The MDT DLM manages locks on inode permissions and pathnames.   Each OST has its own DLM for locks on file stripes stored theron, which scales the locking performance as the filesystem grows.</para>
          </listitem>
          <listitem>
-          <para><emphasis role="bold">Quotas</emphasis> : User and group quotas are available for Lustre.</para>
+          <para><emphasis role="bold">Quotas:</emphasis> User and group quotas are available for Lustre.</para>
          </listitem>
          <listitem>
-          <para><emphasis role="bold">OSS addition</emphasis> : The capacity of a Lustre file system and aggregate cluster bandwidth can be increased without interrupting any operations by adding a new OSS with OSTs to the cluster.</para>
+          <para><emphasis role="bold">Capacity growth:</emphasis>  The size of a Lustre file system and aggregate cluster bandwidth can be increased without interruption by adding a new OSS with OSTs to the cluster.</para>
          </listitem>
          <listitem>
-          <para><emphasis role="bold">Controlled striping</emphasis> : The distribution of files across OSTs can be configured on a per file, per directory, or per file system basis. This allows file I/O to be tuned to specific application requirements. Lustre uses RAID-0 striping and balances space usage across OSTs.</para>
+          <para><emphasis role="bold">Controlled striping:</emphasis> The layout of files across OSTs can be configured on a per file, per directory, or per file system basis. This allows file I/O to be tuned to specific application requirements within a single filesystem. Lustre uses RAID-0 striping and balances space usage across OSTs.</para>
          </listitem>
          <listitem>
            <para><emphasis role="bold">Network data integrity protection:</emphasis>  A checksum of all data sent from the client to the OSS protects against corruption during data transfer.</para>
@@ -199,16 +211,16 @@
            <para><emphasis role="bold">MPI I/O:</emphasis>  Lustre has a dedicated MPI ADIO layer that optimizes parallel I/O to match the underlying file system architecture.</para>
          </listitem>
          <listitem>
-          <para><emphasis role="bold">NFS and CIFS export:</emphasis>  Lustre files can be re-exported using NFS or CIFS (via Samba) enabling them to be shared with a non-Linux client.</para>
+          <para><emphasis role="bold">NFS and CIFS export:</emphasis>  Lustre files can be re-exported using NFS or CIFS (via Samba) enabling them to be shared with non-Linux clients.</para>
          </listitem>
          <listitem>
            <para><emphasis role="bold">Disaster recovery tool:</emphasis>  Lustre provides a distributed file system check (lfsck) that can restore consistency between storage components in case of a major file system error. Lustre can operate even in the presence of file system inconsistencies, so lfsck is not required before returning the file system to production.</para>
          </listitem>
          <listitem>
-          <para><emphasis role="bold">Internal monitoring and instrumentation interfaces:</emphasis>  Lustre offers a variety of mechanisms to examine performance and tuning.</para>
+          <para><emphasis role="bold">Performance monitoring:</emphasis>  Lustre offers a variety of mechanisms to examine performance and tuning.</para>
          </listitem>
          <listitem>
-          <para><emphasis role="bold">Open source</emphasis> : Lustre is licensed under the GPL 2.0 license for use with Linux.</para>
+          <para><emphasis role="bold">Open source:</emphasis>  Lustre is licensed under the GPL 2.0 license for use with Linux.</para>
          </listitem>
        </itemizedlist>
      </section>
author	Andreas Dilger <adilger@whamcloud.com>
	Wed, 6 Jul 2011 18:28:24 +0000 (12:28 -0600)
committer	Andreas Dilger <adilger@whamcloud.com>
	Wed, 6 Jul 2011 20:18:51 +0000 (14:18 -0600)