From fbf9b2e77200836ffc665a4b46b4232d2cf0bf87 Mon Sep 17 00:00:00 2001 From: Richard Henwood Date: Tue, 17 May 2011 17:19:55 -0500 Subject: [PATCH] FIX: xrefs --- LustreMaintenance.xml | 317 ++++++++++++++++++++------------------------------ ManagingLNET.xml | 149 +++++++----------------- 2 files changed, 169 insertions(+), 297 deletions(-) diff --git a/LustreMaintenance.xml b/LustreMaintenance.xml index 112925d..dbb073b 100644 --- a/LustreMaintenance.xml +++ b/LustreMaintenance.xml @@ -1,74 +1,46 @@ - + - Lustre Maintenance + Lustre Maintenance Once you have the Lustre file system up and running, you can use the procedures in this section to perform these basic Lustre maintenance tasks: + - Working with Inactive OSTs + Working with Inactive OSTs - - - - Finding Nodes in the Lustre File System - - - - - - Mounting a Server Without Lustre Service - - - - - - Regenerating Lustre Configuration Logs - - - + Finding Nodes in the Lustre File System - Changing a Server NID + Mounting a Server Without Lustre Service - - - - Adding a New OST to a Lustre File System - - - - - - Removing and Restoring OSTs + Regenerating Lustre Configuration Logs - + Changing a Server NID - Aborting Recovery + Adding a New OST to a Lustre File System - + Removing and Restoring OSTs - Determining Which Machine is Serving an OST + Aborting Recovery - + Determining Which Machine is Serving an OST - Changing the Address of a Failover Node + Changing the Address of a Failover Node -
- <anchor xml:id="dbdoclet.50438199_pgfId-1298889" xreflabel=""/> -
- 14.1 <anchor xml:id="dbdoclet.50438199_42877" xreflabel=""/><anchor xml:id="dbdoclet.50438199_85142" xreflabel=""/>Working with <anchor xml:id="dbdoclet.50438199_marker-1298888" xreflabel=""/>Inactive OSTs +
+ 14.1 <anchor xml:id="dbdoclet.50438199_85142" xreflabel=""/>Working with <anchor xml:id="dbdoclet.50438199_marker-1298888" xreflabel=""/>Inactive OSTs To mount a client or an MDT with one or more inactive OSTs, run commands similar to this: client> mount -o exclude=testfs-OST0000 -t lustre uml1:/testfs\ /mnt/testfs client> cat /proc/fs/lustre/lov/testfs-clilov-*/target_obd @@ -76,33 +48,25 @@ To activate an inactive OST on a live client or MDT, use the lctl activate command on the OSC device. For example: lctl --device 7 activate - - - - - - Note -A colon-separated list can also be specified. For example, exclude=testfs-OST0000:testfs-OST0001. - - - - -
-
- 14.2 <anchor xml:id="dbdoclet.50438199_15240" xreflabel=""/>Finding <anchor xml:id="dbdoclet.50438199_marker-1298897" xreflabel=""/>Nodes in the Lustre File System + + + + +A colon-separated list can also be specified. For example, exclude=testfs-OST0000:testfs-OST0001. + + +
+ 14.2 Finding <anchor xml:id="dbdoclet.50438199_marker-1298897" xreflabel=""/>Nodes in the Lustre File System There may be situations in which you need to find all nodes in your Lustre file system or get the names of all OSTs. To get a list of all Lustre nodes, run this command on the MGS: # cat /proc/fs/lustre/mgs/MGS/live/* - - - - - - Note -This command must be run on the MGS. - - - - + + +This command must be rund on the MGS. + + + In this example, file system lustre has three nodes, lustre-MDT0000, lustre-OST0000, and lustre-OST0001. cfs21:/tmp# cat /proc/fs/lustre/mgs/MGS/live/* fsname: lustre @@ -114,24 +78,20 @@ To get the names of all OSTs, run this command on the MDS: # cat /proc/fs/lustre/lov/<fsname>-mdtlov/target_obd - - - - - - Note -This command must be run on the MDS. - - - - + + +This command must be rund on the MDS. + + + In this example, there are two OSTs, lustre-OST0000 and lustre-OST0001, which are both active. cfs21:/tmp# cat /proc/fs/lustre/lov/lustre-mdtlov/target_obd 0: lustre-OST0000_UUID ACTIVE 1: lustre-OST0001_UUID ACTIVE
-
- 14.3 <anchor xml:id="dbdoclet.50438199_26070" xreflabel=""/>Mounting a Server Without <anchor xml:id="dbdoclet.50438199_marker-1298918" xreflabel=""/>Lustre Service +
+ 14.3 Mounting a Server Without <anchor xml:id="dbdoclet.50438199_marker-1298918" xreflabel=""/>Lustre Service If you are using a combined MGS/MDT, but you only want to start the MGS and not the MDT, run this command: mount -t lustre <MDT partition> -o nosvc <mount point> @@ -140,165 +100,155 @@ $ mount -t lustre -L testfs-MDT0000 -o nosvc /mnt/test/mdt
-
- 14.4 <anchor xml:id="dbdoclet.50438199_54623" xreflabel=""/>Regenerating Lustre <anchor xml:id="dbdoclet.50438199_marker-1305736" xreflabel=""/>Configuration Logs +
+ 14.4 Regenerating Lustre <anchor xml:id="dbdoclet.50438199_marker-1305736" xreflabel=""/>Configuration Logs If the Lustre system’s configuration logs are in a state where the file system cannot be started, use the writeconf command to erase them. After the writeconf command is run and the servers restart, the configuration logs are re-generated and stored on the MGS (as in a new file system). You should only use the writeconf command if: The configuration logs are in a state where the file system cannot start - - - A server NID is being changed - - - The writeconf command is destructive to some configuration items (i.e., OST pools information and items set via conf_param), and should be used with caution. To avoid problems: Shut down the file system before running the writeconf command - - - Run the writeconf command on all servers (MDT first, then OSTs) - - - Start the file system in this order: - + MGS (or the combined MGS/MDT) - - - MDT - - - OSTs - - - Lustre clients - - - - + - - - - - - - - - - - - - - - - Caution -The OST pools feature enables a group of OSTs to be named for file striping purposes. If you use OST pools, be aware that running the writeconf command erases all pools information (as well as any other parameters set via lctl conf_param). We recommend that the pools definitions (and conf_param settings) be executed via a script, so they can be reproduced easily after a writeconf is performed. - - - - - To regenerate Lustre’s system configuration logs: - 1. Shut down the file system in this order. - a. Unmount the clients. - b. Unmount the MDT. - c. Unmount all OSTs. - 2. Make sure the the MDT and OST devices are available. - 3. Run the writeconf command on all servers. + + + +The OST pools feature enables a group of OSTs to be named for file striping purposes. If you use OST pools, be aware that running the writeconf command erases all pools information (as well as any other parameters set via lctl conf_param). We recommend that the pools definitions (and conf_param settings) be executed via a script, so they can be reproduced easily after a writeconf is performed. + + + To regenerate Lustre's system configuration logs: + + Shut down the file system in this order. + + Unmount the clients. + + Unmount the MDT. + + Unmount all OSTs. + + + Make sure the the MDT and OST devices are available. + + Run the writeconf command on all servers. Run writeconf on the MDT first, and then the OSTs. - a. On the MDT, run: + + On the MDT, run: <mdt node>$ tunefs.lustre --writeconf <device> - b. On each OST, run: + + On each OST, run: <ost node>$ tunefs.lustre --writeconf <device> - 4. Restart the file system in this order. - a. Mount the MGS (or the combined MGS/MDT). - b. Mount the MDT. - c. Mount the OSTs. - d. Mount the clients. + + Restart the file system in this order. + + Mount the MGS (or the combined MGS/MDT). + + Mount the MDT. + + Mount the OSTs. + + Mount the clients. + After the writeconf command is run, the configuration logs are re-generated as servers restart. +
-
- 14.5 <anchor xml:id="dbdoclet.50438199_31353" xreflabel=""/>Changing a <anchor xml:id="dbdoclet.50438199_marker-1305737" xreflabel=""/>Server NID +
+ 14.5 Changing a <anchor xml:id="dbdoclet.50438199_marker-1305737" xreflabel=""/>Server NID If you need to change the NID on the MDT or an OST, run the writeconf command to erase Lustre configuration information (including server NIDs), and then re-generate the system configuration using updated server NIDs. Change a server NID in these situations: New server hardware is added to the file system, and the MDS or an OSS is being moved to the new machine - - - New network card is installed in the server - - - You want to reassign IP addresses - - - To change a server NID: - 1. Update the LNET configuration in the /etc/modprobe.conf file so the list of server NIDs (lctl list_nids) is correct. + + Update the LNET configuration in the /etc/modprobe.conf file so the list of server NIDs (lctl list_nids) is correct. The lctl list_nids command indicates which network(s) are configured to work with Lustre. - 2. Shut down the file system in this order. - a. Unmount the clients. - b. Unmount the MDT. - c. Unmount all OSTs. - 3. Run the writeconf command on all servers. + + Shut down the file system in this order. + + Unmount the clients. + + Unmount the MDT. + + Unmount all OSTs. + + + Run the writeconf command on all servers. Run writeconf on the MDT first, and then the OSTs. - a. On the MDT, run: + + On the MDT, run: <mdt node>$ tunefs.lustre --writeconf <device> - b. On each OST, run: + + On each OST, run: <ost node>$ tunefs.lustre --writeconf <device> - c. If the NID on the MGS was changed, communicate the new MGS location to each server. Run: + + If the NID on the MGS was changed, communicate the new MGS location to each server. Run: tunefs.lustre --erase-param --mgsnode=<new_nid(s)> --writeconf /dev/.. - 4. Restart the file system in this order. - a. Mount the MGS (or the combined MGS/MDT). - b. Mount the MDT. - c. Mount the OSTs. - d. Mount the clients. + + + Restart the file system in this order. + + Mount the MGS (or the combined MGS/MDT). + + Mount the MDT. + + Mount the OSTs. + + Mount the clients. + + After the writeconf command is run, the configuration logs are re-generated as servers restart, and server NIDs in the updated list_nids file are used.
-
- 14.6 <anchor xml:id="dbdoclet.50438199_22527" xreflabel=""/>Adding a New <anchor xml:id="dbdoclet.50438199_marker-1306353" xreflabel=""/>OST to a Lustre File System +
+ 14.6 Adding a New <anchor xml:id="dbdoclet.50438199_marker-1306353" xreflabel=""/>OST to a Lustre File System To add an OST to existing Lustre file system: + 1. Add a new OST by passing on the following commands, run: $ mkfs.lustre --fsname=spfs --ost --mgsnode=mds16@tcp0 /dev/sda $ mkdir -p /mnt/test/ost0 $ mount -t lustre /dev/sda /mnt/test/ost0 + 2. Migrate the data (possibly). + The file system is quite unbalanced when new empty OSTs are added. New file creations are automatically balanced. If this is a scratch file system or files are pruned at a regular interval, then no further work may be needed. New files being created will preferentially be placed on the empty OST. As old files are deleted, they will release space on the old OST. Files existing prior to the expansion can optionally be rebalanced with an in-place copy, which can be done with a simple script. The basic method is to copy existing files to a temporary file, then move the temp file over the old one. This should not be attempted with files which are currently being written to by users or applications. This operation redistributes the stripes over the entire set of OSTs. @@ -308,24 +258,18 @@ To migrate files within the /test filesystem on OST0004 that are larger than 4GB in size, enter: lfs find /test -obd test-OST0004 -size +4G | lfs_migrate -y - See lfs_migrate for more details. +See (lfs_migrate) for more details.
-
- 14.7 <anchor xml:id="dbdoclet.50438199_14978" xreflabel=""/>Removing<anchor xml:id="dbdoclet.50438199_marker-1298976" xreflabel=""/> and Restoring OSTs - OSTs can be removed from and restored to a Lustre file system. Currently in Lustre, removing an OST really means that the OST is ‘deactivated’ in the file system, not permanently removed. A removed OST still appears in the file system; do not create a new OST with the same name. +
+ 14.7 Removing<anchor xml:id="dbdoclet.50438199_marker-1298976" xreflabel=""/> and Restoring OSTs + OSTs can be removed from and restored to a Lustre file system. Currently in Lustre, removing an OST really means that the OST is 'deactivated' in the file system, not permanently removed. A removed OST still appears in the file system; do not create a new OST with the same name. You may want to remove (deactivate) an OST and prevent new files from being written to it in several situations: Hard drive has failed and a RAID resync/rebuild is underway - - - OST is nearing its space capacity - - -
<anchor xml:id="dbdoclet.50438199_pgfId-1298979" xreflabel=""/>14.7.1 Removing an OST from the File System @@ -335,29 +279,24 @@ OST is nearing its space capacity - - - Hard drive has failed and a RAID resync/rebuild is underway - - - OST storage has failed permanently - - - When removing an OST, remember that the MDT does not communicate directly with OSTs. Rather, each OST has a corresponding OSC which communicates with the MDT. It is necessary to determine the device number of the OSC that corresponds to the OST. Then, you use this device number to deactivate the OSC on the MDT. To remove an OST from the file system: - 1. For the OST to be removed, determine the device number of the corresponding OSC on the MDT. - a. List all OSCs on the node, along with their device numbers. Run: + + + For the OST to be removed, determine the device number of the corresponding OSC on the MDT. + + To list all OSCs on the node, along with their device numbers. Run: lctldl|grep " osc " This is sample lctldl|grep + +
-
diff --git a/ManagingLNET.xml b/ManagingLNET.xml index 8759c38..eb22139 100644 --- a/ManagingLNET.xml +++ b/ManagingLNET.xml @@ -1,75 +1,48 @@ - + - Managing Lustre Networking (LNET) + Managing Lustre Networking (LNET) + This chapter describes some tools for managing Lustre Networking (LNET) and includes the following sections: - Updating the Health Status of a Peer or Router - - - - - - Starting and Stopping LNET - - - - - - Multi-Rail Configurations with LNET - - - - - - Load Balancing with InfiniBand - - - - - -
- <anchor xml:id="dbdoclet.50438203_pgfId-1289878" xreflabel=""/> -
- 15.1 <anchor xml:id="dbdoclet.50438203_51732" xreflabel=""/>Updating the Health Status of a Peer or <anchor xml:id="dbdoclet.50438203_marker-1288828" xreflabel=""/>Router + Updating the Health Status of a Peer or Router + + + Starting and Stopping LNET + + + Multi-Rail Configurations with LNET + + + Load Balancing with InfiniBand + + + +
+ 15.1 Updating the Health Status of a Peer or <anchor xml:id="dbdoclet.50438203_marker-1288828" xreflabel=""/>Router There are two mechanisms to update the health status of a peer or a router: - LNET can actively check health status of all routers and mark them as dead or alive automatically. By default, this is off. To enable it set auto_down and if desired check_routers_before_use. This initial check may cause a pause equal to router_ping_timeout at system startup, if there are dead routers in the system. + LNET can actively check health status of all routers and mark them as dead or alive automatically. By default, this is off. To enable it set auto_down and if desired check_routers_before_use. This initial check may cause a pause equal to router_ping_timeout at system startup, if there are dead routers in the system. - - - - When there is a communication error, all LNDs notify LNET that the peer (not necessarily a router) is down. This mechanism is always on, and there is no parameter to turn it off. However, if you set the LNET module parameter auto_down to 0, LNET ignores all such peer-down notifications. - - - + When there is a communication error, all LNDs notify LNET that the peer (not necessarily a router) is down. This mechanism is always on, and there is no parameter to turn it off. However, if you set the LNET module parameter auto_down to 0, LNET ignores all such peer-down notifications. Several key differences in both mechanisms: - The router pinger only checks routers for their health, while LNDs notices all dead peers, regardless of whether they are a router or not. - - - - - - The router pinger actively checks the router health by sending pings, but LNDs only notice a dead peer when there is network traffic going on. + The router pinger only checks routers for their health, while LNDs notices all dead peers, regardless of whether they are a router or not. - + The router pinger actively checks the router health by sending pings, but LNDs only notice a dead peer when there is network traffic going on. The router pinger can bring a router from alive to dead or vice versa, but LNDs can only bring a peer down. - - -
-
- 15.2 <anchor xml:id="dbdoclet.50438203_48703" xreflabel=""/>Starting and Stopping LNET +
+ 15.2 Starting and Stopping LNET Lustre automatically starts and stops LNET, but it can also be manually started in a standalone manner. This is particularly useful to verify that your networking setup is working correctly before you attempt to start Lustre.
<anchor xml:id="dbdoclet.50438203_pgfId-1287402" xreflabel=""/>15.2.1 Starting <anchor xml:id="dbdoclet.50438203_marker-1287400" xreflabel=""/>LNET @@ -102,68 +75,47 @@ Before the LNET modules can be removed, LNET references must be removed. In general, these references are removed automatically when Lustre is shut down, but for standalone routers, an explicit step is needed to stop LNET. Run: lctl network unconfigure - - - - - - Note -Attempting to remove Lustre modules prior to stopping the network may result in a crash or an LNET hang. if this occurs, the node must be rebooted (in most cases). Make sure that the Lustre network and Lustre are stopped prior to unloading the modules. Be extremely careful using rmmod -f. - - - - + + + +Attempting to remove Lustre modules prior to stopping the network may result in a crash or an LNET hang. if this occurs, the node must be rebooted (in most cases). Make sure that the Lustre network and Lustre are stopped prior to unloading the modules. Be extremely careful using rmmod -f. + + To unconfigure the LNET network, run: modprobe -r <any lnd and the lnet modules> - - - - - - Tip -To remove all Lustre modules, run:$ lctl modules | awk '{print $2}' | xargs rmmod - - - - + + + +To remove all Lustre modules, run:$ lctl modules | awk '{print $2}' | xargs rmmod + + +
-
- 15.3 <anchor xml:id="dbdoclet.50438203_72197" xreflabel=""/><anchor xml:id="dbdoclet.50438203_82542" xreflabel=""/>Multi-Rail Configurations with <anchor xml:id="dbdoclet.50438203_Multi-rail-configurations-with-LNET-LNET" xreflabel=""/>LNET +
+ 15.3 <anchor xml:id="dbdoclet.50438203_82542" xreflabel=""/>Multi-Rail Configurations with <anchor xml:id="dbdoclet.50438203_Multi-rail-configurations-with-LNET-LNET" xreflabel=""/>LNET To aggregate bandwidth across both rails of a dual-rail IB cluster (o2iblnd) Multi-rail configurations are only supported by o2iblnd; other IB LNDs do not support multiple interfaces. using LNET, consider these points: LNET can work with multiple rails, however, it does not load balance across them. The actual rail used for any communication is determined by the peer NID. - - - Multi-rail LNET configurations do not provide an additional level of network fault tolerance. The configurations described below are for bandwidth aggregation only. Network interface failover is planned as an upcoming Lustre feature. - - - A Lustre node always uses the same local NID to communicate with a given peer NID. The criteria used to determine the local NID are: - - Fewest hops (to minimize routing), and - - - Appears first in the "networks" or "ip2nets" LNET configuration strings - - -
-
- 15.4 <anchor xml:id="dbdoclet.50438203_78227" xreflabel=""/>Load Balancing with InfiniBand +
+ 15.4 Load Balancing with InfiniBand A Lustre file system contains OSSs with two InfiniBand HCAs. Lustre clients have only one InfiniBand HCA using OFED Infiniband ''o2ib'' drivers. Load balancing between the HCAs on the OSS is accomplished through LNET.
<anchor xml:id="dbdoclet.50438203_pgfId-1290317" xreflabel=""/>15.4.1 Setting Up modprobe.conf<anchor xml:id="dbdoclet.50438203_77834" xreflabel=""/><anchor xml:id="dbdoclet.50438203_marker-1290316" xreflabel=""/> for Load Balancing @@ -173,27 +125,18 @@ Dual HCA OSS server - - - options lnet networks="o2ib0(ib0),o2ib1(ib1) 192.168.10.1.[101-102] Client with the odd IP address - - - options lnet networks=o2ib0(ib0) 192.168.10.[103-253/2] Client with the even IP address - - - options lnet networks=o2ib1(ib0) 192.168.10.[102-254/2] @@ -236,9 +179,6 @@ A cluster with more clients than servers. The fact that an individual client cannot get two rails of bandwidth is unimportant because the servers are the actual bottleneck. - - - ip2nets="o2ib0(ib0), o2ib1(ib1) 192.168.[0-1].* \ #all servers;\ @@ -251,9 +191,6 @@ ents" A single client that must get two rails of bandwidth, and it does not matter if the maximum aggregate bandwidth is only (# servers) * (1 rail). - - - ip2nets=" o2ib0(ib0) 192.168.[0-1].[0-252/2] \ #even servers;\ @@ -266,9 +203,6 @@ ents" All clients and all servers must get two rails of bandwidth. - - - ip2nets=†o2ib0(ib0),o2ib2(ib1) 192.168.[0-1].[0-252/2] \ #even servers;\ @@ -281,6 +215,5 @@ ents" This configuration includes two additional proxy o2ib networks to work around Lustre's simplistic NID selection algorithm. It connects "even" clients to "even" servers with o2ib0 on rail0, and "odd" servers with o2ib3 on rail1. Similarly, it connects "odd" clients to "odd" servers with o2ib1 on rail0, and "even" servers with o2ib2 on rail1.
-
-- 1.8.3.1