FIGURES = figures/ost-setattr-generic.png \
figures/ost-punch-generic.png \
figures/mds-reint-setattr-generic.png \
+ figures/mds-reint-setxattr-generic.png \
figures/mds-getxattr-generic.png \
figures/ldlm-enqueue-generic.png \
figures/ldlm-enqueue-intent-layout-generic.png \
figures/chmod_rpcs.png \
figures/mds-reint-setattr-request.png \
figures/mds-reint-setattr-reply.png \
+ figures/mds-reint-setxattr-request.png \
+ figures/mds-reint-setxattr-reply.png \
figures/touch_rpcs.png \
figures/ldlm-enqueue-intent-layout-request.png \
figures/ldlm-enqueue-intent-layout-reply.png \
figures/ldlm-cancel-request.png \
figures/ldlm-cancel-reply.png \
figures/ost-punch-request.png \
- figures/ost-punch-reply.png
+ figures/ost-punch-reply.png \
+ figures/fstat_rpcs.png \
+ figures/ldlm-enqueue-intent-getattr-request.png \
+ figures/ldlm-enqueue-intent-getattr-reply.png \
+ figures/ldlm-enqueue-intent-lvb-reply.png \
+ figures/ldlm-enqueue-request.png \
+ figures/ldlm-enqueue-reply.png \
+ figures/ldlm-gl-callback-request.png \
+ figures/ldlm-gl-callback-reply.png \
+ figures/ldlm-enqueue-intent-getattr-generic.png \
+ figures/ldlm-gl-callback-generic.png \
+ figures/statfs_rpcs.png \
+ figures/mds-statfs-request.png \
+ figures/mds-statfs-reply.png \
+ figures/ost-statfs-request.png \
+ figures/ost-statfs-reply.png \
+ figures/mds-statfs-generic.png \
+ figures/ost-statfs-generic.png \
+ figures/ldlm-enqueue-intent-getxattr-request.png \
+ figures/ldlm-enqueue-intent-getxattr-reply.png \
+ figures/ldlm-enqueue-intent-getxattr-generic.png
TEXT = protocol.txt \
introduction.txt \
data_types.txt \
lustre_file_ids.txt \
lustre_handle.txt \
+ ptlrpc_body.txt \
mdt_structs.txt \
mdt_body.txt \
+ obd_statfs.txt \
mds_reint_structs.txt \
mdt_rec_reint.txt \
mdt_rec_setattr.txt \
+ mdt_rec_setxattr.txt \
ost_setattr_structs.txt \
connection.txt \
timeouts.txt \
ost_lvb.txt \
early_lock_cancellation.txt \
llog.txt \
+ path_lookup.txt \
recovery.txt \
security.txt \
lustre_messages.txt \
lustre_operations.txt \
ost_setattr.txt \
ost_punch.txt \
+ ost_statfs.txt \
mds_reint.txt \
+ mds_statfs.txt \
mds_getxattr.txt \
ldlm_enqueue.txt \
ldlm_bl_callback.txt \
ldlm_cp_callback.txt \
+ ldlm_gl_callback.txt \
ldlm_cancel.txt \
file_system_operations.txt \
+ getattr.txt \
setattr.txt \
+ statfs.txt \
+ getxattr.txt \
+ setxattr.txt \
glossary.txt
.SUFFIXES : .gnuplot .gv .pdf .png .fig
asciidoc protocol.txt
protocol.pdf: $(FIGURES) $(TEXT)
- a2x -f pdf --fop protocol.txt
+ a2x --no-xmllint -f pdf --fop protocol.txt
.gv.png:
dot -Tpng $< -o $@
long. Thus there may need to an extra four bytes of padding after the
'lm_bufflens' array if that array has an odd number of entries.
-OBD statfs
-^^^^^^^^^^
-
-The 'obd_stafs' structure defines fields that are used for returning
-server common 'statfs' data items to a client. It augments that data
-with some Lustre-specific information, and also has space allocated
-for future use by Lustre.
-
-----
-struct obd_statfs {
- __u64 os_type;
- __u64 os_blocks;
- __u64 os_bfree;
- __u64 os_bavail;
- __u64 os_files;
- __u64 os_ffree;
- __u8 os_fsid[40];
- __u32 os_bsize;
- __u32 os_namelen;
- __u64 os_maxbytes;
- __u32 os_state; /**< obd_statfs_state OS_STATE_* flag */
- __u32 os_fprecreated; /* objs available now to the caller */
- /* used in QoS code to find preferred
- * OSTs */
- __u32 os_spare2;
- __u32 os_spare3;
- __u32 os_spare4;
- __u32 os_spare5;
- __u32 os_spare6;
- __u32 os_spare7;
- __u32 os_spare8;
- __u32 os_spare9;
-};
-----
-
-Lustre Message Preamble
-^^^^^^^^^^^^^^^^^^^^^^^
-[[lustre-message-preamble]]
-
-Every Lustre message starts with both the above header and an
-additional set of fields (in its first "buffer") given by the 'struct
-ptlrpc_body_v3' structure. This preamble has information information
-relevant to every message type. In particular, the Lustre message type
-is itself encoded in the 'pb_opc' Lustre operation number. The value
-of that op code determines what else will be in the message following
-the preamble.
-----
-#define PTLRPC_NUM_VERSIONS 4
-#define JOBSTATS_JOBID_SIZE 32
-struct ptlrpc_body_v3 {
- struct lustre_handle pb_handle;
- __u32 pb_type;
- __u32 pb_version;
- __u32 pb_opc;
- __u32 pb_status;
- __u64 pb_last_xid;
- __u64 pb_last_seen;
- __u64 pb_last_committed;
- __u64 pb_transno;
- __u32 pb_flags;
- __u32 pb_op_flags;
- __u32 pb_conn_cnt;
- __u32 pb_timeout;
- __u32 pb_service_time;
- __u32 pb_limit;
- __u64 pb_slv;
- __u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
- __u64 pb_padding[4];
- char pb_jobid[JOBSTATS_JOBID_SIZE];
-};
-#define ptlrpc_body ptlrpc_body_v3
-----
-In a connection request, sent by a client to server and regarding a
-specific target, the 'pb_handle' is 0. In the reply to a connection
-request, sent by the server, the handle is a value uniquely
-identifying the target. Subsequent messages between this client and
-this server regarding this target will use this handle to to gain
-access to their shared state. The handle is persistent across
-reconnects.
-
-The 'pb_type' is PTL_RPC_MSG_REQUEST in messages when they are
-initiated, it is PTL_RPC_MSG_REPLY in a reply, and it is
-PTL_RPC_MSG_ERR to convey that a message was received that could not
-be interpreted, that is, if it was corrupt or incomplete. The encoding
-of those type values is given by:
-----
-#define PTL_RPC_MSG_REQUEST 4711
-#define PTL_RPC_MSG_ERR 4712
-#define PTL_RPC_MSG_REPLY 4713
-----
-The error message type is only for responding to a message that failed
-to be interpreted as an actual message. Note that other errors, such
-as those that emerge from processing the actual message content, do
-not use the PTL_RPC_MSG_ERR type.
-
-The 'pb_version' identifies the version of the Lustre protocol and is
-derived from the following constants. The lower two bytes give the
-version of PtlRPC being employed in the message, and the upper two
-bytes encode the role of the host for the service being
-requested. That role is one of OBD, MDS, OST, DLM, LOG, or MGS.
-----
-#define PTLRPC_MSG_VERSION 0x00000003
-#define LUSTRE_VERSION_MASK 0xffff0000
-#define LUSTRE_OBD_VERSION 0x00010000
-#define LUSTRE_MDS_VERSION 0x00020000
-#define LUSTRE_OST_VERSION 0x00030000
-#define LUSTRE_DLM_VERSION 0x00040000
-#define LUSTRE_LOG_VERSION 0x00050000
-#define LUSTRE_MGS_VERSION 0x00060000
-----
-
-The 'pb_opc' value (operation code) gives the actual Lustre operation
-that is the subject of this message. For example, MDS_CONNECT is a
-Lustre operation (number 38). The following list gives the name used
-and the value for each operation.
-----
-typedef enum {
- OST_REPLY = 0,
- OST_GETATTR = 1,
- OST_SETATTR = 2,
- OST_READ = 3,
- OST_WRITE = 4,
- OST_CREATE = 5,
- OST_DESTROY = 6,
- OST_GET_INFO = 7,
- OST_CONNECT = 8,
- OST_DISCONNECT = 9,
- OST_PUNCH = 10,
- OST_OPEN = 11,
- OST_CLOSE = 12,
- OST_STATFS = 13,
- OST_SYNC = 16,
- OST_SET_INFO = 17,
- OST_QUOTACHECK = 18,
- OST_QUOTACTL = 19,
- OST_QUOTA_ADJUST_QUNIT = 20,
- MDS_GETATTR = 33,
- MDS_GETATTR_NAME = 34,
- MDS_CLOSE = 35,
- MDS_REINT = 36,
- MDS_READPAGE = 37,
- MDS_CONNECT = 38,
- MDS_DISCONNECT = 39,
- MDS_GETSTATUS = 40,
- MDS_STATFS = 41,
- MDS_PIN = 42,
- MDS_UNPIN = 43,
- MDS_SYNC = 44,
- MDS_DONE_WRITING = 45,
- MDS_SET_INFO = 46,
- MDS_QUOTACHECK = 47,
- MDS_QUOTACTL = 48,
- MDS_GETXATTR = 49,
- MDS_SETXATTR = 50,
- MDS_WRITEPAGE = 51,
- MDS_IS_SUBDIR = 52,
- MDS_GET_INFO = 53,
- MDS_HSM_STATE_GET = 54,
- MDS_HSM_STATE_SET = 55,
- MDS_HSM_ACTION = 56,
- MDS_HSM_PROGRESS = 57,
- MDS_HSM_REQUEST = 58,
- MDS_HSM_CT_REGISTER = 59,
- MDS_HSM_CT_UNREGISTER = 60,
- MDS_SWAP_LAYOUTS = 61,
- LDLM_ENQUEUE = 101,
- LDLM_CONVERT = 102,
- LDLM_CANCEL = 103,
- LDLM_BL_CALLBACK = 104,
- LDLM_CP_CALLBACK = 105,
- LDLM_GL_CALLBACK = 106,
- LDLM_SET_INFO = 107,
- MGS_CONNECT = 250,
- MGS_DISCONNECT = 251,
- MGS_EXCEPTION = 252,
- MGS_TARGET_REG = 253,
- MGS_TARGET_DEL = 254,
- MGS_SET_INFO = 255,
- MGS_CONFIG_READ = 256,
- OBD_PING = 400,
- OBD_LOG_CANCEL = 401,
- OBD_QC_CALLBACK = 402,
- OBD_IDX_READ = 403,
- LLOG_ORIGIN_HANDLE_CREATE = 501,
- LLOG_ORIGIN_HANDLE_NEXT_BLOCK = 502,
- LLOG_ORIGIN_HANDLE_READ_HEADER = 503,
- LLOG_ORIGIN_HANDLE_WRITE_REC = 504,
- LLOG_ORIGIN_HANDLE_CLOSE = 505,
- LLOG_ORIGIN_CONNECT = 506,
- LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508,
- LLOG_ORIGIN_HANDLE_DESTROY = 509,
- QUOTA_DQACQ = 601,
- QUOTA_DQREL = 602,
- SEQ_QUERY = 700,
- SEC_CTX_INIT = 801,
- SEC_CTX_INIT_CONT = 802,
- SEC_CTX_FINI = 803,
- FLD_QUERY = 900,
- FLD_READ = 901,
- UPDATE_OBJ = 1000,
- LAST_OPC
-} cmd_t;
-----
-The symbols and values above identify the operations Lustre uses in
-its protocol. They are examined in detail in the
-<<lustre-operations,Lustre Operations>> section. Lustre carries out
-each of these operations via the exchange of a pair of messages: a
-request and a reply. The details of each message are specific to each
-operation. The <<lustre-messages,Lustre Messages>> chapter discusses
-each message and its contents.
-
-The 'pb_status' value in a request message is set to the 'pid' of the
-process making the request. In a reply message, a zero indicates that
-the service successfully initiated the requested operation. If for
-some reason the operation could not be initiated (eg. "permission
-denied") the status will encode the standard Linux kernel (POSIX)
-error code (eg. EPERM).
-
-'pb_last_xid' and 'pb_last_seen' are not used.
-
-The 'pb_last_committed' value is always zero in a request. In a reply
-it is the highest transaction number that has been committed to
-storage. The transaction numbers are maintained on a per-target basis
-and each series of transaction numbers is a strictly increasing
-sequence. This field is set in any kind of reply message including
-pings and non-modifying transactions.
-
-The 'pb_transno' value always zero in a new request. It is also zero
-for replies to operations that do not modify the file system. For
-replies to operations that do modify the file system it is the
-server-assigned value from the sequence of values associated with the
-given client and target. That transaction number is copied into the
-'pb_trans' field of the 'ptlrpc_body' of the originial request. If the
-request has to be replayed it will include the transaction number.
-
-The 'pb_flags' value governs the client state machine. Fixme: document
-what the states and transitions are of this state machine. Currently,
-only the bottom two bytes are used, and they encode state according to
-the following values:
-----
-#define MSG_GEN_FLAG_MASK 0x0000ffff
-#define MSG_LAST_REPLAY 0x0001
-#define MSG_RESENT 0x0002
-#define MSG_REPLAY 0x0004
-#define MSG_DELAY_REPLAY 0x0010
-#define MSG_VERSION_REPLAY 0x0020
-#define MSG_REQ_REPLAY_DONE 0x0040
-#define MSG_LOCK_REPLAY_DONE 0x0080
-----
-
-The 'pb_op_flags' value governs the client connection status state
-machine. Fixme: document what the states and transitions are of this
-state machine.
-----
-#define MSG_CONNECT_RECOVERING 0x00000001
-#define MSG_CONNECT_RECONNECT 0x00000002
-#define MSG_CONNECT_REPLAYABLE 0x00000004
-#define MSG_CONNECT_LIBCLIENT 0x00000010
-#define MSG_CONNECT_INITIAL 0x00000020
-#define MSG_CONNECT_ASYNC 0x00000040
-#define MSG_CONNECT_NEXT_VER 0x00000080
-#define MSG_CONNECT_TRANSNO 0x00000100
-----
-In normal operation an initial request to connect will set
-'pb_op_flags' to MSG_CONNECT_INITIAL and MSG_CONNECT_NEXT_VER. The
-reply to that connection request (and all other, non-connect, requests
-and replies) will set 'pb_op_flags' to 0.
-
-The 'pb_conn_cnt' (connection count) value in a request message
-reports the client's "era", which is part of the client and server's
-shared state. The value of the era is initialized to one when it is
-first connected to the MDT. Each subsequent connection (after an
-eviction) increments the era for the client. Since the 'pb_conn_cnt'
-reflects the client's era at the time the message was composed the
-server can use this value to discard late-arriving messages requesting
-operations on out-of-date shared state.
-
-The 'pb_timeout' value in a request indicates how long (in seconds)
-the requester plans to wait before timing out the operation. That is,
-the corresponding reply for this message should arrive within this
-time frame. The service may extend this time frame via an "early
-reply", which is a reply to this message that notifies the requester
-that it should extend its timeout interval by the value of the
-'pb_timeout' field in the reply. The "early reply" does not indicate
-the operation has actually been initiated. Clients maintain multiple
-request queues, called "portals", and each type of operation is
-assigned to one of these queues. There is a timeout value associated
-with each queue, and the timeout update affects all the messages
-associated with the given queue, not just the specific message that
-initiated the request. Finally, in a reply message (one that does
-indicate the operation has been initiated) the timeout value updates
-the timeout interval for the queue. Is this last point different from
-the "early reply" update?
-
-The 'pb_service_time' value is zero in a request. In a reply it
-indicates how long this particular operation actually took from the
-time it first arrived in the request queue (at the service) to the
-time the server replied. Note that the client can use this value and
-the local elapsed time for the operation to calculate network latency.
-
-The 'pb_limit' value is zero in a request. In a reply it is a value
-sent from a lock service to a client to set the maximum number of
-locks available to the client. When dynamic lock LRU's are enabled
-this allows for managing the size of the LRU.
-
-The 'pb_slv' value is zero in a request. On a DLM service, the "server
-lock volume" is a value that characterizes (estimates) the amount of
-traffic, or load, on that lock service. It is calculated as the
-product of the number of locks and their age. In a reply, the 'pb_slv'
-value indicates to the client the available share of the total lock
-load on the server that the client is allowed to consume. The client
-is then responsible for reducing its number or (or age) of locks to
-stay within this limit.
-
-The array of 'pb_pre_versions' values has four entries. They are
-always zero in a new request message. They are also zero in replies to
-operations that do not modify the file system. For an operation that
-does modify the file system, the reply encodes the most recent
-transaction numbers for the objects modified by this operation, and
-the 'pb_pre_versions' values are copied into the original request when
-the reply arrives. If the request needs to be replayed then the
-updated 'pb_pre_versions' values accompany the replayed request.
-
-'pb_padding' is reserved for future use.
-
-The 'pb_jobid' (string) value gives a unique identifier associated
-with the process on behalf of which this message was generated. The
-identifier is assigned to the user process by a job scheduler, if any.
+include::ptlrpc_body.txt[]
Object Based Disk UUID
^^^^^^^^^^^^^^^^^^^^^^
include::mds_reint_structs.txt[]
include::ost_setattr_structs.txt[]
+
+include::statfs_structs.txt[]
--- /dev/null
+Lustre Protocol Documentation - getattr
+=======================================
+Andrew Uselton <andrew.c.uselton@intel.com>
+v0.0, May 2015
+:author: Andrew Uselton
+:doctype: article
+:Author Initials: ACU
+:toc:
+:icons:
+:numbered:
+:imagesdir: ./figures
+:website: http://lustre.org/
+:keywords: PtlRPC, Lustre, Protocol
+
+File System Operations
+----------------------
+
+include::getattr.txt[]
+
+RPCs
+----
+[[rpcs]]
+
+include::ldlm_enqueue.txt[]
+
+include::ldlm_gl_callback.txt[]
+
+:numbered!:
+
+[appendix]
+License
+-------
+
+Copyright (C) Intel 2015
+
+This work is licensed under a Creative Commons Attribution-ShareAlike
+4.0 International License (CC BY-SA 4.0). See
+<https://creativecommons.org/licenses/by-sa/4.0/> for more detail.
+
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 3960 2295 181 181 3960 2295 4035 2460
+1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4114 1491 181 181 4114 1491 4189 1656
+1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 1875 1950 181 181 1875 1950 1950 2115
+1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 9000 2175 181 181 9000 2175 9075 2340
+1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 9600 3750 181 181 9600 3750 9675 3915
+1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 7425 3375 181 181 7425 3375 7500 3540
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 1200 1200 1200 1350
+2 1 0 1 0 0 50 -1 41 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 150 1575 1125 1575
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+ 600 600 1800 600 1800 1200 600 1200 600 600
+2 1 0 1 0 0 50 -1 41 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1275 1725 4725 1725
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+ 4215 600 5415 600 5415 1200 4215 1200 4215 600
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 4815 1185 4815 1335
+2 1 0 1 0 0 50 -1 41 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 4740 2100 1290 2100
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+ 10275 660 11475 660 11475 1260 10275 1260 10275 660
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 10815 1290 10815 1440
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 7815 1215 7815 1365
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+ 7215 585 8415 585 8415 1185 7215 1185 7215 585
+2 1 0 1 0 0 50 -1 41 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1305 2445 7710 2445
+2 1 0 1 0 0 50 -1 41 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 10770 3465 7920 3465
+2 1 0 1 0 0 50 -1 41 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 7919 2809 10784 2794
+2 1 0 1 0 0 50 -1 41 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 7710 3750 1275 3750
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+ 1140 1350 1275 1350 1275 4200 1140 4200 1140 1350
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+ 4740 1335 4875 1335 4875 4200 4740 4200 4740 1335
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+ 7725 1365 7875 1365 7875 4200 7725 4200 7725 1365
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+ 10725 1455 10875 1455 10875 4200 10725 4200 10725 1455
+4 0 0 50 -1 0 12 0.0000 4 150 480 4515 960 MDT\001
+4 0 0 50 -1 0 12 0.0000 4 195 2430 1335 2415 LDLM_ENQUEUE request\001
+4 0 0 50 -1 0 12 0.0000 4 150 105 3914 2384 3\001
+4 0 0 50 -1 0 12 0.0000 4 150 615 885 960 Client1\001
+4 0 0 50 -1 0 12 0.0000 4 150 615 10560 1035 Client2\001
+4 0 0 50 -1 0 12 0.0000 4 150 405 7605 1005 OST\001
+4 0 0 50 -1 0 12 0.0000 4 195 435 9900 3660 reply\001
+4 0 0 50 -1 0 12 0.0000 4 165 645 8415 2985 request\001
+4 0 0 50 -1 0 12 0.0000 4 195 525 285 1470 fstat()\001
+4 0 0 50 -1 0 12 0.0000 4 195 2430 1380 1665 LDLM_ENQUEUE request\001
+4 0 0 50 -1 0 12 0.0000 4 195 2310 8055 2685 LDLM_GL_CALLBACK\001
+4 0 0 50 -1 0 12 0.0000 4 195 2310 8385 3375 LDLM_GL_CALLBACK\001
+4 0 0 50 -1 0 12 0.0000 4 150 105 4050 1575 1\001
+4 0 0 50 -1 0 12 0.0000 4 150 105 1800 2025 2\001
+4 0 0 50 -1 0 12 0.0000 4 195 2220 2400 2025 LDLM_ENQUEUE reply\001
+4 0 0 50 -1 0 12 0.0000 4 195 2220 4950 3600 LDLM_ENQUEUE reply\001
+4 0 0 50 -1 0 12 0.0000 4 150 105 8925 2250 4\001
+4 0 0 50 -1 0 12 0.0000 4 150 105 9525 3825 5\001
+4 0 0 50 -1 0 12 0.0000 4 150 105 7350 3450 6\001
Single
-2
1200 2
-6 1140 1740 4605 2490
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
- 1815 1875 4590 1875 4590 2475 1215 2475 1215 2025
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
- 3015 1875 3015 1950 3015 2025 3015 2100 3015 2175 3015 2250
- 3015 2325 3015 2400 3015 2475
-4 0 0 50 -1 16 18 0.0000 4 270 615 1140 1950 reply\001
-4 0 0 50 -1 16 18 0.0000 4 270 1530 1365 2250 ptlrpc_body\001
-4 0 0 50 -1 16 18 0.0000 4 270 1305 3150 2280 ldlm_reply\001
--6
-6 1125 2775 6135 3525
-2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
- 2910 2880 6120 2865 6120 3495 1200 3510 1200 3060
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
- 3000 2910 3000 2985 3000 3060 3000 3135 3000 3210 3000 3285
- 3000 3360 3000 3435 3000 3510
-2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
- 4650 2910 4650 2985 4650 3060 4650 3135 4650 3210 4650 3285
- 4650 3360 4650 3435 4650 3510
-4 0 0 50 -1 16 18 0.0000 4 270 1530 1350 3285 ptlrpc_body\001
-4 0 0 50 -1 16 18 0.0000 4 270 1920 1125 2985 extent lvb reply\001
-4 0 0 50 -1 16 18 0.0000 4 270 1305 3135 3315 ldlm_reply\001
-4 0 0 50 -1 0 18 0.0000 4 255 810 4860 3315 ost_lvb\001
--6
2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
3000 900 3000 975 3000 1050 3000 1125 3000 1200 3000 1275
3000 1350 3000 1425 3000 1500
2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
2025 900 4995 900 5010 1500 1200 1500 1200 1050
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 1815 1875 4590 1875 4590 2475 1215 2475 1215 2025
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3015 1875 3015 1950 3015 2025 3015 2100 3015 2175 3015 2250
+ 3015 2325 3015 2400 3015 2475
4 0 0 50 -1 16 18 0.0000 4 255 930 1050 1005 request\001
4 0 0 50 -1 16 18 0.0000 4 270 1530 1350 1305 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 615 1140 1950 reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1365 2250 ptlrpc_body\001
4 0 0 50 -1 16 18 0.0000 4 270 2385 1050 675 LDLM_ENQUEUE\001
4 0 0 50 -1 16 18 0.0000 4 270 1620 3165 1320 ldlm_request\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 3150 2280 ldlm_reply\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3000 900 3000 975 3000 1050 3000 1125 3000 1200 3000 1275
+ 3000 1350 3000 1425 3000 1500
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 4890 915 4890 990 4890 1065 4890 1140 4890 1215 4890 1290
+ 4890 1365 4890 1440 4890 1515
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 8370 930 8370 1005 8370 1080 8370 1155 8370 1230 8370 1305
+ 8370 1380 8370 1455 8370 1530
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 3615 975 9465 975 9465 1560 1140 1575 1140 1125
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 6540 990 6540 1065 6540 1140 6540 1215 6540 1290 6540 1365
+ 6540 1440 6540 1515 6540 1590
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 3000 2910 6000 2910 6000 3510 1200 3525 1200 3075
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3000 2925 3000 3000 3000 3075 3000 3150 3000 3225 3000 3300
+ 3000 3375 3000 3450 3000 3525
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 4575 2910 4575 2985 4575 3060 4575 3135 4575 3210 4575 3285
+ 4575 3360 4575 3435 4575 3510
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 3420 1905 5985 1890 5985 2490 1185 2505 1185 2055
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 2985 1905 2985 1980 2985 2055 2985 2130 2985 2205 2985 2280
+ 2985 2355 2985 2430 2985 2505
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 4560 1890 4560 1965 4560 2040 4560 2115 4560 2190 4560 2265
+ 4560 2340 4560 2415 4560 2490
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1350 1305 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 2385 1050 675 LDLM_ENQUEUE\001
+4 0 0 50 -1 16 18 0.0000 4 270 1620 3165 1320 ldlm_request\001
+4 0 0 50 -1 16 18 0.0000 4 270 2550 1050 1005 intent:layout request\001
+4 0 0 50 -1 16 18 0.0000 4 270 1380 4980 1335 ldlm_intent\001
+4 0 0 50 -1 16 18 0.0000 4 270 1635 6600 1335 layout_intent\001
+4 0 0 50 -1 16 18 0.0000 4 210 900 8505 1335 eadata\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1350 3300 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 3135 3330 ldlm_reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 930 4650 3330 ost_lvb\001
+4 0 0 50 -1 16 18 0.0000 4 270 1830 1125 3000 intent:lvb reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1335 2280 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 3120 2310 ldlm_reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 2235 1110 1980 intent:layout reply\001
+4 0 0 50 -1 16 18 0.0000 4 210 900 4695 2295 eadata\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3000 900 3000 975 3000 1050 3000 1125 3000 1200 3000 1275
+ 3000 1350 3000 1425 3000 1500
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 4890 915 4890 990 4890 1065 4890 1140 4890 1215 4890 1290
+ 4890 1365 4890 1440 4890 1515
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 3375 975 7875 975 7875 1575 1140 1575 1140 1125
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 6540 990 6540 1065 6540 1140 6540 1215 6540 1290 6540 1365
+ 6540 1440 6540 1515 6540 1590
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1350 1305 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 2385 1050 675 LDLM_ENQUEUE\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 3165 1320 ldlm_reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 5025 1350 mdt_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1065 6675 1350 mdt_md\001
+4 0 0 50 -1 16 18 0.0000 4 270 2295 1050 1005 intent:getattr reply\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3000 900 3000 975 3000 1050 3000 1125 3000 1200 3000 1275
+ 3000 1350 3000 1425 3000 1500
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 4890 915 4890 990 4890 1065 4890 1140 4890 1215 4890 1290
+ 4890 1365 4890 1440 4890 1515
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 3675 975 8850 975 8850 1575 1140 1575 1140 1125
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 6540 990 6540 1065 6540 1140 6540 1215 6540 1290 6540 1365
+ 6540 1440 6540 1515 6540 1590
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 7950 975 7950 1050 7950 1125 7950 1200 7950 1275 7950 1350
+ 7950 1425 7950 1500 7950 1575
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1350 1305 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 2385 1050 675 LDLM_ENQUEUE\001
+4 0 0 50 -1 16 18 0.0000 4 270 1620 3165 1320 ldlm_request\001
+4 0 0 50 -1 16 18 0.0000 4 270 1380 4980 1335 ldlm_intent\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 6600 1335 mdt_body\001
+4 0 0 50 -1 16 18 0.0000 4 150 705 8025 1350 name\001
+4 0 0 50 -1 16 18 0.0000 4 270 2610 1050 1005 intent:getattr request\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+6 975 1800 10125 3300
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 2925 1950 2925 2025 2925 2100 2925 2175 2925 2250 2925 2325
+ 2925 2400 2925 2475 2925 2550
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 3525 2025 10050 2025 10050 2625 1050 2625 1065 2175
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 6465 2040 6465 2115 6465 2190 6465 2265 6465 2340 6465 2415
+ 6465 2490 6465 2565 6465 2640
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 4875 2625 5100 2625 5100 3225 1065 3225 1050 2625
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 8325 2025 8325 2100 8325 2175 8325 2250 8325 2325 8325 2400
+ 8325 2475 8325 2550 8325 2625
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 2475 2625 2475 2700 2475 2775 2475 2850 2475 2925 2475 3000
+ 2475 3075 2475 3150 2475 3225
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3675 2625 3675 2700 3675 2775 3675 2850 3675 2925 3675 3000
+ 3675 3075 3675 3150 3675 3225
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 4725 2025 4725 2100 4725 2175 4725 2250 4725 2325 4725 2400
+ 4725 2475 4725 2550 4725 2625
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1275 2355 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 3090 2370 ldlm_reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 4950 2400 mdt_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 2445 975 2055 intent:getxattr reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1650 6600 2400 lov_mds_md\001
+4 0 0 50 -1 16 18 0.0000 4 210 1230 8475 2400 ACL data\001
+4 0 0 50 -1 16 18 0.0000 4 210 1050 1275 3000 EA data\001
+4 0 0 50 -1 16 18 0.0000 4 210 975 2625 3000 EA vals\001
+4 0 0 50 -1 16 18 0.0000 4 210 960 3825 3000 EA lens\001
+-6
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 2925 900 2925 975 2925 1050 2925 1125 2925 1200 2925 1275
+ 2925 1350 2925 1425 2925 1500
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 3525 975 10050 975 10050 1575 1050 1575 1065 1125
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 6465 990 6465 1065 6465 1140 6465 1215 6465 1290 6465 1365
+ 6465 1440 6465 1515 6465 1590
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 8325 975 8325 1050 8325 1125 8325 1200 8325 1275 8325 1350
+ 8325 1425 8325 1500 8325 1575
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 4725 975 4725 1050 4725 1125 4725 1200 4725 1275 4725 1350
+ 4725 1425 4725 1500 4725 1575
+4 0 0 50 -1 16 18 0.0000 4 270 2385 1050 675 LDLM_ENQUEUE\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1275 1305 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 3090 1320 ldlm_reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 2445 975 1005 intent:getxattr reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 6600 1350 mdt_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1380 4950 1350 ldlm_intent\001
+4 0 0 50 -1 16 18 0.0000 4 210 645 8625 1350 capa\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3000 900 3000 975 3000 1050 3000 1125 3000 1200 3000 1275
+ 3000 1350 3000 1425 3000 1500
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 3600 975 10125 975 10125 1575 1125 1575 1140 1125
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 6540 990 6540 1065 6540 1140 6540 1215 6540 1290 6540 1365
+ 6540 1440 6540 1515 6540 1590
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 4950 1575 5175 1575 5175 2175 1140 2175 1125 1575
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 8400 975 8400 1050 8400 1125 8400 1200 8400 1275 8400 1350
+ 8400 1425 8400 1500 8400 1575
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 2550 1575 2550 1650 2550 1725 2550 1800 2550 1875 2550 1950
+ 2550 2025 2550 2100 2550 2175
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3750 1575 3750 1650 3750 1725 3750 1800 3750 1875 3750 1950
+ 3750 2025 3750 2100 3750 2175
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 4800 975 4800 1050 4800 1125 4800 1200 4800 1275 4800 1350
+ 4800 1425 4800 1500 4800 1575
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1350 1305 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 2385 1050 675 LDLM_ENQUEUE\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 3165 1320 ldlm_reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 5025 1350 mdt_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 2445 1050 1005 intent:getxattr reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1650 6675 1350 lov_mds_md\001
+4 0 0 50 -1 16 18 0.0000 4 210 1230 8550 1350 ACL data\001
+4 0 0 50 -1 16 18 0.0000 4 210 1050 1350 1950 EA data\001
+4 0 0 50 -1 16 18 0.0000 4 210 975 2700 1950 EA vals\001
+4 0 0 50 -1 16 18 0.0000 4 210 960 3900 1950 EA lens\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3000 900 3000 975 3000 1050 3000 1125 3000 1200 3000 1275
+ 3000 1350 3000 1425 3000 1500
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 4890 915 4890 990 4890 1065 4890 1140 4890 1215 4890 1290
+ 4890 1365 4890 1440 4890 1515
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 3825 975 7950 975 7950 1575 1140 1575 1140 1125
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 6540 990 6540 1065 6540 1140 6540 1215 6540 1290 6540 1365
+ 6540 1440 6540 1515 6540 1590
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1350 1305 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 2385 1050 675 LDLM_ENQUEUE\001
+4 0 0 50 -1 16 18 0.0000 4 270 1620 3165 1320 ldlm_request\001
+4 0 0 50 -1 16 18 0.0000 4 270 1380 4980 1335 ldlm_intent\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 6600 1335 mdt_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 2760 1050 1005 intent:getxattr request\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 2940 900 5940 900 5940 1500 1140 1515 1140 1065
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 2940 915 2940 990 2940 1065 2940 1140 2940 1215 2940 1290
+ 2940 1365 2940 1440 2940 1515
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 4515 900 4515 975 4515 1050 4515 1125 4515 1200 4515 1275
+ 4515 1350 4515 1425 4515 1500
+4 0 0 50 -1 16 18 0.0000 4 270 2385 1050 675 LDLM_ENQUEUE\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1290 1290 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 3075 1320 ldlm_reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1830 1065 990 intent:lvb reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 930 4650 1335 ost_lvb\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3000 900 3000 975 3000 1050 3000 1125 3000 1200 3000 1275
+ 3000 1350 3000 1425 3000 1500
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 2025 900 4275 900 4275 1500 1200 1500 1200 1050
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 1815 1875 5625 1875 5625 2475 1215 2475 1215 2025
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3015 1875 3015 1950 3015 2025 3015 2100 3015 2175 3015 2250
+ 3015 2325 3015 2400 3015 2475
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 4500 1875 4500 1950 4500 2025 4500 2100 4500 2175 4500 2250
+ 4500 2325 4500 2400 4500 2475
+4 0 0 50 -1 16 18 0.0000 4 255 930 1050 1005 request\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1350 1305 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 615 1140 1950 reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1365 2250 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 3150 2280 ldlm_reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 3030 1050 675 LDLM_GL_CALLBACK\001
+4 0 0 50 -1 16 18 0.0000 4 270 930 3165 1320 ost_lvb\001
+4 0 0 50 -1 16 18 0.0000 4 270 930 4575 2250 ost_lvb\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 1665 915 4440 915 4440 1515 1065 1515 1065 1065
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 2865 915 2865 990 2865 1065 2865 1140 2865 1215 2865 1290
+ 2865 1365 2865 1440 2865 1515
+4 0 0 50 -1 16 18 0.0000 4 270 615 990 990 reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1215 1290 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 930 3000 1320 ost_lvb\001
+4 0 0 50 -1 16 18 0.0000 4 270 3030 1050 675 LDLM_GL_CALLBACK\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3000 900 3000 975 3000 1050 3000 1125 3000 1200 3000 1275
+ 3000 1350 3000 1425 3000 1500
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 2025 900 4995 900 5010 1500 1200 1500 1200 1050
+4 0 0 50 -1 16 18 0.0000 4 255 930 1050 1005 request\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1350 1305 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1620 3165 1320 ldlm_request\001
+4 0 0 50 -1 16 18 0.0000 4 270 3030 1050 675 LDLM_GL_CALLBACK\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3000 900 3000 975 3000 1050 3000 1125 3000 1200 3000 1275
+ 3000 1350 3000 1425 3000 1500
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 5325 900 5325 975 5325 1050 5325 1125 5325 1200 5325 1275
+ 5325 1350 5325 1425 5325 1500
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 2025 900 7200 900 7200 1500 1200 1500 1200 1050
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 1815 1875 4590 1875 4590 2475 1215 2475 1215 2025
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3015 1875 3015 1950 3015 2025 3015 2100 3015 2175 3015 2250
+ 3015 2325 3015 2400 3015 2475
+4 0 0 50 -1 16 18 0.0000 4 255 930 1050 1005 request\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1350 1305 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1620 5400 1320 ldlm_request\001
+4 0 0 50 -1 16 18 0.0000 4 270 615 1140 1950 reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1365 2250 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 3165 2250 mdt_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 4350 1050 675 MDS_REINT:REINT_SETXATTR\001
+4 0 0 50 -1 16 18 0.0000 4 270 2175 3075 1275 mdt_rec_setxattr\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 1875 975 4650 975 4650 1575 1275 1575 1275 1125
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3075 975 3075 1050 3075 1125 3075 1200 3075 1275 3075 1350
+ 3075 1425 3075 1500 3075 1575
+4 0 0 50 -1 16 18 0.0000 4 270 615 1200 1050 reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1425 1350 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1305 3225 1350 mdt_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 4350 1050 675 MDS_REINT:REINT_SETXATTR\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3000 900 3000 975 3000 1050 3000 1125 3000 1200 3000 1275
+ 3000 1350 3000 1425 3000 1500
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 2025 900 7200 900 7200 1500 1200 1500 1200 1050
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 5400 900 5400 975 5400 1050 5400 1125 5400 1200 5400 1275
+ 5400 1350 5400 1425 5400 1500
+4 0 0 50 -1 16 18 0.0000 4 255 930 1050 1005 request\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1350 1305 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 4350 1050 675 MDS_REINT:REINT_SETXATTR\001
+4 0 0 50 -1 16 18 0.0000 4 270 2175 3150 1305 mdt_rec_setxattr\001
+4 0 0 50 -1 16 18 0.0000 4 270 1620 5475 1275 ldlm_request\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+6 1125 1875 4650 2700
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 1800 2025 4575 2025 4575 2625 1200 2625 1200 2175
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3000 2025 3000 2100 3000 2175 3000 2250 3000 2325 3000 2400
+ 3000 2475 3000 2550 3000 2625
+4 0 0 50 -1 16 18 0.0000 4 270 615 1125 2100 reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1350 2400 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1365 3075 2400 obd_statfs\001
+-6
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 2025 975 3150 975 3150 1575 1140 1575 1140 1125
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1290 1350 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 255 930 1065 1050 request\001
+4 0 0 50 -1 16 18 0.0000 4 270 1935 1050 675 MDS_STATFS\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 1875 975 4650 975 4650 1575 1275 1575 1275 1125
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3075 975 3075 1050 3075 1125 3075 1200 3075 1275 3075 1350
+ 3075 1425 3075 1500 3075 1575
+4 0 0 50 -1 16 18 0.0000 4 270 615 1200 1050 reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1425 1350 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1935 1050 675 MDS_STATFS\001
+4 0 0 50 -1 16 18 0.0000 4 270 1365 3150 1350 obd_statfs\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 2025 975 3150 975 3150 1575 1140 1575 1140 1125
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1290 1350 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1935 1050 675 MDS_STATFS\001
+4 0 0 50 -1 16 18 0.0000 4 255 930 1065 1050 request\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+6 1125 1875 4650 2700
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 1800 2025 4575 2025 4575 2625 1200 2625 1200 2175
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3000 2025 3000 2100 3000 2175 3000 2250 3000 2325 3000 2400
+ 3000 2475 3000 2550 3000 2625
+4 0 0 50 -1 16 18 0.0000 4 270 615 1125 2100 reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1350 2400 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1365 3075 2400 obd_statfs\001
+-6
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 2025 975 3150 975 3150 1575 1140 1575 1140 1125
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1290 1350 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 255 930 1065 1050 request\001
+4 0 0 50 -1 16 18 0.0000 4 270 1890 1050 675 OST_STATFS\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+6 1200 825 4725 1650
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 1875 975 4650 975 4650 1575 1275 1575 1275 1125
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 9
+ 3075 975 3075 1050 3075 1125 3075 1200 3075 1275 3075 1350
+ 3075 1425 3075 1500 3075 1575
+4 0 0 50 -1 16 18 0.0000 4 270 615 1200 1050 reply\001
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1425 1350 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 270 1365 3150 1350 obd_statfs\001
+-6
+4 0 0 50 -1 16 18 0.0000 4 270 1890 1050 675 OST_STATFS\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 5
+ 2025 975 3150 975 3150 1575 1140 1575 1140 1125
+4 0 0 50 -1 16 18 0.0000 4 270 1530 1290 1350 ptlrpc_body\001
+4 0 0 50 -1 16 18 0.0000 4 255 930 1065 1050 request\001
+4 0 0 50 -1 16 18 0.0000 4 270 1890 1050 675 OST_STATFS\001
--- /dev/null
+#FIG 3.2 Produced by xfig version 3.2.5b
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+6 1350 1275 1800 1725
+1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 1561 1514 181 181 1561 1514 1636 1679
+4 0 0 50 -1 0 12 0.0000 4 150 105 1531 1574 1\001
+-6
+6 4275 1725 4725 2175
+1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4486 1936 181 181 4486 1936 4561 2101
+4 0 0 50 -1 0 12 0.0000 4 150 105 4456 2011 2\001
+-6
+6 1275 2100 1725 2550
+1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 1484 2309 181 181 1484 2309 1559 2474
+4 0 0 50 -1 0 12 0.0000 4 150 105 1439 2384 3\001
+-6
+6 7725 2475 8175 2925
+1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 7921 2699 181 181 7921 2699 7996 2864
+4 0 0 50 -1 0 12 0.0000 4 150 105 7876 2774 4\001
+-6
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 1200 1200 1200 1350
+2 1 0 1 0 0 50 -1 41 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 150 1575 1125 1575
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+ 600 600 1800 600 1800 1200 600 1200 600 600
+2 1 0 1 0 0 50 -1 41 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1275 1725 4725 1725
+2 1 0 1 0 0 50 -1 41 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 8250 2925 1275 2925
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+ 4215 600 5415 600 5415 1200 4215 1200 4215 600
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 4815 1185 4815 1335
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+ 7770 615 8970 615 8970 1215 7770 1215 7770 615
+2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2
+ 8370 1215 8370 1365
+2 1 0 1 0 0 50 -1 41 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 1290 2475 8250 2475
+2 1 0 1 0 0 50 -1 41 0.000 0 0 -1 1 0 2
+ 0 0 1.00 60.00 120.00
+ 4740 2100 1290 2100
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+ 1125 1350 1275 1350 1275 3300 1125 3300 1125 1350
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+ 4740 1335 4875 1335 4875 3300 4740 3300 4740 1335
+2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5
+ 8295 1365 8475 1365 8475 3300 8295 3300 8295 1365
+4 0 0 50 -1 0 12 0.0000 4 150 480 4515 960 MDT\001
+4 0 0 50 -1 0 12 0.0000 4 150 405 8070 990 OST\001
+4 0 0 50 -1 0 12 0.0000 4 150 510 885 960 Client\001
+4 0 0 50 -1 0 12 0.0000 4 195 2055 1875 1650 MDS_STATFS request\001
+4 0 0 50 -1 0 12 0.0000 4 195 1845 2550 2025 MDS_STATFS reply\001
+4 0 0 50 -1 0 12 0.0000 4 195 1995 1725 2400 OST_STATFS request\001
+4 0 0 50 -1 0 12 0.0000 4 195 1785 5850 2850 OST_STATFS reply\001
+4 0 0 50 -1 0 12 0.0000 4 195 615 285 1470 statfs()\001
Further discussion of the 'creat()' system call.
+include::getattr.txt[]
+
include::setattr.txt[]
+
+include::statfs.txt[]
+
+include::getxattr.txt[]
--- /dev/null
+Getattr
+~~~~~~~
+
+The 'getattr' VFS method is used to examine the attributes associated
+with a resource (it is an inode operation). The attributes are the
+same ones returned by a 'stat' operation: mode, uid, guid, size,
+atime, ctime, and mtime.
+
+Examining the File Attributes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When file attribute information is requested (a 'stat' command or
+'fstat' system call, for instance) the 'getattr' is part of the
+interaction with the LDLM. The RPCs are as shown in <<fstat-rpcs>>.
+
+.Getattr RPCs for Examining File Attributes
+[[fstat-rpcs]]
+image::fstat_rpcs.png["getattr RPCs for examining file attributes",height=150]
+
+//////////////////////////////////////////////////////////////////////
+The fstat_rpcs.png diagram resembles this text art:
+
+Time
+Step Client1 MDT OST Client2
+ ------- ------- ------- -------
+1 LDLM_ENQUEUE---->
+2 <-------LDLM_ENQUEUE
+3 LDLM_ENQUEUE----------------->
+4 LDLM_GL_CALLBACK---->
+5 <-------LDLM_GL_CALLBACK
+6 <-----------------LDLM_ENQUEUE
+//////////////////////////////////////////////////////////////////////
+
+1 - Client1 issues an LDLM_ENQUEUE with a GETATTR intent.
+
+The LDLM_ENQUEUE request RPC asks for a concurrent read lock on the
+resource and indicates the values it is interested in receiving.
+
+//////////////////////////////////////////////////////////////////////
+Add this cross reference once we rebased to the setattr update.
+
+For a detailed discussion of all the fields in
+the 'ldlm_request', 'ldlm_intent', and 'mdt_body' refer to
+<<struct-ldlm-request>>, <<struct-ldlm-intent>>, and
+<<struct-mdt-body>>.
+//////////////////////////////////////////////////////////////////////
+
+.LDLM_ENQUEUE ( intent : getattr ) Request Packet Structure
+image::ldlm-enqueue-intent-getattr-request.png["LDLM_ENQUEUE (intent : getattr) Request Packet Structure",height=50]
+
+//////////////////////////////////////////////////////////////////////
+The ldlm-enqueue-intent-getattr-request.png diagram resembles this
+text art:
+
+ LDLM_ENQUEUE:
+ --intent:getattr request--------------------------------------
+ | ptlrpc_body | ldlm_request | ldlm_intent | mdt_body | name |
+ --------------------------------------------------------------
+//////////////////////////////////////////////////////////////////////
+
+The ldlm_request structure signals that it has an intent ('lock_flags' =
+LDLM_FL_HAS_INTENT), The lock descriptor's resource type is
+'loc_desc'->'l_resource'->'lr_type'=LDLM_IBITS.
+
+The 'ldlm_intent' opcode is for 'getattr' (0x1000).
+
+The 'mdt_body' has its 'mbo_valid' field set to 0x28491035fdf, which
+is these flags:
+
+.Flags for 'mbo_valid' field of 'struct mdt_body'
+[options="header"]
+|====
+| Flag | Meaning
+| OBD_MD_FLID | FID
+| OBD_MD_FLATIME | atime attribute
+| OBD_MD_FLMTIME | mtime attribute
+| OBD_MD_FLCTIME | ctime attribute
+| OBD_MD_FLSIZE | size attribute
+| OBD_MD_FLBLKSZ | block size attribute
+| OBD_MD_FLMODE | mode attribute
+| OBD_MD_FLTYPE | type
+| OBD_MD_FLUID | UID attribute
+| OBD_MD_FLGID | GID attribute
+| OBD_MD_FLFLAGS | flags
+| OBD_MD_FLNLINK | number of links
+| OBD_MD_FLGENER | generation
+| OBD_MD_FLRDEV | rdev
+| OBD_MD_FLEASIZE | extended attributes size
+| OBD_MD_FLGROUP | group
+| OBD_MD_FLDIREA | dir extended attributes
+| OBD_MD_FLMODEASIZE | mode size
+| OBD_MD_MEA |
+| OBD_MD_FLACL | ACL
+| OBD_MD_FLMDSCAPA |
+|====
+
+In the context of an LDLM_ENQUEUE 'getattr' intent request the flags
+indicate that Client1 would like to hear back about the flagged
+attributes.
+
+The 'name' buffer is one byte long in this case. That signifies an
+empty string ('\0'). The resource was identified by FID in this case,
+but in other circumstances it might be identified by name via a string
+value in the last buffer.
+
+2 - The LDLM_ENQUEUE reply returns the indicated attribute values to
+Client1.
+
+In addition to the 'ptlrpc_body' (RPC message header), the
+LDLM_ENQUEUE reply RPC to Client1 has an 'ldlm_reply', an
+'mdt_body', and an 'mdt_md' structure.
+
+//////////////////////////////////////////////////////////////////////
+Add this cross reference once we rebased to the setattr update.
+
+For a detailed discussion of
+the fields in 'ldlm_reply', 'mdt_body', and 'mdt_md' refer to
+<<struct-ldlm-reply>>, <<struct-mdt-body>>, and <<struct-mdt-md>>.
+//////////////////////////////////////////////////////////////////////
+
+.LDLM_ENQUEUE (intent : getattr ) Reply Packet Structure
+image::ldlm-enqueue-intent-getattr-reply.png["LDLM_ENQUEUE ( intent : getattr ) Reply Packet Structure",height=50]
+
+//////////////////////////////////////////////////////////////////////
+The ldlm-enqueue-intent-getattr-reply.png diagram resembles this text
+art:
+
+ LDLM_ENQUEUE:
+ --intent:getattr reply--------------------------
+ | ptlrpc_body | ldlm_reply | mdt_body | mdt_md |
+ ------------------------------------------------
+//////////////////////////////////////////////////////////////////////
+
+The reply from the MDT grants the requested concurrent read lock on
+the resource.
+
+The 'mdt_body' carries the 'getattr' attribute information that was
+being requested. The mbo_valid field indicates that it is replying
+with this set of valid fields (0x8080022f8f):
+
+.Flags for 'mbo_valid' field of 'struct mdt_body'
+[options="header"]
+|====
+| Flag | Meaning
+| OBD_MD_FLID | FID
+| OBD_MD_FLATIME | atime attribute
+| OBD_MD_FLMTIME | mtime attribute
+| OBD_MD_FLCTIME | ctime attribute
+| OBD_MD_FLMODE | mode attribute
+| OBD_MD_FLTYPE | type
+| OBD_MD_FLUID | UID attribute
+| OBD_MD_FLGID | GID attribute
+| OBD_MD_FLFLAGS | flags
+| OBD_MD_FLNLINK | number of links attribute
+| OBD_MD_FLEASIZE | extended attributes size
+| OBD_MD_FLMODEASIZE |
+| OBD_MD_FLACL | ACL
+|====
+
+As a bonus the MDT returnes layout information about the file, so that
+Client1 can get attribute information from the OST(s) responsible
+for the file's objects (if any).
+
+3 - Client1 asks for a protected read lock on the resource on the
+OST.
+
+The previous RPC provided layout information to Client1 enabling it to
+query the OSTs (one of them in this case) about the object's
+attributes. The LDLM_ ENQUEUE indicates the desired (on-OST) resource
+and that it should be a protected read of type LDLM_EXTENT (11).
+
+.LDLM_ENQUEUE Request Packet Structure
+image::ldlm-enqueue-request.png["LDLM_ENQUEUE Request Packet Structure",height=50]
+
+//////////////////////////////////////////////////////////////////////
+The ldlm-enqueue-request.png diagram resembles this text art:
+
+ LDLM_ENQUEUE:
+ --request---------------------
+ | ptlrpc_body | ldlm_request |
+ ------------------------------
+//////////////////////////////////////////////////////////////////////
+
+4 - The OST invokes a glimps lock callback on Client2.
+
+Client2 previously had a lock on the desired resource, and the glimpse
+induces Client2 to flush its buffers, if needed, and update the OST
+size and time attributes. The LDLM_GL_CALLBACK asks for a write lock
+on the entire extent of the resource.
+
+.LDLM_GL_CALLBACK Request Packet Structure
+image::ldlm-gl-callback-request.png["LDLM_GL_CALLBACK Request Packet Structure",height=50]
+
+//////////////////////////////////////////////////////////////////////
+The ldlm-gl-callback-request.png diagram resembles this text art:
+
+ LDLM_GL_CALLBACK:
+ --request---------------------
+ | ptlrpc_body | ldlm_request |
+ ------------------------------
+//////////////////////////////////////////////////////////////////////
+
+5 - Client2 replies with LVB data for the OST.
+
+The OST is waiting to hear back from Client2 to update size and time
+attributes, if needed, due to Client2 chache being flushed to the
+OST. The glimpse allows the information to return to the OST, and
+thereby get passed to Client1, without taking the lock from Client2.
+
+.LDLM_GL_CALLBACK Reply Packet Structure
+image::ldlm-gl-callback-reply.png["LDLM_GL_CALLBACK Reply Packet Structure",height=50]
+
+//////////////////////////////////////////////////////////////////////
+The ldlm-gl-callback-reply.png diagram resembles this text art:
+
+ LDLM_GL_CALLBACK:
+ --reply------------------
+ | ptlrpc_body | ost_lvb |
+ -------------------------
+//////////////////////////////////////////////////////////////////////
+
+The 'ost_lvb' data from Client2 has atribute data to update the OST.
+
+6 - The OST replies with the updated attribute information.
+
+.LDLM_ENQUEUE Intent:LVB Reply Packet Structure
+image::ldlm-enqueue-intent-lvb-reply.png["LDLM_ENQUEUE Intent:LVB Reply Packet Structure",height=50]
+
+//////////////////////////////////////////////////////////////////////
+The ldlm-enqueue-intent-lvb-reply.png diagram resembles this text art:
+
+ LDLM_ENQUEUE:
+ --intent:lvb reply--------------------
+ | ptlrpc_body | ldlm_reply | ost_lvb |
+ --------------------------------------
+//////////////////////////////////////////////////////////////////////
+
+The lock is not granted, but the attribute data has been updated.
--- /dev/null
+Getxattr
+~~~~~~~~
+
+The 'getxattr' VFS method queries extended attribute information for a
+resource given a path ('getxattr()' system call) or FID ('fgetxattr()'
+system call). If provided a path Lustre will go through a path lookup
+first (not shown [fixme: this should be a cross reference]) in order
+to determine the FID for the resource.
+
+Lustre maintains extended attribute information on the MDS, and a
+single RPC (request and reply) retrieves the information from the
+MDT. The RPC is an LDLM_ENQUEUE with the 'getxattr' intent. The RPC is
+shown in <<getxattr-rpcs>>.
+
+.Getxattr RPCs
+[[getxattr-rpcs]]
+image::getxattr_rpcs.png["getxattr RPCs",height=100]
+
+//////////////////////////////////////////////////////////////////////
+The getxattr_rpcs.png diagram resembles this text art:
+
+Time
+Step Client MDT OST
+ ------- ------- -------
+1 LDLM_ENQUEUE--->
+2 <---LDLM_ENQUEUE
+//////////////////////////////////////////////////////////////////////
+
+1 - The client issues an LDLM_ENQUEUE with a 'getxattr' intent request
+to the MDS.
+
+The LDLM_ENQUEUE request identifies the resource, and asks for a
+protected read lock on the inode (LDLM_IBITS = 13) with a 'getxattr'
+intent. It accompanies the request with a 'struct mdt_body' that
+provides the FID. The 'mbo_valid' field of the 'mdt_body' is
+0x3000000001 which corresponds to these flags:
+
+.Flags for 'mbo_valid' field of 'struct mdt_body'
+[options="header"]
+|====
+| Flag | Meaning
+| OBD_MD_FLID | FID
+| OBD_MD_FLXATTR | extended attributes
+| OBD_MD_FLXATTRLS | extended attributes size
+|====
+
+The inclusion of OBD_MD_FLXATTR and OBD_MD_FLXATTRLS indicate that the
+extended attributes are the information being requested for the FID.
+
+.LDLM_ENQUEUE Intent:Getxattr Request Packet Structure
+image::ldlm-enqueue-intent-getxattr-request.png["LDLM_ENQUEUE Intent:Getxattr Request Packet Structure",height=50]
+
+//////////////////////////////////////////////////////////////////////
+The ldlm-enqueue-intent-getxattr-request.png diagram resembles this
+text art:
+
+ LDLM_ENQUEUE:
+ --intent:getxattr request------------------------------
+ | ptlrpc_body | ldlm_request | ldlm_intent | mdt_body |
+ -------------------------------------------------------
+//////////////////////////////////////////////////////////////////////
+
+2 - The MDT replies with an LDLM_ENQUEUE with the extended attributes
+data.
+
+The LDLM_ENQUEUE reply grants the protected read lock on the inode
+bits. A 'struct mdt_body' (see <<struct-mdt-body>>) is present with no
+valid metadata ('mbo_valid' = 0), but the 'mbo_eadatasize',
+'mbo_aclsize', and 'mb_maxmdsize' fields are valid. Those values give
+the sizes of the 'EA data' and 'EA vals' buffers, respectively, and
+the number of (__u32) entries in the 'EA lens' array. The 'struct
+lov_mds_md' and the ACL data are also present but carry no
+information.
+
+.LDLM_ENQUEUE Intent:Getxattr Reply Packet Structure
+image::ldlm-enqueue-intent-getxattr-reply.png["LDLM_ENQUEUE Intent:Getxattr Reply Packet Structure",height=80]
+
+//////////////////////////////////////////////////////////////////////
+The ldlm-enqueue-intent-getxattr-reply.png diagram resembles this
+text art:
+
+ LDLM_ENQUEUE:
+ --intent:getxattr reply----------------------------------------
+ | ptlrpc_body | ldlm_reply | mdt_body | lov_mds_md | ACL data |
+ | EA data | EA vals | EA lens |
+ ---------------------------------------------------------------
+//////////////////////////////////////////////////////////////////////
+
+The last three sections of the message convey the extended attributes
+information. They are formed as follow: The "EA data" section lists a
+sequence of null-terminated strings, where each string identifies an
+extended attribute. The "EA vals" section is a block of data holding
+the values of the listed extended attributes. The "EA lens" section is
+an array of sizes (__u32). Each of those sizes gives the length of the
+EA value, in order, in the "EA values" block.
+
+
'ldlm_reply' descriptors of the desired and granted locks.
.LDLM_ENQUEUE Generic Packet Structure
-image::ldlm-enqueue-generic.png["LDLM_ENQUEUE Generic Packet Structure",height=150]
+image::ldlm-enqueue-generic.png["LDLM_ENQUEUE Generic Packet Structure",height=100]
//////////////////////////////////////////////////////////////////////
The ldlm-enqueue-generic.png diagram resembles this text
--reply---------------------
| ptlrpc_body | ldlm_reply |
----------------------------
- --extent lvb reply--------------------
- | ptlrpc_body | ldlm_reply | ost_lvb |
- --------------------------------------
//////////////////////////////////////////////////////////////////////
However, there are many variants to this RPCs. A lock request may signal
-----------------------------------------
//////////////////////////////////////////////////////////////////////
+And in this example the intent is to get attribute information.
+
+.LDLM_ENQUEUE Intent:Getattr Generic Packet Structure
+image::ldlm-enqueue-intent-getattr-generic.png["LDLM_ENQUEUE Intent:Getattr Generic Packet Structure",height=150]
+
+//////////////////////////////////////////////////////////////////////
+The ldlm-enqueue-intent-getattr-generic.png diagram resembles this text
+art:
+
+ LDLM_ENQUEUE:
+ --intent:getattr request-------------------------------
+ | ptlrpc_body | ldlm_request | ldlm_intent | mdt_body |
+ lustre_capa |name |
+ -------------------------------------------------------
+ --intent:getattr reply--------------------------
+ | ptlrpc_body | ldlm_reply | mdt_body | mdt_md |
+ ------------------------------------------------
+ --intent:lvb reply--------------------
+ | ptlrpc_body | ldlm_reply | ost_lvb |
+ --------------------------------------
+//////////////////////////////////////////////////////////////////////
+
+Here is another example of an intent, in this case the 'getxattr' intent.
+
+.LDLM_ENQUEUE Intent:Getxattr Generic Packet Structure
+image::ldlm-enqueue-intent-getxattr-generic.png["LDLM_ENQUEUE Intent:Getxattr Generic Packet Structure",height=125]
+
+//////////////////////////////////////////////////////////////////////
+The ldlm-enqueue-intent-getxattr-generic.png diagram resembles this text
+art:
+
+ LDLM_ENQUEUE:
+ --intent:getxattr request------------------------------------
+ | ptlrpc_body | ldlm_request |ldlm_intent | mdt_body | capa |
+ -------------------------------------------------------------
+ --intent:getxattr reply----------------------------------------
+ | ptlrpc_body | ldlm_reply | mdt_body | lov_mds_md | ACL data |
+ | EA data | EA vals | EA lens |
+ ---------------------------------------------------------------
+//////////////////////////////////////////////////////////////////////
+
'ptlrpc_body'::
RPC descriptor.
Description of the layout information that is the subject of a layout
intent.
+'mdt_body'::
+In a request, an indication (in the 'mbo_valid' field) of what
+attributes the requester would like. In a reply, (again based on
+'mbo_valid') the values being returned.
+
+'lustre_capa'::
+So called "capabilities" structure. This is deprecated in recent
+versions of Lustre, and commonly appears in the packet header as a zero
+length buffer.
+
+'name'::
+A text field supplying the name of the desired resource.
+
'ldlm_reply'::
Resembling the 'ldlm_request', but in this case indicating what the
LDLM actually granted as well as relevant policy data.
+'mdt_md'::
+Layout data for the resource. This buffer is optional and will appear
+as zero length in some packets.
+
+'mdt_body'::
+Metadata about a given resource.
+
+'ACL data'::
+Access Control List data associated with a resource.
+
+'EA data'::
+The names of any extended attributes associated with the resource. The
+names are null-terminated strings concatenated into a single sequnce.
+
+'EA vals'::
+A block of data concatenating the values for the extended attributes
+listed in "EA vals".
+
+'EA lens'::
+The sizes of the extended attirbute values. This is a sequence of
+32-bit unsigned integers, one for each extended
+attribute. The sizes give the length of the corresponding extended
+attribute in the "EA vals" block of data. Thus the sum of those sizes
+equals the length of the "EA vals".
+
Lock Value Block::
A Lock Value Block (LVB) is included in the LDLM_ENQUEUE reply message
when one of three things needs to be communicated back to the
returned from an MDT to a client requesting a lock a lock with a
layout intent. In an intent request (as opposed to a reply and as yet
unimplemanted) it will modify the layout. It will not be included
-(zero length) in requests in current releases.
+(zero length) in requests in current releases.
--- /dev/null
+Command 104: LDLM_GL_CALLBACK
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+[[ldlm-gl-callback-rpc]]
+
+An RPC that assists with getting a lock back from an entity that has
+it.
+
+.LDLM_GL_CALLBACK Generic Packet Structure
+image::ldlm-gl-callback-generic.png["LDLM_GL_CALLBACK Generic Packet Structure",height=100]
+
+//////////////////////////////////////////////////////////////////////
+The ldlm-gl-callback-generic.png diagram resemgles this text
+art:
+
+ LDLM_GL_CALLBACK:
+ --request---------------------
+ | ptlrpc_body | ldlm_request |
+ ------------------------------
+ --reply------------------
+ | ptlrpc_body | ost_lvb |
+ -------------------------
+//////////////////////////////////////////////////////////////////////
+
+The request RPC resembles the simplest LDLM_ENQUEUE RPC, but only
+identifies the relevant resource that the destination entity already
+had a lock on. It asks the recipient to flush its dirty write cache,
+and notify the requester of size and time attributes once that is
+done. The reply updates the attributes on the requester.
+
+'ptlrpc_body'::
+RPC descriptor.
+
+'ldlm_request'::
+Description of the lock being requested. Which resource is the target,
+what lock is current, and what lock desired.
+
+'ost_lvb'::
+Attribute data associated with a resource on an OST.
+
^^^^^^^^^^
[[struct-ldlm-reply]]
-The 'ldlm_reply' structure is the reciprocal of the 'ldlm_request'.
+The 'ldlm_reply' structure is the reciprocal of the 'ldlm_request'.
----
struct ldlm_reply {
The 'lmm_magic' field is filled in with the value LOV_MAGIC
(0x0BD10BD0) when the structure is in use. If the structure is in a
buffer of an RPC without the magic number in place, then the rest of
-the structure is ignored.
+the structure is ignored.
The 'lmm_pattern' field is only ever set to LOV_PATTERN_RAID0
-(0x001).
+(0x001).
The 'lmm_oi' field gives the LOV object ID for the first OST of the
layout. This is the OST where striping will begin.
The 'lmm_stripe_size' field give the stripe size for the object. This
is how many bytes will be on a particular OST before going to the next
-stripe.
+stripe.
The 'lmm_stripe_count' field gives how many OSTs the file is striped
over.
The 'lmm_layout_gen' field ges updated as the layout of the obeject is
updated. This way out-of-date references to the layout can be
-recognized.
+recognized.
The 'lmm_objects' array gives per-stripe data for more complex
(non-uniform) layouts.
A Lustre message is a sequence of bytes. The message begins with a
<<lustre-message-header,Lustre Message Header>> and has between one
-and nine subsequences called "buffers". Each buffer has a structure
+and nine sub-sequences called "buffers". Each buffer has a structure
(the size and meaning of the bytes) that corresponds to the 'struct'
entities in the <<data-structs,Data Structures and Defines Section>>
Section. The header gives the number of buffers in its 'lm_buffcount'
field. The first buffer in any message is always the
-<<lustre-message-preamble,Lustre Message Preamble>>. The operation
+<<struct-ptlrpc-body,Lustre RPC Descriptor>>. The operation
code ('pb_opc' field) and the message type ('pb_type' field: request
-or reply?) in the preamble together specify the "format" of the
+or reply?) in the descriptor together specify the "format" of the
message, where the format is the number and content of the remaining
buffers. As a shorthand, it is useful to name each of these formats,
and the following list gives all of the formats along with the number
[options="header"]
|====
| structure | meaning
-| ptlrpc_body | message preamble
+| ptlrpc_body | RPC descriptor
| llogd_body | LLog description
| string | The name of the desired log
|====
[options="header"]
|====
| structure | meaning
-| ptlrpc_body | message preamble
+| ptlrpc_body | RPC descriptor
| llogd_body | LLog description
|====
[options="header"]
|====
| structure | meaning
-| ptlrpc_body | message preamble
+| ptlrpc_body | RPC descriptor
| llog_log_hdr | LLog log header info
|====
[options="header"]
|====
| structure | meaning
-| ptlrpc_body | message preamble
+| ptlrpc_body | RPC descriptor
| llogd_body | LLog description
| eadata | variable length field for extended attributes
|====
[options="header"]
|====
| structure | meaning
-| ptlrpc_body | message preamble
+| ptlrpc_body | RPC descriptor
| mdt_body | Information about the MDT
| MDT_MD | OST stripe and index info
| ACL | ACLs for the fid
[options="header"]
|====
| structure | meaning
-| ptlrpc_body | message preamble
+| ptlrpc_body | RPC descriptor
| mdt_body | Information about the MDT
| lustre_capa | security capa info
|====
[options="header"]
|====
| structure | meaning
-| ptlrpc_body | message preamble
+| ptlrpc_body | RPC descriptor
| mdt_body | Information about the MDT
|====
[options="header"]
|====
| structure | meaning
-| ptlrpc_body | message preamble
+| ptlrpc_body | RPC descriptor
| mgs_config_body | Information about the MGS supporting the request
|====
[options="header"]
|====
| structure | meaning
-| ptlrpc_body | message preamble
+| ptlrpc_body | RPC descriptor
| mgs_config_body | Information about the MGS supporting the request
|====
[options="header"]
|====
| structure | meaning
-| ptlrpc_body | message preamble
+| ptlrpc_body | RPC descriptor
| obd_uuid | UUID of the target
| obd_uuid | UUID of the client
| lustre_handle | connection handle
[options="header"]
|====
| structure | meaning
-| ptlrpc_body | message preamble
+| ptlrpc_body | RPC descriptor
| obd_connect_data | connection data
|====
[options="header"]
|====
| structure | meaning
-| ptlrpc_body | message preamble
+| ptlrpc_body | RPC descriptor
| obd_statfs | statfs system call info
|====
[[lustre-operations]]
Lustre operations are denoted by the 'pb_opc' op-code field of the
-message preamble. Each operation is implemented as a pair of messages,
+RPC descriptor. Each operation is implemented as a pair of messages,
with the 'pb_type' field set to PTLRPC_MSG_REQUEST for requests
initiating the operation, and PTLRPC_MSG_REPLY for replies. Note that
-as a general matter, the receipt by a client of the rply message only
+as a general matter, the receipt by a client of the reply message only
assures the client hat the server has initiated the action, if
any. See the discussion on <<transno,transaction numbers>>
and <<recovery>> for how the client is given confirmation that a
Command 8: OST CONNECT - Client connection to an OST
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+[[ost-connect-rpc]]
.OST_CONNECT (8)
[options="header"]
capabilities appropriate to the client. The 'ocd_brw_size' is set to the
largest value for the size of an RPC that the client can handle. The
'ocd_ibits_known' and 'ocd_checksum_types' values are set to what the client
-considers appropriate. Other fields in the preamble and
+considers appropriate. Other fields in the descriptor and
'obd_connect_data' structures are zero, as is the 'lustre_handle'
element.
The target maintains the last committed transaction for a client in
its export for that client. If this is the first connection, then that
-last transactiion value would just be zero. If there were previous
+last transaction value would just be zero. If there were previous
transactions for the client, then the transaction number for the last
such committed transaction is put in the 'pb_last_committed' field.
Command 9: OST DISCONNECT - Disconnect client from an OST
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+[[ost-disconnect-rpc]]
.OST_DISCONNECT (9)
[options="header"]
|====
The information exchanged in a DISCONNECT message is that normally
-conveyed in the mesage preamble.
+conveyed in the RPC descriptor.
include::ost_punch.txt[]
+include::ost_statfs.txt[]
+
Command 33: MDS GETATTR - Get MDS Attributes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+[[mds-getattr-rpc]]
.MDS_GETATTR (33)
[options="header"]
Command 38: MDS CONNECT - Client connection to an MDS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+[[mds-connect-rpc]]
.MDS_CONNECT (38)
[options="header"]
N.B. This is nearly identical to the explanation for OST_CONNECT and
for MGS_CONNECT. We may want to simplify and/or unify the discussion
-and only call out how this one differees from a generic CONNECT
+and only call out how this one differs from a generic CONNECT
operation.
When a client initiates a connection to a specific target on an MDS,
capabilities appropriate to the client. The 'ocd_brw_size' is set to the
largest value for the size of an RPC that the client can handle. The
'ocd_ibits_known' and 'ocd_checksum_types' values are set to what the client
-considers appropriate. Other fields in the preamble and
+considers appropriate. Other fields in the descriptor and
'obd_connect_data' structures are zero, as is the 'lustre_handle'
element.
The target maintains the last committed transaction for a client in
its export for that client. If this is the first connection, then that
-last transactiion value would just be zero. If there were previous
+last transaction value would just be zero. If there were previous
transactions for the client, then the transaction number for the last
such committed transaction is put in the 'pb_last_committed' field.
Command 39: MDS DISCONNECT - Disconnect client from an MDS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+[[mds-disconnect-rpc]]
.MDS_DISCONNECT (39)
[options="header"]
|====
The information exchanged in a DISCONNECT message is that normally
-conveyed in the mesage preamble.
+conveyed in the RPC descriptor.
Command 40: MDS GETSTATUS - get the status from a target
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+[[mds-getstatus-rpc]]
The MDS_GETSTATUS command targets a specific MDT. If there are several,
the client will need to send a separate message for each.
In the reply there is additional information about the MDT's
capabilities.
-Command 41: MDS STATFS - get statfs data from the server
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.MDS_STATFS (41)
-[options="header"]
-|====
-| request | reply
-| empty | obd_statfs_server
-|====
-
-The 'empty' request message is one that only has the 'ptlrpc_body'
-data encoded. The fields have thier generic values for a request from
-this client, with 'pb_opc' being set to MDS_STATFS (41).
-
-In the reply there is, in addition to the 'ptlrpc_body', data relevant
-to a 'statfs' system call.
+include::mds_statfs.txt[]
include::mds_getxattr.txt[]
include::ldlm_cp_callback.txt[]
+include::ldlm_gl_callback.txt[]
+
Command 250: MGS CONNECT - Client connection to an MGS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+[[mgs-connect-rpc]]
.MGS_CONNECT (250)
[options="header"]
capabilities appropriate to the client. The 'ocd_brw_size' is set to the
largest value for the size of an RPC that the client can handle. The
'ocd_ibits_known' and 'ocd_checksum_types' values are set to what the client
-considers appropriate. Other fields in the preamble and
+considers appropriate. Other fields in the descriptor and
'obd_connect_data' structures are zero.
Once the server receives the 'obd_connect_client' message on behalf of
The target maintains the last committed transaction for a client in
its export for that client. If this is the first connection, then that
-last transactiion value would just be zero. If there were previous
+last transaction value would just be zero. If there were previous
transactions for the client, then the transaction number for the last
such committed transaction is put in the 'pb_last_committed' field.
Command 251: MGS DISCONNECT - Disconnect client from an MGS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+[[mgs-disconnect-rpc]]
.MGS_DISCONNECT (251)
[options="header"]
it also has 'empty' messages gong back and forth.
The information exchanged in a DISCONNECT message is that normally
-conveyed in the mesage preamble.
+conveyed in the RPC descriptor.
Command 256: MGS CONFIG READ - Read MGS configuration info
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+[[mgs-config-read-rpc]]
.MGS_CONFIG_READ (256)
[options="header"]
Command 501: LLOG ORIGIN HANDLE CREATE - Create llog handle
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+[[llog-origin-handle-create-rpc]]
.LLOG_ORIGIN_HANDLE_CREATE (510)
[options="header"]
Command 502: LLOG ORIGIN HANDLE NEXT BLOCK - Read the next block
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+[[llog-origin-handle-next-block-rpc]]
.LLOG_ORIGIN_HANDLE_NEXT_BLOCK (502)
[options="header"]
Command 503: LLOG ORIGIN HANDLE READ HEADER - Read handle header
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+[[llog-origin-handle-read-header-rpc]]
.LLOG_ORIGIN_HANDLE_READ_HEADER (503)
[options="header"]
| llogd_body_only | llog_log_hdr_only
|====
-LLOG_ORIGIN_HANDLE_NEXT_BLOCK
[[mds-reint-rpm]]
An RPC that implements an operation that will change the information
-on an MDT.
+on an MDT. There are a variety of operations all gathered under the
+MDS_REINT 'opcode'.
+
+----
+typedef enum {
+ REINT_SETATTR = 1,
+ REINT_CREATE = 2,
+ REINT_LINK = 3,
+ REINT_UNLINK = 4,
+ REINT_RENAME = 5,
+ REINT_OPEN = 6,
+ REINT_SETXATTR = 7,
+ REINT_RMENTRY = 8,
+ REINT_MIGRATE = 9,
+ REINT_MAX
+} mds_reint_t, mdt_reint_t;
+----
REINT_SETATTR
^^^^^^^^^^^^^
-------------------------------------------------------
//////////////////////////////////////////////////////////////////////
+The second buffer ('mdt_rec_setattr' in the above) is one of the
+variants specific to the particular REINT as given by the
+'mdt_reint_t' opcode. Each such variant has the same number and size
+of fields, but how the fields are interpreted varies slightly between
+variiants. For all the variant structures refer to
+<<mds-reint-structs>>.
+
+REINT_SETXATTR
+^^^^^^^^^^^^^^
+[[mds-reint-setxattr-rpc]]
+
+An RPC that implements the 'setxattr' sub-command of the MDS_REINT.
+
+.MDS_REINT:REINT_SETXATTR Generic Packet Structure
+image::mds-reint-setxattr-generic.png["MDS_REINT:REINT_SETXATTR Generic Packet Structure",height=100]
+
+//////////////////////////////////////////////////////////////////////
+The mds-reint-setxattr-generic.png diagram resembles this text art:
+
+ MDS_REINT:
+ --REINT_SETXATTR-request-------------------------------------
+ | ptlrpc_body | mdt_rec_setxattr | lustre_capa | mdt_ioepoc |
+ eadata | llog_cookie | ldlm_request |
+ -------------------------------------------------------------
+
+ --REINT_SETXATTR-reply---------------------------------
+ | ptlrpc_body | mdt_body | mdt_md | acl | lustre_capa |
+ lustre_capa |
+ -------------------------------------------------------
+//////////////////////////////////////////////////////////////////////
+
'ptlrpc_body'::
RPC descriptor.
'mdt_rec_setattr'::
Information pertinent to setting attributes on the MDT.
+'mdt_rec_setxattr'::
+Information pertinent to setting extended attributes on the MDT.
+
'lustre_capa'::
So called "capabilities" structure. This is deprecated in recent
versions of Lustre, and commonly appears in the packet header as a zero
include::mdt_rec_reint.txt[]
include::mdt_rec_setattr.txt[]
+
+include::mdt_rec_setxattr.txt[]
--- /dev/null
+Command 41: MDS_STATFS
+~~~~~~~~~~~~~~~~~~~~~~
+[[mds-statfs-rpc]]
+
+MDS_STATFS is an RPC that queries data about the underlying file
+system for a given MDT. It is generated in response to an explicit
+call for 'statfs' information from the VFS via the 'statfs(2)'
+function.
+
+The MDS_STATFS request message is a so-called "empty" message in that
+it only has a buffer for the 'ptlrpc_body' with the 'pb_opc' value
+MDS_STATFS (41).
+
+The reply message conveys 'statfs' data when it succeeds, and an error
+code if it doesn't.
+
+.MDS_STATFS Generic Packet Structure
+
+:frame: none
+:grid: none
+[width="50%", cols="2a"]
+|====
+| request
+[cols="1"]
+!===================
+! <<struct-ptlrpc-body,ptlrpc_body>> !
+!===================
+| reply
+[cols="2"]
+!===================
+! <<struct-ptlrpc-body,ptlrpc_body>> ! <<struct-obd_statfs,obd_statfs>> !
+!===================
+|====
+
+'ptlrpc_body':: RPC descriptor. Only the 'pb_opc' value (MDS_STATFS =
+41) is directly relevant to the MDS_STATFS request message. The rest
+of the 'ptlrpc_body' fields handle generic information about the
+RPC, as discussed in <<struct-ptlrpc-body>>, including generic error
+conditions. In a normal reply ('pb_type' = PTL_RPC_MSG_REPLY) the
+'pb_status' field is 0. The one error that can be returned in
+'pb_status' that is speficially from OST_STATFS' handling is -ENOMEM,
+which occurs if there is not enough memory to allocate a temporary
+buffer for the 'statfs' data.
+
+'obd_statfs'::
+File system wide statistics corresponding to 'struct statfs' as well
+as Lustre-specific information. See <<struct-obd-statfs>> for a
+detailed discussion.
--- /dev/null
+REINT_SETXATTR
+^^^^^^^^^^^^^^
+[[mdt-rec-setxattr]]
+
+The variant of the 'mdt_rec_reint' for the 'setxattr' operation is:
+
+----
+struct mdt_rec_setxattr {
+ __u32 sx_opcode;
+ __u32 sx_cap;
+ __u32 sx_fsuid;
+ __u32 sx_fsuid_h;
+ __u32 sx_fsgid;
+ __u32 sx_fsgid_h;
+ __u32 sx_suppgid1;
+ __u32 sx_suppgid1_h;
+ __u32 sx_suppgid2;
+ __u32 sx_suppgid2_h;
+ struct lu_fid sx_fid;
+ __u64 sx_padding_1;
+ __u32 sx_padding_2;
+ __u32 sx_padding_3;
+ __u64 sx_valid;
+ __s64 sx_time;
+ __u64 sx_padding_5;
+ __u64 sx_padding_6;
+ __u64 sx_padding_7;
+ __u32 sx_size;
+ __u32 sx_flags;
+ __u32 sx_padding_8;
+ __u32 sx_padding_9;
+ __u32 sx_padding_10;
+ __u32 sx_padding_11;
+};
+----
+
+The 'setxattr' variant modifies the semantics of the generic REINT
+fields as follows:
+
+There is only one FID to be operated upon, and the 'sx_valid',
+and the second 'struct lu_fid' is just "padding".
+
+The 'sx_valid' field identifies which of the other fields in the
+structure are to be honored. If the corresponding flag bit is not set
+then the value of the corresponding field is to be ignored. The flag
+values draw from the same set of definitions as <<mdt-rec-setattr>>.
+
+.Flags for 'sx_valid' field of 'struct mdt_rec_setxattr'
+[options="header"]
+|====
+| Flag | Meaning
+| OBD_MD_FLCTIME | ctime attribute
+|====
+
+The 'sx_time' field is set to the 'ctime' value for the update and the
+OBD_MD_FLCTIME value is used in the 'sx_valid' field to indicate the
+value is to be honored. Fixme: The other flag values for 'sx_valid'
+are not clear in the code, so I need to dig into them more
+deeply. Similarly, the use and possible values for the 'sx_flags'
+field are not obvious.
+
[[mdt-structs]]
These structures convey information to or from an MDT concerning the
-metadata about a resource.
+metadata about a resource.
include::mdt_body.txt[]
--- /dev/null
+OBD Statfs
+^^^^^^^^^^
+[[struct-obd-statfs]]
+
+An 'obd_statfs' structure conveys file-system-wide information for the
+back-end file system of a given target (MDT or OST).
+
+----
+struct obd_statfs {
+ __u64 os_type;
+ __u64 os_blocks;
+ __u64 os_bfree;
+ __u64 os_bavail;
+ __u64 os_files;
+ __u64 os_ffree;
+ __u8 os_fsid[40];
+ __u32 os_bsize;
+ __u32 os_namelen;
+ __u64 os_maxbytes;
+ __u32 os_state;
+ __u32 os_fprecreated;
+ __u32 os_spare2;
+ __u32 os_spare3;
+ __u32 os_spare4;
+ __u32 os_spare5;
+ __u32 os_spare6;
+ __u32 os_spare7;
+ __u32 os_spare8;
+ __u32 os_spare9;
+};
+----
+
+Most of the fields correspond directly to the 'struct statfs' fields
+returned by a 'statfs()' system call and have the same meaning. The
+values are for the back-end storage of the target in question (MDT or
+OST).
+
+Teh 'os_type' field gives the type of the target's back-end file
+system:
+
+.Back-end file types ('os_type') for Lustre targets
+[options="header"]
+|====
+| f_type | value
+| EXT?_SUPER_MAGIC (ldiskfs) | 0xEF53
+| ZFS_SUPER_MAGIC | 0x2fc12fc1
+|====
+
+The 'os_blocks' field is the total number of blocks in the target, in
+units of os_bsize.
+
+The 'os_bfree' field is the number of blocks not currently in use.
+
+The 'os_bavail' is the number of blocks available to be allocated to
+new files.
+
+The 'os_files' field is the total number of files on the target, both
+allocated and free. For some OSD types this is a static number, and
+for others this is dynamic based on os_ffree and the amount of free
+space.
+
+The 'os_ffree' field is the current number of files that could be
+created based on current usage. For some OSD types this is a function
+of the free space in the device and may increase or decrease as free
+space changes.
+
+The 'os_fsid' is intended to be the target backing device UUID in
+ASCII format. The current osd-ldiskfs and osd-zfs implementations
+don't fill this in.
+
+The 'os_bsize' field is the block size in bytes. This is for computing
+the total, free, and available space in combination with os_blocks,
+os_bfree, and os_bavail respectively. It does not necessarily
+represent the minimum or optimal IO size.
+
+The 'os_namelen' field gives the maximum name length for files on the
+back-end file system.
+
+The 'os_maxbytes' field is the maximum size of a single object
+(i.e. the maximum byte offset that can be written to). This is the
+same value as the 'ocd_maxbytes' field of the 'obd_connect_data'
+structure.
+
+The 'os_state' field encodes the status of the underlying back-end
+file system. It can be:
+
+.Back-end file system state
+[options="header"]
+|====
+| os_state flag | value
+| OS_STATE_DEGRADED | 0x1
+| OS_STATE_READONLY | 0x2
+|====
+
+In normal operation the 'os_state' value is returned as 0x0. If the
+back-end file system has a RAID configuration that is degraded or
+rebuilding the state is returned with the OS_STATE_DEGRADED (0x1) flag
+set. If the file system has been set to read-only, for whatever
+reason, then the state is returned with the OS_STATE_READONLY (0x2)
+flag set, for example if it was explicitly mounted read-only, or
+corruption has been detected at runtime in the backing filesystem.
+
+The 'os_fprecreated' field counts the number of pre-created objects
+available on an OST. The 'os_fprecreated' value counts as "used"
+inodes, so it reduces 'os_ffree'. Lustre places a hard limit of
+2*OST_MAX_PRECREATE (2*20000) on the number of precreates it will
+allow for an OST. There are currently no precreated objects on an MDT
+so it is just 0 in that case.
~~~~~~~~~~~~~~~~~~~~~~
[[ost-setattr-rpc]]
-An RPC that sets resource attributes.
+OST_SETATTR (pb_opc = 2) is an RPC that sets resource attributes.
.OST_SETATTR Generic Packet Structure
image::ost-setattr-generic.png["OST_SETATTR Generic Packet Structure",height=100]
^^^^
[[struct-obdo]]
-The 'obdo' structure conveys metadata about a resource on an OST.
+The 'obdo' structure conveys metadata about a resource on an OST.
----
struct obdo {
^^^^^^
[[struct-ost-id]]
-The 'ost_id' identifies an object on a particular OST.
+The 'ost_id' identifies an object on a particular OST.
----
struct ost_id {
--- /dev/null
+Command 13: OST_STATFS
+~~~~~~~~~~~~~~~~~~~~~~
+[[ost-statfs-rpc]]
+
+OST_STATFS ('pb_opc' = 13) is an RPC that queries data about the
+underlying file system for a given OST. It's form and use are nearly
+identical to the MDS_STATFS RPC. Refer to <<mds-statfs-rpc>> for
+details. The only differences in OST_STATFS are that it has a distinct
+'pb_opc' value, it carries information about an OST (instead of an
+MDT), and it can be generated in this one additional way: There is a
+regularly scheduled process that 'pings' each OST from each MDT using
+OST_STATFS, so that the MDT can remain current on the available space
+on the OSTs.
+
--- /dev/null
+Path Lookup
+-----------
+[[path-lookup]]
+
+When an operation is to be performed on a file or directory (the
+"target"), say a 'stat' command, the target is specified with a path.
+The Lustre client host maintains a cache of information about
+directories and files. The first time a target is visited the cache
+must be loaded with that information. To get the information the
+client requests a 'concurrent read: LOOKUP IBITS' lock on each path
+element from the root (of the Lustre file system) up to the target.
+The type of the lock requested for the target will depend on the
+operation being performed. The client does not need to send a new lock
+request for a path element if it is currently holding a compatible
+lock in its cache.
+
+Each lock request is accompanied by an "intent" that characterizes
+the intended operation for which the lock is being acquired. For path
+elements before the target the intent is a 'lookup' and for the target
+the intent corresponds to an operation on the target. For example, in
+a 'stat' command the lock's intent for the target will be a getattr.
+
include::llog.txt[]
+include::path_lookup.txt[]
+
include::recovery.txt[]
include::security.txt[]
--- /dev/null
+Ptlrpc_body - The Lustre RPC Descriptor
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+[[struct-ptlrpc-body]]
+
+Every Lustre message starts with both the above header and an
+additional set of fields (in its first "buffer") given by the 'struct
+ptlrpc_body_v3' structure. This preamble has information information
+relevant to every RPC type. In particular, the RPC type is itself
+encoded in the 'pb_opc' Lustre operation number. The value of that
+opcode, as well as whether it is an RPC 'request' or 'reply',
+determines what else will be in the message following the preamble.
+----
+#define PTLRPC_NUM_VERSIONS 4
+#define JOBSTATS_JOBID_SIZE 32
+struct ptlrpc_body {
+ struct lustre_handle pb_handle;
+ __u32 pb_type;
+ __u32 pb_version;
+ __u32 pb_opc;
+ __u32 pb_status;
+ __u64 pb_last_xid;
+ __u64 pb_last_seen;
+ __u64 pb_last_committed;
+ __u64 pb_transno;
+ __u32 pb_flags;
+ __u32 pb_op_flags;
+ __u32 pb_conn_cnt;
+ __u32 pb_timeout;
+ __u32 pb_service_time;
+ __u32 pb_limit;
+ __u64 pb_slv;
+ __u64 pb_pre_versions[PTLRPC_NUM_VERSIONS];
+ __u64 pb_padding[4];
+ char pb_jobid[JOBSTATS_JOBID_SIZE];
+};
+----
+
+In a connection request, sent by a client to a server and regarding a
+specific target, the 'pb_handle' is 0. In the reply to a connection
+request, sent by the target, the handle is a value uniquely
+identifying the target. Subsequent messages between this client and
+this target will use this handle to to gain access to their shared
+state. The handle is persistent across client reconnects to the same
+instance of the server, but if the client unmounts the filesystem or
+is evicted then it must re-connect as a new client.
+
+The 'pb_type' is PTL_RPC_MSG_REQUEST in messages when they are
+initiated, it is PTL_RPC_MSG_REPLY in a reply, and it is
+PTL_RPC_MSG_ERR in a reply to convey that a message was received that
+could not be interpreted, that is, if it was corrupt or
+incomplete. The encoding of those type values is given by:
+----
+#define PTL_RPC_MSG_REQUEST 4711
+#define PTL_RPC_MSG_ERR 4712
+#define PTL_RPC_MSG_REPLY 4713
+----
+The 'pb_type' = PTL_RPC_MSG_ERR is only for message handling errors.
+This may be a message that failed to be interpreted as an actual
+message, or it may indicate some more general problem with the
+connection between the client and the target. Note that other errors,
+such as those that emerge from processing the actual message content,
+do not use the PTL_RPC_MSG_ERR type.
+
+The 'pb_status' field provides an error return code, if any, for the
+RPC. When 'pb_type' = PTL_RPC_MSG_ERR the 'pb_status' will also be
+set to one of the following message handling errors:
+
+.format
+[options="header"]
+|====
+| pb_type | pb_status | if
+| PTL_RPC_MSG_ERR | -ENOMEM | No memory for reply buffer
+| PTL_RPC_MSG_ERR | -ENOTSUPP | Invalid opcode
+| PTL_RPC_MSG_ERR | -EINVAL | Bad magic or version
+| PTL_RPC_MSG_ERR | -EPROTO | Request is malformed or cannot be
+ processed in current context
+|====
+
+A PTL_RPC_MSG_ERR message does not need to allocate memory, so it
+should normally be sent as a reply even if there is not enough memory
+to allocate the normal reply buffer, unless the underlying network
+transport itself cannot allocate memory to send it. (fixme: and what
+happens then?)
+
+In most cases there is a reply with 'pb_type' = PTL_RPC_MSG_REPLY,
+indicating that the request was processed, but it may still have
+'pb_status' set to a non-zero value to indicate that the request
+encountered an error during processing (see below). This may indicate
+something very specific to the particular RPC, but it may also be a
+very general sort of error. Those that are specific to particular RPCs
+will be documented with the respective RPCs, and those that are more
+generic are listed here:
+
+.format
+[options="header"]
+|====
+| pb_type | pb_status | meaning
+| PTL_RPC_MSG_REPLY | -ENOTCONN | Client is not connected to the
+ target, typically meaning the
+ server was restarted or the
+ client was evicted, and the
+ client needs to reconnect.
+| PTL_RPC_MSG_REPLY | -EINPROGRESS | The request cannot be processed
+ currently due to some other
+ factor, such as during initial
+ mount, a delay contacting the
+ quota master during a write, or
+ LFSCK rebuilding the OI table,
+ but the client should continue
+ to retry after a delay until
+ interrupted or successful. This
+ avoids blocking the server
+ threads with client requests
+ that cannot currently be
+ processed, but other requests
+ might be processed in the
+ meantime.
+| PTL_RPC_MSG_REPLY | -ESHUTDOWN | The server is being stopped and
+ no new connections are allowed.
+|====
+
+The significance of -ENOTCONN is discussed more fully in
+<<connection>>, but a brief comment may be useful here. The networking
+layers supporting the exchange of RPCs can be in good working order
+when 'pb_status' = -ENOTCONN is returned in an RPC reply message. The
+connection refered to by that status is the Lustre connection. That
+connection is part of the shared state between Lustre clients and
+servers that gets established via MDS_CONNECT and OST_CONNECT RPCs,
+and can be lost due to an 'eviction'. So, even when that Lusre
+connection is lost (or has not been established, yet), RPC messages
+can be exchanged.
+
+The 'pb_version' identifies the version of the Lustre protocol and is
+derived from the following constants. The lower two bytes give the
+version of PtlRPC being employed in the message, and the upper two
+bytes encode the role of the host for the service being
+requested. That role is one of OBD, MDS, OST, DLM, LOG, or MGS.
+----
+#define PTLRPC_MSG_VERSION 0x00000003
+#define LUSTRE_VERSION_MASK 0xffff0000
+#define LUSTRE_OBD_VERSION 0x00010000
+#define LUSTRE_MDS_VERSION 0x00020000
+#define LUSTRE_OST_VERSION 0x00030000
+#define LUSTRE_DLM_VERSION 0x00040000
+#define LUSTRE_LOG_VERSION 0x00050000
+#define LUSTRE_MGS_VERSION 0x00060000
+----
+
+The 'pb_opc' value (operation code) gives the actual Lustre operation
+that is the subject of this message. For example, MDS_CONNECT is a
+Lustre operation (number 38). The following list gives the name used
+and the value for each operation.
+----
+typedef enum {
+ OST_REPLY = 0,
+ OST_GETATTR = 1,
+ <<ost-setattr-rpc,OST_SETATTR>> = 2,
+ OST_READ = 3,
+ OST_WRITE = 4,
+ OST_CREATE = 5,
+ OST_DESTROY = 6,
+ OST_GET_INFO = 7,
+ <<ost-connect-rpc,OST_CONNECT>> = 8,
+ <<ost-connect-rpc,OST_DISCONNECT>> = 9,
+ OST_PUNCH = 10,
+ OST_OPEN = 11,
+ OST_CLOSE = 12,
+ OST_STATFS = 13,
+ OST_SYNC = 16,
+ OST_SET_INFO = 17,
+ OST_QUOTACHECK = 18,
+ OST_QUOTACTL = 19,
+ OST_QUOTA_ADJUST_QUNIT = 20,
+
+ <<mds-getattr-rpc,MDS_GETATTR>> = 33,
+ MDS_GETATTR_NAME = 34,
+ MDS_CLOSE = 35,
+ MDS_REINT = 36,
+ MDS_READPAGE = 37,
+ <<mds-connect-rpc,MDS_CONNECT>> = 38,
+ <<mds-disconnect,MDS_DISCONNECT>> = 39,
+ <<mds-getstatus-rpc,MDS_GETSTATUS>> = 40,
+ <mds-statfs-rpc,MDS_STATFS>> = 41,
+ MDS_PIN = 42,
+ MDS_UNPIN = 43,
+ MDS_SYNC = 44,
+ MDS_DONE_WRITING = 45,
+ MDS_SET_INFO = 46,
+ MDS_QUOTACHECK = 47,
+ MDS_QUOTACTL = 48,
+ <<mds-getxattr-rpc,MDS_GETXATTR>> = 49,
+ MDS_SETXATTR = 50,
+ MDS_WRITEPAGE = 51,
+ MDS_IS_SUBDIR = 52,
+ MDS_GET_INFO = 53,
+ MDS_HSM_STATE_GET = 54,
+ MDS_HSM_STATE_SET = 55,
+ MDS_HSM_ACTION = 56,
+ MDS_HSM_PROGRESS = 57,
+ MDS_HSM_REQUEST = 58,
+ MDS_HSM_CT_REGISTER = 59,
+ MDS_HSM_CT_UNREGISTER = 60,
+ MDS_SWAP_LAYOUTS = 61,
+
+ <<ldlm-enqueue-rpc,LDLM_ENQUEUE>> = 101,
+ LDLM_CONVERT = 102,
+ <ldlm-cancel-rpc,LDLM_CANCEL>> = 103,
+ <ldlm_bl_callback-rpc,LDLM_BL_CALLBACK>> = 104,
+ <ldlm-cp-callback-rpc,LDLM_CP_CALLBACK>> = 105,
+ LDLM_GL_CALLBACK = 106,
+ LDLM_SET_INFO = 107,
+
+ <<mgs-connect-rpc,MGS_CONNECT>> = 250,
+ <<mgs-disconnect-rpc,MGS_DISCONNECT>> = 251,
+ MGS_EXCEPTION = 252,
+ MGS_TARGET_REG = 253,
+ MGS_TARGET_DEL = 254,
+ MGS_SET_INFO = 255,
+ <<mgs-config-read-rpc,MGS_CONFIG_READ>> = 256,
+
+ OBD_PING = 400,
+ OBD_LOG_CANCEL = 401,
+ OBD_QC_CALLBACK = 402,
+ OBD_IDX_READ = 403,
+
+ <<llog-origin-handle-create-rpc,LLOG_ORIGIN_HANDLE_CREATE>> = 501,
+ <<llog-origin-handle-next-block,LLOG_ORIGIN_HANDLE_NEXT_BLOCK>> = 502,
+ <<llog-origin-handle-read-header,LLOG_ORIGIN_HANDLE_READ_HEADER>> = 503,
+ LLOG_ORIGIN_HANDLE_WRITE_REC = 504,
+ LLOG_ORIGIN_HANDLE_CLOSE = 505,
+ LLOG_ORIGIN_CONNECT = 506,
+ LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508,
+ LLOG_ORIGIN_HANDLE_DESTROY = 509,
+
+ QUOTA_DQACQ = 601,
+ QUOTA_DQREL = 602,
+
+ SEQ_QUERY = 700,
+
+ SEC_CTX_INIT = 801,
+ SEC_CTX_INIT_CONT = 802,
+ SEC_CTX_FINI = 803,
+
+ FLD_QUERY = 900,
+ FLD_READ = 901,
+
+ UPDATE_OBJ = 1000
+} cmd_t;
+----
+The symbols and values above identify the operations Lustre uses in
+its protocol. They are examined in detail in the
+<<lustre-operations,Lustre Operations>> section. Lustre carries out
+each of these operations via the exchange of a pair of messages: a
+request and a reply. The details of each message are specific to each
+operation. The <<lustre-messages,Lustre Messages>> chapter discusses
+each message and its contents.
+
+The 'pb_status' field was already mentioned above in conjuction with
+the 'pb_type' field in replies. In a request message 'pb_status' is
+set to the 'pid' of the process making the request. In a reply
+message, a zero indicates that the service successfully initiated the
+requested operation. When an error is being reported the value will
+encode a standard Linux kernel (POSIX) error code as initially
+defined for the i386/x86_64 architecture. The 'pb_status' value is
+returned as a negative number, so for example, a permissions error
+would be indicated as -EPERM.
+
+'pb_last_xid' and 'pb_last_seen' are not used.
+
+The 'pb_last_committed' value is always zero in a request. In a reply
+it is the highest transaction number that has been committed to
+storage. The transaction numbers are maintained on a per-target basis
+and each series of transaction numbers is a strictly increasing
+sequence for modifications originating from any client. This field is
+set in any kind of reply message including pings and non-modifying
+transactions. If 'pb_last_committed' is larger than, or equal to, any
+of the client's uncommitted requests (see 'pb_transno' below) then the
+server is confirming those requests have been committed to stable
+storage. At that point the client will free the request structures.
+
+The 'pb_transno' value is always zero in a new request. It is also
+zero for replies to operations that do not modify the file system. For
+replies to operations that do modify the file system it is the
+target-unique, server-assigned transaction number for the client
+request. The 'pb_transno' assigned to each modifying request is in
+strictly increasing order, but may not be sequential for a single
+client, and the client may receive replies in a different order than
+they were processed by the server.Upon receipt of the reply, the
+client copies this transaction number from 'pb_transno' of the reply
+to 'pb_transno' of the saved request. If 'pb_transno' is larger than
+'pb_last_commited' (above) then the request has only been processed at
+the target and is not yet committed to stable storage. The client
+must save the request for later resend to the server in case the
+target fails before the modification can be committed to disk.If the
+request has to be replayed it will include the transaction number.
+
+The 'pb_flags' value governs the client state machine. Fixme: document
+what the states and transitions are of this state machine. Currently,
+only the bottom two bytes are used, and they encode state according to
+the following values:
+----
+#define MSG_GEN_FLAG_MASK 0x0000ffff
+#define MSG_LAST_REPLAY 0x0001
+#define MSG_RESENT 0x0002
+#define MSG_REPLAY 0x0004
+#define MSG_DELAY_REPLAY 0x0010
+#define MSG_VERSION_REPLAY 0x0020
+#define MSG_REQ_REPLAY_DONE 0x0040
+#define MSG_LOCK_REPLAY_DONE 0x0080
+----
+
+MGS_LAST_REPLAY is currently unused. It had been used to indicate that
+this is the last RPC request to be replayed by this client during
+recovery. MGS_LAST_REPLAY has been replaced by MSG_REQ_REPLAY_DONE
+and MSG_LOCK_REPLAY_DONE.
+
+MGS_RESENT is set when this RPC request is being resent because no
+reply was received.
+
+MGS_REPLAY indicates this RPC request is being replayed after the
+client received a reply but before it was committed to storage. The
+'pb_transno' field holds the server-assigned transaction number.
+
+MGS_DELAY_REPLAY is currently unused.
+
+MSG_VERSION_REPLAY indicates that a replayed request has
+pb_pre_versions[] filled with the prior object versions and can be
+used with Version Based Recovery.
+
+MSG_LOCK_REPLAY_DONE indicates the client has completed lock replay,
+and is ready to finish recovery.
+
+The 'pb_op_flags' values are specific to a particular 'pb_opc', but
+are currently only used by the *_CONNECT RPCs.The 'pb_op_flags' value
+for connect operations governs the client connection status state
+machine.
+
+----
+#define MSG_CONNECT_RECOVERING 0x00000001
+#define MSG_CONNECT_RECONNECT 0x00000002
+#define MSG_CONNECT_REPLAYABLE 0x00000004
+#define MSG_CONNECT_LIBCLIENT 0x00000010
+#define MSG_CONNECT_INITIAL 0x00000020
+#define MSG_CONNECT_ASYNC 0x00000040
+#define MSG_CONNECT_NEXT_VER 0x00000080
+#define MSG_CONNECT_TRANSNO 0x00000100
+----
+
+MGS_CONNECT_RECOVERING indicate the server is in recovery
+
+MGS_CONNECT_RECONNECT indicates the client is reconnecting after
+non-responsiveness from the server.
+
+MGS_CONNECT_REPLAYABLE indicates the server connection supports RPC
+replay (only OSTs support non-recoverable connections, but that is not
+the default).
+
+The MGS_CONNECT_LIBCLIENT is for the a 'liblustre' client. It is
+currently unused.
+
+The client sends MGS_CONNECT_INITIAL the first time the client is
+connecting to the server. MSG_CONNECT_INITIAL connections are not
+allowed during server recovery.
+
+MGS_CONNECT_ASYNC is currently unused.
+
+MSG_CONNECT_NEXT_VER indicates that the client can understand the next
+higher protocol version, and the server can reply to the connect with
+that RPC version if it is supported, otherwise it will reply with the
+same RPC version as the request. This allows RPC protocol versions to
+be negotiated during a transition period (e.g. upgrade from RPC from
+LUSTRE_MSG_MAGIC_V1 to LUSTRE_MSG_MAGIC_V2).
+
+In normal operation an initial request to connect will set
+'pb_op_flags' to MSG_CONNECT_INITIAL (in some earlier versions
+MSG_CONNECT_NEXT_VER was mistakenly included, though it did no
+harm). The reply to that connection request (and all other,
+non-connect, requests and replies) will set 'pb_op_flags' to 0.
+
+The 'pb_conn_cnt' (connection count) value in a request message
+reports the client's "era", which is part of the client and server's
+shared state. The value of the era is initialized to one when it is
+first connected to the MDT. Each subsequent connection (after an
+eviction) increments the era for the client. Since the 'pb_conn_cnt'
+reflects the client's era at the time the message was composed the
+server can use this value to discard late-arriving messages requesting
+operations on out-of-date shared state.
+
+The 'pb_timeout' value in a request indicates how long (in seconds)
+the requester plans to wait before timing out the operation. That is,
+the corresponding reply for this message should arrive within this
+time frame. The service may extend this time frame via an "early
+reply", which is a reply to this message that notifies the requester
+that it should extend its timeout interval by the value of the
+'pb_timeout' field in the reply. The "early reply" does not indicate
+the operation has actually been initiated. Clients maintain multiple
+request queues, called "portals", and each type of operation is
+assigned to one of these queues. There is a timeout value associated
+with each queue, and the timeout update affects all the messages
+associated with the given queue, not just the specific message that
+initiated the request. Finally, in a reply message (one that does
+indicate the operation has been initiated) the timeout value updates
+the timeout interval for the queue. Is this last point different from
+the "early reply" update?
+
+The 'pb_service_time' value is zero in a request. In a reply it
+indicates how long this particular operation actually took from the
+time it first arrived in the request queue (at the service) to the
+time the server replied. Note that the client can use this value and
+the local elapsed time for the operation to calculate network latency.
+
+The 'pb_limit' value is zero in a request. In a reply it is a value
+sent from a lock service to a client to set the maximum number of
+locks available to the client. When dynamic lock LRU's are enabled
+this allows for managing the size of the LRU.
+
+The 'pb_slv' value is zero in a request. On a DLM service, the "server
+lock volume" is a value that characterizes (estimates) the amount of
+traffic, or load, on that lock service. It is calculated as the
+product of the number of locks and their age. In a reply, the 'pb_slv'
+value indicates to the client the available share of the total lock
+load on the server that the client is allowed to consume. The client
+is then responsible for reducing its number or (or age) of locks to
+stay within this limit.
+
+The array of 'pb_pre_versions' values has four entries. They are
+always zero in a new request message. They are also zero in replies to
+operations that do not modify the file system. For an operation that
+does modify the file system, the reply encodes the most recent
+transaction numbers for the objects modified by this operation, and
+the 'pb_pre_versions' values are copied into the original request when
+the reply arrives. If the request needs to be replayed then the
+updated 'pb_pre_versions' values accompany the replayed request.
+
+'pb_padding' is reserved for future use.
+
+The 'pb_jobid' (string) value gives a unique identifier associated
+with the process on behalf of which this message was generated. The
+identifier is assigned to the user process by a job scheduler, if any.
1 - Client1 issues an MDS_REINT with the REINT_SETATTR sub-operation.
-In addition to the 'ptlrpc_body' (RPC message header), the MDS_REINT
+In addition to the 'ptlrpc_body' (Lustre RPC descriptor), the MDS_REINT
request RPC from the client has the REINT structure 'mdt_rec_setattr', and a
lock request 'ldlm_request'. For a detailed discussion of all the fields in
the 'mdt_rec_setattr' and 'ldlm_request' refer to <<mdt-rec-setattr>>
2 - The MDS_REINT reply acknowledges the updated attributes.
-In addition to the 'ptlrpc_body' (RPC message header), the MDS_REINT
+In addition to the 'ptlrpc_body' (Lustre RPC descriptor), the MDS_REINT
reply RPC to the client has the 'mdt_body' structure. For a detailed
discussion of the fields in the 'mdt_body' refer to <<struct-mdt-body>>.
Having received an LDLM_BL_CALLBACK Client2 must finish up with its
lock. Once it does it sends an LDLM_CANCEL request to the OST to
-signal that it is done.
+signal that it is done.
.LDLM_CANCEL Request Packet Structure
image::ldlm-cancel-request.png["LDLM_CANCEL Request Packet Structure",height=50]
--- /dev/null
+Setxattr
+~~~~~~~~
+
+The 'setxattr' VFS method is used to modify the extended attributes
+associated with a resource. The 'fsetxattr()' system call induces the
+'setxattr' variant of the MDS_REINT RPC.
+
+.Setxattr RPCs for Modifying a Resource's Extended Attributes
+[[setxattr-rpcs]]
+image::setxattr_rpcs.png["setxattr RPCs for modifying a resource's Extended Attributes",height=100]
+
+//////////////////////////////////////////////////////////////////////
+The setxattr_rpcs.png diagram resembles this text art:
+
+Time
+Step Client MDT OST
+ ------- ------- -------
+1 MDS_REINT------->
+2 <-------MDS_REINT
+//////////////////////////////////////////////////////////////////////
+
+1 - The client issues an MDS_REINT with the REINT_SETXATTR
+sub-operation.
+
+In addition to the 'ptlrpc_body' (RPC message header), the MDS_REINT
+request RPC from the client has the REINT structure
+'mdt_rec_setxattr', the name of the extended attribute in question,
+the extended attribute data to put in place, and a lock request
+'ldlm_request'. For a detailed discussion of all the fields in the
+'mdt_rec_setxattr' and 'ldlm_request' refer to <<mdt-rec-setxattr>>
+and <<struct-ldlm-request>>.
+
+.MDS_REINT:REINT_SETXATTR Request Packet Structure
+image::mds-reint-setxattr-request.png["MDS_REINT:REINT_SETXATTR Request Packet Structure",height=50]
+
+//////////////////////////////////////////////////////////////////////
+The mds-reint-setxattr-request.png diagram resembles this text art:
+
+ MDS_REINT:
+ --REINT_SETXATTR-request-------------------------
+ | ptlrpc_body | mdt_rec_setxattr | ldlm_request |
+ -------------------------------------------------
+//////////////////////////////////////////////////////////////////////
+
+The 'setxattr' wants to set the 'ctime' associated with the EA update
+as well, so the one 'sx_valid' flag it uses is OBD_MD_FLCTIME
+(0x00008). The 'sx_valid' field also has the OBD_MD_FLXATTR
+(0x01000000000) flag set, though that is never checked or used.
+
+The 'ldlm_request' structure encompasses an early lock cancellation
+(see <<early-lock-cancellation>>) on the lock that the client had
+previously acquired for the target resource. The lock handle
+identifies this lock. Only lock_count and lock_handle are used, and
+the rest of the ldlm_request is cleared, i.e. all fields set to
+zero. In some instances the ldlm_request is empty (all zeros)
+indicating early lock cancellation is not being used. This may be a
+bug in the code for that instance.
+
+2 - The MDS_REINT reply acknowledges the updated attributes.
+
+In addition to the 'ptlrpc_body' (RPC message header), the MDS_REINT
+reply RPC to the client has the 'mdt_body' structure. For a detailed
+discussion of the fields in the 'mdt_body' refer to <<struct-mdt-body>>.
+
+.MDS_REINT:REINT_SETXATTR Reply Packet Structure
+image::mds-reint-setxattr-reply.png["MDS_REINT:REINT_SETXATTR Reply Packet Structure",height=50]
+
+//////////////////////////////////////////////////////////////////////
+The mds-reint-setxattr-reply.png diagram resembles this text art:
+
+ --REINT_SETXATTR-reply-----
+ | ptlrpc_body | mdt_body |
+ --------------------------
+//////////////////////////////////////////////////////////////////////
+
+The reply from the MDT after the setxattr operation has no valid
+'mdt_body' fields, 'mbo_valid = 0x0.
--- /dev/null
+
+Stat
+~~~~
+
+The POSIX file system operation 'stat()' returns 'struct stat'
+information about a resource (file or directory), and in Lustre that
+information is obtained via the 'll_getattr()' function on the
+client. How this is accomplished within Lustre depends on the state of
+the client's directory entry cache as well as the locks it holds on
+the relevant resources. If the client has no information about the
+resource or any of the directories on the path leading to it (the
+cache is "cold") then it has to get that information first. Refer to
+<<path-lookup>> for the details. If the client cache already has all
+the lookup information about the path, and it has a valid read lock
+on the resource then the information can come entirely from cache: no
+Lustre messages flow at all. If the client does need to acquire a lock
+on the resource (for each relevant MDT and OST) then the client issues
+an LDLM_ENQUEUE request to the MDT (respectively OST) with the
+'getattr' intent, see <<intents>>. The getattr intent to the MDT
+returns UID, GID and mode, while the intent to the OST(s) returns the
+size and the file timestamps. The responses to the LDLM_ENQUEUE with
+intents provide all the required stat information without any
+additional messages.
+
--- /dev/null
+Statfs
+~~~~~~
+
+The 'statfs' VFS method queries file-system-wide space and inode
+usage. For details about the MDS_STATFS RPC, including possible faults
+and error return codes, see <<mds-statfs-rpc>>, and for OST_STATFS see
+<<ost-statfs-rpc>>.
+
+A client gets 'statfs' information for the file system as a whole by
+first querying the individual storage targets for the 'statfs'
+information on each back-end file system. The RPCs that flow are as
+shown in <<statfs-rpcs>>. The values returned in the VFS call are
+built from the results of the *_STATFS RPCs as follows:
+
+.Statfs values
+[options="header"]
+|====
+| field | VFS value
+| 'f_type' | 0x0bd00bd0
+| 'f_bsize' | OST 'os_bsize'
+| 'f_blocks' | sum over OSTs 'os_blocks'
+| 'f_bfree' | sum over OSTs 'os_bfree'
+| 'f_bavail' | sum over OSTs 'os_bavail'
+| 'f_files' | sum over MDTs 'os_ffiles'
+| 'f_ffree' | sum over MDTs 'of_ffree'
+| 'f_fsid' | UUID
+| 'f_namelen' | maximum filename length
+|====
+
+See the discussion in <<struct-obd-statfs>> for details about the
+'obd_statfs' fields 'os_*'. In particular, the total and free objects
+counts would normally be determined by the corresponding values on the
+MDTs, but if the total objects available across all OSTs (respectively
+free objects) happens to be fewer than the number on the MDTs, then
+the smaller value is used. The number of free objects is modified as
+described below to ensure that at least this many new files can be
+created. The total number of objects is changed to preserve the
+difference 'f_files' - 'f_ffree', which is the current number of
+used objects. This is what "df" displays.
+
+The number of OST free objects is divided by the filesystem-wide
+default stripe count (i.e. the expected number of OST objects used per
+MDT file), so that 'f_ffree' represents the expected minimum number of
+files that can be created at the current time.
+
+The 'os_{bfree,bavail,blocks}' values are scaled down (in power-of-two
+increments) to match the maximum 'os_bsize' returned from any OST so
+that the sum of these fields makes sense, and potentially scaled down
+again (also in power-of-two increments) to increase 'f_bsize' to fit
+into the 'statfs' structure so that the 'f_blocks' field will fit into the
+available field width (which may only be 2^32 blocks on a 32-bit
+client).
+
+For most possible error conditions directly related to processing
+'statfs' information (ENOMEM, for instance) the error is propagated
+back to the VFS and the 'struct statfs' does not have valid
+information. In the case of an EAGAIN during an exchange of RPC
+messages, that error is handled transparently to the 'statfs' itself.
+
+.Statfs RPCs
+[[statfs-rpcs]]
+image::statfs_rpcs.png["statfs RPCs",height=150]
+
+//////////////////////////////////////////////////////////////////////
+The statfs_rpcs.png diagram resembles this text art:
+
+Time
+Step Client MDT OST
+ ------- ------- -------
+1 MDS_STATFS------>
+2 <------MDS_STATFS
+3 OST_STATFS------>
+4 <------OST_STATFS
+//////////////////////////////////////////////////////////////////////
+
+1 - The client issues an MDS_STATFS request to each MDT.
+
+The MDS_STATFS request is a so-called 'empty' RPC in that it consists
+only of the 'ptlrpc_body' (Lustre RPC descriptor) with the opcode set to
+MDS_STATFS (41). Refer to the discussion of <<mds-statfs-rpc>> for
+more general information about the MDS_STATFS RPC's request and reply
+messages.
+
+.MDS_STATFS Request Packet Structure
+
+:frame: none
+:grid: none
+[width="50%", cols="2a"]
+|====
+| request
+[cols="1"]
+!===================
+! <<struct-ptlrpc-body,ptlrpc_body>> !
+!===================
+|====
+
+2 - The MDS_STATFS reply returns 'statfs' info
+
+In addition to the 'ptlrpc_body' (Lustre RPC descriptor), the
+MDS_STATFS reply to the client has the 'obd_statfs' structure. For a
+detailed discussion of the fields in the 'obd_statfs' refer to
+<<struct-obd-statfs>>.
+
+.MDS_STATFS Reply Packet Structure
+
+:frame: none
+:grid: none
+[width="50%", cols="2a"]
+|====
+| reply
+[cols="2"]
+!===================
+! <<struct-ptlrpc-body,ptlrpc_body>> ! <<struct-obd-statfs,obd_statfs>> !
+!===================
+|====
+
+3 - The client issues an OST_STATFS request to each OST.
+
+The OST_STATFS RPC looks just like the MDS_STATFS, except the opcode
+is 13 instead of 41 and the target of the RPC is an OST instead of an
+MDT.
+
+.OST_STATFS Request Packet Structure
+
+:frame: none
+:grid: none
+[width="50%", cols="2a"]
+|====
+| request
+[cols="1"]
+!===================
+! <<struct-ptlrpc-body,ptlrpc_body>> !
+!===================
+|====
+
+
+4 - The OST_STATFS reply returns 'statfs' info
+
+In addition to the 'ptlrpc_body' (Lustre RPC descriptor), the OST_STATFS
+reply to the client has the 'obd_statfs' structure. For a detailed
+discussion of the fields in the 'obd_statfs' refer to
+<<struct-obd-statfs>>.
+
+.OST_STATFS Reply Packet Structure
+
+:frame: none
+:grid: none
+[width="50%", cols="2a"]
+|====
+| reply
+[cols="2"]
+!===================
+! <<struct-ptlrpc-body,ptlrpc_body>> ! <<struct-obd-statfs,obd_statfs>> !
+!===================
+|====
--- /dev/null
+Statfs Structures
+~~~~~~~~~~~~~~~~~
+[[statfs-structs]]
+
+These structures convey statfs information to or from an MDTs and OSTs.
+
+include::obd_statfs.txt[]
+
Every request message indicates a timeout value and every reply answers
with the value the service will honor. Initial connection requests
propose a value for the timeout, and subsequent requests and replies
-pass that value back and forth as part of the message header
+pass that value back and forth as part of the RPC descriptor
('pb_timeout').
Service Times
Closely related to the timeouts in Lustre are the service times that
are expect and observed for each connection class. A request will
-always list the service time as 0 in the message header
+always list the service time as 0 in the RPC descriptor
('pb_service_time'). The reply lists the time the server actual to
send the reply.