X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=import.txt;fp=import.txt;h=555e12c13f065461e1d18a3936c8e88fe61b7e31;hb=75ebff14a4d8714411bf755755b0823a2987dc44;hp=09817942948ee9e06df2e0fbf64714f66b7b8645;hpb=5a5b3e4d0db84c3dc3486627e66f37f2e7da2ee5;p=doc%2Fprotocol.git diff --git a/import.txt b/import.txt index 0981794..555e12c 100644 --- a/import.txt +++ b/import.txt @@ -1,83 +1,101 @@ Import ^^^^^^ +[[obd-import]] + +The 'obd_import' structure holds the connection state for between each +client and each target it is connected to. ---- +struct obd_import { + enum lustre_imp_state imp_state; + int imp_generation; + __u32 imp_conn_cnt; + struct lustre_handle imp_remote_handle; + struct obd_connect_data imp_connect_data; +}; +---- + +////////////////////////////////////////////////////////////////////// +This is the rest of the info associated with obd_import: + #define IMP_STATE_HIST_LEN 16 struct import_state_hist { enum lustre_imp_state ish_state; time_t ish_time; }; struct obd_import { - struct portals_handle imp_handle; - atomic_t imp_refcount; - struct lustre_handle imp_dlm_handle; - struct ptlrpc_connection *imp_connection; - struct ptlrpc_client *imp_client; - cfs_list_t imp_pinger_chain; - cfs_list_t imp_zombie_chain; - cfs_list_t imp_replay_list; - cfs_list_t imp_sending_list; - cfs_list_t imp_delayed_list; - cfs_list_t imp_committed_list; - cfs_list_t *imp_replay_cursor; - struct obd_device *imp_obd; - struct ptlrpc_sec *imp_sec; - struct mutex imp_sec_mutex; - cfs_time_t imp_sec_expire; - wait_queue_head_t imp_recovery_waitq; - atomic_t imp_inflight; - atomic_t imp_unregistering; - atomic_t imp_replay_inflight; - atomic_t imp_inval_count; - atomic_t imp_timeouts; - enum lustre_imp_state imp_state; - struct import_state_hist imp_state_hist[IMP_STATE_HIST_LEN]; - int imp_state_hist_idx; - int imp_generation; - __u32 imp_conn_cnt; - int imp_last_generation_checked; - __u64 imp_last_replay_transno; - __u64 imp_peer_committed_transno; - __u64 imp_last_transno_checked; - struct lustre_handle imp_remote_handle; - cfs_time_t imp_next_ping; - __u64 imp_last_success_conn; - cfs_list_t imp_conn_list; - struct obd_import_conn *imp_conn_current; - spinlock_t imp_lock; - /* flags */ - unsigned long - imp_no_timeout:1, - imp_invalid:1, - imp_deactive:1, - imp_replayable:1, - imp_dlm_fake:1, - imp_server_timeout:1, - imp_delayed_recovery:1, - imp_no_lock_replay:1, - imp_vbr_failed:1, - imp_force_verify:1, - imp_force_next_verify:1, - imp_pingable:1, - imp_resend_replay:1, - imp_no_pinger_recover:1, - imp_need_mne_swab:1, - imp_force_reconnect:1, - imp_connect_tried:1; - __u32 imp_connect_op; - struct obd_connect_data imp_connect_data; - __u64 imp_connect_flags_orig; - int imp_connect_error; - __u32 imp_msg_magic; - __u32 imp_msghdr_flags; /* adjusted based on server capability */ - struct ptlrpc_request_pool *imp_rq_pool; /* emergency request pool */ - struct imp_at imp_at; /* adaptive timeout data */ - time_t imp_last_reply_time; /* for health check */ + struct portals_handle imp_handle; + atomic_t imp_refcount; + struct lustre_handle imp_dlm_handle; + struct ptlrpc_connection *imp_connection; + struct ptlrpc_client *imp_client; + cfs_list_t imp_pinger_chain; + cfs_list_t imp_zombie_chain; + cfs_list_t imp_replay_list; + cfs_list_t imp_sending_list; + cfs_list_t imp_delayed_list; + cfs_list_t imp_committed_list; + cfs_list_t *imp_replay_cursor; + struct obd_device *imp_obd; + struct ptlrpc_sec *imp_sec; + struct mutex imp_sec_mutex; + cfs_time_t imp_sec_expire; + wait_queue_head_t imp_recovery_waitq; + atomic_t imp_inflight; + atomic_t imp_unregistering; + atomic_t imp_replay_inflight; + atomic_t imp_inval_count; + atomic_t imp_timeouts; + enum lustre_imp_state imp_state; + struct import_state_hist imp_state_hist[IMP_STATE_HIST_LEN]; + int imp_state_hist_idx; + int imp_generation; + __u32 imp_conn_cnt; + int imp_last_generation_checked; + __u64 imp_last_replay_transno; + __u64 imp_peer_committed_transno; + __u64 imp_last_transno_checked; + struct lustre_handle imp_remote_handle; + cfs_time_t imp_next_ping; + __u64 imp_last_success_conn; + cfs_list_t imp_conn_list; + struct obd_import_conn *imp_conn_current; + spinlock_t imp_lock; + /* flags */ + unsigned long + imp_no_timeout:1, + imp_invalid:1, + imp_deactive:1, + imp_replayable:1, + imp_dlm_fake:1, + imp_server_timeout:1, + imp_delayed_recovery:1, + imp_no_lock_replay:1, + imp_vbr_failed:1, + imp_force_verify:1, + imp_force_next_verify:1, + imp_pingable:1, + imp_resend_replay:1, + imp_no_pinger_recover:1, + imp_need_mne_swab:1, + imp_force_reconnect:1, + imp_connect_tried:1; + __u32 imp_connect_op; + struct obd_connect_data imp_connect_data; + __u64 imp_connect_flags_orig; + int imp_connect_error; + __u32 imp_msg_magic; + __u32 imp_msghdr_flags; /* adjusted based on server capability */ + struct ptlrpc_request_pool *imp_rq_pool; /* emergency request pool */ + struct imp_at imp_at; /* adaptive timeout data */ + time_t imp_last_reply_time; /* for health check */ }; ----- +////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////// +////vvvv The 'imp_handle' value is the unique id for the import, and is used as -a hash key to gain access to it. It is not used in any of the Lustre +a hash key to it. It is not used in any of the Lustre protocol messages, but rather is just for internal reference. The 'imp_refcount' is also for internal use. The value is incremented @@ -166,6 +184,8 @@ multiple threads waiting on this process to complete. The 'imp_timeout' field is a counter that is incremented every time there is a timeout in communication with the target. +////^^^^ +////////////////////////////////////////////////////////////////////// The 'imp_state' tracks the state of the import. It draws from the enumerated set of values: @@ -185,12 +205,17 @@ enumerated set of values: | LUSTRE_IMP_FULL | 9 | LUSTRE_IMP_EVICTED | 10 |===== + +////////////////////////////////////////////////////////////////////// +////vvvv fixme: what are the transitions between these states? The 'imp_state_hist' array maintains a list of the last 16 (IMP_STATE_HIST_LEN) states the import was in, along with the time it entered each (fixme: or is it when it left that state?). The list is maintained in a circular manner, so the 'imp_state_hist_idx' points to the entry in the list for the most recently visited state. +////^^^^ +////////////////////////////////////////////////////////////////////// The 'imp_generation' and 'imp_conn_cnt' fields are monotonically increasing counters. Every time a connection request is sent to the @@ -198,6 +223,8 @@ target the 'imp_conn_cnt' counter is incremented, and every time a reply is received for the connection request the 'imp_generation' counter is incremented. +////////////////////////////////////////////////////////////////////// +////vvvv The 'imp_last_generation_checked' implements an optimization. When a replay process has successfully traversed the reply list the 'imp_generation' value is noted here. If the generation has not @@ -205,27 +232,32 @@ incremented then the replay list does not need to be traversed again. During replay the 'imp_last_replay_transno' is set to the transaction number of the last request being replayed, and -'imp_peer_committed_transno is set to the 'pb_last_committed' value -(of the 'ptlrpc_body') from replies if that value is higher than the +'imp_peer_committed_transno' is set to the 'pb_last_committed' value +(of the <>) from replies if that value is higher than the previous 'imp_peer_committed_transno'. The 'imp_last_transno_checked' field implements an optimization. It is set to the -'imp_last_replay_transno' as its replay is initiated. If -'imp_last_transno_checked' is still 'imp_last_replay_transno' and -'imp_generation' is still 'imp_last_generation_checked' then there +'imp_last_replay_transno' as its replay is initiated. + +If 'imp_last_transno_checked' is still 'imp_last_replay_transno' and +'imp_generation' is still 'imp_last_generation_checked' then there are no additional requests ready to be removed from the replay list. Furthermore, 'imp_last_transno_checked' may no longer be needed, since the committed transactions are now maintained on a separate list. +////^^^^ +////////////////////////////////////////////////////////////////////// The 'imp_remote_handle' is the handle sent by the target in a connection reply message to uniquely identify the export for this target and client that is maintained on the server. This is the handle used in all subsequent messages to the target. -There are two separate ping intervals (fixme: what are the -values?). If there are no uncommitted messages for the target then the -default ping interval is used to set the 'imp_next_ping' to the time +////////////////////////////////////////////////////////////////////// +////vvvv +There are two separate ping intervals. If there are no uncommitted +messages for the target then the default ping interval, based on the +Adaptive Timeout value, is used to set the 'imp_next_ping' to the time the next ping needs to be sent. If there are uncommitted requests then -a "short interval" is used to set the time for the next ping. +a "short interval" of 7s is used to set the time for the next ping. The 'imp_last_success_conn' value is set to the time of the last successful connection. fixme: The source says it is in 64 bit @@ -289,3 +321,5 @@ or 'imp_pingable' flags? During recovery, the client sets the the current value of 'imp_replay_last_transno'. The 'imp_need_mne_swab' flag indicates a version dependent circumstance where swabbing was inadvertently left out of one processing step. +////^^^^ +//////////////////////////////////////////////////////////////////////