X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Finclude%2Flustre_export.h;h=f6e136afad37886ca8dd8b11095c6fe9c514a6cd;hp=72a83afc7471182828fa42a95efffa8eac9679a9;hb=5315db3f1066619d6effe4f778d2df3ad1ba738f;hpb=d51f4e19e640a5601297599775f8835b12cdb1dc diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index 72a83af..f6e136a 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -17,121 +15,158 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ * Lustre is a trademark of Sun Microsystems, Inc. */ +/** \defgroup obd_export PortalRPC export definitions + * + * @{ + */ #ifndef __EXPORT_H #define __EXPORT_H +/** \defgroup export export + * + * @{ + */ + +#include +#include + #include -#include +#include #include -#include struct mds_client_data; struct mdt_client_data; struct mds_idmap_table; struct mdt_idmap_table; -struct lu_export_data { - /** Protects led_lcd below */ - struct semaphore led_lcd_lock; - /** Per-client data for each export */ - struct lsd_client_data *led_lcd; - /** Offset of record in last_rcvd file */ - loff_t led_lr_off; - /** Client index in last_rcvd file */ - int led_lr_idx; -}; +/** + * Target-specific export data + */ +struct tg_export_data { + /** Protects ted_lcd, ted_reply_* and + * ted_release_* fields below */ + struct mutex ted_lcd_lock; + /** Per-client data for each export */ + struct lsd_client_data *ted_lcd; + /** Offset of record in last_rcvd file */ + loff_t ted_lr_off; + /** Client index in last_rcvd file */ + int ted_lr_idx; -struct mdt_export_data { - struct lu_export_data med_led; - struct list_head med_open_head; - spinlock_t med_open_lock; /* lock med_open_head, mfd_list*/ - __u64 med_ibits_known; - struct semaphore med_idmap_sem; - struct lustre_idmap_table *med_idmap; + /** + * ted_nodemap_lock is used to ensure that the nodemap is not destroyed + * between the time that ted_nodemap is checked for NULL, and a + * reference is taken. Modifications to ted_nodemap require that the + * active_config_lock and the nodemap(s)'s nm_member_list_lock be + * taken, as well as ted_nodemap_lock, so the export can be properly + * added to or removed from the nodemap's member list. When an export + * is added to a nodemap, a reference on that nodemap must be taken. + * That reference can be put only after ted_nodemap no longer refers to + * it. + */ + spinlock_t ted_nodemap_lock; + struct lu_nodemap *ted_nodemap; + struct list_head ted_nodemap_member; + + /** last version of nodemap config sent to client */ + __u64 ted_nodemap_version; + + /* Every reply data fields below are + * protected by ted_lcd_lock */ + /** List of reply data */ + struct list_head ted_reply_list; + int ted_reply_cnt; + /** Reply data with highest transno is retained */ + struct tg_reply_data *ted_reply_last; + /* Statistics */ + int ted_reply_max; /* high water mark */ + int ted_release_xid; + int ted_release_tag; + /* grants */ + long ted_dirty; /* in bytes */ + long ted_grant; /* in bytes */ + long ted_pending; /* bytes just being written */ + __u8 ted_pagebits; /* log2 of client page size */ + + /** + * File Modification Data (FMD) tracking + */ + spinlock_t ted_fmd_lock; /* protects ted_fmd_list */ + struct list_head ted_fmd_list; /* FIDs being modified */ + int ted_fmd_count;/* items in ted_fmd_list */ }; -#define med_lcd_lock med_led.led_lcd_lock -#define med_lcd med_led.led_lcd -#define med_lr_off med_led.led_lr_off -#define med_lr_idx med_led.led_lr_idx - -struct osc_creator { - spinlock_t oscc_lock; - struct list_head oscc_wait_create_list; - struct obd_device *oscc_obd; - obd_id oscc_last_id;//last available pre-created object - obd_id oscc_next_id;// what object id to give out next - int oscc_grow_count; - /** - * Limit oscc_grow_count value, can be changed via proc fs - */ - int oscc_max_grow_count; - struct obdo oscc_oa; - int oscc_flags; - cfs_waitq_t oscc_waitq; /* creating procs wait on this */ +/** + * MDT-specific export data + */ +struct mdt_export_data { + struct tg_export_data med_ted; + /** List of all files opened by client on this MDT */ + struct list_head med_open_head; + spinlock_t med_open_lock; /* med_open_head, mfd_list */ }; struct ec_export_data { /* echo client */ - struct list_head eced_locks; + struct list_head eced_locks; }; /* In-memory access to client data from OST struct */ +/** Filter (oss-side) specific import data */ struct filter_export_data { - struct lu_export_data fed_led; - spinlock_t fed_lock; /**< protects fed_mod_list */ - long fed_dirty; /* in bytes */ - long fed_grant; /* in bytes */ - struct list_head fed_mod_list; /* files being modified */ - int fed_mod_count;/* items in fed_writing list */ - long fed_pending; /* bytes just being written */ - __u32 fed_group; + struct tg_export_data fed_ted; + __u64 fed_lastid_gen; + /* count of SOFT_SYNC RPCs, which will be reset after + * ofd_soft_sync_limit number of RPCs, and trigger a sync. */ + atomic_t fed_soft_sync_count; + __u32 fed_group; }; -#define fed_lcd_lock fed_led.led_lcd_lock -#define fed_lcd fed_led.led_lcd -#define fed_lr_off fed_led.led_lr_off -#define fed_lr_idx fed_led.led_lr_idx - -typedef struct nid_stat { - lnet_nid_t nid; - struct hlist_node nid_hash; - struct list_head nid_list; - struct obd_device *nid_obd; - struct proc_dir_entry *nid_proc; - struct lprocfs_stats *nid_stats; - struct lprocfs_stats *nid_ldlm_stats; - struct brw_stats *nid_brw_stats; - atomic_t nid_exp_ref_count; /* for obd_nid_stats_hash - exp_nid_stats */ -}nid_stat_t; +struct mgs_export_data { + struct list_head med_clients; /* mgc fs client via this exp */ + spinlock_t med_lock; /* protect med_clients */ +}; + +/** + * per-NID statistics structure. + * It tracks access patterns to this export on a per-client-NID basis + */ +struct nid_stat { + lnet_nid_t nid; + struct hlist_node nid_hash; + struct list_head nid_list; + struct obd_device *nid_obd; + struct proc_dir_entry *nid_proc; + struct lprocfs_stats *nid_stats; + struct lprocfs_stats *nid_ldlm_stats; + atomic_t nid_exp_ref_count; /* for obd_nid_stats_hash + exp_nid_stats */ +}; #define nidstat_getref(nidstat) \ do { \ - atomic_inc(&(nidstat)->nid_exp_ref_count); \ + atomic_inc(&(nidstat)->nid_exp_ref_count); \ } while(0) #define nidstat_putref(nidstat) \ do { \ - atomic_dec(&(nidstat)->nid_exp_ref_count); \ - LASSERTF(atomic_read(&(nidstat)->nid_exp_ref_count) >= 0, \ - "stat %p nid_exp_ref_count < 0\n", nidstat); \ + atomic_dec(&(nidstat)->nid_exp_ref_count); \ + LASSERTF(atomic_read(&(nidstat)->nid_exp_ref_count) >= 0, \ + "stat %p nid_exp_ref_count < 0\n", nidstat); \ } while(0) enum obd_option { @@ -140,74 +175,142 @@ enum obd_option { OBD_OPT_ABORT_RECOV = 0x0004, }; +/** + * Export structure. Represents target-side of connection in portals. + * Also used in Lustre to connect between layers on the same node when + * there is no network-connection in-between. + * For every connected client there is an export structure on the server + * attached to the same obd device. + */ struct obd_export { - struct portals_handle exp_handle; - atomic_t exp_refcount; + /** + * Export handle, it's id is provided to client on connect + * Subsequent client RPCs contain this handle id to identify + * what export they are talking to. + */ + struct portals_handle exp_handle; + /** + * Set of counters below is to track where export references are + * kept. The exp_rpc_count is used for reconnect handling also, + * the cb_count and locks_count are for debug purposes only for now. + * The sum of them should be less than exp_handle.href by 3 + */ + atomic_t exp_rpc_count; /* RPC references */ + atomic_t exp_cb_count; /* Commit callback references */ + /** Number of queued replay requests to be processes */ + atomic_t exp_replay_count; + atomic_t exp_locks_count; /** Lock references */ +#if LUSTRE_TRACKS_LOCK_EXP_REFS + struct list_head exp_locks_list; + spinlock_t exp_locks_list_guard; +#endif + /** UUID of client connected to this export */ + struct obd_uuid exp_client_uuid; + /** To link all exports on an obd device */ + struct list_head exp_obd_chain; + /** work_struct for destruction of export */ + struct work_struct exp_zombie_work; + /* Unlinked export list */ + struct list_head exp_stale_list; + struct rhash_head exp_uuid_hash; /** uuid-export hash */ + struct hlist_node exp_nid_hash; /** nid-export hash */ + struct hlist_node exp_gen_hash; /** last_rcvd clt gen hash */ /** - * Set of counters below is to track where export references are - * kept. The exp_rpc_count is used for reconnect handling also, - * the cb_count and locks_count are for debug purposes only for now. - * The sum of them should be less than exp_refcount by 3 + * All exports eligible for ping evictor are linked into a list + * through this field in "most time since last request on this export" + * order + * protected by obd_dev_lock */ - atomic_t exp_rpc_count; /** RPC references */ - atomic_t exp_cb_count; /** Commit callback references */ - atomic_t exp_locks_count; /** Lock references */ - - atomic_t exp_replay_count; - struct obd_uuid exp_client_uuid; - struct list_head exp_obd_chain; - struct hlist_node exp_uuid_hash; /* uuid-export hash*/ - struct hlist_node exp_nid_hash; /* nid-export hash */ - /* exp_obd_chain_timed fo ping evictor, protected by obd_dev_lock */ - struct list_head exp_obd_chain_timed; - struct obd_device *exp_obd; - struct obd_import *exp_imp_reverse; /* to make RPCs backwards */ + struct list_head exp_obd_chain_timed; + /** Obd device of this export */ + struct obd_device *exp_obd; + /** + * "reverse" import to send requests (e.g. from ldlm) back to client + * exp_lock protect its change + */ + struct obd_import *exp_imp_reverse; struct nid_stat *exp_nid_stats; - struct lprocfs_stats *exp_md_stats; + /** Active connetion */ struct ptlrpc_connection *exp_connection; - __u32 exp_conn_cnt; - lustre_hash_t *exp_lock_hash; /* existing lock hash */ - spinlock_t exp_lock_hash_lock; - struct list_head exp_outstanding_replies; - struct list_head exp_uncommitted_replies; - spinlock_t exp_uncommitted_replies_lock; - __u64 exp_last_committed; - cfs_time_t exp_last_request_time; - struct list_head exp_req_replay_queue; - spinlock_t exp_lock; /* protects flags int below */ - /* ^ protects exp_outstanding_replies too */ - __u64 exp_connect_flags; - enum obd_option exp_flags; - unsigned long exp_failed:1, - exp_in_recovery:1, - exp_disconnected:1, - exp_connecting:1, - /** VBR: export missed recovery */ - exp_delayed:1, - /** VBR: failed version checking */ - exp_vbr_failed:1, - exp_req_replay_needed:1, - exp_lock_replay_needed:1, - exp_need_sync:1, - exp_flvr_changed:1, - exp_flvr_adapt:1, - exp_libclient:1, /* liblustre client? */ - /* client timed out and tried to reconnect, - * but couldn't because of active rpcs */ - exp_abort_active_req:1; - struct list_head exp_queued_rpc; /* RPC to be handled */ - /* also protected by exp_lock */ - enum lustre_sec_part exp_sp_peer; - struct sptlrpc_flavor exp_flvr; /* current */ - struct sptlrpc_flavor exp_flvr_old[2]; /* about-to-expire */ - cfs_time_t exp_flvr_expire[2]; /* seconds */ + /** Connection count value from last successful reconnect rpc */ + __u32 exp_conn_cnt; + /** Hash list of all ldlm locks granted on this export */ + struct cfs_hash *exp_lock_hash; + /** + * Hash list for Posix lock deadlock detection, added with + * ldlm_lock::l_exp_flock_hash. + */ + struct cfs_hash *exp_flock_hash; + struct list_head exp_outstanding_replies; + struct list_head exp_uncommitted_replies; + spinlock_t exp_uncommitted_replies_lock; + /** Last committed transno for this export */ + __u64 exp_last_committed; + /** When was last request received */ + time64_t exp_last_request_time; + /** On replay all requests waiting for replay are linked here */ + struct list_head exp_req_replay_queue; + /** + * protects exp_flags, exp_outstanding_replies and the change + * of exp_imp_reverse + */ + spinlock_t exp_lock; + /** Compatibility flags for this export are embedded into + * exp_connect_data */ + struct obd_connect_data exp_connect_data; + enum obd_option exp_flags; + unsigned long exp_failed:1, + exp_in_recovery:1, + exp_disconnected:1, + exp_connecting:1, + /** VBR: export missed recovery */ + exp_delayed:1, + /** VBR: failed version checking */ + exp_vbr_failed:1, + exp_req_replay_needed:1, + exp_lock_replay_needed:1, + exp_need_sync:1, + exp_flvr_changed:1, + exp_flvr_adapt:1, + /* if to swap nidtbl entries for 2.2 clients. + * Only used by the MGS to fix LU-1644. */ + exp_need_mne_swab:1, + /* The export already got final replay ping + * request. */ + exp_replay_done:1, + /* local client with recovery disabled */ + exp_no_recovery:1; + /* also protected by exp_lock */ + enum lustre_sec_part exp_sp_peer; + struct sptlrpc_flavor exp_flvr; /* current */ + struct sptlrpc_flavor exp_flvr_old[2]; /* about-to-expire */ + time64_t exp_flvr_expire[2]; /* seconds */ + + /** protects exp_hp_rpcs */ + spinlock_t exp_rpc_lock; + struct list_head exp_hp_rpcs; /* (potential) HP RPCs */ + struct list_head exp_reg_rpcs; /* RPC being handled */ + /** blocking dlm lock list, protected by exp_bl_list_lock */ + struct list_head exp_bl_list; + spinlock_t exp_bl_list_lock; + + /** Target specific data */ union { - struct lu_export_data eu_target_data; + struct tg_export_data eu_target_data; struct mdt_export_data eu_mdt_data; struct filter_export_data eu_filter_data; struct ec_export_data eu_ec_data; + struct mgs_export_data eu_mgs_data; } u; + + struct adaptive_timeout exp_bl_lock_at; + + /** highest XID received by export client that has no + * unreceived lower-numbered XID + */ + __u64 exp_last_xid; + long *exp_used_slots; }; #define exp_target_data u.eu_target_data @@ -215,50 +318,64 @@ struct obd_export { #define exp_filter_data u.eu_filter_data #define exp_ec_data u.eu_ec_data -static inline int exp_expired(struct obd_export *exp, cfs_duration_t age) +static inline __u64 *exp_connect_flags_ptr(struct obd_export *exp) { - LASSERT(exp->exp_delayed); - return cfs_time_before(cfs_time_add(exp->exp_last_request_time, age), - cfs_time_current_sec()); + return &exp->exp_connect_data.ocd_connect_flags; } -static inline int exp_connect_cancelset(struct obd_export *exp) +static inline __u64 exp_connect_flags(struct obd_export *exp) { - LASSERT(exp != NULL); - return !!(exp->exp_connect_flags & OBD_CONNECT_CANCELSET); + return *exp_connect_flags_ptr(exp); } -static inline int exp_connect_lru_resize(struct obd_export *exp) +static inline __u64 *exp_connect_flags2_ptr(struct obd_export *exp) { - LASSERT(exp != NULL); - return !!(exp->exp_connect_flags & OBD_CONNECT_LRU_RESIZE); + return &exp->exp_connect_data.ocd_connect_flags2; } -static inline int exp_connect_rmtclient(struct obd_export *exp) +static inline __u64 exp_connect_flags2(struct obd_export *exp) { - LASSERT(exp != NULL); - return !!(exp->exp_connect_flags & OBD_CONNECT_RMT_CLIENT); + if (exp_connect_flags(exp) & OBD_CONNECT_FLAGS2) + return *exp_connect_flags2_ptr(exp); + return 0; } -static inline int client_is_remote(struct obd_export *exp) +static inline int exp_max_brw_size(struct obd_export *exp) { - struct obd_import *imp = class_exp2cliimp(exp); + LASSERT(exp != NULL); + if (exp_connect_flags(exp) & OBD_CONNECT_BRW_SIZE) + return exp->exp_connect_data.ocd_brw_size; + + return ONE_MB_BRW_SIZE; +} - return !!(imp->imp_connect_data.ocd_connect_flags & - OBD_CONNECT_RMT_CLIENT); +static inline int exp_connect_multibulk(struct obd_export *exp) +{ + return exp_max_brw_size(exp) > ONE_MB_BRW_SIZE; +} + +static inline int exp_connect_cancelset(struct obd_export *exp) +{ + LASSERT(exp != NULL); + return !!(exp_connect_flags(exp) & OBD_CONNECT_CANCELSET); +} + +static inline int exp_connect_lru_resize(struct obd_export *exp) +{ + LASSERT(exp != NULL); + return !!(exp_connect_flags(exp) & OBD_CONNECT_LRU_RESIZE); } static inline int exp_connect_vbr(struct obd_export *exp) { - LASSERT(exp != NULL); - LASSERT(exp->exp_connection); - return !!(exp->exp_connect_flags & OBD_CONNECT_VBR); + LASSERT(exp != NULL); + LASSERT(exp->exp_connection); + return !!(exp_connect_flags(exp) & OBD_CONNECT_VBR); } -static inline int exp_connect_som(struct obd_export *exp) +static inline int exp_connect_umask(struct obd_export *exp) { - LASSERT(exp != NULL); - return !!(exp->exp_connect_flags & OBD_CONNECT_SOM); + return !!(exp_connect_flags(exp) & OBD_CONNECT_UMASK); } static inline int imp_connect_lru_resize(struct obd_import *imp) @@ -270,7 +387,108 @@ static inline int imp_connect_lru_resize(struct obd_import *imp) return !!(ocd->ocd_connect_flags & OBD_CONNECT_LRU_RESIZE); } +static inline int exp_connect_layout(struct obd_export *exp) +{ + return !!(exp_connect_flags(exp) & OBD_CONNECT_LAYOUTLOCK); +} + +static inline bool exp_connect_lvb_type(struct obd_export *exp) +{ + LASSERT(exp != NULL); + if (exp_connect_flags(exp) & OBD_CONNECT_LVB_TYPE) + return true; + else + return false; +} + +static inline bool imp_connect_lvb_type(struct obd_import *imp) +{ + struct obd_connect_data *ocd; + + LASSERT(imp != NULL); + ocd = &imp->imp_connect_data; + if (ocd->ocd_connect_flags & OBD_CONNECT_LVB_TYPE) + return true; + else + return false; +} + +static inline bool imp_connect_disp_stripe(struct obd_import *imp) +{ + struct obd_connect_data *ocd; + + LASSERT(imp != NULL); + ocd = &imp->imp_connect_data; + return ocd->ocd_connect_flags & OBD_CONNECT_DISP_STRIPE; +} + +static inline bool imp_connect_shortio(struct obd_import *imp) +{ + struct obd_connect_data *ocd = &imp->imp_connect_data; + + return ocd->ocd_connect_flags & OBD_CONNECT_SHORTIO; +} + +static inline __u64 exp_connect_ibits(struct obd_export *exp) +{ + struct obd_connect_data *ocd; + + ocd = &exp->exp_connect_data; + return ocd->ocd_ibits_known; +} + +static inline int exp_connect_large_acl(struct obd_export *exp) +{ + return !!(exp_connect_flags(exp) & OBD_CONNECT_LARGE_ACL); +} + +static inline int exp_connect_lockahead(struct obd_export *exp) +{ + return !!(exp_connect_flags2(exp) & OBD_CONNECT2_LOCKAHEAD); +} + +static inline int exp_connect_overstriping(struct obd_export *exp) +{ + return !!(exp_connect_flags2(exp) & OBD_CONNECT2_OVERSTRIPING); +} + +static inline int exp_connect_flr(struct obd_export *exp) +{ + return !!(exp_connect_flags2(exp) & OBD_CONNECT2_FLR); +} + +static inline int exp_connect_lock_convert(struct obd_export *exp) +{ + return !!(exp_connect_flags2(exp) & OBD_CONNECT2_LOCK_CONVERT); +} + extern struct obd_export *class_conn2export(struct lustre_handle *conn); -extern struct obd_device *class_conn2obd(struct lustre_handle *conn); + +static inline int exp_connect_archive_id_array(struct obd_export *exp) +{ + return !!(exp_connect_flags2(exp) & OBD_CONNECT2_ARCHIVE_ID_ARRAY); +} + +static inline int exp_connect_sepol(struct obd_export *exp) +{ + return !!(exp_connect_flags2(exp) & OBD_CONNECT2_SELINUX_POLICY); +} + +enum { + /* archive_ids in array format */ + KKUC_CT_DATA_ARRAY_MAGIC = 0x092013cea, + /* archive_ids in bitmap format */ + KKUC_CT_DATA_BITMAP_MAGIC = 0x082018cea, +}; + + +struct kkuc_ct_data { + __u32 kcd_magic; + __u32 kcd_nr_archives; + __u32 kcd_archives[0]; +}; + +/** @} export */ #endif /* __EXPORT_H */ +/** @} obd_export */