X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Finclude%2Flustre_import.h;h=8f52c66b4905ab5b41aa81e1cda6a9949289d546;hb=59729e4c08679061491b47c2adbc5208bbef0e7a;hp=17c36dd1de2e9f8d3421a59f5ce5fcf28900de0a;hpb=a37738fcbd79e283c687a630d9ccb7d7a9fedb3e;p=fs%2Flustre-release.git diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h index 17c36dd..8f52c66 100644 --- a/lustre/include/lustre_import.h +++ b/lustre/include/lustre_import.h @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -17,17 +15,15 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ /* * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2016, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -46,10 +42,15 @@ * * @{ */ - -#include -#include - +#include +#include +#include +#include +#include +#include +#include +#include +#include /** * Adaptive Timeout stuff @@ -61,20 +62,25 @@ #define AT_FLG_NOHIST 0x1 /* use last reported value only */ struct adaptive_timeout { - time_t at_binstart; /* bin start time */ - unsigned int at_hist[AT_BINS]; /* timeout history bins */ - unsigned int at_flags; - unsigned int at_current; /* current timeout value */ - unsigned int at_worst_ever; /* worst-ever timeout value */ - time_t at_worst_time; /* worst-ever timeout timestamp */ - cfs_spinlock_t at_lock; + time64_t at_binstart; /* bin start time */ + unsigned int at_hist[AT_BINS]; /* timeout history bins */ + unsigned int at_flags; + unsigned int at_current; /* current timeout value */ + unsigned int at_worst_ever; /* worst-ever timeout value */ + time64_t at_worst_time; /* worst-ever timeout timestamp */ + spinlock_t at_lock; +}; + +enum lustre_at_flags { + LATF_SKIP = 0x0, + LATF_STATS = 0x1, }; struct ptlrpc_at_array { - cfs_list_t *paa_reqs_array; /** array to hold requests */ + struct list_head *paa_reqs_array; /** array to hold requests */ __u32 paa_size; /** the size of array */ __u32 paa_count; /** the total count of reqs */ - time_t paa_deadline; /** the earliest deadline of reqs */ + time64_t paa_deadline; /** the earliest deadline of reqs */ __u32 *paa_reqs_count; /** the count of reqs in each entry */ }; @@ -100,19 +106,21 @@ enum lustre_imp_state { LUSTRE_IMP_RECOVER = 8, LUSTRE_IMP_FULL = 9, LUSTRE_IMP_EVICTED = 10, + LUSTRE_IMP_IDLE = 11, + LUSTRE_IMP_LAST }; /** Returns test string representation of numeric import state \a state */ static inline char * ptlrpc_import_state_name(enum lustre_imp_state state) { - static char* import_state_names[] = { - "", "CLOSED", "NEW", "DISCONN", - "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT", - "RECOVER", "FULL", "EVICTED", - }; - - LASSERT (state <= LUSTRE_IMP_EVICTED); - return import_state_names[state]; + static char *import_state_names[] = { + "", "CLOSED", "NEW", "DISCONN", + "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT", + "RECOVER", "FULL", "EVICTED", "IDLE", + }; + + LASSERT(state < LUSTRE_IMP_LAST); + return import_state_names[state]; } /** @@ -132,23 +140,23 @@ enum obd_import_event { * Definition of import connection structure */ struct obd_import_conn { - /** Item for linking connections together */ - cfs_list_t oic_item; - /** Pointer to actual PortalRPC connection */ + /** Item for linking connections together */ + struct list_head oic_item; + /** Pointer to actual PortalRPC connection */ struct ptlrpc_connection *oic_conn; /** uuid of remote side */ struct obd_uuid oic_uuid; /** - * Time (64 bit jiffies) of last connection attempt on this connection + * Time (64 bit seconds) of last connection attempt on this connection */ - __u64 oic_last_attempt; + time64_t oic_last_attempt; }; /* state history */ #define IMP_STATE_HIST_LEN 16 struct import_state_hist { - enum lustre_imp_state ish_state; - time_t ish_time; + enum lustre_imp_state ish_state; + time64_t ish_time; }; /** @@ -156,62 +164,81 @@ struct import_state_hist { * Imports are representing client-side view to remote target. */ struct obd_import { - /** Local handle (== id) for this import. */ - struct portals_handle imp_handle; - /** Reference counter */ - cfs_atomic_t imp_refcount; - struct lustre_handle imp_dlm_handle; /* client's ldlm export */ - /** Currently active connection */ - struct ptlrpc_connection *imp_connection; + /** Reference counter */ + atomic_t imp_refcount; + struct lustre_handle imp_dlm_handle; /* client's ldlm export */ + /** Currently active connection */ + struct ptlrpc_connection *imp_connection; /** PortalRPC client structure for this import */ struct ptlrpc_client *imp_client; - /** List element for linking into pinger chain */ - cfs_list_t imp_pinger_chain; - /** List element for linking into chain for destruction */ - cfs_list_t imp_zombie_chain; + /** List element for linking into pinger chain */ + struct list_head imp_pinger_chain; + /** work struct for destruction of import */ + struct work_struct imp_zombie_work; /** * Lists of requests that are retained for replay, waiting for a reply, * or waiting for recovery to complete, respectively. * @{ */ - cfs_list_t imp_replay_list; - cfs_list_t imp_sending_list; - cfs_list_t imp_delayed_list; + struct list_head imp_replay_list; + struct list_head imp_sending_list; + struct list_head imp_delayed_list; /** @} */ - /** obd device for this import */ - struct obd_device *imp_obd; + /** + * List of requests that are retained for committed open replay. Once + * open is committed, open replay request will be moved from the + * imp_replay_list into the imp_committed_list. + * The imp_replay_cursor is for accelerating searching during replay. + * @{ + */ + struct list_head imp_committed_list; + struct list_head *imp_replay_cursor; + /** @} */ + + /** List of not replied requests */ + struct list_head imp_unreplied_list; + /** Known maximal replied XID */ + __u64 imp_known_replied_xid; + + /** obd device for this import */ + struct obd_device *imp_obd; /** * some seciruty-related fields * @{ */ - struct ptlrpc_sec *imp_sec; - cfs_semaphore_t imp_sec_mutex; - cfs_time_t imp_sec_expire; + struct ptlrpc_sec *imp_sec; + struct mutex imp_sec_mutex; + time64_t imp_sec_expire; + pid_t imp_sec_refpid; /** @} */ - /** Wait queue for those who need to wait for recovery completion */ - cfs_waitq_t imp_recovery_waitq; - - /** Number of requests currently in-flight */ - cfs_atomic_t imp_inflight; - /** Number of requests currently unregistering */ - cfs_atomic_t imp_unregistering; - /** Number of replay requests inflight */ - cfs_atomic_t imp_replay_inflight; - /** Number of currently happening import invalidations */ - cfs_atomic_t imp_inval_count; - /** Numbner of request timeouts */ - cfs_atomic_t imp_timeouts; - /** Current import state */ + /** Wait queue for those who need to wait for recovery completion */ + wait_queue_head_t imp_recovery_waitq; + + /** Number of requests currently in-flight */ + atomic_t imp_inflight; + /** Number of requests currently unregistering */ + atomic_t imp_unregistering; + /** Number of replay requests inflight */ + atomic_t imp_replay_inflight; + /** Number of currently happening import invalidations */ + atomic_t imp_inval_count; + /** Numbner of request timeouts */ + atomic_t imp_timeouts; + /** Current import state */ enum lustre_imp_state imp_state; + /** Last replay state */ + enum lustre_imp_state imp_replay_state; /** History of import states */ struct import_state_hist imp_state_hist[IMP_STATE_HIST_LEN]; int imp_state_hist_idx; /** Current import generation. Incremented on every reconnect */ int imp_generation; + /** Idle connection initiated at this generation */ + int imp_initiated_at; /** Incremented every time we send reconnection request */ __u32 imp_conn_cnt; /** @@ -236,71 +263,69 @@ struct obd_import { */ struct lustre_handle imp_remote_handle; /** When to perform next ping. time in jiffies. */ - cfs_time_t imp_next_ping; - /** When we last succesfully connected. time in 64bit jiffies */ - __u64 imp_last_success_conn; + time64_t imp_next_ping; + /** When we last successfully connected. time in 64bit jiffies */ + time64_t imp_last_success_conn; /** List of all possible connection for import. */ - cfs_list_t imp_conn_list; + struct list_head imp_conn_list; /** * Current connection. \a imp_connection is imp_conn_current->oic_conn */ struct obd_import_conn *imp_conn_current; /** Protects flags, level, generation, conn_cnt, *_list */ - cfs_spinlock_t imp_lock; - - /* flags */ - unsigned long imp_no_timeout:1, /* timeouts are disabled */ - imp_invalid:1, /* evicted */ - imp_deactive:1, /* administratively disabled */ - imp_replayable:1, /* try to recover the import */ - imp_dlm_fake:1, /* don't run recovery (timeout instead) */ - imp_server_timeout:1, /* use 1/2 timeout on MDS' OSCs */ - imp_delayed_recovery:1, /* VBR: imp in delayed recovery */ - imp_no_lock_replay:1, /* VBR: if gap was found then no lock replays */ - imp_vbr_failed:1, /* recovery by versions was failed */ - imp_force_verify:1, /* force an immidiate ping */ - imp_pingable:1, /* pingable */ - imp_resend_replay:1, /* resend for replay */ - imp_no_pinger_recover:1,/* disable normal recovery, for test only. */ - imp_force_reconnect:1; /* import must be reconnected instead of chouse new connection */ - __u32 imp_connect_op; - struct obd_connect_data imp_connect_data; - __u64 imp_connect_flags_orig; - int imp_connect_error; - - __u32 imp_msg_magic; - __u32 imp_msghdr_flags; /* adjusted based on server capability */ - - struct ptlrpc_request_pool *imp_rq_pool; /* emergency request pool */ - - struct imp_at imp_at; /* adaptive timeout data */ - time_t imp_last_reply_time; /* for health check */ -}; - -typedef void (*obd_import_callback)(struct obd_import *imp, void *closure, - int event, void *event_arg, void *cb_data); - -/** - * Structure for import observer. - * It is possible to register "observer" on an import and every time - * something happens to an import (like connect/evict/disconnect) - * obderver will get its callback called with event type - */ -struct obd_import_observer { - cfs_list_t oio_chain; - obd_import_callback oio_cb; - void *oio_cb_data; + spinlock_t imp_lock; + + /* flags */ + unsigned long imp_no_timeout:1, /* timeouts are disabled */ + imp_invalid:1, /* evicted */ + /* administratively disabled */ + imp_deactive:1, + /* try to recover the import */ + imp_replayable:1, + /* don't run recovery (timeout instead) */ + imp_dlm_fake:1, + /* use 1/2 timeout on MDS' OSCs */ + imp_server_timeout:1, + /* VBR: imp in delayed recovery */ + imp_delayed_recovery:1, + /* recovery by versions was failed */ + imp_vbr_failed:1, + /* force an immidiate ping */ + imp_force_verify:1, + /* force a scheduled ping */ + imp_force_next_verify:1, + /* pingable */ + imp_pingable:1, + /* resend for replay */ + imp_resend_replay:1, + /* disable normal recovery, for test only. */ + imp_no_pinger_recover:1, + /* import must be reconnected instead of + * chouse new connection */ + imp_force_reconnect:1, + /* import has tried to connect with server */ + imp_connect_tried:1, + /* connected but not FULL yet */ + imp_connected:1; + u32 imp_connect_op; + u32 imp_idle_timeout; + u32 imp_idle_debug; + struct obd_connect_data imp_connect_data; + __u64 imp_connect_flags_orig; + __u64 imp_connect_flags2_orig; + int imp_connect_error; + + enum lustre_msg_magic imp_msg_magic; + /* adjusted based on server capability */ + enum lustre_msghdr imp_msghdr_flags; + + /* adaptive timeout data */ + struct imp_at imp_at; + time64_t imp_last_reply_time; /* for health check */ }; -void class_observe_import(struct obd_import *imp, obd_import_callback cb, - void *cb_data); -void class_unobserve_import(struct obd_import *imp, obd_import_callback cb, - void *cb_data); -void class_notify_import_observers(struct obd_import *imp, int event, - void *event_arg); - /* import.c */ static inline unsigned int at_est2timeout(unsigned int val) { @@ -315,14 +340,37 @@ static inline unsigned int at_timeout2est(unsigned int val) return (max((val << 2) / 5, 5U) - 4); } -static inline void at_init(struct adaptive_timeout *at, int val, int flags) { - memset(at, 0, sizeof(*at)); +static inline void at_reset_nolock(struct adaptive_timeout *at, int val) +{ at->at_current = val; at->at_worst_ever = val; - at->at_worst_time = cfs_time_current_sec(); - at->at_flags = flags; - cfs_spin_lock_init(&at->at_lock); + at->at_worst_time = ktime_get_real_seconds(); } + +static inline void at_reset(struct adaptive_timeout *at, int val) +{ + spin_lock(&at->at_lock); + at_reset_nolock(at, val); + spin_unlock(&at->at_lock); +} + +static inline void at_init(struct adaptive_timeout *at, int val, int flags) { + memset(at, 0, sizeof(*at)); + spin_lock_init(&at->at_lock); + at->at_flags = flags; + at_reset(at, val); +} + +static inline void at_reinit(struct adaptive_timeout *at, int val, int flags) +{ + spin_lock(&at->at_lock); + at->at_binstart = 0; + memset(at->at_hist, 0, sizeof(at->at_hist)); + at->at_flags = flags; + at_reset_nolock(at, val); + spin_unlock(&at->at_lock); +} + extern unsigned int at_min; static inline int at_get(struct adaptive_timeout *at) { return (at->at_current > at_min) ? at->at_current : at_min; @@ -335,7 +383,6 @@ extern unsigned int at_max; /* genops.c */ struct obd_export; extern struct obd_import *class_exp2cliimp(struct obd_export *); -extern struct obd_import *class_conn2cliimp(struct lustre_handle *); /** @} import */