Whamcloud - gitweb
LU-1757 brw: add short io osc/ost transfer.
[fs/lustre-release.git] / lustre / include / lustre_export.h
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2016, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  */
32 /** \defgroup obd_export PortalRPC export definitions
33  *
34  * @{
35  */
36
37 #ifndef __EXPORT_H
38 #define __EXPORT_H
39
40 /** \defgroup export export
41  *
42  * @{
43  */
44
45 #include <lprocfs_status.h>
46 #include <uapi/linux/lustre/lustre_idl.h>
47 #include <lustre_dlm.h>
48
49 struct mds_client_data;
50 struct mdt_client_data;
51 struct mds_idmap_table;
52 struct mdt_idmap_table;
53
54 /**
55  * Target-specific export data
56  */
57 struct tg_export_data {
58         /** Protects ted_lcd, ted_reply_* and
59          * ted_release_* fields below */
60         struct mutex            ted_lcd_lock;
61         /** Per-client data for each export */
62         struct lsd_client_data  *ted_lcd;
63         /** Offset of record in last_rcvd file */
64         loff_t                  ted_lr_off;
65         /** Client index in last_rcvd file */
66         int                     ted_lr_idx;
67
68         /**
69          * ted_nodemap_lock is used to ensure that the nodemap is not destroyed
70          * between the time that ted_nodemap is checked for NULL, and a
71          * reference is taken. Modifications to ted_nodemap require that the
72          * active_config_lock and the nodemap(s)'s nm_member_list_lock be
73          * taken, as well as ted_nodemap_lock, so the export can be properly
74          * added to or removed from the nodemap's member list. When an export
75          * is added to a nodemap, a reference on that nodemap must be taken.
76          * That reference can be put only after ted_nodemap no longer refers to
77          * it.
78          */
79         spinlock_t              ted_nodemap_lock;
80         struct lu_nodemap       *ted_nodemap;
81         struct list_head        ted_nodemap_member;
82
83         /** last version of nodemap config sent to client */
84         __u64                   ted_nodemap_version;
85
86         /* Every reply data fields below are
87          * protected by ted_lcd_lock */
88         /** List of reply data */
89         struct list_head        ted_reply_list;
90         int                     ted_reply_cnt;
91         /** Reply data with highest transno is retained */
92         struct tg_reply_data    *ted_reply_last;
93         /* Statistics */
94         int                     ted_reply_max; /* high water mark */
95         int                     ted_release_xid;
96         int                     ted_release_tag;
97         /* grants */
98         long                    ted_dirty;    /* in bytes */
99         long                    ted_grant;    /* in bytes */
100         long                    ted_pending;  /* bytes just being written */
101         __u8                    ted_pagebits; /* log2 of client page size */
102 };
103
104 /**
105  * MDT-specific export data
106  */
107 struct mdt_export_data {
108         struct tg_export_data   med_ted;
109         /** List of all files opened by client on this MDT */
110         struct list_head        med_open_head;
111         spinlock_t              med_open_lock; /* med_open_head, mfd_list */
112 };
113
114 struct ec_export_data { /* echo client */
115         struct list_head        eced_locks;
116 };
117
118 /* In-memory access to client data from OST struct */
119 /** Filter (oss-side) specific import data */
120 struct filter_export_data {
121         struct tg_export_data   fed_ted;
122         spinlock_t              fed_lock;       /**< protects fed_mod_list */
123         __u64                   fed_lastid_gen;
124         struct list_head        fed_mod_list; /* files being modified */
125         /* count of SOFT_SYNC RPCs, which will be reset after
126          * ofd_soft_sync_limit number of RPCs, and trigger a sync. */
127         atomic_t                fed_soft_sync_count;
128         int                     fed_mod_count;/* items in fed_writing list */
129         __u32                   fed_group;
130 };
131
132 struct mgs_export_data {
133         struct list_head        med_clients;    /* mgc fs client via this exp */
134         spinlock_t              med_lock;       /* protect med_clients */
135 };
136
137 /**
138  * per-NID statistics structure.
139  * It tracks access patterns to this export on a per-client-NID basis
140  */
141 struct nid_stat {
142         lnet_nid_t               nid;
143         struct hlist_node        nid_hash;
144         struct list_head         nid_list;
145         struct obd_device       *nid_obd;
146         struct proc_dir_entry   *nid_proc;
147         struct lprocfs_stats    *nid_stats;
148         struct lprocfs_stats    *nid_ldlm_stats;
149         atomic_t                 nid_exp_ref_count; /* for obd_nid_stats_hash
150                                                        exp_nid_stats */
151 };
152
153 #define nidstat_getref(nidstat)                                                \
154 do {                                                                           \
155         atomic_inc(&(nidstat)->nid_exp_ref_count);                         \
156 } while(0)
157
158 #define nidstat_putref(nidstat)                                                \
159 do {                                                                           \
160         atomic_dec(&(nidstat)->nid_exp_ref_count);                         \
161         LASSERTF(atomic_read(&(nidstat)->nid_exp_ref_count) >= 0,          \
162                  "stat %p nid_exp_ref_count < 0\n", nidstat);                  \
163 } while(0)
164
165 enum obd_option {
166         OBD_OPT_FORCE =         0x0001,
167         OBD_OPT_FAILOVER =      0x0002,
168         OBD_OPT_ABORT_RECOV =   0x0004,
169 };
170
171 /**
172  * Export structure. Represents target-side of connection in portals.
173  * Also used in Lustre to connect between layers on the same node when
174  * there is no network-connection in-between.
175  * For every connected client there is an export structure on the server
176  * attached to the same obd device.
177  */
178 struct obd_export {
179         /**
180          * Export handle, it's id is provided to client on connect
181          * Subsequent client RPCs contain this handle id to identify
182          * what export they are talking to.
183          */
184         struct portals_handle   exp_handle;
185         atomic_t                exp_refcount;
186         /**
187          * Set of counters below is to track where export references are
188          * kept. The exp_rpc_count is used for reconnect handling also,
189          * the cb_count and locks_count are for debug purposes only for now.
190          * The sum of them should be less than exp_refcount by 3
191          */
192         atomic_t                exp_rpc_count; /* RPC references */
193         atomic_t                exp_cb_count; /* Commit callback references */
194         /** Number of queued replay requests to be processes */
195         atomic_t                exp_replay_count;
196         atomic_t                exp_locks_count; /** Lock references */
197 #if LUSTRE_TRACKS_LOCK_EXP_REFS
198         struct list_head        exp_locks_list;
199         spinlock_t              exp_locks_list_guard;
200 #endif
201         /** UUID of client connected to this export */
202         struct obd_uuid         exp_client_uuid;
203         /** To link all exports on an obd device */
204         struct list_head        exp_obd_chain;
205         /* Unlinked export list */
206         struct list_head        exp_stale_list;
207         struct hlist_node       exp_uuid_hash;  /** uuid-export hash*/
208         struct hlist_node       exp_nid_hash;   /** nid-export hash */
209         struct hlist_node       exp_gen_hash;   /** last_rcvd clt gen hash */
210         /**
211          * All exports eligible for ping evictor are linked into a list
212          * through this field in "most time since last request on this export"
213          * order
214          * protected by obd_dev_lock
215          */
216         struct list_head        exp_obd_chain_timed;
217         /** Obd device of this export */
218         struct obd_device      *exp_obd;
219         /**
220          * "reverse" import to send requests (e.g. from ldlm) back to client
221          * exp_lock protect its change
222          */
223         struct obd_import        *exp_imp_reverse;
224         struct nid_stat          *exp_nid_stats;
225         /** Active connetion */
226         struct ptlrpc_connection *exp_connection;
227         /** Connection count value from last successful reconnect rpc */
228         __u32                     exp_conn_cnt;
229         /** Hash list of all ldlm locks granted on this export */
230         struct cfs_hash          *exp_lock_hash;
231         /**
232          * Hash list for Posix lock deadlock detection, added with
233          * ldlm_lock::l_exp_flock_hash.
234          */
235         struct cfs_hash        *exp_flock_hash;
236         struct list_head        exp_outstanding_replies;
237         struct list_head        exp_uncommitted_replies;
238         spinlock_t              exp_uncommitted_replies_lock;
239         /** Last committed transno for this export */
240         __u64                   exp_last_committed;
241         /** When was last request received */
242         time64_t                exp_last_request_time;
243         /** On replay all requests waiting for replay are linked here */
244         struct list_head        exp_req_replay_queue;
245         /**
246          * protects exp_flags, exp_outstanding_replies and the change
247          * of exp_imp_reverse
248          */
249         spinlock_t              exp_lock;
250         /** Compatibility flags for this export are embedded into
251          *  exp_connect_data */
252         struct obd_connect_data exp_connect_data;
253         enum obd_option         exp_flags;
254         unsigned long           exp_failed:1,
255                                 exp_in_recovery:1,
256                                 exp_disconnected:1,
257                                 exp_connecting:1,
258                                 /** VBR: export missed recovery */
259                                 exp_delayed:1,
260                                 /** VBR: failed version checking */
261                                 exp_vbr_failed:1,
262                                 exp_req_replay_needed:1,
263                                 exp_lock_replay_needed:1,
264                                 exp_need_sync:1,
265                                 exp_flvr_changed:1,
266                                 exp_flvr_adapt:1,
267                                 /* if to swap nidtbl entries for 2.2 clients.
268                                  * Only used by the MGS to fix LU-1644. */
269                                 exp_need_mne_swab:1,
270                                 /* The export already got final replay ping
271                                  * request. */
272                                 exp_replay_done:1;
273         /* also protected by exp_lock */
274         enum lustre_sec_part    exp_sp_peer;
275         struct sptlrpc_flavor   exp_flvr;               /* current */
276         struct sptlrpc_flavor   exp_flvr_old[2];        /* about-to-expire */
277         time64_t                exp_flvr_expire[2];     /* seconds */
278
279         /** protects exp_hp_rpcs */
280         spinlock_t              exp_rpc_lock;
281         struct list_head        exp_hp_rpcs;    /* (potential) HP RPCs */
282         struct list_head        exp_reg_rpcs;  /* RPC being handled */
283
284         /** blocking dlm lock list, protected by exp_bl_list_lock */
285         struct list_head        exp_bl_list;
286         spinlock_t              exp_bl_list_lock;
287
288         /** Target specific data */
289         union {
290                 struct tg_export_data     eu_target_data;
291                 struct mdt_export_data    eu_mdt_data;
292                 struct filter_export_data eu_filter_data;
293                 struct ec_export_data     eu_ec_data;
294                 struct mgs_export_data    eu_mgs_data;
295         } u;
296
297         struct adaptive_timeout    exp_bl_lock_at;
298
299         /** highest XID received by export client that has no
300          * unreceived lower-numbered XID
301          */
302         __u64                     exp_last_xid;
303 };
304
305 #define exp_target_data u.eu_target_data
306 #define exp_mdt_data    u.eu_mdt_data
307 #define exp_filter_data u.eu_filter_data
308 #define exp_ec_data     u.eu_ec_data
309
310 static inline __u64 *exp_connect_flags_ptr(struct obd_export *exp)
311 {
312         return &exp->exp_connect_data.ocd_connect_flags;
313 }
314
315 static inline __u64 exp_connect_flags(struct obd_export *exp)
316 {
317         return *exp_connect_flags_ptr(exp);
318 }
319
320 static inline __u64 *exp_connect_flags2_ptr(struct obd_export *exp)
321 {
322         return &exp->exp_connect_data.ocd_connect_flags2;
323 }
324
325 static inline __u64 exp_connect_flags2(struct obd_export *exp)
326 {
327         return *exp_connect_flags2_ptr(exp);
328 }
329
330 static inline int exp_max_brw_size(struct obd_export *exp)
331 {
332         LASSERT(exp != NULL);
333         if (exp_connect_flags(exp) & OBD_CONNECT_BRW_SIZE)
334                 return exp->exp_connect_data.ocd_brw_size;
335
336         return ONE_MB_BRW_SIZE;
337 }
338
339 static inline int exp_connect_multibulk(struct obd_export *exp)
340 {
341         return exp_max_brw_size(exp) > ONE_MB_BRW_SIZE;
342 }
343
344 static inline int exp_connect_cancelset(struct obd_export *exp)
345 {
346         LASSERT(exp != NULL);
347         return !!(exp_connect_flags(exp) & OBD_CONNECT_CANCELSET);
348 }
349
350 static inline int exp_connect_lru_resize(struct obd_export *exp)
351 {
352         LASSERT(exp != NULL);
353         return !!(exp_connect_flags(exp) & OBD_CONNECT_LRU_RESIZE);
354 }
355
356 static inline int exp_connect_vbr(struct obd_export *exp)
357 {
358         LASSERT(exp != NULL);
359         LASSERT(exp->exp_connection);
360         return !!(exp_connect_flags(exp) & OBD_CONNECT_VBR);
361 }
362
363 static inline int exp_connect_umask(struct obd_export *exp)
364 {
365         return !!(exp_connect_flags(exp) & OBD_CONNECT_UMASK);
366 }
367
368 static inline int imp_connect_lru_resize(struct obd_import *imp)
369 {
370         struct obd_connect_data *ocd;
371
372         LASSERT(imp != NULL);
373         ocd = &imp->imp_connect_data;
374         return !!(ocd->ocd_connect_flags & OBD_CONNECT_LRU_RESIZE);
375 }
376
377 static inline int exp_connect_layout(struct obd_export *exp)
378 {
379         return !!(exp_connect_flags(exp) & OBD_CONNECT_LAYOUTLOCK);
380 }
381
382 static inline bool exp_connect_lvb_type(struct obd_export *exp)
383 {
384         LASSERT(exp != NULL);
385         if (exp_connect_flags(exp) & OBD_CONNECT_LVB_TYPE)
386                 return true;
387         else
388                 return false;
389 }
390
391 static inline bool imp_connect_lvb_type(struct obd_import *imp)
392 {
393         struct obd_connect_data *ocd;
394
395         LASSERT(imp != NULL);
396         ocd = &imp->imp_connect_data;
397         if (ocd->ocd_connect_flags & OBD_CONNECT_LVB_TYPE)
398                 return true;
399         else
400                 return false;
401 }
402
403 static inline bool imp_connect_disp_stripe(struct obd_import *imp)
404 {
405         struct obd_connect_data *ocd;
406
407         LASSERT(imp != NULL);
408         ocd = &imp->imp_connect_data;
409         return ocd->ocd_connect_flags & OBD_CONNECT_DISP_STRIPE;
410 }
411
412 static inline bool imp_connect_shortio(struct obd_import *imp)
413 {
414         struct obd_connect_data *ocd = &imp->imp_connect_data;
415
416         return ocd->ocd_connect_flags & OBD_CONNECT_SHORTIO;
417 }
418
419 static inline __u64 exp_connect_ibits(struct obd_export *exp)
420 {
421         struct obd_connect_data *ocd;
422
423         ocd = &exp->exp_connect_data;
424         return ocd->ocd_ibits_known;
425 }
426
427 static inline int exp_connect_large_acl(struct obd_export *exp)
428 {
429         return !!(exp_connect_flags(exp) & OBD_CONNECT_LARGE_ACL);
430 }
431
432 static inline int exp_connect_lockahead_old(struct obd_export *exp)
433 {
434         return !!(exp_connect_flags(exp) & OBD_CONNECT_LOCKAHEAD_OLD);
435 }
436
437 static inline int exp_connect_lockahead(struct obd_export *exp)
438 {
439         return !!(exp_connect_flags2(exp) & OBD_CONNECT2_LOCKAHEAD);
440 }
441
442 extern struct obd_export *class_conn2export(struct lustre_handle *conn);
443 extern struct obd_device *class_conn2obd(struct lustre_handle *conn);
444
445 #define KKUC_CT_DATA_MAGIC      0x092013cea
446 struct kkuc_ct_data {
447         __u32           kcd_magic;
448         struct obd_uuid kcd_uuid;
449         __u32           kcd_archive;
450 };
451
452 /** @} export */
453
454 #endif /* __EXPORT_H */
455 /** @} obd_export */