X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Finclude%2Flustre_lib.h;h=325fc9b26363b7e249380ef42f7f50a779374337;hp=610ac1547a793dcea331b78e140d91096d26f042;hb=c2b6030e9217e54e7153c0a33cce0c2ea4afa54c;hpb=f95393b0d0a59cf3dc2f29cffc35dcc4cc9d7728 diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h index 610ac15..325fc9b 100644 --- a/lustre/include/lustre_lib.h +++ b/lustre/include/lustre_lib.h @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -17,17 +15,15 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2014, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -49,499 +45,48 @@ #include #include #include -#include -#if defined(__linux__) -#include -#elif defined(__APPLE__) -#include -#elif defined(__WINNT__) -#include -#else -#error Unsupported operating system. -#endif - -/* prng.c */ -unsigned int ll_rand(void); /* returns a random 32-bit integer */ -void ll_srand(unsigned int, unsigned int); /* seed the generator */ -void ll_get_random_bytes(void *buf, int size); +#include /* target.c */ struct ptlrpc_request; struct obd_export; struct lu_target; +struct l_wait_info; #include #include -#include -void target_client_add_cb(struct obd_device *obd, __u64 transno, void *cb_data, - int error); +#define LI_POISON 0x5a5a5a5a +#if BITS_PER_LONG > 32 +# define LL_POISON 0x5a5a5a5a5a5a5a5aL +#else +# define LL_POISON 0x5a5a5a5aL +#endif +#define LP_POISON ((void *)LL_POISON) + +#ifdef HAVE_SERVER_SUPPORT +int rev_import_init(struct obd_export *exp); int target_handle_connect(struct ptlrpc_request *req); int target_handle_disconnect(struct ptlrpc_request *req); void target_destroy_export(struct obd_export *exp); -int target_pack_pool_reply(struct ptlrpc_request *req); int target_handle_ping(struct ptlrpc_request *req); void target_committed_to_req(struct ptlrpc_request *req); -int do_set_info_async(struct obd_import *imp, - int opcode, int version, - obd_count keylen, void *key, - obd_count vallen, void *val, - struct ptlrpc_request_set *set); - -/* quotacheck callback, dqacq/dqrel callback handler */ -int target_handle_qc_callback(struct ptlrpc_request *req); -#ifdef HAVE_QUOTA_SUPPORT -int target_handle_dqacq_callback(struct ptlrpc_request *req); -#else -#define target_handle_dqacq_callback(req) ldlm_callback_reply(req, -ENOTSUPP) -#endif - -#define OBD_RECOVERY_MAX_TIME (obd_timeout * 18) /* b13079 */ - void target_cancel_recovery_timer(struct obd_device *obd); void target_stop_recovery_thread(struct obd_device *obd); void target_cleanup_recovery(struct obd_device *obd); int target_queue_recovery_request(struct ptlrpc_request *req, struct obd_device *obd); -void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id); - -/* client.c */ - -int client_sanobd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg); -struct client_obd *client_conn2cli(struct lustre_handle *conn); - -struct md_open_data; -struct obd_client_handle { - struct lustre_handle och_fh; - struct lu_fid och_fid; - struct md_open_data *och_mod; - __u32 och_magic; - int och_flags; -}; -#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed - -/* statfs_pack.c */ -void statfs_pack(struct obd_statfs *osfs, cfs_kstatfs_t *sfs); -void statfs_unpack(cfs_kstatfs_t *sfs, struct obd_statfs *osfs); - -/* l_lock.c */ -struct lustre_lock { - int l_depth; - cfs_task_t *l_owner; - cfs_semaphore_t l_sem; - cfs_spinlock_t l_spin; -}; - -void l_lock_init(struct lustre_lock *); -void l_lock(struct lustre_lock *); -void l_unlock(struct lustre_lock *); -int l_has_lock(struct lustre_lock *); - -/* - * OBD IOCTLS - */ -#define OBD_IOCTL_VERSION 0x00010004 - -struct obd_ioctl_data { - __u32 ioc_len; - __u32 ioc_version; - - union { - __u64 ioc_cookie; - __u64 ioc_u64_1; - }; - union { - __u32 ioc_conn1; - __u32 ioc_u32_1; - }; - union { - __u32 ioc_conn2; - __u32 ioc_u32_2; - }; - - struct obdo ioc_obdo1; - struct obdo ioc_obdo2; - - obd_size ioc_count; - obd_off ioc_offset; - __u32 ioc_dev; - __u32 ioc_command; - - __u64 ioc_nid; - __u32 ioc_nal; - __u32 ioc_type; - - /* buffers the kernel will treat as user pointers */ - __u32 ioc_plen1; - char *ioc_pbuf1; - __u32 ioc_plen2; - char *ioc_pbuf2; - - /* inline buffers for various arguments */ - __u32 ioc_inllen1; - char *ioc_inlbuf1; - __u32 ioc_inllen2; - char *ioc_inlbuf2; - __u32 ioc_inllen3; - char *ioc_inlbuf3; - __u32 ioc_inllen4; - char *ioc_inlbuf4; - - char ioc_bulk[0]; -}; - -struct obd_ioctl_hdr { - __u32 ioc_len; - __u32 ioc_version; -}; - -static inline int obd_ioctl_packlen(struct obd_ioctl_data *data) -{ - int len = cfs_size_round(sizeof(struct obd_ioctl_data)); - len += cfs_size_round(data->ioc_inllen1); - len += cfs_size_round(data->ioc_inllen2); - len += cfs_size_round(data->ioc_inllen3); - len += cfs_size_round(data->ioc_inllen4); - return len; -} - - -static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data) -{ - if (data->ioc_len > (1<<30)) { - CERROR("OBD ioctl: ioc_len larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen1 > (1<<30)) { - CERROR("OBD ioctl: ioc_inllen1 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen2 > (1<<30)) { - CERROR("OBD ioctl: ioc_inllen2 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen3 > (1<<30)) { - CERROR("OBD ioctl: ioc_inllen3 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen4 > (1<<30)) { - CERROR("OBD ioctl: ioc_inllen4 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inlbuf1 && !data->ioc_inllen1) { - CERROR("OBD ioctl: inlbuf1 pointer but 0 length\n"); - return 1; - } - if (data->ioc_inlbuf2 && !data->ioc_inllen2) { - CERROR("OBD ioctl: inlbuf2 pointer but 0 length\n"); - return 1; - } - if (data->ioc_inlbuf3 && !data->ioc_inllen3) { - CERROR("OBD ioctl: inlbuf3 pointer but 0 length\n"); - return 1; - } - if (data->ioc_inlbuf4 && !data->ioc_inllen4) { - CERROR("OBD ioctl: inlbuf4 pointer but 0 length\n"); - return 1; - } - if (data->ioc_pbuf1 && !data->ioc_plen1) { - CERROR("OBD ioctl: pbuf1 pointer but 0 length\n"); - return 1; - } - if (data->ioc_pbuf2 && !data->ioc_plen2) { - CERROR("OBD ioctl: pbuf2 pointer but 0 length\n"); - return 1; - } - if (data->ioc_plen1 && !data->ioc_pbuf1) { - CERROR("OBD ioctl: plen1 set but NULL pointer\n"); - return 1; - } - if (data->ioc_plen2 && !data->ioc_pbuf2) { - CERROR("OBD ioctl: plen2 set but NULL pointer\n"); - return 1; - } - if (obd_ioctl_packlen(data) > data->ioc_len) { - CERROR("OBD ioctl: packlen exceeds ioc_len (%d > %d)\n", - obd_ioctl_packlen(data), data->ioc_len); - return 1; - } - return 0; -} - -#ifndef __KERNEL__ -static inline int obd_ioctl_pack(struct obd_ioctl_data *data, char **pbuf, - int max) -{ - char *ptr; - struct obd_ioctl_data *overlay; - data->ioc_len = obd_ioctl_packlen(data); - data->ioc_version = OBD_IOCTL_VERSION; - - if (*pbuf && data->ioc_len > max) - return -EINVAL; - if (*pbuf == NULL) { - *pbuf = malloc(data->ioc_len); - } - if (!*pbuf) - return -ENOMEM; - overlay = (struct obd_ioctl_data *)*pbuf; - memcpy(*pbuf, data, sizeof(*data)); - - ptr = overlay->ioc_bulk; - if (data->ioc_inlbuf1) - LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr); - if (data->ioc_inlbuf2) - LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr); - if (data->ioc_inlbuf3) - LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr); - if (data->ioc_inlbuf4) - LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr); - if (obd_ioctl_is_invalid(overlay)) - return -EINVAL; - - return 0; -} - -static inline int obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf, - int max) -{ - char *ptr; - struct obd_ioctl_data *overlay; - - if (!pbuf) - return 1; - overlay = (struct obd_ioctl_data *)pbuf; - - /* Preserve the caller's buffer pointers */ - overlay->ioc_inlbuf1 = data->ioc_inlbuf1; - overlay->ioc_inlbuf2 = data->ioc_inlbuf2; - overlay->ioc_inlbuf3 = data->ioc_inlbuf3; - overlay->ioc_inlbuf4 = data->ioc_inlbuf4; - - memcpy(data, pbuf, sizeof(*data)); - - ptr = overlay->ioc_bulk; - if (data->ioc_inlbuf1) - LOGU(data->ioc_inlbuf1, data->ioc_inllen1, ptr); - if (data->ioc_inlbuf2) - LOGU(data->ioc_inlbuf2, data->ioc_inllen2, ptr); - if (data->ioc_inlbuf3) - LOGU(data->ioc_inlbuf3, data->ioc_inllen3, ptr); - if (data->ioc_inlbuf4) - LOGU(data->ioc_inlbuf4, data->ioc_inllen4, ptr); - - return 0; -} -#endif - -#include - -#ifdef __KERNEL__ -/* function defined in lustre/obdclass//-module.c */ -int obd_ioctl_getdata(char **buf, int *len, void *arg); -int obd_ioctl_popdata(void *arg, void *data, int len); -#else -/* buffer MUST be at least the size of obd_ioctl_hdr */ -static inline int obd_ioctl_getdata(char **buf, int *len, void *arg) -{ - struct obd_ioctl_hdr hdr; - struct obd_ioctl_data *data; - int err; - int offset = 0; - ENTRY; - - err = cfs_copy_from_user(&hdr, (void *)arg, sizeof(hdr)); - if (err) - RETURN(err); - - if (hdr.ioc_version != OBD_IOCTL_VERSION) { - CERROR("Version mismatch kernel vs application\n"); - RETURN(-EINVAL); - } - - if (hdr.ioc_len > OBD_MAX_IOCTL_BUFFER) { - CERROR("User buffer len %d exceeds %d max buffer\n", - hdr.ioc_len, OBD_MAX_IOCTL_BUFFER); - RETURN(-EINVAL); - } - - if (hdr.ioc_len < sizeof(struct obd_ioctl_data)) { - CERROR("User buffer too small for ioctl (%d)\n", hdr.ioc_len); - RETURN(-EINVAL); - } - - /* XXX allocate this more intelligently, using kmalloc when - * appropriate */ - OBD_VMALLOC(*buf, hdr.ioc_len); - if (*buf == NULL) { - CERROR("Cannot allocate control buffer of len %d\n", - hdr.ioc_len); - RETURN(-EINVAL); - } - *len = hdr.ioc_len; - data = (struct obd_ioctl_data *)*buf; - - err = cfs_copy_from_user(*buf, (void *)arg, hdr.ioc_len); - if (err) { - OBD_VFREE(*buf, hdr.ioc_len); - RETURN(err); - } - - if (obd_ioctl_is_invalid(data)) { - CERROR("ioctl not correctly formatted\n"); - OBD_VFREE(*buf, hdr.ioc_len); - RETURN(-EINVAL); - } - - if (data->ioc_inllen1) { - data->ioc_inlbuf1 = &data->ioc_bulk[0]; - offset += cfs_size_round(data->ioc_inllen1); - } - - if (data->ioc_inllen2) { - data->ioc_inlbuf2 = &data->ioc_bulk[0] + offset; - offset += cfs_size_round(data->ioc_inllen2); - } - - if (data->ioc_inllen3) { - data->ioc_inlbuf3 = &data->ioc_bulk[0] + offset; - offset += cfs_size_round(data->ioc_inllen3); - } - - if (data->ioc_inllen4) { - data->ioc_inlbuf4 = &data->ioc_bulk[0] + offset; - } - - RETURN(0); -} - -static inline int obd_ioctl_popdata(void *arg, void *data, int len) -{ - int err = cfs_copy_to_user(arg, data, len); - if (err) - err = -EFAULT; - return err; -} +int target_bulk_io(struct obd_export *exp, struct ptlrpc_bulk_desc *desc, + struct l_wait_info *lwi); #endif -static inline void obd_ioctl_freedata(char *buf, int len) -{ - ENTRY; - - OBD_VFREE(buf, len); - EXIT; - return; -} - -/* - * BSD ioctl description: - * #define IOC_V1 _IOR(g, n1, long) - * #define IOC_V2 _IOW(g, n2, long) - * - * ioctl(f, IOC_V1, arg); - * arg will be treated as a long value, - * - * ioctl(f, IOC_V2, arg) - * arg will be treated as a pointer, bsd will call - * copyin(buf, arg, sizeof(long)) - * - * To make BSD ioctl handles argument correctly and simplely, - * we change _IOR to _IOWR so BSD will copyin obd_ioctl_data - * for us. Does this change affect Linux? (XXX Liang) - */ -#define OBD_IOC_CREATE _IOWR('f', 101, OBD_IOC_DATA_TYPE) -#define OBD_IOC_DESTROY _IOW ('f', 104, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PREALLOCATE _IOWR('f', 105, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_SETATTR _IOW ('f', 107, OBD_IOC_DATA_TYPE) -#define OBD_IOC_GETATTR _IOWR ('f', 108, OBD_IOC_DATA_TYPE) -#define OBD_IOC_READ _IOWR('f', 109, OBD_IOC_DATA_TYPE) -#define OBD_IOC_WRITE _IOWR('f', 110, OBD_IOC_DATA_TYPE) - - -#define OBD_IOC_STATFS _IOWR('f', 113, OBD_IOC_DATA_TYPE) -#define OBD_IOC_SYNC _IOW ('f', 114, OBD_IOC_DATA_TYPE) -#define OBD_IOC_READ2 _IOWR('f', 115, OBD_IOC_DATA_TYPE) -#define OBD_IOC_FORMAT _IOWR('f', 116, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PARTITION _IOWR('f', 117, OBD_IOC_DATA_TYPE) -#define OBD_IOC_COPY _IOWR('f', 120, OBD_IOC_DATA_TYPE) -#define OBD_IOC_MIGR _IOWR('f', 121, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PUNCH _IOWR('f', 122, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_MODULE_DEBUG _IOWR('f', 124, OBD_IOC_DATA_TYPE) -#define OBD_IOC_BRW_READ _IOWR('f', 125, OBD_IOC_DATA_TYPE) -#define OBD_IOC_BRW_WRITE _IOWR('f', 126, OBD_IOC_DATA_TYPE) -#define OBD_IOC_NAME2DEV _IOWR('f', 127, OBD_IOC_DATA_TYPE) -#define OBD_IOC_UUID2DEV _IOWR('f', 130, OBD_IOC_DATA_TYPE) -#define OBD_IOC_GETNAME _IOWR('f', 131, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, OBD_IOC_DATA_TYPE) -#define OBD_IOC_CLIENT_RECOVER _IOW ('f', 133, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PING_TARGET _IOW ('f', 136, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 139 ) -#define OBD_IOC_NO_TRANSNO _IOW ('f', 140, OBD_IOC_DATA_TYPE) -#define OBD_IOC_SET_READONLY _IOW ('f', 141, OBD_IOC_DATA_TYPE) -#define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_ROOT_SQUASH _IOWR('f', 143, OBD_IOC_DATA_TYPE) - -#define OBD_GET_VERSION _IOWR ('f', 144, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_GSS_SUPPORT _IOWR('f', 145, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_CHANGELOG_SEND _IOW ('f', 148, OBD_IOC_DATA_TYPE) -#define OBD_IOC_GETDEVICE _IOWR ('f', 149, OBD_IOC_DATA_TYPE) -#define OBD_IOC_FID2PATH _IOWR ('f', 150, OBD_IOC_DATA_TYPE) -#define OBD_IOC_CHANGELOG_REG _IOW ('f', 151, OBD_IOC_DATA_TYPE) -#define OBD_IOC_CHANGELOG_DEREG _IOW ('f', 152, OBD_IOC_DATA_TYPE) -#define OBD_IOC_CHANGELOG_CLEAR _IOW ('f', 153, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LOV_SETEA _IOW ('f', 156, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_QUOTACHECK _IOW ('f', 160, int) -#define OBD_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *) -#define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl *) - -#define OBD_IOC_MOUNTOPT _IOWR('f', 170, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_RECORD _IOWR('f', 180, OBD_IOC_DATA_TYPE) -#define OBD_IOC_ENDRECORD _IOWR('f', 181, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PARSE _IOWR('f', 182, OBD_IOC_DATA_TYPE) -#define OBD_IOC_DORECORD _IOWR('f', 183, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PROCESS_CFG _IOWR('f', 184, OBD_IOC_DATA_TYPE) -#define OBD_IOC_DUMP_LOG _IOWR('f', 185, OBD_IOC_DATA_TYPE) -#define OBD_IOC_CLEAR_LOG _IOWR('f', 186, OBD_IOC_DATA_TYPE) -#define OBD_IOC_PARAM _IOW ('f', 187, OBD_IOC_DATA_TYPE) -#define OBD_IOC_POOL _IOWR('f', 188, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_CATLOGLIST _IOWR('f', 190, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_INFO _IOWR('f', 191, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_PRINT _IOWR('f', 192, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_CANCEL _IOWR('f', 193, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_REMOVE _IOWR('f', 194, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_CHECK _IOWR('f', 195, OBD_IOC_DATA_TYPE) -#define OBD_IOC_LLOG_CATINFO _IOWR('f', 196, OBD_IOC_DATA_TYPE) - -#define ECHO_IOC_GET_STRIPE _IOWR('f', 200, OBD_IOC_DATA_TYPE) -#define ECHO_IOC_SET_STRIPE _IOWR('f', 201, OBD_IOC_DATA_TYPE) -#define ECHO_IOC_ENQUEUE _IOWR('f', 202, OBD_IOC_DATA_TYPE) -#define ECHO_IOC_CANCEL _IOWR('f', 203, OBD_IOC_DATA_TYPE) - -#define OBD_IOC_GET_OBJ_VERSION _IOR('f', 210, OBD_IOC_DATA_TYPE) - -/* XXX _IOWR('f', 250, long) has been defined in - * libcfs/include/libcfs/libcfs_private.h for debug, don't use it - */ - -/* Until such time as we get_info the per-stripe maximum from the OST, - * we define this to be 2T - 4k, which is the ext3 maxbytes. */ -#define LUSTRE_STRIPE_MAXBYTES 0x1fffffff000ULL +int target_pack_pool_reply(struct ptlrpc_request *req); +int do_set_info_async(struct obd_import *imp, + int opcode, int version, + size_t keylen, void *key, + size_t vallen, void *val, + struct ptlrpc_request_set *set); -/* #define POISON_BULK 0 */ +void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id); /* * l_wait_event is a flexible sleeping function, permitting simple caller @@ -664,68 +209,123 @@ struct l_wait_info { #define LWI_INTR(cb, data) LWI_TIMEOUT_INTR(0, NULL, cb, data) -#ifdef __KERNEL__ +#define LUSTRE_FATAL_SIGS \ + (sigmask(SIGKILL) | sigmask(SIGINT) | sigmask(SIGTERM) | \ + sigmask(SIGQUIT) | sigmask(SIGALRM)) + +/* + * Wait Queue + */ +#ifndef HAVE___ADD_WAIT_QUEUE_EXCLUSIVE +static inline void __add_wait_queue_exclusive(wait_queue_head_t *q, + wait_queue_t *wait) +{ + wait->flags |= WQ_FLAG_EXCLUSIVE; + __add_wait_queue(q, wait); +} +#endif /* HAVE___ADD_WAIT_QUEUE_EXCLUSIVE */ + +/** + * wait_queue_t of Linux (version < 2.6.34) is a FIFO list for exclusively + * waiting threads, which is not always desirable because all threads will + * be waken up again and again, even user only needs a few of them to be + * active most time. This is not good for performance because cache can + * be polluted by different threads. + * + * LIFO list can resolve this problem because we always wakeup the most + * recent active thread by default. + * + * NB: please don't call non-exclusive & exclusive wait on the same + * waitq if add_wait_queue_exclusive_head is used. + */ +#define add_wait_queue_exclusive_head(waitq, link) \ +{ \ + unsigned long flags; \ + \ + spin_lock_irqsave(&((waitq)->lock), flags); \ + __add_wait_queue_exclusive(waitq, link); \ + spin_unlock_irqrestore(&((waitq)->lock), flags); \ +} /* * wait for @condition to become true, but no longer than timeout, specified * by @info. */ -#define __l_wait_event(wq, condition, info, ret, excl) \ +#define __l_wait_event(wq, condition, info, ret, l_add_wait) \ do { \ - cfs_waitlink_t __wait; \ - cfs_duration_t __timeout = info->lwi_timeout; \ - cfs_sigset_t __blocked; \ - int __allow_intr = info->lwi_allow_intr; \ - \ - ret = 0; \ - if (condition) \ - break; \ - \ - cfs_waitlink_init(&__wait); \ - if (excl) \ - cfs_waitq_add_exclusive(&wq, &__wait); \ - else \ - cfs_waitq_add(&wq, &__wait); \ - \ - /* Block all signals (just the non-fatal ones if no timeout). */ \ - if (info->lwi_on_signal != NULL && (__timeout == 0 || __allow_intr)) \ - __blocked = l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \ - else \ - __blocked = l_w_e_set_sigs(0); \ - \ - for (;;) { \ - cfs_set_current_state(CFS_TASK_INTERRUPTIBLE); \ - \ - if (condition) \ - break; \ - \ - if (__timeout == 0) { \ - cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE); \ - } else { \ - cfs_duration_t interval = info->lwi_interval? \ - min_t(cfs_duration_t, \ - info->lwi_interval,__timeout):\ - __timeout; \ - cfs_duration_t remaining = cfs_waitq_timedwait(&__wait,\ - CFS_TASK_INTERRUPTIBLE, \ - interval); \ - __timeout = cfs_time_sub(__timeout, \ - cfs_time_sub(interval, remaining));\ - if (__timeout == 0) { \ - if (info->lwi_on_timeout == NULL || \ - info->lwi_on_timeout(info->lwi_cb_data)) { \ - ret = -ETIMEDOUT; \ - break; \ - } \ - /* Take signals after the timeout expires. */ \ - if (info->lwi_on_signal != NULL) \ - (void)l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \ - } \ - } \ + wait_queue_t __wait; \ + cfs_duration_t __timeout = info->lwi_timeout; \ + sigset_t __blocked; \ + int __allow_intr = info->lwi_allow_intr; \ + \ + ret = 0; \ + if (condition) \ + break; \ + \ + init_waitqueue_entry(&__wait, current); \ + l_add_wait(&wq, &__wait); \ + \ + /* Block all signals (just the non-fatal ones if no timeout). */ \ + if (info->lwi_on_signal != NULL && (__timeout == 0 || __allow_intr)) \ + __blocked = cfs_block_sigsinv(LUSTRE_FATAL_SIGS); \ + else \ + __blocked = cfs_block_sigsinv(0); \ + \ + for (;;) { \ + set_current_state(TASK_INTERRUPTIBLE); \ + \ + /* To guarantee that the condition check will be done */ \ + /* after setting the thread state as TASK_INTERRUPTIBLE. */ \ + /* Otherwise, out-of-order execution may cause some race. */ \ + /* Consider the following real execution order: */ \ + \ + /* 1. Thread1 checks condition on CPU1, gets false. */ \ + /* 2. Thread2 sets condition on CPU2. */ \ + /* 3. Thread2 calls wake_up() on CPU2 to wake the threads */ \ + /* with state TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE. */ \ + /* But the Thread1's state is TASK_RUNNING at that time. */ \ + /* 4. Thread1 sets its state as TASK_INTERRUPTIBLE on CPU1, */ \ + /* then schedule. */ \ + \ + /* If the '__timeout' variable is zero, the Thread1 will */ \ + /* have no chance to check the condition again. */ \ + \ + /* Generally, the interval between out-of-ordered step1 and */ \ + /* step4 is very tiny, as to above step2 and step3 cannot */ \ + /* happen. On some degree, it can explain why we seldom hit */ \ + /* related trouble. But such race really exists, especially */ \ + /* consider that the step1 and step4 can be interruptible. */ \ + /* So add barrier to avoid Thread1 out-of-order execution. */ \ + smp_mb(); \ + \ + if (condition) \ + break; \ + \ + if (__timeout == 0) { \ + schedule(); \ + } else { \ + cfs_duration_t interval = info->lwi_interval? \ + min_t(cfs_duration_t, \ + info->lwi_interval,__timeout):\ + __timeout; \ + cfs_duration_t remaining = schedule_timeout(interval); \ + __timeout = cfs_time_sub(__timeout, \ + cfs_time_sub(interval, remaining));\ + if (__timeout == 0) { \ + if (info->lwi_on_timeout == NULL || \ + info->lwi_on_timeout(info->lwi_cb_data)) { \ + ret = -ETIMEDOUT; \ + break; \ + } \ + /* Take signals after the timeout expires. */ \ + if (info->lwi_on_signal != NULL) \ + (void)cfs_block_sigsinv(LUSTRE_FATAL_SIGS);\ + } \ + } \ \ if (condition) \ break; \ - if (cfs_signal_pending()) { \ + if (signal_pending(current)) { \ if (info->lwi_on_signal != NULL && \ (__timeout == 0 || __allow_intr)) { \ if (info->lwi_on_signal != LWI_ON_SIGNAL_NOOP) \ @@ -733,100 +333,71 @@ do { \ ret = -EINTR; \ break; \ } \ - /* We have to do this here because some signals */ \ - /* are not blockable - ie from strace(1). */ \ - /* In these cases we want to schedule_timeout() */ \ - /* again, because we don't want that to return */ \ - /* -EINTR when the RPC actually succeeded. */ \ - /* the RECALC_SIGPENDING below will deliver the */ \ - /* signal properly. */ \ - cfs_clear_sigpending(); \ + /* We have to do this here because some signals */ \ + /* are not blockable - ie from strace(1). */ \ + /* In these cases we want to schedule_timeout() */ \ + /* again, because we don't want that to return */ \ + /* -EINTR when the RPC actually succeeded. */ \ + /* the recalc_sigpending() below will deliver the */ \ + /* signal properly. */ \ + cfs_clear_sigpending(); \ } \ } \ \ - cfs_block_sigs(__blocked); \ + cfs_restore_sigs(__blocked); \ \ - cfs_set_current_state(CFS_TASK_RUNNING); \ - cfs_waitq_del(&wq, &__wait); \ -} while (0) - -#else /* !__KERNEL__ */ -#define __l_wait_event(wq, condition, info, ret, excl) \ -do { \ - long __timeout = info->lwi_timeout; \ - long __now; \ - long __then = 0; \ - int __timed_out = 0; \ - int __interval = obd_timeout; \ - \ - ret = 0; \ - if (condition) \ - break; \ - \ - if (__timeout != 0) \ - __then = time(NULL); \ - \ - if (__timeout && __timeout < __interval) \ - __interval = __timeout; \ - if (info->lwi_interval && info->lwi_interval < __interval) \ - __interval = info->lwi_interval; \ - \ - while (!(condition)) { \ - liblustre_wait_event(__interval); \ - if (condition) \ - break; \ - \ - if (!__timed_out && info->lwi_timeout != 0) { \ - __now = time(NULL); \ - __timeout -= __now - __then; \ - __then = __now; \ - \ - if (__timeout > 0) \ - continue; \ - \ - __timeout = 0; \ - __timed_out = 1; \ - if (info->lwi_on_timeout == NULL || \ - info->lwi_on_timeout(info->lwi_cb_data)) { \ - ret = -ETIMEDOUT; \ - break; \ - } \ - } \ - } \ + set_current_state(TASK_RUNNING); \ + remove_wait_queue(&wq, &__wait); \ } while (0) -#endif /* __KERNEL__ */ - #define l_wait_event(wq, condition, info) \ ({ \ - int __ret; \ - struct l_wait_info *__info = (info); \ - \ - __l_wait_event(wq, condition, __info, __ret, 0); \ - __ret; \ + int __ret; \ + struct l_wait_info *__info = (info); \ + \ + __l_wait_event(wq, condition, __info, \ + __ret, add_wait_queue); \ + __ret; \ }) #define l_wait_event_exclusive(wq, condition, info) \ ({ \ - int __ret; \ - struct l_wait_info *__info = (info); \ - \ - __l_wait_event(wq, condition, __info, __ret, 1); \ - __ret; \ + int __ret; \ + struct l_wait_info *__info = (info); \ + \ + __l_wait_event(wq, condition, __info, \ + __ret, add_wait_queue_exclusive); \ + __ret; \ +}) + +#define l_wait_event_exclusive_head(wq, condition, info) \ +({ \ + int __ret; \ + struct l_wait_info *__info = (info); \ + \ + __l_wait_event(wq, condition, __info, \ + __ret, add_wait_queue_exclusive_head); \ + __ret; \ }) -#define l_cfs_wait_event(wq, condition) \ +#define l_wait_condition(wq, condition) \ ({ \ struct l_wait_info lwi = { 0 }; \ l_wait_event(wq, condition, &lwi); \ }) -#ifdef __KERNEL__ -#define LIBLUSTRE_CLIENT (0) -#else -#define LIBLUSTRE_CLIENT (1) -#endif +#define l_wait_condition_exclusive(wq, condition) \ +({ \ + struct l_wait_info lwi = { 0 }; \ + l_wait_event_exclusive(wq, condition, &lwi); \ +}) + +#define l_wait_condition_exclusive_head(wq, condition) \ +({ \ + struct l_wait_info lwi = { 0 }; \ + l_wait_event_exclusive_head(wq, condition, &lwi); \ +}) /** @} lib */