X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Finclude%2Fobd_support.h;h=7fa06860b0f9aa1e58e57fcf764b5422461da469;hp=1b31dd4d2d11de56cad109f42428fa83c76a1729;hb=346a17e4d8b5c291d776387ace81a5b74bc24141;hpb=4e10baf258ecfd8acde1f426f9dc1a21477bdccc diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 1b31dd4..7fa0686 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -1,49 +1,159 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * GPL HEADER START * - * This file is part of Lustre, http://www.lustre.org. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. */ #ifndef _OBD_SUPPORT #define _OBD_SUPPORT -#include +#include +#include +#include + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif /* global variables */ -extern atomic_t obd_memory; -extern int obd_memmax; -extern unsigned int obd_fail_loc; +extern struct lprocfs_stats *obd_memory; +enum { + OBD_MEMORY_STAT = 0, + OBD_MEMORY_PAGES_STAT = 1, + OBD_STATS_NUM, +}; + extern unsigned int obd_debug_peer_on_timeout; extern unsigned int obd_dump_on_timeout; extern unsigned int obd_dump_on_eviction; +/* obd_timeout should only be used for recovery, not for + networking / disk / timings affected by load (use Adaptive Timeouts) */ extern unsigned int obd_timeout; /* seconds */ -#define PING_INTERVAL max(obd_timeout / 4, 1U) -#define RECONNECT_INTERVAL max(obd_timeout / 10, 10U) -#define LDLM_TIMEOUT_DEFAULT 20 -extern unsigned int ldlm_timeout; -extern unsigned int obd_health_check_timeout; +extern unsigned int ldlm_timeout; /* seconds */ +extern unsigned int at_min; +extern unsigned int at_max; +extern unsigned int at_history; +extern int at_early_margin; +extern int at_extra; extern unsigned int obd_sync_filter; extern unsigned int obd_max_dirty_pages; -extern atomic_t obd_dirty_pages; -extern cfs_waitq_t obd_race_waitq; -extern int obd_race_state; - +extern cfs_atomic_t obd_dirty_pages; +extern cfs_atomic_t obd_dirty_transit_pages; +extern unsigned int obd_alloc_fail_rate; + +/* lvfs.c */ +int obd_alloc_fail(const void *ptr, const char *name, const char *type, + size_t size, const char *file, int line); + +/* Some hash init argument constants */ +#define HASH_POOLS_BKT_BITS 3 +#define HASH_POOLS_CUR_BITS 3 +#define HASH_POOLS_MAX_BITS 7 +#define HASH_UUID_BKT_BITS 5 +#define HASH_UUID_CUR_BITS 7 +#define HASH_UUID_MAX_BITS 12 +#define HASH_NID_BKT_BITS 5 +#define HASH_NID_CUR_BITS 7 +#define HASH_NID_MAX_BITS 12 +#define HASH_NID_STATS_BKT_BITS 5 +#define HASH_NID_STATS_CUR_BITS 7 +#define HASH_NID_STATS_MAX_BITS 12 +#define HASH_LQS_BKT_BITS 5 +#define HASH_LQS_CUR_BITS 7 +#define HASH_LQS_MAX_BITS 12 +#define HASH_CONN_BKT_BITS 5 +#define HASH_CONN_CUR_BITS 5 +#define HASH_CONN_MAX_BITS 15 +#define HASH_EXP_LOCK_BKT_BITS 5 +#define HASH_EXP_LOCK_CUR_BITS 7 +#define HASH_EXP_LOCK_MAX_BITS 16 +#define HASH_CL_ENV_BKT_BITS 5 +#define HASH_CL_ENV_BITS 10 + +/* Timeout definitions */ +#define OBD_TIMEOUT_DEFAULT 100 +#define LDLM_TIMEOUT_DEFAULT 20 +#define MDS_LDLM_TIMEOUT_DEFAULT 6 +/* Time to wait for all clients to reconnect during recovery (hard limit) */ +#define OBD_RECOVERY_TIME_HARD (obd_timeout * 9) +/* Time to wait for all clients to reconnect during recovery (soft limit) */ +/* Should be very conservative; must catch the first reconnect after reboot */ +#define OBD_RECOVERY_TIME_SOFT (obd_timeout * 3) +/* Change recovery-small 26b time if you change this */ +#define PING_INTERVAL max(obd_timeout / 4, 1U) +/* a bit more than maximal journal commit time in seconds */ +#define PING_INTERVAL_SHORT min(PING_INTERVAL, 7U) +/* Client may skip 1 ping; we must wait at least 2.5. But for multiple + * failover targets the client only pings one server at a time, and pings + * can be lost on a loaded network. Since eviction has serious consequences, + * and there's no urgent need to evict a client just because it's idle, we + * should be very conservative here. */ +#define PING_EVICT_TIMEOUT (PING_INTERVAL * 6) +#define DISK_TIMEOUT 50 /* Beyond this we warn about disk speed */ +#define CONNECTION_SWITCH_MIN 5U /* Connection switching rate limiter */ + /* Max connect interval for nonresponsive servers; ~50s to avoid building up + connect requests in the LND queues, but within obd_timeout so we don't + miss the recovery window */ +#define CONNECTION_SWITCH_MAX min(50U, max(CONNECTION_SWITCH_MIN,obd_timeout)) +#define CONNECTION_SWITCH_INC 5 /* Connection timeout backoff */ +#ifndef CRAY_XT3 +/* In general this should be low to have quick detection of a system + running on a backup server. (If it's too low, import_select_connection + will increase the timeout anyhow.) */ +#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN,obd_timeout/20) +#else +/* ...but for very large systems (e.g. CRAY) we need to keep the initial + connect t.o. high (bz 10803), because they will nearly ALWAYS be doing the + connects for the first time (clients "reboot" after every process, so no + chance to generate adaptive timeout data. */ +#define INITIAL_CONNECT_TIMEOUT max(CONNECTION_SWITCH_MIN,obd_timeout/2) +#endif +/* The max delay between connects is SWITCH_MAX + SWITCH_INC + INITIAL */ +#define RECONNECT_DELAY_MAX (CONNECTION_SWITCH_MAX + CONNECTION_SWITCH_INC + \ + INITIAL_CONNECT_TIMEOUT) +#define LONG_UNLINK 300 /* Unlink should happen before now */ + +/** + * Time interval of shrink, if the client is "idle" more than this interval, + * then the ll_grant thread will return the requested grant space to filter + */ +#define GRANT_SHRINK_INTERVAL 1200/*20 minutes*/ #define OBD_FAIL_MDS 0x100 #define OBD_FAIL_MDS_HANDLE_UNPACK 0x101 @@ -100,6 +210,32 @@ extern int obd_race_state; #define OBD_FAIL_MDS_SETXATTR_WRITE 0x134 #define OBD_FAIL_MDS_FS_SETUP 0x135 #define OBD_FAIL_MDS_RESEND 0x136 +#define OBD_FAIL_MDS_LLOG_CREATE_FAILED 0x137 +#define OBD_FAIL_MDS_LOV_SYNC_RACE 0x138 +#define OBD_FAIL_MDS_OSC_PRECREATE 0x139 +#define OBD_FAIL_MDS_LLOG_SYNC_TIMEOUT 0x13a +#define OBD_FAIL_MDS_CLOSE_NET_REP 0x13b +#define OBD_FAIL_MDS_BLOCK_QUOTA_REQ 0x13c +#define OBD_FAIL_MDS_DROP_QUOTA_REQ 0x13d +#define OBD_FAIL_MDS_REMOVE_COMMON_EA 0x13e +#define OBD_FAIL_MDS_ALLOW_COMMON_EA_SETTING 0x13f +#define OBD_FAIL_MDS_FAIL_LOV_LOG_ADD 0x140 +#define OBD_FAIL_MDS_LOV_PREP_CREATE 0x141 +#define OBD_FAIL_MDS_REINT_DELAY 0x142 +#define OBD_FAIL_MDS_READLINK_EPROTO 0x143 +#define OBD_FAIL_MDS_OPEN_WAIT_CREATE 0x144 +#define OBD_FAIL_MDS_PDO_LOCK 0x145 +#define OBD_FAIL_MDS_PDO_LOCK2 0x146 +#define OBD_FAIL_MDS_OSC_CREATE_FAIL 0x147 + +/* CMD */ +#define OBD_FAIL_MDS_IS_SUBDIR_NET 0x180 +#define OBD_FAIL_MDS_IS_SUBDIR_PACK 0x181 +#define OBD_FAIL_MDS_SET_INFO_NET 0x182 +#define OBD_FAIL_MDS_WRITEPAGE_NET 0x183 +#define OBD_FAIL_MDS_WRITEPAGE_PACK 0x184 +#define OBD_FAIL_MDS_RECOVERY_ACCEPTS_GAPS 0x185 +#define OBD_FAIL_MDS_GET_INFO_NET 0x186 #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 @@ -119,7 +255,7 @@ extern int obd_race_state; #define OBD_FAIL_OST_BRW_READ_BULK 0x20f #define OBD_FAIL_OST_SYNC_NET 0x210 #define OBD_FAIL_OST_ALL_REPLY_NET 0x211 -#define OBD_FAIL_OST_ALL_REQUESTS_NET 0x212 +#define OBD_FAIL_OST_ALL_REQUEST_NET 0x212 #define OBD_FAIL_OST_LDLM_REPLY_NET 0x213 #define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 #define OBD_FAIL_OST_ENOSPC 0x215 @@ -132,6 +268,16 @@ extern int obd_race_state; #define OBD_FAIL_OST_BRW_SIZE 0x21c #define OBD_FAIL_OST_DROP_REQ 0x21d #define OBD_FAIL_OST_SETATTR_CREDITS 0x21e +#define OBD_FAIL_OST_HOLD_WRITE_RPC 0x21f +#define OBD_FAIL_OST_BRW_WRITE_BULK2 0x220 +#define OBD_FAIL_OST_LLOG_RECOVERY_TIMEOUT 0x221 +#define OBD_FAIL_OST_CANCEL_COOKIE_TIMEOUT 0x222 +#define OBD_FAIL_OST_PAUSE_CREATE 0x223 +#define OBD_FAIL_OST_BRW_PAUSE_PACK 0x224 +#define OBD_FAIL_OST_CONNECT_NET2 0x225 +#define OBD_FAIL_OST_NOMEM 0x226 +#define OBD_FAIL_OST_BRW_PAUSE_BULK2 0x227 +#define OBD_FAIL_OST_MAPBLK_ENOSPC 0x228 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 @@ -149,6 +295,19 @@ extern int obd_race_state; #define OBD_FAIL_LDLM_RECOV_CLIENTS 0x30d #define OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT 0x30e #define OBD_FAIL_LDLM_GLIMPSE 0x30f +#define OBD_FAIL_LDLM_CANCEL_RACE 0x310 +#define OBD_FAIL_LDLM_CANCEL_EVICT_RACE 0x311 +#define OBD_FAIL_LDLM_PAUSE_CANCEL 0x312 +#define OBD_FAIL_LDLM_CLOSE_THREAD 0x313 +#define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE 0x314 +#define OBD_FAIL_LDLM_CP_CB_WAIT 0x315 +#define OBD_FAIL_LDLM_OST_FAIL_RACE 0x316 +#define OBD_FAIL_LDLM_INTR_CP_AST 0x317 +#define OBD_FAIL_LDLM_CP_BL_RACE 0x318 +#define OBD_FAIL_LDLM_NEW_LOCK 0x319 + +/* LOCKLESS IO */ +#define OBD_FAIL_LDLM_SET_CONTENTION 0x385 #define OBD_FAIL_OSC 0x400 #define OBD_FAIL_OSC_BRW_READ_BULK 0x401 @@ -160,6 +319,13 @@ extern int obd_race_state; #define OBD_FAIL_OSC_SHUTDOWN 0x407 #define OBD_FAIL_OSC_CHECKSUM_RECEIVE 0x408 #define OBD_FAIL_OSC_CHECKSUM_SEND 0x409 +#define OBD_FAIL_OSC_BRW_PREP_REQ2 0x40a +#define OBD_FAIL_OSC_CONNECT_CKSUM 0x40b +#define OBD_FAIL_OSC_CKSUM_ADLER_ONLY 0x40c +#define OBD_FAIL_OSC_DIO_PAUSE 0x40d +#define OBD_FAIL_OSC_OBJECT_CONTENTION 0x40e +#define OBD_FAIL_OSC_CP_CANCEL_RACE 0x40f +#define OBD_FAIL_OSC_CP_ENQ_RACE 0x410 #define OBD_FAIL_PTLRPC 0x500 #define OBD_FAIL_PTLRPC_ACK 0x501 @@ -168,12 +334,26 @@ extern int obd_race_state; #define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 #define OBD_FAIL_PTLRPC_DROP_RPC 0x505 #define OBD_FAIL_PTLRPC_DELAY_SEND 0x506 +#define OBD_FAIL_PTLRPC_DELAY_RECOV 0x507 +#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB 0x508 +#define OBD_FAIL_PTLRPC_PAUSE_REQ 0x50a +#define OBD_FAIL_PTLRPC_PAUSE_REP 0x50c +#define OBD_FAIL_PTLRPC_IMP_DEACTIVE 0x50d +#define OBD_FAIL_PTLRPC_DUMP_LOG 0x50e +#define OBD_FAIL_PTLRPC_LONG_REPL_UNLINK 0x50f +#define OBD_FAIL_PTLRPC_LONG_BULK_UNLINK 0x510 +#define OBD_FAIL_PTLRPC_HPREQ_TIMEOUT 0x511 +#define OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT 0x512 +#define OBD_FAIL_PTLRPC_DROP_REQ_OPC 0x513 +#define OBD_FAIL_PTLRPC_FINISH_REPLAY 0x514 #define OBD_FAIL_OBD_PING_NET 0x600 #define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 #define OBD_FAIL_OBD_LOGD_NET 0x602 #define OBD_FAIL_OBD_QC_CALLBACK_NET 0x603 #define OBD_FAIL_OBD_DQACQ 0x604 +#define OBD_FAIL_OBD_LLOG_SETUP 0x605 +#define OBD_FAIL_OBD_LOG_CANCEL_REP 0x606 #define OBD_FAIL_TGT_REPLY_NET 0x700 #define OBD_FAIL_TGT_CONN_RACE 0x701 @@ -181,96 +361,192 @@ extern int obd_race_state; #define OBD_FAIL_TGT_DELAY_CONNECT 0x703 #define OBD_FAIL_TGT_DELAY_RECONNECT 0x704 #define OBD_FAIL_TGT_DELAY_PRECREATE 0x705 +#define OBD_FAIL_TGT_TOOMANY_THREADS 0x706 +#define OBD_FAIL_TGT_REPLAY_DROP 0x707 +#define OBD_FAIL_TGT_FAKE_EXP 0x708 +#define OBD_FAIL_TGT_REPLAY_DELAY 0x709 +#define OBD_FAIL_TGT_LAST_REPLAY 0x710 +#define OBD_FAIL_TGT_CLIENT_ADD 0x711 +#define OBD_FAIL_TGT_RCVG_FLAG 0x712 #define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800 +#define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801 +#define OBD_FAIL_MDC_OLD_EXT_FLAGS 0x802 +#define OBD_FAIL_MDC_GETATTR_ENQUEUE 0x803 #define OBD_FAIL_MGS 0x900 #define OBD_FAIL_MGS_ALL_REQUEST_NET 0x901 #define OBD_FAIL_MGS_ALL_REPLY_NET 0x902 -#define OBD_FAIL_MGC_PROCESS_LOG 0x903 -#define OBD_FAIL_MGS_SLOW_REQUEST_NET 0x904 -#define OBD_FAIL_MGS_SLOW_TARGET_REG 0x905 +#define OBD_FAIL_MGC_PAUSE_PROCESS_LOG 0x903 +#define OBD_FAIL_MGS_PAUSE_REQ 0x904 +#define OBD_FAIL_MGS_PAUSE_TARGET_REG 0x905 -#define OBD_FAIL_QUOTA_QD_COUNT_32BIT 0xA00 +#define OBD_FAIL_QUOTA_RET_QDATA 0xA02 +#define OBD_FAIL_QUOTA_DELAY_REL 0xA03 +#define OBD_FAIL_QUOTA_DELAY_SD 0xA04 #define OBD_FAIL_LPROC_REMOVE 0xB00 -/* preparation for a more advanced failure testbed (not functional yet) */ -#define OBD_FAIL_MASK_SYS 0x0000FF00 -#define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS) -#define OBD_FAIL_ONCE 0x80000000 -#define OBD_FAILED 0x40000000 - -#define OBD_FAIL_CHECK(id) (((obd_fail_loc & OBD_FAIL_MASK_LOC) == \ - ((id) & OBD_FAIL_MASK_LOC)) && \ - ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE))!= \ - (OBD_FAILED | OBD_FAIL_ONCE))) - -#define OBD_FAIL_CHECK_ONCE(id) \ -({ int _ret_ = 0; \ - if (OBD_FAIL_CHECK(id)) { \ - CERROR("*** obd_fail_loc=%x ***\n", id); \ - obd_fail_loc |= OBD_FAILED; \ - if ((id) & OBD_FAIL_ONCE) \ - obd_fail_loc |= OBD_FAIL_ONCE; \ - _ret_ = 1; \ - } \ - _ret_; \ -}) +#define OBD_FAIL_GENERAL_ALLOC 0xC00 + +#define OBD_FAIL_SEQ 0x1000 +#define OBD_FAIL_SEQ_QUERY_NET 0x1001 + +#define OBD_FAIL_FLD 0x1100 +#define OBD_FAIL_FLD_QUERY_NET 0x1101 + +#define OBD_FAIL_SEC_CTX 0x1200 +#define OBD_FAIL_SEC_CTX_INIT_NET 0x1201 +#define OBD_FAIL_SEC_CTX_INIT_CONT_NET 0x1202 +#define OBD_FAIL_SEC_CTX_FINI_NET 0x1203 +#define OBD_FAIL_SEC_CTX_HDL_PAUSE 0x1204 + +#define OBD_FAIL_LLOG 0x1300 +#define OBD_FAIL_LLOG_ORIGIN_CONNECT_NET 0x1301 +#define OBD_FAIL_LLOG_ORIGIN_HANDLE_CREATE_NET 0x1302 +#define OBD_FAIL_LLOG_ORIGIN_HANDLE_DESTROY_NET 0x1303 +#define OBD_FAIL_LLOG_ORIGIN_HANDLE_READ_HEADER_NET 0x1304 +#define OBD_FAIL_LLOG_ORIGIN_HANDLE_NEXT_BLOCK_NET 0x1305 +#define OBD_FAIL_LLOG_ORIGIN_HANDLE_PREV_BLOCK_NET 0x1306 +#define OBD_FAIL_LLOG_ORIGIN_HANDLE_WRITE_REC_NET 0x1307 +#define OBD_FAIL_LLOG_ORIGIN_HANDLE_CLOSE_NET 0x1308 +#define OBD_FAIL_LLOG_CATINFO_NET 0x1309 +#define OBD_FAIL_MDS_SYNC_CAPA_SL 0x1310 +#define OBD_FAIL_SEQ_ALLOC 0x1311 + +/* Assign references to moved code to reduce code changes */ +#define OBD_FAIL_PRECHECK(id) CFS_FAIL_PRECHECK(id) +#define OBD_FAIL_CHECK(id) CFS_FAIL_CHECK(id) +#define OBD_FAIL_CHECK_ORSET(id, value) CFS_FAIL_CHECK_ORSET(id, value) +#define OBD_FAIL_CHECK_RESET(id, value) CFS_FAIL_CHECK_RESET(id, value) +#define OBD_FAIL_RETURN(id, ret) CFS_FAIL_RETURN(id, ret) +#define OBD_FAIL_TIMEOUT(id, secs) CFS_FAIL_TIMEOUT(id, secs) +#define OBD_FAIL_TIMEOUT_MS(id, ms) CFS_FAIL_TIMEOUT_MS(id, ms) +#define OBD_FAIL_TIMEOUT_ORSET(id, value, secs) CFS_FAIL_TIMEOUT_ORSET(id, value, secs) +#define OBD_RACE(id) CFS_RACE(id) +#define OBD_FAIL_ONCE CFS_FAIL_ONCE +#define OBD_FAILED CFS_FAILED -#define OBD_FAIL_RETURN(id, ret) \ -do { \ - if (OBD_FAIL_CHECK_ONCE(id)) { \ - RETURN(ret); \ - } \ -} while(0) +#ifdef __KERNEL__ +static inline void obd_fail_write(int id, struct super_block *sb) +{ + /* We set FAIL_ONCE because we never "un-fail" a device */ + if (OBD_FAIL_CHECK_ORSET(id & ~OBD_FAIL_ONCE, OBD_FAIL_ONCE)) { +#ifdef LIBCFS_DEBUG + BDEVNAME_DECLARE_STORAGE(tmp); + CERROR("cfs_fail_loc=%x, fail write operation on %s\n", + id, ll_bdevname(sb, tmp)); +#endif + /* TODO-CMD: fix getting jdev */ + __lvfs_set_rdonly(lvfs_sbdev(sb), (lvfs_sbdev_type)0); + } +} +#define OBD_FAIL_WRITE(id, sb) obd_fail_write(id, sb) +#endif -#define OBD_FAIL_TIMEOUT(id, secs) \ -do { \ - if (OBD_FAIL_CHECK_ONCE(id)) { \ - CERROR("obd_fail_timeout id %x sleeping for %d secs\n", \ - (id), (secs)); \ - set_current_state(TASK_UNINTERRUPTIBLE); \ - cfs_schedule_timeout(CFS_TASK_UNINT, \ - cfs_time_seconds(secs)); \ - set_current_state(TASK_RUNNING); \ - CERROR("obd_fail_timeout id %x awake\n", (id)); \ - } \ -} while(0) +#define fixme() CDEBUG(D_OTHER, "FIXME\n"); + +extern cfs_atomic_t libcfs_kmemory; + +#ifdef LPROCFS +#define obd_memory_add(size) \ + lprocfs_counter_add(obd_memory, OBD_MEMORY_STAT, (long)(size)) +#define obd_memory_sub(size) \ + lprocfs_counter_sub(obd_memory, OBD_MEMORY_STAT, (long)(size)) +#define obd_memory_sum() \ + lprocfs_stats_collector(obd_memory, OBD_MEMORY_STAT, \ + LPROCFS_FIELDS_FLAGS_SUM) +#define obd_pages_add(order) \ + lprocfs_counter_add(obd_memory, OBD_MEMORY_PAGES_STAT, \ + (long)(1 << (order))) +#define obd_pages_sub(order) \ + lprocfs_counter_sub(obd_memory, OBD_MEMORY_PAGES_STAT, \ + (long)(1 << (order))) +#define obd_pages_sum() \ + lprocfs_stats_collector(obd_memory, OBD_MEMORY_PAGES_STAT, \ + LPROCFS_FIELDS_FLAGS_SUM) + +extern void obd_update_maxusage(void); +extern __u64 obd_memory_max(void); +extern __u64 obd_pages_max(void); -#ifdef __KERNEL__ -/* The idea here is to synchronise two threads to force a race. The - * first thread that calls this with a matching fail_loc is put to - * sleep. The next thread that calls with the same fail_loc wakes up - * the first and continues. */ -#define OBD_RACE(id) \ -do { \ - if (OBD_FAIL_CHECK_ONCE(id)) { \ - obd_race_state = 0; \ - CERROR("obd_race id %x sleeping\n", (id)); \ - OBD_SLEEP_ON(obd_race_waitq, obd_race_state != 0); \ - CERROR("obd_fail_race id %x awake\n", (id)); \ - } else if ((obd_fail_loc & OBD_FAIL_MASK_LOC) == \ - ((id) & OBD_FAIL_MASK_LOC)) { \ - CERROR("obd_fail_race id %x waking\n", (id)); \ - obd_race_state = 1; \ - wake_up(&obd_race_waitq); \ - } \ -} while(0) #else -/* sigh. an expedient fix until OBD_RACE is fixed up */ -#define OBD_RACE(foo) do {} while(0) + +extern __u64 obd_alloc; +extern __u64 obd_pages; + +extern __u64 obd_max_alloc; +extern __u64 obd_max_pages; + +static inline void obd_memory_add(long size) +{ + obd_alloc += size; + if (obd_alloc > obd_max_alloc) + obd_max_alloc = obd_alloc; +} + +static inline void obd_memory_sub(long size) +{ + obd_alloc -= size; +} + +static inline void obd_pages_add(int order) +{ + obd_pages += 1<< order; + if (obd_pages > obd_max_pages) + obd_max_pages = obd_pages; +} + +static inline void obd_pages_sub(int order) +{ + obd_pages -= 1<< order; +} + +#define obd_memory_sum() (obd_alloc) +#define obd_pages_sum() (obd_pages) + +#define obd_memory_max() (obd_max_alloc) +#define obd_pages_max() (obd_max_pages) + #endif -#define fixme() CDEBUG(D_OTHER, "FIXME\n"); +#define OBD_DEBUG_MEMUSAGE (1) + +#if OBD_DEBUG_MEMUSAGE +#define OBD_ALLOC_POST(ptr, size, name) \ + obd_memory_add(size); \ + CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p.\n", \ + (int)(size), ptr) + +#define OBD_FREE_PRE(ptr, size, name) \ + LASSERT(ptr); \ + obd_memory_sub(size); \ + CDEBUG(D_MALLOC, name " '" #ptr "': %d at %p.\n", \ + (int)(size), ptr); \ + POISON(ptr, 0x5a, size) -extern atomic_t libcfs_kmemory; +#else /* !OBD_DEBUG_MEMUSAGE */ + +#define OBD_ALLOC_POST(ptr, size, name) ((void)0) +#define OBD_FREE_PRE(ptr, size, name) ((void)0) + +#endif /* !OBD_DEBUG_MEMUSAGE */ + +#ifdef RANDOM_FAIL_ALLOC +#define HAS_FAIL_ALLOC_FLAG OBD_FAIL_CHECK(OBD_FAIL_GENERAL_ALLOC) +#else +#define HAS_FAIL_ALLOC_FLAG 0 +#endif + +#define OBD_ALLOC_FAIL_BITS 24 +#define OBD_ALLOC_FAIL_MASK ((1 << OBD_ALLOC_FAIL_BITS) - 1) +#define OBD_ALLOC_FAIL_MULT (OBD_ALLOC_FAIL_MASK / 100) #if defined(LUSTRE_UTILS) /* this version is for utils only */ #define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ do { \ (ptr) = cfs_alloc(size, (gfp_mask)); \ - if ((ptr) == NULL) { \ + if (unlikely((ptr) == NULL)) { \ CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ (int)(size), __FILE__, __LINE__); \ } else { \ @@ -280,21 +556,22 @@ do { \ } \ } while (0) #else /* this version is for the kernel and liblustre */ +#define OBD_FREE_RTN0(ptr) \ +({ \ + cfs_free(ptr); \ + (ptr) = NULL; \ + 0; \ +}) #define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ do { \ (ptr) = cfs_alloc(size, (gfp_mask)); \ - if ((ptr) == NULL) { \ - CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ - atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ - } else { \ + if (likely((ptr) != NULL && \ + (!HAS_FAIL_ALLOC_FLAG || obd_alloc_fail_rate == 0 || \ + !obd_alloc_fail(ptr, #ptr, "km", size, \ + __FILE__, __LINE__) || \ + OBD_FREE_RTN0(ptr)))){ \ memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ + OBD_ALLOC_POST(ptr, size, "kmalloced"); \ } \ } while (0) #endif @@ -314,26 +591,55 @@ do { \ # define OBD_VMALLOC(ptr, size) \ do { \ (ptr) = cfs_alloc_large(size); \ - if ((ptr) == NULL) { \ - CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ - atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ + if (unlikely((ptr) == NULL)) { \ + CERROR("vmalloc of '" #ptr "' (%d bytes) failed\n", \ + (int)(size)); \ + CERROR(LPU64" total bytes allocated by Lustre, %d by LNET\n", \ + obd_memory_sum(), cfs_atomic_read(&libcfs_kmemory)); \ } else { \ memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ + OBD_ALLOC_POST(ptr, size, "vmalloced"); \ } \ +} while(0) +#endif + +#ifdef __KERNEL__ + +/* Allocations above this size are considered too big and could not be done + * atomically. + * + * Be very careful when changing this value, especially when decreasing it, + * since vmalloc in Linux doesn't perform well on multi-cores system, calling + * vmalloc in critical path would hurt peformance badly. See LU-66. + */ +#define OBD_ALLOC_BIG (4 * CFS_PAGE_SIZE) + +#define OBD_ALLOC_LARGE(ptr, size) \ +do { \ + if (size > OBD_ALLOC_BIG) \ + OBD_VMALLOC(ptr, size); \ + else \ + OBD_ALLOC(ptr, size); \ } while (0) + +#define OBD_FREE_LARGE(ptr, size) \ +do { \ + if (size > OBD_ALLOC_BIG) \ + OBD_VFREE(ptr, size); \ + else \ + OBD_FREE(ptr, size); \ +} while (0) +#else +#define OBD_ALLOC_LARGE(ptr, size) OBD_ALLOC(ptr, size) +#define OBD_FREE_LARGE(ptr, size) OBD_FREE(ptr,size) #endif #ifdef CONFIG_DEBUG_SLAB #define POISON(ptr, c, s) do {} while (0) +#define POISON_PTR(ptr) ((void)0) #else #define POISON(ptr, c, s) memset(ptr, c, s) +#define POISON_PTR(ptr) (ptr) = (void *)0xdeadbeef #endif #ifdef POISON_BULK @@ -346,78 +652,127 @@ do { \ #ifdef __KERNEL__ #define OBD_FREE(ptr, size) \ do { \ - LASSERT(ptr); \ - atomic_sub(size, &obd_memory); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - POISON(ptr, 0x5a, size); \ + OBD_FREE_PRE(ptr, size, "kfreed"); \ cfs_free(ptr); \ - (ptr) = (void *)0xdeadbeef; \ -} while (0) + POISON_PTR(ptr); \ +} while(0) + + +#ifdef HAVE_RCU +# ifdef HAVE_CALL_RCU_PARAM +# define my_call_rcu(rcu, cb) call_rcu(rcu, cb, rcu) +# else +# define my_call_rcu(rcu, cb) call_rcu(rcu, cb) +# endif #else -#define OBD_FREE(ptr, size) ((void)(size), free((ptr))) +# define my_call_rcu(rcu, cb) (cb)(rcu) #endif +#define OBD_FREE_RCU_CB(ptr, size, handle, free_cb) \ +do { \ + struct portals_handle *__h = (handle); \ + LASSERT(handle); \ + __h->h_ptr = (ptr); \ + __h->h_size = (size); \ + __h->h_free_cb = (void (*)(void *, size_t))(free_cb); \ + my_call_rcu(&__h->h_rcu, class_handle_free_cb); \ + POISON_PTR(ptr); \ +} while(0) +#define OBD_FREE_RCU(ptr, size, handle) OBD_FREE_RCU_CB(ptr, size, handle, NULL) + +#else +#define OBD_FREE(ptr, size) ((void)(size), free((ptr))) +#define OBD_FREE_RCU(ptr, size, handle) (OBD_FREE(ptr, size)) +#define OBD_FREE_RCU_CB(ptr, size, handle, cb) ((*(cb))(ptr, size)) +#endif /* ifdef __KERNEL__ */ + #ifdef __arch_um__ # define OBD_VFREE(ptr, size) OBD_FREE(ptr, size) #else # define OBD_VFREE(ptr, size) \ do { \ - LASSERT(ptr); \ - atomic_sub(size, &obd_memory); \ - CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - POISON(ptr, 0x5a, size); \ + OBD_FREE_PRE(ptr, size, "vfreed"); \ cfs_free_large(ptr); \ - (ptr) = (void *)0xdeadbeef; \ -} while (0) + POISON_PTR(ptr); \ +} while(0) #endif /* we memset() the slab object to 0 when allocation succeeds, so DO NOT * HAVE A CTOR THAT DOES ANYTHING. its work will be cleared here. we'd * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */ +#define OBD_SLAB_FREE_RTN0(ptr, slab) \ +({ \ + cfs_mem_cache_free((slab), (ptr)); \ + (ptr) = NULL; \ + 0; \ +}) #define OBD_SLAB_ALLOC(ptr, slab, type, size) \ do { \ - LASSERT(!in_interrupt()); \ + LASSERT(ergo(type != CFS_ALLOC_ATOMIC, !cfs_in_interrupt())); \ (ptr) = cfs_mem_cache_alloc(slab, (type)); \ - if ((ptr) == NULL) { \ - CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ - atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ - } else { \ + if (likely((ptr) != NULL && \ + (!HAS_FAIL_ALLOC_FLAG || obd_alloc_fail_rate == 0 || \ + !obd_alloc_fail(ptr, #ptr, "slab-", size, \ + __FILE__, __LINE__) || \ + OBD_SLAB_FREE_RTN0(ptr, slab)))) { \ memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n",\ - (int)(size), ptr, atomic_read(&obd_memory)); \ + OBD_ALLOC_POST(ptr, size, "slab-alloced"); \ } \ -} while (0) +} while(0) #define OBD_FREE_PTR(ptr) OBD_FREE(ptr, sizeof *(ptr)) #define OBD_SLAB_FREE(ptr, slab, size) \ do { \ - LASSERT(ptr); \ - CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - atomic_sub(size, &obd_memory); \ - POISON(ptr, 0x5a, size); \ + OBD_FREE_PRE(ptr, size, "slab-freed"); \ cfs_mem_cache_free(slab, ptr); \ - (ptr) = (void *)0xdeadbeef; \ -} while (0) + POISON_PTR(ptr); \ +} while(0) -#define KEY_IS(str) (keylen >= strlen(key) && strcmp(key, str) == 0) +#define OBD_SLAB_ALLOC_PTR(ptr, slab) \ + OBD_SLAB_ALLOC((ptr), (slab), CFS_ALLOC_STD, sizeof *(ptr)) +#define OBD_SLAB_FREE_PTR(ptr, slab) \ + OBD_SLAB_FREE((ptr), (slab), sizeof *(ptr)) +#define OBD_SLAB_ALLOC_PTR_GFP(ptr, slab, gfp) \ + OBD_SLAB_ALLOC((ptr), (slab), (gfp), sizeof *(ptr)) -#if defined(__linux__) -#include -#elif defined(__APPLE__) -#include -#elif defined(__WINNT__) -#include -#else -#error Unsupported operating system. -#endif +#define KEY_IS(str) \ + (keylen >= (sizeof(str)-1) && memcmp(key, str, (sizeof(str)-1)) == 0) + +/* Wrapper for contiguous page frame allocation */ +#define OBD_PAGE_ALLOC(ptr, gfp_mask) \ +do { \ + (ptr) = cfs_alloc_page(gfp_mask); \ + if (unlikely((ptr) == NULL)) { \ + CERROR("alloc_pages of '" #ptr "' %d page(s) / "LPU64" bytes "\ + "failed\n", (int)1, \ + (__u64)(1 << CFS_PAGE_SHIFT)); \ + CERROR(LPU64" total bytes and "LPU64" total pages " \ + "("LPU64" bytes) allocated by Lustre, " \ + "%d total bytes by LNET\n", \ + obd_memory_sum(), \ + obd_pages_sum() << CFS_PAGE_SHIFT, \ + obd_pages_sum(), \ + cfs_atomic_read(&libcfs_kmemory)); \ + } else { \ + obd_pages_add(0); \ + CDEBUG(D_MALLOC, "alloc_pages '" #ptr "': %d page(s) / " \ + LPU64" bytes at %p.\n", \ + (int)1, \ + (__u64)(1 << CFS_PAGE_SHIFT), ptr); \ + } \ +} while (0) + +#define OBD_PAGE_FREE(ptr) \ +do { \ + LASSERT(ptr); \ + obd_pages_sub(0); \ + CDEBUG(D_MALLOC, "free_pages '" #ptr "': %d page(s) / "LPU64" bytes " \ + "at %p.\n", \ + (int)1, (__u64)(1 << CFS_PAGE_SHIFT), \ + ptr); \ + cfs_free_page(ptr); \ + (ptr) = (void *)0xdeadbeef; \ +} while (0) #endif