From: adilger Date: Tue, 24 Aug 2004 06:33:07 +0000 (+0000) Subject: Land b1_2_smallfix onto b1_2 (20040824_0018) X-Git-Tag: v1_8_0_110~486^5~171 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=8b3830469e48bf4eec838b371202a8b0fd038b0f;p=fs%2Flustre-release.git Land b1_2_smallfix onto b1_2 (20040824_0018) - replace config semaphore with spinlock (3306) - be sure to send a reply for a CANCEL rpc with bad export (3863) - don't allow enqueue to complete on a destroyed export (3822) - down write_lock before checking llog header bitmap (3825) - recover from lock replay timeout (3764) - up llog sem before sending rpc (3652) - change a dlm LBUG to LASSERTF, to maybe learn something (4228) - fix NULL deref and obd_dev leak on setup error (3312) - replace some LBUG about llog ops with error handling (3841) - don't match INVALID dentries from d_lookup and spin (3784) - hold dcache_lock while marking dentries INVALID and hashing (4255) - add libwrap support for the TCP acceptor (3996) - add /proc/sys/portals/routes for non-root route listing (3994) - allow setting MDS UUID in .xml (2580) - print the stack of a process that LBUGs (4228) - add validity checks when grabbing inodes from l_ast_data (3599) --- diff --git a/lnet/archdep.m4 b/lnet/archdep.m4 index 531a5bd..27704bd 100644 --- a/lnet/archdep.m4 +++ b/lnet/archdep.m4 @@ -97,7 +97,7 @@ if test x$enable_modules != xno ; then BACKINGFS="ldiskfs" ],[ KMODEXT=".o" - linux25="no" + linux25="no" ]) AC_MSG_CHECKING([if you are using Linux 2.6]) AC_MSG_RESULT([$linux25]) @@ -257,13 +257,13 @@ if test x$enable_modules != xno ; then AC_MSG_RESULT([$LINUXRELEASE]) AC_SUBST(LINUXRELEASE) - moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel + moduledir='/lib/modules/'$LINUXRELEASE/kernel modulefsdir='$(moduledir)/fs/$(PACKAGE)' - modulenetdir='$(moduledir)/net/$(PACKAGE)' + modulenetdir='$(moduledir)/net/$(PACKAGE)' AC_SUBST(moduledir) AC_SUBST(modulefsdir) - AC_SUBST(modulenetdir) + AC_SUBST(modulenetdir) # ------------ RELEASE -------------------------------- AC_MSG_CHECKING([for Lustre release]) diff --git a/lnet/build.m4 b/lnet/build.m4 index 114478c..e8a540a 100644 --- a/lnet/build.m4 +++ b/lnet/build.m4 @@ -96,3 +96,21 @@ else LIBEFENCE="" fi AC_SUBST(LIBEFENCE) + +# -------- enable acceptor libwrap (TCP wrappers) support? ------- +AC_MSG_CHECKING([if libwrap support is requested]) +AC_ARG_ENABLE([libwrap], + AC_HELP_STRING([--enable-libwrap], [use TCP wrappers]), + [case "${enableval}" in + yes) enable_libwrap=yes ;; + no) enable_libwrap=no ;; + *) AC_MSG_ERROR(bad value ${enableval} for --enable-libwrap) ;; + esac],[enable_libwrap=no]) +AC_MSG_RESULT([$enable_libwrap]) +if test x$enable_libwrap = xyes ; then + LIBWRAP="-lwrap" + AC_DEFINE(HAVE_LIBWRAP, 1, [libwrap support is requested]) +else + LIBWRAP="" +fi +AC_SUBST(LIBWRAP) diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index 69246b2..4a88b88 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -65,14 +65,11 @@ extern void kportal_assertion_failed(char *expr, char *file, const char *func, const int line); #define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__, \ __FUNCTION__, __LINE__)) -/* it would be great to dump_stack() here, but some kernels - * export it as show_stack() and I can't be bothered to - * proprely engage in that dance right now */ #define LASSERTF(cond, fmt...) \ do { \ if (unlikely(!(cond))) { \ - portals_debug_msg(0, D_EMERG, __FILE__, __FUNCTION__,\ - __LINE__, CDEBUG_STACK, \ + portals_debug_msg(DEBUG_SUBSYSTEM, D_EMERG, __FILE__,\ + __FUNCTION__,__LINE__, CDEBUG_STACK,\ "ASSERTION(" #cond ") failed:" fmt);\ LBUG(); \ } \ @@ -101,6 +98,7 @@ do { \ #define LBUG_WITH_LOC(file, func, line) \ do { \ CEMERG("LBUG\n"); \ + CERROR("STACK: %s\n", portals_debug_dumpstack()); \ portals_debug_dumplog(); \ portals_run_lbug_upcall(file, func, line); \ set_task_state(current, TASK_UNINTERRUPTIBLE); \ diff --git a/lnet/router/proc.c b/lnet/router/proc.c index dd65b34..b9d6234 100644 --- a/lnet/router/proc.c +++ b/lnet/router/proc.c @@ -24,55 +24,196 @@ #include "router.h" #define KPR_PROC_ROUTER "sys/portals/router" +#define KPR_PROC_ROUTES "sys/portals/routes" -int -kpr_proc_read (char *page, char **start, off_t off, int count, int *eof, void *data) +/* Used for multi-page route list book keeping */ +struct proc_route_data { + struct list_head *curr; + unsigned int generation; + off_t skip; +} kpr_read_routes_data; + +/* nal2name support re-used from utils/portals.c */ +struct name2num { + char *name; + int num; +} nalnames[] = { + { "any", 0}, + { "elan", QSWNAL}, + { "tcp", SOCKNAL}, + { "gm", GMNAL}, + { "ib", IBNAL}, + { NULL, -1} +}; + +static struct name2num *name2num_lookup_num(struct name2num *table, int num) +{ + while (table->name != NULL) + if (num == table->num) + return (table); + else + table++; + return (NULL); +} + +static char *nal2name(int nal) { - unsigned long long bytes = kpr_fwd_bytes; - unsigned long packets = kpr_fwd_packets; - unsigned long errors = kpr_fwd_errors; + struct name2num *e = name2num_lookup_num(nalnames, nal); + return ((e == NULL) ? "???" : e->name); +} + + +static int kpr_proc_router_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + unsigned long long bytes = kpr_fwd_bytes; + unsigned long packets = kpr_fwd_packets; + unsigned long errors = kpr_fwd_errors; unsigned int qdepth = atomic_read (&kpr_queue_depth); - int len; - - *eof = 1; - if (off != 0) - return (0); - - len = sprintf (page, "%Ld %ld %ld %d\n", bytes, packets, errors, qdepth); - - *start = page; - return (len); + int len; + + *eof = 1; + if (off != 0) + return (0); + + len = sprintf(page, "%Ld %ld %ld %d\n", bytes, packets, errors, qdepth); + + *start = page; + return (len); } -int -kpr_proc_write (struct file *file, const char *ubuffer, unsigned long count, void *data) +static int kpr_proc_router_write(struct file *file, const char *ubuffer, + unsigned long count, void *data) { - /* Ignore what we've been asked to write, and just zero the stats counters */ - kpr_fwd_bytes = 0; - kpr_fwd_packets = 0; - kpr_fwd_errors = 0; + /* Ignore what we've been asked to write, and just zero the stats */ + kpr_fwd_bytes = 0; + kpr_fwd_packets = 0; + kpr_fwd_errors = 0; - return (count); + return (count); } -void -kpr_proc_init(void) +static int kpr_proc_routes_read(char *page, char **start, off_t off, + int count, int *eof, void *data) { - struct proc_dir_entry *entry = create_proc_entry (KPR_PROC_ROUTER, S_IFREG | S_IRUGO | S_IWUSR, NULL); + struct proc_route_data *prd = data; + kpr_route_entry_t *re; + kpr_gateway_entry_t *ge; + int chunk_len = 0; + int line_len = 0; + int user_len = 0; + + *eof = 1; + *start = page; + + if (prd->curr == NULL) { + if (off != 0) + return 0; + + /* First pass, initialize our private data */ + prd->curr = kpr_routes.next; + prd->generation = kpr_routes_generation; + prd->skip = 0; + } else { + /* Abort route list generation change */ + if (prd->generation != kpr_routes_generation) { + prd->curr = NULL; + return sprintf(page, "\nError: Routes Changed\n"); + } + + /* All the routes have been walked */ + if (prd->curr == &kpr_routes) { + prd->curr = NULL; + return 0; + } + } + + read_lock(&kpr_rwlock); + *start = page + prd->skip; + user_len = -prd->skip; + + for (; prd->curr != &kpr_routes; prd->curr = prd->curr->next) { + re = list_entry(prd->curr, kpr_route_entry_t, kpre_list); + ge = re->kpre_gateway; + + line_len = sprintf(page + chunk_len, + "%12s "LPX64" : "LPX64" - "LPX64", %s\n", + nal2name(ge->kpge_nalid), ge->kpge_nid, + re->kpre_lo_nid, re->kpre_hi_nid, + ge->kpge_alive ? "up" : "down"); + chunk_len += line_len; + user_len += line_len; - if (entry == NULL) - { + /* The route table will exceed one page */ + if ((chunk_len > (PAGE_SIZE - 80)) || (user_len > count)) { + prd->curr = prd->curr->next; + break; + } + } + + *eof = 0; + + /* Caller received only a portion of the last entry, the + * remaining will be delivered in the next page if asked for. + */ + if (user_len > count) { + prd->curr = prd->curr->prev; + prd->skip = line_len - (user_len - count); + read_unlock(&kpr_rwlock); + return count; + } + + /* Not enough data to entirely satify callers request */ + prd->skip = 0; + read_unlock(&kpr_rwlock); + return user_len; +} + +static int kpr_proc_routes_write(struct file *file, const char *ubuffer, + unsigned long count, void *data) +{ + /* no-op; lctl should be used to adjust the routes */ + return (count); +} + +void kpr_proc_init(void) +{ + struct proc_dir_entry *router_entry; + struct proc_dir_entry *routes_entry; + + /* Initialize KPR_PROC_ROUTER */ + router_entry = create_proc_entry (KPR_PROC_ROUTER, + S_IFREG | S_IRUGO | S_IWUSR, NULL); + + if (router_entry == NULL) { CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTER); return; } - entry->data = NULL; - entry->read_proc = kpr_proc_read; - entry->write_proc = kpr_proc_write; + router_entry->data = NULL; + router_entry->read_proc = kpr_proc_router_read; + router_entry->write_proc = kpr_proc_router_write; + + /* Initialize KPR_PROC_ROUTES */ + routes_entry = create_proc_entry (KPR_PROC_ROUTES, + S_IFREG | S_IRUGO | S_IWUSR, NULL); + + if (routes_entry == NULL) { + CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTES); + return; + } + + kpr_read_routes_data.curr = NULL; + kpr_read_routes_data.generation = 0; + kpr_read_routes_data.skip = 0; + + routes_entry->data = &kpr_read_routes_data; + routes_entry->read_proc = kpr_proc_routes_read; + routes_entry->write_proc = kpr_proc_routes_write; } -void -kpr_proc_fini(void) +void kpr_proc_fini(void) { remove_proc_entry(KPR_PROC_ROUTER, 0); + remove_proc_entry(KPR_PROC_ROUTES, 0); } diff --git a/lnet/router/router.c b/lnet/router/router.c index d0dbf0a..6fcd83a 100644 --- a/lnet/router/router.c +++ b/lnet/router/router.c @@ -27,6 +27,7 @@ LIST_HEAD(kpr_routes); LIST_HEAD(kpr_gateways); LIST_HEAD(kpr_nals); +unsigned int kpr_routes_generation; unsigned long long kpr_fwd_bytes; unsigned long kpr_fwd_packets; unsigned long kpr_fwd_errors; @@ -599,7 +600,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, list_for_each (e, &kpr_gateways) { kpr_gateway_entry_t *ge2 = list_entry(e, kpr_gateway_entry_t, kpge_list); - + if (ge2->kpge_nalid == gateway_nalid && ge2->kpge_nid == gateway_nid) { PORTAL_FREE (ge, sizeof (*ge)); @@ -611,7 +612,6 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, if (!dup) { /* Adding a new gateway... */ - list_add (&ge->kpge_list, &kpr_gateways); /* ...zero all gateway weights so this one doesn't have to @@ -622,12 +622,12 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, kpge_list); atomic_set (&ge2->kpge_weight, 0); } - } re->kpre_gateway = ge; ge->kpge_refcount++; list_add (&re->kpre_list, &kpr_routes); + kpr_routes_generation++; write_unlock_irqrestore (&kpr_rwlock, flags); return (0); @@ -645,12 +645,12 @@ kpr_del_route (int gw_nalid, ptl_nid_t gw_nid, ptl_nid_t lo, ptl_nid_t hi) { int specific = (lo != PTL_NID_ANY); - unsigned long flags; + unsigned long flags; int rc = -ENOENT; - struct list_head *e; - struct list_head *n; + struct list_head *e; + struct list_head *n; - CDEBUG(D_NET, "Del route [%d] "LPX64" : "LPX64" - "LPX64"\n", + CDEBUG(D_NET, "Del route [%d] "LPX64" : "LPX64" - "LPX64"\n", gw_nalid, gw_nid, lo, hi); LASSERT(!in_interrupt()); @@ -658,20 +658,19 @@ kpr_del_route (int gw_nalid, ptl_nid_t gw_nid, /* NB Caller may specify either all routes via the given gateway * (lo/hi == PTL_NID_ANY) or a specific route entry (lo/hi are * actual NIDs) */ - if (specific ? (hi == PTL_NID_ANY || hi < lo) : (hi != PTL_NID_ANY)) return (-EINVAL); - write_lock_irqsave(&kpr_rwlock, flags); + write_lock_irqsave(&kpr_rwlock, flags); list_for_each_safe (e, n, &kpr_routes) { kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t, kpre_list); kpr_gateway_entry_t *ge = re->kpre_gateway; - + if (ge->kpge_nalid != gw_nalid || ge->kpge_nid != gw_nid || - (specific && + (specific && (lo != re->kpre_lo_nid || hi != re->kpre_hi_nid))) continue; @@ -689,7 +688,9 @@ kpr_del_route (int gw_nalid, ptl_nid_t gw_nid, break; } + kpr_routes_generation++; write_unlock_irqrestore(&kpr_rwlock, flags); + return (rc); } @@ -751,6 +752,7 @@ kpr_initialise (void) CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n", atomic_read(&portal_kmemory)); + kpr_routes_generation = 0; kpr_proc_init(); PORTAL_SYMBOL_REGISTER(kpr_router_interface); diff --git a/lnet/router/router.h b/lnet/router/router.h index 309025b3..611d808 100644 --- a/lnet/router/router.h +++ b/lnet/router/router.h @@ -102,9 +102,12 @@ extern int kpr_get_route (int idx, int *gateway_nal, ptl_nid_t *gateway_nid, extern int kpr_sys_notify (int gw_nalid, ptl_nid_t gw_nid, int alive, time_t when); +extern unsigned int kpr_routes_generation; extern unsigned long long kpr_fwd_bytes; extern unsigned long kpr_fwd_packets; extern unsigned long kpr_fwd_errors; extern atomic_t kpr_queue_depth; +extern struct list_head kpr_routes; +extern rwlock_t kpr_rwlock; #endif /* _KPLROUTER_H */ diff --git a/lnet/utils/Makefile.am b/lnet/utils/Makefile.am index 851a8e1..051bcd9 100644 --- a/lnet/utils/Makefile.am +++ b/lnet/utils/Makefile.am @@ -20,6 +20,7 @@ lib_LIBRARIES = libptlctl.a endif acceptor_SOURCES = acceptor.c +acceptor_LDADD = $(LIBWRAP) wirecheck_SOURCES = wirecheck.c diff --git a/lnet/utils/acceptor.c b/lnet/utils/acceptor.c index f6367d4..daed215 100644 --- a/lnet/utils/acceptor.c +++ b/lnet/utils/acceptor.c @@ -13,6 +13,11 @@ #include #include #include +#ifdef HAVE_LIBWRAP +#include +#include +#include +#endif #include #include @@ -26,6 +31,12 @@ #define PROGNAME "acceptor" +#ifdef HAVE_LIBWRAP +/* needed because libwrap declares these as externs */ +int allow_severity = LOG_INFO; +int deny_severity = LOG_WARNING; +#endif + void create_pidfile(char *name, int port) { char pidfile[1024]; @@ -276,7 +287,11 @@ int main(int argc, char **argv) int cfd; struct portal_ioctl_data data; struct portals_cfg pcfg; - +#ifdef HAVE_LIBWRAP + struct request_info request; + char addrstr[INET_ADDRSTRLEN]; +#endif + cfd = accept(fd, (struct sockaddr *)&clntaddr, &len); if ( cfd < 0 ) { perror("accept"); @@ -284,6 +299,19 @@ int main(int argc, char **argv) continue; } +#ifdef HAVE_LIBWRAP + /* libwrap access control */ + request_init(&request, RQ_DAEMON, "lustre", RQ_FILE, cfd, 0); + sock_host(&request); + if (!hosts_access(&request)) { + inet_ntop(AF_INET, &clntaddr.sin_addr, + addrstr, INET_ADDRSTRLEN); + syslog(LOG_WARNING, "Unauthorized access from %s:%hd\n", + addrstr, ntohs(clntaddr.sin_port)); + close (cfd); + continue; + } +#endif show_connection (cfd, clntaddr.sin_addr.s_addr); PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD); diff --git a/lnet/utils/debug.c b/lnet/utils/debug.c index d460919..e546aaf 100644 --- a/lnet/utils/debug.c +++ b/lnet/utils/debug.c @@ -371,7 +371,9 @@ int jt_dbg_debug_kernel(int argc, char **argv) fprintf(stderr, "usage: %s [file] [raw]\n", argv[0]); return 0; } - sprintf(filename, "%s-%ld.tmp", argv[1], random); + sprintf(filename, "%s.%lu.%u", argc > 1 ? argv[1] : "/tmp/lustre-log", + time(NULL), getpid()); + if (argc > 2) raw = atoi(argv[2]); unlink(filename); diff --git a/lustre/ChangeLog b/lustre/ChangeLog index c2c847c..22dd2fb 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -1,4 +1,4 @@ -tbd Cluster File Systems, Inc. +2004-08-24 Cluster File Systems, Inc. * version 1.2.5 * bug fixes - don't close LustreDB during write_conf until it is done (3860) @@ -7,7 +7,23 @@ tbd Cluster File Systems, Inc. - don't allow multiple threads in OSC recovery(3812) - fix debug_size parameters (3864) - fix mds_postrecov to initialize import for llog ctxt (3121) + - replace config semaphore with spinlock (3306) + - be sure to send a reply for a CANCEL rpc with bad export (3863) + - don't allow enqueue to complete on a destroyed export (3822) + - down write_lock before checking llog header bitmap (3825) + - recover from lock replay timeout (3764) + - up llog sem before sending rpc (3652) - reduce ns lock hold times when setting kms (3267) + - change a dlm LBUG to LASSERTF, to maybe learn something (4228) + - fix NULL deref and obd_dev leak on setup error (3312) + - replace some LBUG about llog ops with error handling (3841) + - don't match INVALID dentries from d_lookup and spin (3784) + - hold dcache_lock while marking dentries INVALID and hashing (4255) + * miscellania + - add libwrap support for the TCP acceptor (3996) + - add /proc/sys/portals/routes for non-root route listing (3994) + - allow setting MDS UUID in .xml (2580) + - print the stack of a process that LBUGs (4228) 2004-07-14 Cluster File Systems, Inc. * version 1.2.4 @@ -26,6 +42,7 @@ tbd Cluster File Systems, Inc. - return -ENOENT instead of asserting if ost getattr+unlink race (3558) - avoid deadlock after precreation failure (3758) - fix race and lock order deadlock in orphan handling (3450, 3750) + - add validity checks when grabbing inodes from l_ast_data (3599) * miscellania - add /proc/.../recovery_status to obdfilter (3428) - lightweight CDEBUG infrastructure, debug daemon (3668) diff --git a/lustre/configure.in b/lustre/configure.in index df41107..2a70dd6 100644 --- a/lustre/configure.in +++ b/lustre/configure.in @@ -5,7 +5,7 @@ AC_INIT AC_CANONICAL_SYSTEM -AM_INIT_AUTOMAKE(lustre, 1.2.4.1) +AM_INIT_AUTOMAKE(lustre, 1.2.5) # AM_MAINTAINER_MODE # Four main targets: lustre kernel modules, utilities, tests, and liblustre diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index b661662..ede6646 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -263,6 +263,12 @@ static inline int mapping_has_pages(struct address_space *mapping) #define ll_vfs_symlink(dir, dentry, path, mode) vfs_symlink(dir, dentry, path, mode) #endif +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) +#endif + #ifdef HAVE_I_ALLOC_SEM #define UP_WRITE_I_ALLOC_SEM(i) do { up_write(&(i)->i_alloc_sem); } while (0) #define DOWN_WRITE_I_ALLOC_SEM(i) do { down_write(&(i)->i_alloc_sem); } while(0) diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index 523e6e5..e2f74a5 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -956,6 +956,10 @@ struct llog_log_hdr { struct llog_rec_tail llh_tail; } __attribute__((packed)); +#define LLOG_BITMAP_SIZE(llh) ((llh->llh_hdr.lrh_len - \ + llh->llh_bitmap_offset - \ + sizeof(llh->llh_tail)) * 8) + /* log cookies are used to reference a specific log file and a record therein */ struct llog_cookie { struct llog_logid lgc_lgl; diff --git a/lustre/include/linux/lustre_lite.h b/lustre/include/linux/lustre_lite.h index 1c66b53..69b8d51 100644 --- a/lustre/include/linux/lustre_lite.h +++ b/lustre/include/linux/lustre_lite.h @@ -71,10 +71,13 @@ struct ll_dentry_data { extern struct file_operations ll_pgcache_seq_fops; +#define LLI_INODE_MAGIC 0x111d0de5 +#define LLI_INODE_DEAD 0xdeadd00d #define LLI_F_HAVE_OST_SIZE_LOCK 0 #define LLI_F_HAVE_MDS_SIZE_LOCK 1 #define LLI_F_PREFER_EXTENDED_SIZE 2 struct ll_inode_info { + int lli_inode_magic; struct lov_stripe_md *lli_smd; char *lli_symlink_name; struct semaphore lli_open_sem; diff --git a/lustre/include/linux/lustre_log.h b/lustre/include/linux/lustre_log.h index 3eb75da..8152647 100644 --- a/lustre/include/linux/lustre_log.h +++ b/lustre/include/linux/lustre_log.h @@ -191,7 +191,7 @@ struct llog_ctxt { struct llog_operations *loc_logops; struct llog_handle *loc_handle; struct llog_canceld_ctxt *loc_llcd; - struct semaphore loc_sem; /* protects loc_llcd */ + struct semaphore loc_sem; /* protects loc_llcd and loc_imp */ void *llog_proc_cb; }; diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 8f2f9e2..f75d9ea 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -46,6 +46,7 @@ /* OBD Device Declarations */ #define MAX_OBD_DEVICES 256 extern struct obd_device obd_dev[MAX_OBD_DEVICES]; +extern spinlock_t obd_dev_lock; /* OBD Operations Declarations */ extern struct obd_device *class_conn2obd(struct lustre_handle *); @@ -56,7 +57,8 @@ struct obd_export *class_conn2export(struct lustre_handle *); int class_register_type(struct obd_ops *ops, struct lprocfs_vars *, char *nm); int class_unregister_type(char *nm); -struct obd_device *class_newdev(int *dev); +struct obd_device *class_newdev(struct obd_type *type); +void class_release_dev(struct obd_device *obd); int class_name2dev(char *name); struct obd_device *class_name2obd(char *name); diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index 3647c55..01f8b33 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -120,6 +120,8 @@ extern wait_queue_head_t obd_race_waitq; #define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308 #define OBD_FAIL_LDLM_ENQUEUE_INTENT_ERR 0x309 #define OBD_FAIL_LDLM_CREATE_RESOURCE 0x30a +#define OBD_FAIL_LDLM_ENQUEUE_BLOCKED 0x30b +#define OBD_FAIL_LDLM_REPLY 0x30c #define OBD_FAIL_OSC 0x400 #define OBD_FAIL_OSC_BRW_READ_BULK 0x401 @@ -141,6 +143,8 @@ extern wait_queue_head_t obd_race_waitq; #define OBD_FAIL_TGT_REPLY_NET 0x700 #define OBD_FAIL_TGT_CONN_RACE 0x701 +#define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800 + /* preparation for a more advanced failure testbed (not functional yet) */ #define OBD_FAIL_MASK_SYS 0x0000FF00 #define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS) diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.24.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.24.patch index 6059aba..973e14b 100644 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.24.patch +++ b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.24.patch @@ -138,11 +138,13 @@ Index: linux-2.4.24/fs/ext3/super.c void ext3_put_super (struct super_block * sb) { struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -407,6 +529,7 @@ +@@ -407,6 +529,9 @@ kdev_t j_dev = sbi->s_journal->j_dev; int i; ++#ifdef EXT3_DELETE_THREAD + J_ASSERT(sbi->s_delete_inodes == 0); ++#endif ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); if (!(sb->s_flags & MS_RDONLY)) { @@ -335,10 +337,11 @@ Index: linux-2.4.24/fs/ext3/namei.c =================================================================== --- linux-2.4.24.orig/fs/ext3/namei.c 2004-01-12 20:36:31.000000000 +0300 +++ linux-2.4.24/fs/ext3/namei.c 2004-01-12 20:36:32.000000000 +0300 -@@ -1936,6 +1936,36 @@ +@@ -1936,6 +1936,40 @@ return retval; } ++#ifdef EXT3_DELETE_THREAD +static int ext3_try_to_delay_deletion(struct inode *inode) +{ + struct ext3_sb_info *sbi = EXT3_SB(inode->i_sb); @@ -368,6 +371,9 @@ Index: linux-2.4.24/fs/ext3/namei.c + + return 0; +} ++#else ++#define ext3_try_to_delay_deletion(inode) do {} while (0) ++#endif + static int ext3_unlink(struct inode * dir, struct dentry *dentry) { diff --git a/lustre/kernel_patches/patches/ext3-nlinks-2.4.24.patch b/lustre/kernel_patches/patches/ext3-nlinks-2.4.24.patch new file mode 100644 index 0000000..f198362 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-nlinks-2.4.24.patch @@ -0,0 +1,170 @@ +--- ./fs/ext3/namei.c.orig 2004-08-19 12:53:21.000000000 +0800 ++++ ./fs/ext3/namei.c 2004-08-19 12:44:18.000000000 +0800 +@@ -1541,11 +1541,16 @@ + static inline void ext3_inc_count(handle_t *handle, struct inode *inode) + { + inode->i_nlink++; ++ if (is_dx(inode) && inode->i_nlink > 1) { ++ if (inode->i_nlink >= 65000) /* limit is 16-bit i_links_count */ ++ inode->i_nlink = 1; ++ } + } + + static inline void ext3_dec_count(handle_t *handle, struct inode *inode) + { +- inode->i_nlink--; ++ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) ++ inode->i_nlink--; + } + + static int ext3_add_nondir(handle_t *handle, +@@ -1646,7 +1651,7 @@ + struct ext3_dir_entry_2 * de; + int err; + +- if (dir->i_nlink >= EXT3_LINK_MAX) ++ if (EXT3_DIR_LINK_MAXED(dir)) + return -EMLINK; + + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + +@@ -1668,7 +1673,7 @@ + inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; + dir_block = ext3_bread (handle, inode, 0, 1, &err); + if (!dir_block) { +- inode->i_nlink--; /* is this nlink == 0? */ ++ ext3_dec_count(handle, inode); /* is this nlink == 0? */ + ext3_mark_inode_dirty(handle, inode); + iput (inode); + goto out_stop; +@@ -1700,7 +1705,7 @@ + iput (inode); + goto out_stop; + } +- dir->i_nlink++; ++ ext3_inc_count(handle, dir); + ext3_update_dx_flag(dir); + ext3_mark_inode_dirty(handle, dir); + d_instantiate(dentry, inode); +@@ -1761,10 +1766,11 @@ + } + de = (struct ext3_dir_entry_2 *) bh->b_data; + } +- if (!ext3_check_dir_entry ("empty_dir", inode, de, bh, +- offset)) { +- brelse (bh); +- return 1; ++ if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) { ++ /* On error skip the de and offset to the next block. */ ++ de = (void *)(bh->b_data + sb->s_blocksize); ++ offset = (offset | (sb->s_blocksize - 1)) + 1; ++ continue; + } + if (le32_to_cpu(de->inode)) { + brelse (bh); +@@ -1957,14 +1963,14 @@ + retval = ext3_delete_entry(handle, dir, de, bh); + if (retval) + goto end_rmdir; +- if (inode->i_nlink != 2) +- ext3_warning (inode->i_sb, "ext3_rmdir", +- "empty directory has nlink!=2 (%d)", +- inode->i_nlink); ++ if (!EXT3_DIR_LINK_EMPTY(inode)) ++ ext3_warning(inode->i_sb, __FUNCTION__, ++ "empty directory has too many links (%d)", ++ inode->i_nlink); + inode->i_version = ++event; + inode->i_nlink = 0; + ext3_orphan_add(handle, inode); +- dir->i_nlink--; ++ ext3_dec_count(handle, dir); + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + ext3_mark_inode_dirty(handle, inode); + ext3_update_dx_flag(dir); +@@ -2046,7 +2052,7 @@ + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + ext3_update_dx_flag(dir); + ext3_mark_inode_dirty(handle, dir); +- inode->i_nlink--; ++ ext3_dec_count(handle, inode); + if (!inode->i_nlink) { + ext3_try_to_delay_deletion(inode); + ext3_orphan_add(handle, inode); +@@ -2140,9 +2146,8 @@ + if (S_ISDIR(inode->i_mode)) + return -EPERM; + +- if (inode->i_nlink >= EXT3_LINK_MAX) { ++ if (EXT3_DIR_LINK_MAXED(inode)) + return -EMLINK; +- } + + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + + EXT3_INDEX_EXTRA_TRANS_BLOCKS); +@@ -2226,8 +2231,8 @@ + if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) + goto end_rename; + retval = -EMLINK; +- if (!new_inode && new_dir!=old_dir && +- new_dir->i_nlink >= EXT3_LINK_MAX) ++ if (!new_inode && new_dir != old_dir && ++ EXT3_DIR_LINK_MAXED(new_dir)) + goto end_rename; + } + if (!new_bh) { +@@ -2285,7 +2290,7 @@ + } + + if (new_inode) { +- new_inode->i_nlink--; ++ ext3_dec_count(handle, new_inode); + new_inode->i_ctime = CURRENT_TIME; + } + old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; +@@ -2296,11 +2301,11 @@ + PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino); + BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata"); + ext3_journal_dirty_metadata(handle, dir_bh); +- old_dir->i_nlink--; ++ ext3_dec_count(handle, old_dir); + if (new_inode) { +- new_inode->i_nlink--; ++ ext3_dec_count(handle, new_inode); + } else { +- new_dir->i_nlink++; ++ ext3_inc_count(handle, new_dir); + ext3_update_dx_flag(new_dir); + ext3_mark_inode_dirty(handle, new_dir); + } +--- ./include/linux/ext3_fs.h.orig 2004-08-19 12:53:52.000000000 +0800 ++++ ./include/linux/ext3_fs.h 2004-08-19 11:06:33.000000000 +0800 +@@ -42,7 +42,7 @@ + /* + * Always enable hashed directories + */ +-#define CONFIG_EXT3_INDEX ++#define CONFIG_EXT3_INDEX 1 + + /* + * Debug code +@@ -581,14 +581,15 @@ + */ + + #ifdef CONFIG_EXT3_INDEX +- #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ +- EXT3_FEATURE_COMPAT_DIR_INDEX) && \ ++#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ ++ EXT3_FEATURE_COMPAT_DIR_INDEX) && \ + (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) +-#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX) +-#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) ++#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX) ++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \ ++ (is_dx(dir) && (dir)->i_nlink == 1)) + #else + #define is_dx(dir) 0 +-#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) ++#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) + #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) + #endif + diff --git a/lustre/kernel_patches/series/vanilla-2.4.24 b/lustre/kernel_patches/series/vanilla-2.4.24 index 5c4852e..d050da7 100644 --- a/lustre/kernel_patches/series/vanilla-2.4.24 +++ b/lustre/kernel_patches/series/vanilla-2.4.24 @@ -36,3 +36,4 @@ ext3-xattr-ptr-arith-fix.patch procfs-ndynamic-2.4.patch ext3-truncate-buffer-head.patch inode-max-readahead-2.4.24.patch +ext3-nlinks-2.4.24.patch diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 5483c78..8567977 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1048,14 +1048,8 @@ target_send_reply_msg (struct ptlrpc_request *req, int rc, int fail_id) if (rc) { DEBUG_REQ(D_ERROR, req, "processing error (%d)", rc); - if (req->rq_reply_state == NULL) { - rc = lustre_pack_reply (req, 0, NULL, NULL); - if (rc != 0) { - CERROR ("can't allocate reply\n"); - return (rc); - } - } - req->rq_type = PTL_RPC_MSG_ERR; + req->rq_status = rc; + return (ptlrpc_error(req)); } else { DEBUG_REQ(D_NET, req, "sending reply"); } diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index d4f2e8f..80f5bab 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -229,7 +229,7 @@ static void lock_handle_addref(void *lock) * usage: pass in a resource on which you have done ldlm_resource_get * pass in a parent lock on which you have done a ldlm_lock_get * after return, ldlm_*_put the resource and parent - * returns: lock with refcount 1 + * returns: lock with refcount 2 - one for current caller and one for remote */ static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent, struct ldlm_resource *resource) @@ -851,7 +851,7 @@ ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns, policy = ldlm_processing_policy_table[res->lr_type]; policy(lock, flags, 1, &rc); - EXIT; + GOTO(out, rc); out: l_unlock(&ns->ns_lock); return rc; diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index ecc7d4a..214ef40 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -582,8 +582,15 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req, LDLM_DEBUG(lock, "server-side enqueue handler, new lock created"); LASSERT(req->rq_export); - lock->l_export = class_export_get(req->rq_export); + + OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_BLOCKED, obd_timeout * 2); l_lock(&lock->l_resource->lr_namespace->ns_lock); + if (req->rq_export->exp_failed) { + LDLM_ERROR(lock,"lock on destroyed export %p\n",req->rq_export); + l_unlock(&lock->l_resource->lr_namespace->ns_lock); + GOTO(out, rc = -ENOTCONN); + } + lock->l_export = class_export_get(req->rq_export); list_add(&lock->l_export_chain, &lock->l_export->exp_ldlm_data.led_held_locks); l_unlock(&lock->l_resource->lr_namespace->ns_lock); @@ -666,6 +673,8 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req, size[1]); } up(&lock->l_resource->lr_lvb_sem); + } else { + ldlm_lock_destroy(lock); } if (!err && dlm_req->lock_desc.l_resource.lr_type != LDLM_FLOCK) @@ -1116,7 +1125,8 @@ static int ldlm_cancel_handler(struct ptlrpc_request *req) lustre_swab_ldlm_request); if (dlm_req != NULL) ldlm_lock_dump_handle(D_ERROR, &dlm_req->lock_handle1); - RETURN(-ENOTCONN); + ldlm_callback_reply(req, -ENOTCONN); + RETURN(0); } switch (req->rq_reqmsg->opc) { @@ -1132,7 +1142,7 @@ static int ldlm_cancel_handler(struct ptlrpc_request *req) default: CERROR("invalid opcode %d\n", req->rq_reqmsg->opc); - RETURN(-EINVAL); + ldlm_callback_reply(req, -EINVAL); } RETURN(0); diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index d6e5359..bf77461 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -896,6 +896,10 @@ static int replay_lock_interpret(struct ptlrpc_request *req, LDLM_DEBUG(lock, "replayed lock:"); ptlrpc_import_recovery_state_machine(req->rq_import); out: + if (rc != ELDLM_OK) + ptlrpc_connect_import(req->rq_import, NULL); + + RETURN(rc); } diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index 5105afc..ac406c7 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -455,10 +455,8 @@ ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent, struct ldlm_resource *res; ENTRY; - if (type < LDLM_MIN_TYPE || type > LDLM_MAX_TYPE) { - LBUG(); - RETURN(NULL); - } + LASSERTF(type >= LDLM_MIN_TYPE && type <= LDLM_MAX_TYPE, + "type: %d", type); res = ldlm_resource_new(); if (!res) { diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index 57d6389..1c08045 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -493,11 +493,11 @@ static int llu_iop_getattr(struct pnode *pno, static int null_if_equal(struct ldlm_lock *lock, void *data) { - if (data == lock->l_ast_data) + if (data == lock->l_ast_data) { lock->l_ast_data = NULL; - if (lock->l_req_mode != lock->l_granted_mode) - return LDLM_ITER_STOP; + if (lock->l_req_mode != lock->l_granted_mode) + LDLM_ERROR(lock,"clearing inode with ungranted lock\n"); } return LDLM_ITER_CONTINUE; } diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index 290eb46..aa6ab902 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -49,6 +49,35 @@ static void ll_release(struct dentry *de) EXIT; } +/* Compare if two dentries are the same. Don't match if the existing dentry + * is marked DCACHE_LUSTRE_INVALID. Returns 1 if different, 0 if the same. + * + * This avoids a race where ll_lookup_it() instantiates a dentry, but we get + * an AST before calling d_revalidate_it(). The dentry still exists (marked + * INVALID) so d_lookup() matches it, but we have no lock on it (so + * lock_match() fails) and we spin around real_lookup(). */ +static int ll_dcompare(struct dentry *parent, struct qstr *d_name, + struct qstr *name) +{ + struct dentry *dchild; + ENTRY; + + if (d_name->len != name->len) + RETURN(1); + + if (memcmp(d_name->name, name->name, name->len)) + RETURN(1); + + dchild = container_of(d_name, struct dentry, d_name); /* ugh */ + if (dchild->d_flags & DCACHE_LUSTRE_INVALID) { + CDEBUG(D_DENTRY,"INVALID dentry %p not matched, was bug 3784\n", + dchild); + RETURN(1); + } + + RETURN(0); +} + /* should NOT be called with the dcache lock, see fs/dcache.c */ static int ll_ddelete(struct dentry *de) { @@ -153,8 +182,6 @@ restart: EXIT; } -extern struct dentry *ll_find_alias(struct inode *, struct dentry *); - static int revalidate_it_finish(struct ptlrpc_request *request, int offset, struct lookup_intent *it, struct dentry *de) @@ -245,6 +272,7 @@ int ll_revalidate_it(struct dentry *de, int flags, struct lookup_intent *it) if (d_mountpoint(de)) RETURN(1); + OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5); ll_frob_intent(&it, &lookup_it); LASSERT(it); @@ -405,6 +433,7 @@ struct dentry_operations ll_d_ops = { #endif .d_release = ll_release, .d_delete = ll_ddelete, + .d_compare = ll_dcompare, #if 0 .d_pin = ll_pin, .d_unpin = ll_unpin, diff --git a/lustre/llite/file.c b/lustre/llite/file.c index e419317..bdac6d1 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -296,15 +296,25 @@ static int ll_lock_to_stripe_offset(struct inode *inode, struct ldlm_lock *lock) ENTRY; if (lsm->lsm_stripe_count == 1) - RETURN(0); + GOTO(check, stripe = 0); /* get our offset in the lov */ rc = obd_get_info(exp, sizeof(key), &key, &vallen, &stripe); if (rc != 0) { CERROR("obd_get_info: rc = %d\n", rc); - LBUG(); + RETURN(rc); } LASSERT(stripe < lsm->lsm_stripe_count); + +check: + if (lsm->lsm_oinfo[stripe].loi_id != lock->l_resource->lr_name.name[0]|| + lsm->lsm_oinfo[stripe].loi_gr != lock->l_resource->lr_name.name[1]){ + LDLM_ERROR(lock, "resource doesn't match object "LPU64"/"LPU64, + lsm->lsm_oinfo[stripe].loi_id, + lsm->lsm_oinfo[stripe].loi_gr); + RETURN(-ELDLM_NO_LOCK_DATA); + } + RETURN(stripe); } @@ -470,6 +480,8 @@ static int ll_extent_lock_callback(struct ldlm_lock *lock, lsm = lli->lli_smd; stripe = ll_lock_to_stripe_offset(inode, lock); + if (stripe < 0) + goto iput; ll_pgcache_remove_extent(inode, lsm, lock, stripe); /* grabbing the i_sem will wait for write() to complete. ns @@ -523,6 +535,8 @@ int ll_async_completion_ast(struct ldlm_lock *lock, int flags, void *data) LDLM_DEBUG(lock, "client-side async enqueue: granted/glimpsed"); stripe = ll_lock_to_stripe_offset(inode, lock); + if (stripe < 0) + goto iput; if (lock->l_lvb_len) { struct lov_stripe_md *lsm = lli->lli_smd; @@ -542,6 +556,7 @@ int ll_async_completion_ast(struct ldlm_lock *lock, int flags, void *data) l_unlock(&lock->l_resource->lr_namespace->ns_lock); } +iput: iput(inode); wake_up(&lock->l_waitq); @@ -556,8 +571,9 @@ static int ll_glimpse_callback(struct ldlm_lock *lock, void *reqp) struct ptlrpc_request *req = reqp; struct inode *inode = ll_inode_from_lock(lock); struct ll_inode_info *lli; + struct lov_stripe_md *lsm; struct ost_lvb *lvb; - int rc, size = sizeof(*lvb), stripe = 0; + int rc, size = sizeof(*lvb), stripe; ENTRY; if (inode == NULL) @@ -565,12 +581,14 @@ static int ll_glimpse_callback(struct ldlm_lock *lock, void *reqp) lli = ll_i2info(inode); if (lli == NULL) GOTO(iput, rc = -ELDLM_NO_LOCK_DATA); - if (lli->lli_smd == NULL) + lsm = lli->lli_smd; + if (lsm == NULL) GOTO(iput, rc = -ELDLM_NO_LOCK_DATA); /* First, find out which stripe index this lock corresponds to. */ - if (lli->lli_smd->lsm_stripe_count > 1) - stripe = ll_lock_to_stripe_offset(inode, lock); + stripe = ll_lock_to_stripe_offset(inode, lock); + if (stripe < 0) + GOTO(iput, rc = -ELDLM_NO_LOCK_DATA); rc = lustre_pack_reply(req, 1, &size, NULL); if (rc) { diff --git a/lustre/llite/llite_close.c b/lustre/llite/llite_close.c index c213781..09d80be 100644 --- a/lustre/llite/llite_close.c +++ b/lustre/llite/llite_close.c @@ -85,7 +85,7 @@ void ll_try_done_writing(struct inode *inode) if (list_empty(&lli->lli_close_item)) { CDEBUG(D_INODE, "adding inode %lu/%u to close list\n", inode->i_ino, inode->i_generation); - LASSERT(igrab(inode) == inode); + igrab(inode); list_add_tail(&lli->lli_close_item, &lcq->lcq_list); wake_up(&lcq->lcq_waitq); } diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 901985a..fd0c775 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -236,14 +236,12 @@ void lustre_common_put_super(struct super_block *sb) obd_disconnect(sbi->ll_mdc_exp, 0); // We do this to get rid of orphaned dentries. That is not really trw. - spin_lock(&dcache_lock); hlist_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) { struct dentry *dentry = hlist_entry(tmp, struct dentry, d_hash); - CWARN("orphan dentry %*s (%p) at unmount\n", - dentry->d_name.len, dentry->d_name.name, dentry); + CWARN("orphan dentry %*s (%p->%p) at unmount\n", + dentry->d_name.len, dentry->d_name.name, dentry, next); shrink_dcache_parent(dentry); } - spin_unlock(&dcache_lock); EXIT; } @@ -324,6 +322,7 @@ void ll_lli_init(struct ll_inode_info *lli) lli->lli_maxbytes = PAGE_CACHE_MAXBYTES; spin_lock_init(&lli->lli_lock); INIT_LIST_HEAD(&lli->lli_pending_write_llaps); + lli->lli_inode_magic = LLI_INODE_MAGIC; } int ll_fill_super(struct super_block *sb, void *data, int silent) @@ -713,23 +712,31 @@ void lustre_put_super(struct super_block *sb) struct inode *ll_inode_from_lock(struct ldlm_lock *lock) { - struct inode *inode; + struct inode *inode = NULL; l_lock(&lock->l_resource->lr_namespace->ns_lock); - if (lock->l_ast_data) - inode = igrab(lock->l_ast_data); - else - inode = NULL; + if (lock->l_ast_data) { + struct ll_inode_info *lli = ll_i2info(lock->l_ast_data); + if (lli->lli_inode_magic == LLI_INODE_MAGIC) { + inode = igrab(lock->l_ast_data); + } else { + inode = lock->l_ast_data; + CDEBUG(inode->i_state & I_FREEING ? D_INFO : D_WARNING, + "l_ast_data %p is bogus: magic %0x8\n", + lock->l_ast_data, lli->lli_inode_magic); + inode = NULL; + } + } l_unlock(&lock->l_resource->lr_namespace->ns_lock); return inode; } static int null_if_equal(struct ldlm_lock *lock, void *data) { - if (data == lock->l_ast_data) + if (data == lock->l_ast_data) { lock->l_ast_data = NULL; - if (lock->l_req_mode != lock->l_granted_mode) - return LDLM_ITER_STOP; + if (lock->l_req_mode != lock->l_granted_mode) + LDLM_ERROR(lock,"clearing inode with ungranted lock\n"); } return LDLM_ITER_CONTINUE; } @@ -762,6 +769,7 @@ void ll_clear_inode(struct inode *inode) strlen(lli->lli_symlink_name) + 1); lli->lli_symlink_name = NULL; } + lli->lli_inode_magic = LLI_INODE_DEAD; EXIT; } diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index e0bfe21..8567ae8 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -222,6 +222,32 @@ void ll_prepare_mdc_op_data(struct mdc_op_data *data, struct inode *i1, data->mod_time = LTIME_S(CURRENT_TIME); } +static void ll_d_add(struct dentry *de, struct inode *inode) +{ + CDEBUG(D_DENTRY, "adding inode %p to dentry %p\n", inode, de); + /* d_instantiate */ + if (!list_empty(&de->d_alias)) { + spin_unlock(&dcache_lock); + CERROR("dentry %*s %p alias next %p, prev %p\n", + de->d_name.len, de->d_name.name, de, + de->d_alias.next, de->d_alias.prev); + LBUG(); + } + if (inode) + list_add(&de->d_alias, &inode->i_dentry); + de->d_inode = inode; + + /* d_rehash */ + if (!list_empty(&de->d_hash)) { + spin_unlock(&dcache_lock); + CERROR("dentry %*s %p hash next %p, prev %p\n", + de->d_name.len, de->d_name.name, de, + de->d_hash.next, de->d_hash.prev); + LBUG(); + } + __d_rehash(de, 0); +} + /* Search "inode"'s alias list for a dentry that has the same name and parent as * de. If found, return it. If not found, return de. */ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de) @@ -253,16 +279,18 @@ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de) hlist_del_init(&dentry->d_hash); __d_rehash(dentry, 0); /* avoid taking dcache_lock inside */ - spin_unlock(&dcache_lock); + dentry->d_flags &= ~DCACHE_LUSTRE_INVALID; atomic_inc(&dentry->d_count); + spin_unlock(&dcache_lock); iput(inode); - dentry->d_flags &= ~DCACHE_LUSTRE_INVALID; CDEBUG(D_DENTRY, "alias dentry %*s (%p) parent %p inode %p " "refc %d\n", de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode, atomic_read(&de->d_count)); return dentry; } + ll_d_add(de, inode); + spin_unlock(&dcache_lock); return de; @@ -275,7 +303,7 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, struct dentry **de = icbd->icbd_childp; struct inode *parent = icbd->icbd_parent; struct ll_sb_info *sbi = ll_i2sbi(parent); - struct dentry *dentry = *de, *saved = *de; + struct dentry *dentry = *de; struct inode *inode = NULL; int rc; @@ -314,13 +342,13 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, dentry = *de = ll_find_alias(inode, dentry); } else { ENTRY; + spin_lock(&dcache_lock); + ll_d_add(dentry, inode); + spin_unlock(&dcache_lock); } - dentry->d_op = &ll_d_ops; ll_set_dd(dentry); - - if (dentry == saved) - d_add(dentry, inode); + dentry->d_op = &ll_d_ops; RETURN(0); } diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index eea7e8f..f1f1307 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -96,6 +96,8 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private, case FSFILT_OP_RENAME: /* modify additional directory */ nblocks += EXT3_SINGLEDATA_TRANS_BLOCKS; + nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS + + EXT3_SINGLEDATA_TRANS_BLOCKS) * logs; /* no break */ case FSFILT_OP_SYMLINK: /* additional block + block bitmap + GDT for long symlink */ diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index ce5ac40..868f010 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -98,6 +98,7 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, save->pwd = dget(current->fs->pwd); save->pwdmnt = mntget(current->fs->pwdmnt); save->ngroups = current_ngroups; + save->ouc.ouc_umask = current->fs->umask; LASSERT(save->pwd); LASSERT(save->pwdmnt); @@ -110,19 +111,18 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, save->ouc.ouc_cap = current->cap_effective; save->ouc.ouc_suppgid1 = current_groups[0]; save->ouc.ouc_suppgid2 = current_groups[1]; - save->ouc.ouc_umask = current->fs->umask; current->fsuid = uc->ouc_fsuid; current->fsgid = uc->ouc_fsgid; current->cap_effective = uc->ouc_cap; current_ngroups = 0; - current->fs->umask = 0; /* umask already applied on client */ if (uc->ouc_suppgid1 != -1) current_groups[current_ngroups++] = uc->ouc_suppgid1; if (uc->ouc_suppgid2 != -1) current_groups[current_ngroups++] = uc->ouc_suppgid2; } + current->fs->umask = 0; /* umask already applied on client */ set_fs(new_ctx->fs); set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd); @@ -166,6 +166,7 @@ void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, dput(saved->pwd); mntput(saved->pwdmnt); + current->fs->umask = saved->ouc.ouc_umask; if (uc) { current->fsuid = saved->ouc.ouc_fsuid; current->fsgid = saved->ouc.ouc_fsgid; @@ -173,7 +174,6 @@ void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, current_ngroups = saved->ngroups; current_groups[0] = saved->ouc.ouc_suppgid1; current_groups[1] = saved->ouc.ouc_suppgid2; - current->fs->umask = saved->ouc.ouc_umask; } /* @@ -249,8 +249,12 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix) if (dchild->d_inode) { int old_mode = dchild->d_inode->i_mode; - if (!S_ISDIR(old_mode)) + if (!S_ISDIR(old_mode)) { + CERROR("found %s (%lu/%u) is mode %o\n", name, + dchild->d_inode->i_ino, + dchild->d_inode->i_generation, old_mode); GOTO(out_err, err = -ENOTDIR); + } /* Fixup directory permissions if necessary */ if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) { diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index cf590e4..1e2133e 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -1274,6 +1274,7 @@ int mds_handle(struct ptlrpc_request *req) OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0); rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast, ldlm_server_blocking_ast, NULL); + fail = OBD_FAIL_LDLM_REPLY; break; case LDLM_CONVERT: DEBUG_REQ(D_INODE, req, "convert"); diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 563790a..a141fd2 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -1137,7 +1137,7 @@ static int mds_orphan_add_link(struct mds_update_record *rec, fidlen = ll_fid2str(fidname, inode->i_ino, inode->i_generation); - CDEBUG(D_ERROR, "pending destroy of %dx open %d linked %s %s = %s\n", + CDEBUG(D_INODE, "pending destroy of %dx open %d linked %s %s = %s\n", mds_orphan_open_count(inode), inode->i_nlink, S_ISDIR(inode->i_mode) ? "dir" : S_ISREG(inode->i_mode) ? "file" : "other",rec->ur_name,fidname); @@ -1717,6 +1717,7 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset, struct mds_obd *mds = mds_req2mds(req); struct lustre_handle dlm_handles[4]; struct mds_body *body = NULL; + struct lov_mds_md *lmm = NULL; int rc = 0, lock_count = 3, cleanup_phase = 0; void *handle = NULL; ENTRY; @@ -1791,12 +1792,17 @@ no_unlink: OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE, de_srcdir->d_inode->i_sb); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) /* Check if we are moving old entry into its child. 2.6 does not check for this in vfs_rename() anymore */ if (is_subdir(de_new, de_old)) GOTO(cleanup, rc = -EINVAL); +#endif + + lmm = lustre_msg_buf(req->rq_repmsg, 1, 0); + handle = fsfilt_start_log(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME, + NULL, le32_to_cpu(lmm->lmm_stripe_count)); - handle = fsfilt_start(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME, NULL); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 8df334d..661ca23 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -75,9 +75,9 @@ atomic_t portal_kmemory = {0}; #endif -struct semaphore obd_conf_sem; /* serialize configuration commands */ struct obd_device obd_dev[MAX_OBD_DEVICES]; struct list_head obd_types; +spinlock_t obd_dev_lock; #ifndef __KERNEL__ atomic_t obd_memory; int obd_memmax; @@ -96,6 +96,12 @@ unsigned int obd_sync_filter; /* = 0, don't sync by default */ DECLARE_WAIT_QUEUE_HEAD(obd_race_waitq); #ifdef __KERNEL__ +unsigned int obd_print_fail_loc(void) +{ + CWARN("obd_fail_loc = %x\n", obd_fail_loc); + return obd_fail_loc; +} + /* opening /dev/obd */ static int obd_class_open(struct inode * inode, struct file * file) { @@ -162,7 +168,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) struct obd_ioctl_data *data; struct portals_debug_ioctl_data *debug_data; struct obd_device *obd = NULL; - int err = 0, len = 0, serialised = 0; + int err = 0, len = 0; ENTRY; if (current->fsuid != 0) @@ -179,26 +185,6 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) return 0; } - switch (cmd) { - case OBD_IOC_BRW_WRITE: - case OBD_IOC_BRW_READ: - case OBD_IOC_GETATTR: - case ECHO_IOC_ENQUEUE: - case ECHO_IOC_CANCEL: - case OBD_IOC_CLIENT_RECOVER: - case OBD_IOC_CATLOGLIST: - case OBD_IOC_LLOG_INFO: - case OBD_IOC_LLOG_PRINT: - case OBD_IOC_LLOG_CANCEL: - case OBD_IOC_LLOG_CHECK: - case OBD_IOC_LLOG_REMOVE: - break; - default: - down(&obd_conf_sem); - serialised = 1; - break; - } - CDEBUG(D_IOCTL, "cmd = %x, obd = %p\n", cmd, obd); if (obd_ioctl_getdata(&buf, &len, (void *)arg)) { CERROR("OBD ioctl: data error\n"); @@ -346,8 +332,6 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) out: if (buf) obd_ioctl_freedata(buf, len); - if (serialised) - up(&obd_conf_sem); RETURN(err); } /* class_handle_ioctl */ @@ -383,6 +367,7 @@ void *obd_psdev = NULL; EXPORT_SYMBOL(obd_dev); EXPORT_SYMBOL(obdo_cachep); EXPORT_SYMBOL(obd_fail_loc); +EXPORT_SYMBOL(obd_print_fail_loc); EXPORT_SYMBOL(obd_race_waitq); EXPORT_SYMBOL(obd_dump_on_timeout); EXPORT_SYMBOL(obd_timeout); @@ -575,7 +560,7 @@ int init_obdclass(void) if (err) return err; - sema_init(&obd_conf_sem, 1); + spin_lock_init(&obd_dev_lock); INIT_LIST_HEAD(&obd_types); err = misc_register(&obd_psdev); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 451a5e9..380a80a 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -176,23 +176,36 @@ int class_unregister_type(char *name) RETURN(0); } /* class_unregister_type */ -struct obd_device *class_newdev(int *dev) +struct obd_device *class_newdev(struct obd_type *type) { struct obd_device *result = NULL; int i; - for (i = 0 ; i < MAX_OBD_DEVICES ; i++) { + spin_lock(&obd_dev_lock); + for (i = 0 ; i < MAX_OBD_DEVICES && result == NULL; i++) { struct obd_device *obd = &obd_dev[i]; if (!obd->obd_type) { + LASSERT(obd->obd_minor == i); + memset(obd, 0, sizeof(*obd)); + obd->obd_minor = i; + obd->obd_type = type; result = obd; - if (dev) - *dev = i; - break; } } + spin_unlock(&obd_dev_lock); return result; } +void class_release_dev(struct obd_device *obd) +{ + int minor = obd->obd_minor; + + spin_lock(&obd_dev_lock); + memset(obd, 0, sizeof(*obd)); + obd->obd_minor = minor; + spin_unlock(&obd_dev_lock); +} + int class_name2dev(char *name) { int i; @@ -200,11 +213,15 @@ int class_name2dev(char *name) if (!name) return -1; + spin_lock(&obd_dev_lock); for (i = 0; i < MAX_OBD_DEVICES; i++) { struct obd_device *obd = &obd_dev[i]; - if (obd->obd_name && strcmp(name, obd->obd_name) == 0) + if (obd->obd_name && strcmp(name, obd->obd_name) == 0) { + spin_unlock(&obd_dev_lock); return i; + } } + spin_unlock(&obd_dev_lock); return -1; } @@ -221,11 +238,15 @@ int class_uuid2dev(struct obd_uuid *uuid) { int i; + spin_lock(&obd_dev_lock); for (i = 0; i < MAX_OBD_DEVICES; i++) { struct obd_device *obd = &obd_dev[i]; - if (obd_uuid_equals(uuid, &obd->obd_uuid)) + if (obd_uuid_equals(uuid, &obd->obd_uuid)) { + spin_unlock(&obd_dev_lock); return i; + } } + spin_unlock(&obd_dev_lock); return -1; } @@ -247,6 +268,7 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, { int i; + spin_lock(&obd_dev_lock); for (i = 0; i < MAX_OBD_DEVICES; i++) { struct obd_device *obd = &obd_dev[i]; if (obd->obd_type == NULL) @@ -258,10 +280,12 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, if (obd_uuid_equals(tgt_uuid, &imp->imp_target_uuid) && ((grp_uuid)? obd_uuid_equals(grp_uuid, &obd->obd_uuid) : 1)) { + spin_unlock(&obd_dev_lock); return obd; } } } + spin_unlock(&obd_dev_lock); return NULL; } @@ -273,6 +297,7 @@ struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next) { int i; + if (next == NULL) i = 0; else if (*next >= 0 && *next < MAX_OBD_DEVICES) @@ -280,6 +305,7 @@ struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next) else return NULL; + spin_lock(&obd_dev_lock); for (; i < MAX_OBD_DEVICES; i++) { struct obd_device *obd = &obd_dev[i]; if (obd->obd_type == NULL) @@ -287,9 +313,11 @@ struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next) if (obd_uuid_equals(grp_uuid, &obd->obd_uuid)) { if (next != NULL) *next = i+1; + spin_unlock(&obd_dev_lock); return obd; } } + spin_unlock(&obd_dev_lock); return NULL; } diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c index 0ad595f..073f128 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/obdclass/llog.c @@ -103,17 +103,23 @@ int llog_cancel_rec(struct llog_handle *loghandle, int index) (llh->llh_count == 1) && (loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) { rc = llog_destroy(loghandle); - if (rc) + if (rc) { CERROR("failure destroying log after last cancel: %d\n", rc); - LASSERT(rc == 0); - RETURN(1); + ext2_set_bit(index, llh->llh_bitmap); + llh->llh_count++; + } else { + rc = 1; + } + RETURN(rc); } rc = llog_write_rec(loghandle, &llh->llh_hdr, NULL, 0, NULL, 0); - if (rc) + if (rc) { CERROR("failure re-writing header %d\n", rc); - LASSERT(rc == 0); + ext2_set_bit(index, llh->llh_bitmap); + llh->llh_count++; + } RETURN(rc); } EXPORT_SYMBOL(llog_cancel_rec); diff --git a/lustre/obdclass/llog_cat.c b/lustre/obdclass/llog_cat.c index d4fa370..c4df3f8 100644 --- a/lustre/obdclass/llog_cat.c +++ b/lustre/obdclass/llog_cat.c @@ -56,7 +56,7 @@ static struct llog_handle *llog_cat_new_log(struct llog_handle *cathandle) ENTRY; llh = cathandle->lgh_hdr; - bitmap_size = sizeof(llh->llh_bitmap) * 8; + bitmap_size = LLOG_BITMAP_SIZE(llh); index = (cathandle->lgh_last_idx + 1) % bitmap_size; @@ -209,10 +209,12 @@ static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle, loghandle = cathandle->u.chd.chd_current_log; if (loghandle) { struct llog_log_hdr *llh = loghandle->lgh_hdr; - if (loghandle->lgh_last_idx < (sizeof(llh->llh_bitmap)*8) - 1) { - down_write(&loghandle->lgh_lock); + down_write(&loghandle->lgh_lock); + if (loghandle->lgh_last_idx < LLOG_BITMAP_SIZE(llh) - 1) { up_read(&cathandle->lgh_lock); RETURN(loghandle); + } else { + up_write(&loghandle->lgh_lock); } } if (!create) { @@ -230,10 +232,12 @@ static struct llog_handle *llog_cat_current_log(struct llog_handle *cathandle, loghandle = cathandle->u.chd.chd_current_log; if (loghandle) { struct llog_log_hdr *llh = loghandle->lgh_hdr; - if (loghandle->lgh_last_idx < (sizeof(llh->llh_bitmap)*8) - 1) { - down_write(&loghandle->lgh_lock); + down_write(&loghandle->lgh_lock); + if (loghandle->lgh_last_idx < LLOG_BITMAP_SIZE(llh) - 1) { up_write(&cathandle->lgh_lock); RETURN(loghandle); + } else { + up_write(&loghandle->lgh_lock); } } @@ -394,7 +398,7 @@ int llog_cat_set_first_idx(struct llog_handle *cathandle, int index) int i, bitmap_size, idx; ENTRY; - bitmap_size = sizeof(llh->llh_bitmap) * 8; + bitmap_size = LLOG_BITMAP_SIZE(llh); if (llh->llh_cat_idx == (index - 1)) { idx = llh->llh_cat_idx + 1; llh->llh_cat_idx = idx; diff --git a/lustre/obdclass/llog_lvfs.c b/lustre/obdclass/llog_lvfs.c index 9d99e9a..f81826c 100644 --- a/lustre/obdclass/llog_lvfs.c +++ b/lustre/obdclass/llog_lvfs.c @@ -264,18 +264,18 @@ static int llog_lvfs_write_rec(struct llog_handle *loghandle, /* NOTE: padding is a record, but no bit is set */ if (left != 0 && left != reclen && left < (reclen + LLOG_MIN_REC_SIZE)) { - int bitmap_size = sizeof(llh->llh_bitmap) * 8; loghandle->lgh_last_idx++; rc = llog_lvfs_pad(obd, file, left, loghandle->lgh_last_idx); if (rc) RETURN(rc); /* if it's the last idx in log file, then return -ENOSPC */ - if (loghandle->lgh_last_idx == bitmap_size - 1) + if (loghandle->lgh_last_idx == LLOG_BITMAP_SIZE(llh) - 1) RETURN(-ENOSPC); } loghandle->lgh_last_idx++; index = loghandle->lgh_last_idx; + LASSERT(index < LLOG_BITMAP_SIZE(llh)); rec->lrh_index = index; if (buf == NULL) { lrt = (struct llog_rec_tail *) diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index d5009ef..ac15529 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -48,7 +48,7 @@ int class_attach(struct lustre_cfg *lcfg) struct obd_type *type; struct obd_device *obd; char *typename, *name, *uuid; - int minor, rc, len, dev, stage = 0; + int rc, len, cleanup_phase = 0; if (!lcfg->lcfg_inllen1 || !lcfg->lcfg_inlbuf1) { CERROR("No type passed!\n"); @@ -90,7 +90,7 @@ int class_attach(struct lustre_cfg *lcfg) CERROR("OBD: unknown type: %s\n", typename); RETURN(-EINVAL); } - stage = 1; + cleanup_phase = 1; /* class_put_type */ obd = class_name2obd(name); if (obd != NULL) { @@ -98,23 +98,11 @@ int class_attach(struct lustre_cfg *lcfg) GOTO(out, rc = -EEXIST); } - obd = class_newdev(&dev); + obd = class_newdev(type); if (obd == NULL) GOTO(out, rc = -EINVAL); + cleanup_phase = 2; /* class_release_dev */ - /* have we attached a type to this device */ - if (obd->obd_attached || obd->obd_type) { - CERROR("OBD: Device %d already typed as %s.\n", - obd->obd_minor, MKSTR(obd->obd_type->typ_name)); - GOTO(out, rc = -EBUSY); - } - - LASSERT(obd == (obd_dev + obd->obd_minor)); - - minor = obd->obd_minor; - memset(obd, 0, sizeof(*obd)); - obd->obd_minor = minor; - obd->obd_type = type; INIT_LIST_HEAD(&obd->obd_exports); obd->obd_num_exports = 0; spin_lock_init(&obd->obd_dev_lock); @@ -138,7 +126,7 @@ int class_attach(struct lustre_cfg *lcfg) if (!obd->obd_name) GOTO(out, rc = -ENOMEM); memcpy(obd->obd_name, name, len); - stage = 2; + cleanup_phase = 3; /* free obd_name */ len = strlen(uuid); if (len >= sizeof(obd->obd_uuid)) { @@ -161,11 +149,13 @@ int class_attach(struct lustre_cfg *lcfg) obd->obd_minor, typename); RETURN(0); out: - switch (stage) { - case 2: + switch (cleanup_phase) { + case 3: OBD_FREE(obd->obd_name, strlen(obd->obd_name) + 1); + case 2: + class_release_dev(obd); case 1: - class_put_type(obd->obd_type); + class_put_type(type); obd->obd_type = NULL; } return rc; @@ -219,7 +209,6 @@ err_exp: int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg) { - int minor; int err = 0; ENTRY; @@ -244,10 +233,7 @@ int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg) obd->obd_attached = 0; obd->obd_type->typ_refcnt--; class_put_type(obd->obd_type); - obd->obd_type = NULL; - minor = obd->obd_minor; - memset(obd, 0, sizeof(*obd)); - obd->obd_minor = minor; + class_release_dev(obd); RETURN(err); } diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 918ba44..dab3947 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -2913,7 +2913,7 @@ static int osc_import_event(struct obd_device *obd, /* Only do this on the MDS OSC's */ if (imp->imp_server_timeout) { struct osc_creator *oscc = &obd->u.cli.cl_oscc; - + spin_lock(&oscc->oscc_lock); oscc->oscc_flags |= OSCC_FLAG_RECOVERING; spin_unlock(&oscc->oscc_lock); @@ -2936,7 +2936,7 @@ static int osc_import_event(struct obd_device *obd, /* all pages go to failing rpcs due to the invalid import */ osc_check_rpcs(cli); spin_unlock(&cli->cl_loi_list_lock); - + ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); break; diff --git a/lustre/portals/archdep.m4 b/lustre/portals/archdep.m4 index 531a5bd..27704bd 100644 --- a/lustre/portals/archdep.m4 +++ b/lustre/portals/archdep.m4 @@ -97,7 +97,7 @@ if test x$enable_modules != xno ; then BACKINGFS="ldiskfs" ],[ KMODEXT=".o" - linux25="no" + linux25="no" ]) AC_MSG_CHECKING([if you are using Linux 2.6]) AC_MSG_RESULT([$linux25]) @@ -257,13 +257,13 @@ if test x$enable_modules != xno ; then AC_MSG_RESULT([$LINUXRELEASE]) AC_SUBST(LINUXRELEASE) - moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel + moduledir='/lib/modules/'$LINUXRELEASE/kernel modulefsdir='$(moduledir)/fs/$(PACKAGE)' - modulenetdir='$(moduledir)/net/$(PACKAGE)' + modulenetdir='$(moduledir)/net/$(PACKAGE)' AC_SUBST(moduledir) AC_SUBST(modulefsdir) - AC_SUBST(modulenetdir) + AC_SUBST(modulenetdir) # ------------ RELEASE -------------------------------- AC_MSG_CHECKING([for Lustre release]) diff --git a/lustre/portals/build.m4 b/lustre/portals/build.m4 index 114478c..e8a540a 100644 --- a/lustre/portals/build.m4 +++ b/lustre/portals/build.m4 @@ -96,3 +96,21 @@ else LIBEFENCE="" fi AC_SUBST(LIBEFENCE) + +# -------- enable acceptor libwrap (TCP wrappers) support? ------- +AC_MSG_CHECKING([if libwrap support is requested]) +AC_ARG_ENABLE([libwrap], + AC_HELP_STRING([--enable-libwrap], [use TCP wrappers]), + [case "${enableval}" in + yes) enable_libwrap=yes ;; + no) enable_libwrap=no ;; + *) AC_MSG_ERROR(bad value ${enableval} for --enable-libwrap) ;; + esac],[enable_libwrap=no]) +AC_MSG_RESULT([$enable_libwrap]) +if test x$enable_libwrap = xyes ; then + LIBWRAP="-lwrap" + AC_DEFINE(HAVE_LIBWRAP, 1, [libwrap support is requested]) +else + LIBWRAP="" +fi +AC_SUBST(LIBWRAP) diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index 69246b2..4a88b88 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -65,14 +65,11 @@ extern void kportal_assertion_failed(char *expr, char *file, const char *func, const int line); #define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__, \ __FUNCTION__, __LINE__)) -/* it would be great to dump_stack() here, but some kernels - * export it as show_stack() and I can't be bothered to - * proprely engage in that dance right now */ #define LASSERTF(cond, fmt...) \ do { \ if (unlikely(!(cond))) { \ - portals_debug_msg(0, D_EMERG, __FILE__, __FUNCTION__,\ - __LINE__, CDEBUG_STACK, \ + portals_debug_msg(DEBUG_SUBSYSTEM, D_EMERG, __FILE__,\ + __FUNCTION__,__LINE__, CDEBUG_STACK,\ "ASSERTION(" #cond ") failed:" fmt);\ LBUG(); \ } \ @@ -101,6 +98,7 @@ do { \ #define LBUG_WITH_LOC(file, func, line) \ do { \ CEMERG("LBUG\n"); \ + CERROR("STACK: %s\n", portals_debug_dumpstack()); \ portals_debug_dumplog(); \ portals_run_lbug_upcall(file, func, line); \ set_task_state(current, TASK_UNINTERRUPTIBLE); \ diff --git a/lustre/portals/router/proc.c b/lustre/portals/router/proc.c index dd65b34..b9d6234 100644 --- a/lustre/portals/router/proc.c +++ b/lustre/portals/router/proc.c @@ -24,55 +24,196 @@ #include "router.h" #define KPR_PROC_ROUTER "sys/portals/router" +#define KPR_PROC_ROUTES "sys/portals/routes" -int -kpr_proc_read (char *page, char **start, off_t off, int count, int *eof, void *data) +/* Used for multi-page route list book keeping */ +struct proc_route_data { + struct list_head *curr; + unsigned int generation; + off_t skip; +} kpr_read_routes_data; + +/* nal2name support re-used from utils/portals.c */ +struct name2num { + char *name; + int num; +} nalnames[] = { + { "any", 0}, + { "elan", QSWNAL}, + { "tcp", SOCKNAL}, + { "gm", GMNAL}, + { "ib", IBNAL}, + { NULL, -1} +}; + +static struct name2num *name2num_lookup_num(struct name2num *table, int num) +{ + while (table->name != NULL) + if (num == table->num) + return (table); + else + table++; + return (NULL); +} + +static char *nal2name(int nal) { - unsigned long long bytes = kpr_fwd_bytes; - unsigned long packets = kpr_fwd_packets; - unsigned long errors = kpr_fwd_errors; + struct name2num *e = name2num_lookup_num(nalnames, nal); + return ((e == NULL) ? "???" : e->name); +} + + +static int kpr_proc_router_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + unsigned long long bytes = kpr_fwd_bytes; + unsigned long packets = kpr_fwd_packets; + unsigned long errors = kpr_fwd_errors; unsigned int qdepth = atomic_read (&kpr_queue_depth); - int len; - - *eof = 1; - if (off != 0) - return (0); - - len = sprintf (page, "%Ld %ld %ld %d\n", bytes, packets, errors, qdepth); - - *start = page; - return (len); + int len; + + *eof = 1; + if (off != 0) + return (0); + + len = sprintf(page, "%Ld %ld %ld %d\n", bytes, packets, errors, qdepth); + + *start = page; + return (len); } -int -kpr_proc_write (struct file *file, const char *ubuffer, unsigned long count, void *data) +static int kpr_proc_router_write(struct file *file, const char *ubuffer, + unsigned long count, void *data) { - /* Ignore what we've been asked to write, and just zero the stats counters */ - kpr_fwd_bytes = 0; - kpr_fwd_packets = 0; - kpr_fwd_errors = 0; + /* Ignore what we've been asked to write, and just zero the stats */ + kpr_fwd_bytes = 0; + kpr_fwd_packets = 0; + kpr_fwd_errors = 0; - return (count); + return (count); } -void -kpr_proc_init(void) +static int kpr_proc_routes_read(char *page, char **start, off_t off, + int count, int *eof, void *data) { - struct proc_dir_entry *entry = create_proc_entry (KPR_PROC_ROUTER, S_IFREG | S_IRUGO | S_IWUSR, NULL); + struct proc_route_data *prd = data; + kpr_route_entry_t *re; + kpr_gateway_entry_t *ge; + int chunk_len = 0; + int line_len = 0; + int user_len = 0; + + *eof = 1; + *start = page; + + if (prd->curr == NULL) { + if (off != 0) + return 0; + + /* First pass, initialize our private data */ + prd->curr = kpr_routes.next; + prd->generation = kpr_routes_generation; + prd->skip = 0; + } else { + /* Abort route list generation change */ + if (prd->generation != kpr_routes_generation) { + prd->curr = NULL; + return sprintf(page, "\nError: Routes Changed\n"); + } + + /* All the routes have been walked */ + if (prd->curr == &kpr_routes) { + prd->curr = NULL; + return 0; + } + } + + read_lock(&kpr_rwlock); + *start = page + prd->skip; + user_len = -prd->skip; + + for (; prd->curr != &kpr_routes; prd->curr = prd->curr->next) { + re = list_entry(prd->curr, kpr_route_entry_t, kpre_list); + ge = re->kpre_gateway; + + line_len = sprintf(page + chunk_len, + "%12s "LPX64" : "LPX64" - "LPX64", %s\n", + nal2name(ge->kpge_nalid), ge->kpge_nid, + re->kpre_lo_nid, re->kpre_hi_nid, + ge->kpge_alive ? "up" : "down"); + chunk_len += line_len; + user_len += line_len; - if (entry == NULL) - { + /* The route table will exceed one page */ + if ((chunk_len > (PAGE_SIZE - 80)) || (user_len > count)) { + prd->curr = prd->curr->next; + break; + } + } + + *eof = 0; + + /* Caller received only a portion of the last entry, the + * remaining will be delivered in the next page if asked for. + */ + if (user_len > count) { + prd->curr = prd->curr->prev; + prd->skip = line_len - (user_len - count); + read_unlock(&kpr_rwlock); + return count; + } + + /* Not enough data to entirely satify callers request */ + prd->skip = 0; + read_unlock(&kpr_rwlock); + return user_len; +} + +static int kpr_proc_routes_write(struct file *file, const char *ubuffer, + unsigned long count, void *data) +{ + /* no-op; lctl should be used to adjust the routes */ + return (count); +} + +void kpr_proc_init(void) +{ + struct proc_dir_entry *router_entry; + struct proc_dir_entry *routes_entry; + + /* Initialize KPR_PROC_ROUTER */ + router_entry = create_proc_entry (KPR_PROC_ROUTER, + S_IFREG | S_IRUGO | S_IWUSR, NULL); + + if (router_entry == NULL) { CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTER); return; } - entry->data = NULL; - entry->read_proc = kpr_proc_read; - entry->write_proc = kpr_proc_write; + router_entry->data = NULL; + router_entry->read_proc = kpr_proc_router_read; + router_entry->write_proc = kpr_proc_router_write; + + /* Initialize KPR_PROC_ROUTES */ + routes_entry = create_proc_entry (KPR_PROC_ROUTES, + S_IFREG | S_IRUGO | S_IWUSR, NULL); + + if (routes_entry == NULL) { + CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTES); + return; + } + + kpr_read_routes_data.curr = NULL; + kpr_read_routes_data.generation = 0; + kpr_read_routes_data.skip = 0; + + routes_entry->data = &kpr_read_routes_data; + routes_entry->read_proc = kpr_proc_routes_read; + routes_entry->write_proc = kpr_proc_routes_write; } -void -kpr_proc_fini(void) +void kpr_proc_fini(void) { remove_proc_entry(KPR_PROC_ROUTER, 0); + remove_proc_entry(KPR_PROC_ROUTES, 0); } diff --git a/lustre/portals/router/router.c b/lustre/portals/router/router.c index d0dbf0a..6fcd83a 100644 --- a/lustre/portals/router/router.c +++ b/lustre/portals/router/router.c @@ -27,6 +27,7 @@ LIST_HEAD(kpr_routes); LIST_HEAD(kpr_gateways); LIST_HEAD(kpr_nals); +unsigned int kpr_routes_generation; unsigned long long kpr_fwd_bytes; unsigned long kpr_fwd_packets; unsigned long kpr_fwd_errors; @@ -599,7 +600,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, list_for_each (e, &kpr_gateways) { kpr_gateway_entry_t *ge2 = list_entry(e, kpr_gateway_entry_t, kpge_list); - + if (ge2->kpge_nalid == gateway_nalid && ge2->kpge_nid == gateway_nid) { PORTAL_FREE (ge, sizeof (*ge)); @@ -611,7 +612,6 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, if (!dup) { /* Adding a new gateway... */ - list_add (&ge->kpge_list, &kpr_gateways); /* ...zero all gateway weights so this one doesn't have to @@ -622,12 +622,12 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, kpge_list); atomic_set (&ge2->kpge_weight, 0); } - } re->kpre_gateway = ge; ge->kpge_refcount++; list_add (&re->kpre_list, &kpr_routes); + kpr_routes_generation++; write_unlock_irqrestore (&kpr_rwlock, flags); return (0); @@ -645,12 +645,12 @@ kpr_del_route (int gw_nalid, ptl_nid_t gw_nid, ptl_nid_t lo, ptl_nid_t hi) { int specific = (lo != PTL_NID_ANY); - unsigned long flags; + unsigned long flags; int rc = -ENOENT; - struct list_head *e; - struct list_head *n; + struct list_head *e; + struct list_head *n; - CDEBUG(D_NET, "Del route [%d] "LPX64" : "LPX64" - "LPX64"\n", + CDEBUG(D_NET, "Del route [%d] "LPX64" : "LPX64" - "LPX64"\n", gw_nalid, gw_nid, lo, hi); LASSERT(!in_interrupt()); @@ -658,20 +658,19 @@ kpr_del_route (int gw_nalid, ptl_nid_t gw_nid, /* NB Caller may specify either all routes via the given gateway * (lo/hi == PTL_NID_ANY) or a specific route entry (lo/hi are * actual NIDs) */ - if (specific ? (hi == PTL_NID_ANY || hi < lo) : (hi != PTL_NID_ANY)) return (-EINVAL); - write_lock_irqsave(&kpr_rwlock, flags); + write_lock_irqsave(&kpr_rwlock, flags); list_for_each_safe (e, n, &kpr_routes) { kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t, kpre_list); kpr_gateway_entry_t *ge = re->kpre_gateway; - + if (ge->kpge_nalid != gw_nalid || ge->kpge_nid != gw_nid || - (specific && + (specific && (lo != re->kpre_lo_nid || hi != re->kpre_hi_nid))) continue; @@ -689,7 +688,9 @@ kpr_del_route (int gw_nalid, ptl_nid_t gw_nid, break; } + kpr_routes_generation++; write_unlock_irqrestore(&kpr_rwlock, flags); + return (rc); } @@ -751,6 +752,7 @@ kpr_initialise (void) CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n", atomic_read(&portal_kmemory)); + kpr_routes_generation = 0; kpr_proc_init(); PORTAL_SYMBOL_REGISTER(kpr_router_interface); diff --git a/lustre/portals/router/router.h b/lustre/portals/router/router.h index 309025b3..611d808 100644 --- a/lustre/portals/router/router.h +++ b/lustre/portals/router/router.h @@ -102,9 +102,12 @@ extern int kpr_get_route (int idx, int *gateway_nal, ptl_nid_t *gateway_nid, extern int kpr_sys_notify (int gw_nalid, ptl_nid_t gw_nid, int alive, time_t when); +extern unsigned int kpr_routes_generation; extern unsigned long long kpr_fwd_bytes; extern unsigned long kpr_fwd_packets; extern unsigned long kpr_fwd_errors; extern atomic_t kpr_queue_depth; +extern struct list_head kpr_routes; +extern rwlock_t kpr_rwlock; #endif /* _KPLROUTER_H */ diff --git a/lustre/portals/utils/Makefile.am b/lustre/portals/utils/Makefile.am index 851a8e1..051bcd9 100644 --- a/lustre/portals/utils/Makefile.am +++ b/lustre/portals/utils/Makefile.am @@ -20,6 +20,7 @@ lib_LIBRARIES = libptlctl.a endif acceptor_SOURCES = acceptor.c +acceptor_LDADD = $(LIBWRAP) wirecheck_SOURCES = wirecheck.c diff --git a/lustre/portals/utils/acceptor.c b/lustre/portals/utils/acceptor.c index f6367d4..daed215 100644 --- a/lustre/portals/utils/acceptor.c +++ b/lustre/portals/utils/acceptor.c @@ -13,6 +13,11 @@ #include #include #include +#ifdef HAVE_LIBWRAP +#include +#include +#include +#endif #include #include @@ -26,6 +31,12 @@ #define PROGNAME "acceptor" +#ifdef HAVE_LIBWRAP +/* needed because libwrap declares these as externs */ +int allow_severity = LOG_INFO; +int deny_severity = LOG_WARNING; +#endif + void create_pidfile(char *name, int port) { char pidfile[1024]; @@ -276,7 +287,11 @@ int main(int argc, char **argv) int cfd; struct portal_ioctl_data data; struct portals_cfg pcfg; - +#ifdef HAVE_LIBWRAP + struct request_info request; + char addrstr[INET_ADDRSTRLEN]; +#endif + cfd = accept(fd, (struct sockaddr *)&clntaddr, &len); if ( cfd < 0 ) { perror("accept"); @@ -284,6 +299,19 @@ int main(int argc, char **argv) continue; } +#ifdef HAVE_LIBWRAP + /* libwrap access control */ + request_init(&request, RQ_DAEMON, "lustre", RQ_FILE, cfd, 0); + sock_host(&request); + if (!hosts_access(&request)) { + inet_ntop(AF_INET, &clntaddr.sin_addr, + addrstr, INET_ADDRSTRLEN); + syslog(LOG_WARNING, "Unauthorized access from %s:%hd\n", + addrstr, ntohs(clntaddr.sin_port)); + close (cfd); + continue; + } +#endif show_connection (cfd, clntaddr.sin_addr.s_addr); PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD); diff --git a/lustre/portals/utils/debug.c b/lustre/portals/utils/debug.c index d460919..e546aaf 100644 --- a/lustre/portals/utils/debug.c +++ b/lustre/portals/utils/debug.c @@ -371,7 +371,9 @@ int jt_dbg_debug_kernel(int argc, char **argv) fprintf(stderr, "usage: %s [file] [raw]\n", argv[0]); return 0; } - sprintf(filename, "%s-%ld.tmp", argv[1], random); + sprintf(filename, "%s.%lu.%u", argc > 1 ? argv[1] : "/tmp/lustre-log", + time(NULL), getpid()); + if (argc > 2) raw = atoi(argv[2]); unlink(filename); diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 66f692c..1988d11 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -985,7 +985,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) * EINTR. * I don't really care if we go once more round the loop in * the error cases -eeb. */ - } while (rc != 0); + } while (rc != 0 || set->set_remaining != 0); LASSERT(set->set_remaining == 0); diff --git a/lustre/ptlrpc/llog_server.c b/lustre/ptlrpc/llog_server.c index fa53b3c..ed3b813 100644 --- a/lustre/ptlrpc/llog_server.c +++ b/lustre/ptlrpc/llog_server.c @@ -77,7 +77,8 @@ int llog_origin_handle_create(struct ptlrpc_request *req) } ctxt = llog_get_context(obd, body->lgd_ctxt_idx); - LASSERT(ctxt != NULL); + if (ctxt == NULL) + GOTO(out, rc = -EINVAL); disk_obd = ctxt->loc_exp->exp_obd; push_ctxt(&saved, &disk_obd->obd_ctxt, NULL); @@ -131,7 +132,8 @@ int llog_origin_handle_next_block(struct ptlrpc_request *req) GOTO(out, rc = -ENOMEM); ctxt = llog_get_context(obd, body->lgd_ctxt_idx); - LASSERT(ctxt != NULL); + if (ctxt == NULL) + GOTO(out, rc = -EINVAL); disk_obd = ctxt->loc_exp->exp_obd; push_ctxt(&saved, &disk_obd->obd_ctxt, NULL); @@ -197,7 +199,8 @@ int llog_origin_handle_read_header(struct ptlrpc_request *req) } ctxt = llog_get_context(obd, body->lgd_ctxt_idx); - LASSERT(ctxt != NULL); + if (ctxt == NULL) + GOTO(out, rc = -EINVAL); disk_obd = ctxt->loc_exp->exp_obd; push_ctxt(&saved, &disk_obd->obd_ctxt, NULL); diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c index 1bdfbe8..b806b1e 100644 --- a/lustre/ptlrpc/recov_thread.c +++ b/lustre/ptlrpc/recov_thread.c @@ -143,39 +143,37 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt, LASSERT(ctxt); + down(&ctxt->loc_sem); if (ctxt->loc_imp == NULL) { CWARN("no import for ctxt %p\n", ctxt); - RETURN(0); - } - - if (count == 0 || cookies == NULL) { - down(&ctxt->loc_sem); - if (ctxt->loc_llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW)) - GOTO(out, rc); - - llcd = ctxt->loc_llcd; - GOTO(send_now, rc); + GOTO(out, rc = 0); } - down(&ctxt->loc_sem); llcd = ctxt->loc_llcd; - if (llcd == NULL) { - llcd = llcd_grab(); + + if (count > 0 && cookies != NULL) { if (llcd == NULL) { - CERROR("couldn't get an llcd - dropped "LPX64":%x+%u\n", - cookies->lgc_lgl.lgl_oid, - cookies->lgc_lgl.lgl_ogen, cookies->lgc_index); - GOTO(out, rc = -ENOMEM); + llcd = llcd_grab(); + if (llcd == NULL) { + CERROR("couldn't get an llcd - dropped "LPX64 + ":%x+%u\n", + cookies->lgc_lgl.lgl_oid, + cookies->lgc_lgl.lgl_ogen, + cookies->lgc_index); + GOTO(out, rc = -ENOMEM); + } + llcd->llcd_ctxt = ctxt; + ctxt->loc_llcd = llcd; } - llcd->llcd_ctxt = ctxt; - ctxt->loc_llcd = llcd; - } - memcpy((char *)llcd->llcd_cookies + llcd->llcd_cookiebytes, cookies, - sizeof(*cookies)); - llcd->llcd_cookiebytes += sizeof(*cookies); + memcpy((char *)llcd->llcd_cookies + llcd->llcd_cookiebytes, + cookies, sizeof(*cookies)); + llcd->llcd_cookiebytes += sizeof(*cookies); + } else { + if (llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW)) + GOTO(out, rc); + } -send_now: if ((LLCD_SIZE - llcd->llcd_cookiebytes < sizeof(*cookies) || flags & OBD_LLOG_FL_SENDNOW)) { CDEBUG(D_HA, "send llcd %p:%p\n", llcd, llcd->llcd_ctxt); @@ -196,12 +194,12 @@ int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp) if (exp && (ctxt->loc_imp == exp->exp_imp_reverse)) { down(&ctxt->loc_sem); if (ctxt->loc_llcd != NULL) { - CWARN("import will be destroyed, put " - "llcd %p:%p\n", ctxt->loc_llcd, ctxt); llcd_put(ctxt->loc_llcd); ctxt->loc_llcd = NULL; - ctxt->loc_imp = NULL; } + CWARN("import will be destroyed, put " + "llcd %p:%p\n", ctxt->loc_llcd, ctxt); + ctxt->loc_imp = NULL; up(&ctxt->loc_sem); } else { rc = llog_cancel(ctxt, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW); @@ -340,11 +338,11 @@ static int log_commit_thread(void *arg) llcd_put(llcd); continue; } + up(&llcd->llcd_ctxt->loc_sem); request = ptlrpc_prep_req(import, OBD_LOG_CANCEL, 1, &llcd->llcd_cookiebytes, bufs); - up(&llcd->llcd_ctxt->loc_sem); if (request == NULL) { rc = -ENOMEM; @@ -368,9 +366,9 @@ static int log_commit_thread(void *arg) ptlrpc_req_finished(request); continue; } + up(&llcd->llcd_ctxt->loc_sem); rc = ptlrpc_queue_wait(request); ptlrpc_req_finished(request); - up(&llcd->llcd_ctxt->loc_sem); /* If the RPC failed, we put this and the remaining * messages onto the resend list for another time. */ diff --git a/lustre/scripts/lustre.spec.in b/lustre/scripts/lustre.spec.in index b6d3ee2..06f93d3 100644 --- a/lustre/scripts/lustre.spec.in +++ b/lustre/scripts/lustre.spec.in @@ -145,24 +145,15 @@ mkdir -p $RPM_BUILD_ROOT/var/lib/ldap/lustre %attr(-, root, root) /usr/bin/mcreate %attr(-, root, root) /usr/bin/munlink %attr(-, root, root) /usr/bin/mkdirmany -%attr(-, root, root) /usr/bin/iopentest1 -%attr(-, root, root) /usr/bin/iopentest2 -%attr(-, root, root) /usr/lib/lustre/python/* -%attr(-, root, root) /usr/lib/lustre/examples/llmount.sh -%attr(-, root, root) /usr/lib/lustre/examples/llmountcleanup.sh -%attr(-, root, root) /usr/lib/lustre/examples/llecho.sh -%attr(-, root, root) /usr/lib/lustre/examples/local.sh -%attr(-, root, root) /usr/lib/lustre/examples/uml.sh -%attr(-, root, root) /usr/lib/lustre/examples/lov.sh -%attr(-, root, root) /usr/lib/lustre/examples/echo.sh -%attr(-, root, root) /usr/lib/lustre/examples/llechocleanup.sh +%attr(-, root, root) /usr/lib/lustre/python +%attr(-, root, root) /usr/lib/lustre/examples %attr(-, root, root) /etc/init.d/lustre %attr(-, root, root) /etc/init.d/lustrefs %attr(-, root, root) /usr/lib/libptlctl.a %attr(-, root, root) /usr/lib/liblustreapi.a -%attr(-, root, root) /usr/include/lustre/*.h -%attr(-, root, root) /usr/include/portals/*.h +%attr(-, root, root) /usr/include/lustre +%attr(-, root, root) /usr/include/portals %attr(-, root, root) /usr/include/linux/lustre_idl.h %attr(-, root, root) /usr/share/man/man?/* @@ -186,9 +177,9 @@ mkdir -p $RPM_BUILD_ROOT/var/lib/ldap/lustre %files -n lustre-modules %attr(-, root, root) %doc COPYING -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/*.o +%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre #portals modules -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/lustre/*.o +%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/lustre %files -n lustre-source %attr(-, root, root) /usr/src/lustre-%{version} @@ -196,12 +187,6 @@ mkdir -p $RPM_BUILD_ROOT/var/lib/ldap/lustre #%ifarch i386 #%files -n liblustre #%attr(-, root, root) /lib/lustre -#%attr(-, root, root) /lib/lustre/liblov.a -#%attr(-, root, root) /lib/lustre/liblustreclass.a -#%attr(-, root, root) /lib/lustre/libptlrpc.a -#%attr(-, root, root) /lib/lustre/libobdecho.a -#%attr(-, root, root) /lib/lustre/libldlm.a -#%attr(-, root, root) /lib/lustre/libosc.a #%attr(-, root, root) /usr/sbin/lctl #%attr(-, root, root) /usr/sbin/lfind #%attr(-, root, root) /usr/sbin/lstripe @@ -223,16 +208,9 @@ mkdir -p $RPM_BUILD_ROOT/var/lib/ldap/lustre #%dir /var/lib/ldap/lustre %attr(700, ldap, ldap) /var/lib/ldap/lustre -%post -n lustre-modules -if [ ! -e /dev/obd ]; then - mknod /dev/obd c 10 241 -fi -if [ ! -e /dev/portals ]; then - mknod /dev/portals c 10 240 -fi +%post /sbin/chkconfig --add lustre /sbin/chkconfig --add lustrefs -depmod -ae || exit 0 %preun if [ $1 = 0 ]; then @@ -240,6 +218,15 @@ if [ $1 = 0 ]; then /sbin/chkconfig --del lustrefs fi +%post -n lustre-modules +if [ ! -e /dev/obd ]; then + mknod /dev/obd c 10 241 +fi +if [ ! -e /dev/portals ]; then + mknod /dev/portals c 10 240 +fi +depmod -ae || exit 0 + %postun -n lustre-modules depmod -ae || exit 0 diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 20f5eef..bb3368d 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -22,10 +22,10 @@ noinst_PROGRAMS += stat createmany statmany multifstat createtest mlink utime noinst_PROGRAMS += opendirunlink opendevunlink unlinkmany fchdir_test checkstat noinst_PROGRAMS += wantedi statone runas openfile getdents mkdirdeep o_directory noinst_PROGRAMS += small_write multiop sleeptest ll_sparseness_verify cmknod -noinst_PROGRAMS += ll_sparseness_write mrename ll_dirstripe_verify -noinst_PROGRAMS += openfilleddirunlink rename_many memhog +noinst_PROGRAMS += ll_sparseness_write mrename ll_dirstripe_verify mkdirmany rmdirmany +noinst_PROGRAMS += openfilleddirunlink rename_many memhog iopentest1 iopentest2 # noinst_PROGRAMS += ldaptest copy_attr -bin_PROGRAMS = mcreate munlink mkdirmany iopentest1 iopentest2 +bin_PROGRAMS = mcreate munlink endif # TESTS stat_SOURCES = stat.c stat_fs.h diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh index ccf4a94..3e4829e 100755 --- a/lustre/tests/acceptance-small.sh +++ b/lustre/tests/acceptance-small.sh @@ -164,3 +164,7 @@ fi if [ "$REPLAY_OST_SINGLE" != "no" ]; then sh replay-ost-single.sh fi + +if [ "$REPLAY_DUAL" != "no" ]; then + sh replay-dual.sh +fi diff --git a/lustre/tests/local.sh b/lustre/tests/local.sh index 0f8fe13..2d37a47 100755 --- a/lustre/tests/local.sh +++ b/lustre/tests/local.sh @@ -8,14 +8,14 @@ LMC="${LMC:-lmc} -m $config" TMP=${TMP:-/tmp} MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} -MDSSIZE=${MDSSIZE:-100000} +MDSSIZE=${MDSSIZE:-400000} FSTYPE=${FSTYPE:-ext3} MOUNT=${MOUNT:-/mnt/lustre} MOUNT2=${MOUNT2:-${MOUNT}2} NETWORKTYPE=${NETWORKTYPE:-tcp} OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`} -OSTSIZE=${OSTSIZE:-200000} +OSTSIZE=${OSTSIZE:-400000} # specific journal size for the ost, in MB JSIZE=${JSIZE:-0} diff --git a/lustre/tests/lockorder.sh b/lustre/tests/lockorder.sh index cf5051b..6593c33 100644 --- a/lustre/tests/lockorder.sh +++ b/lustre/tests/lockorder.sh @@ -7,7 +7,7 @@ STATMANY=${STATMANY:-statmany} UNLINKMANY=${UNLINKMANY:-unlinkmany} LCTL=${LCTL:-lctl} -MOUNT1=${MOUNT1:-/mnt/lustre1} +MOUNT1=${MOUNT1:-/mnt/lustre} MOUNT2=${MOUNT2:-/mnt/lustre2} DIR=${DIR:-$MOUNT1} DIR2=${DIR2:-$MOUNT2} @@ -46,7 +46,7 @@ while [ $MINRES -gt $MAXRES ]; do MINFILE=$FILETMP MINRES=$FILERES else - rm $TMPFILE + rm $FILETMP fi NUM=$(($NUM + 1)) done @@ -60,7 +60,9 @@ $LCTL mark "start dir: $LOCKDIR=$MAXRES file: $LOCKFILE=$MINRES" $CREATEMANY -l$LOCKFILE $LOCKFILE -$COUNT & CR_PID=$! -sleep 1 +while ! test -f ${LOCKFILE}1 ; do + sleep 1 +done # this will lock $DIR and ${LOCKFILE}0 $STATMANY -s $DIR2/lockdir/lockfile 1 -$COUNT & diff --git a/lustre/tests/lov.sh b/lustre/tests/lov.sh index ec09598..75b94c0 100755 --- a/lustre/tests/lov.sh +++ b/lustre/tests/lov.sh @@ -10,7 +10,7 @@ LMC="${LMC:-lmc} -m $config" TMP=${TMP:-/tmp} MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} -MDSSIZE=${MDSSIZE:-100000} +MDSSIZE=${MDSSIZE:-400000} FSTYPE=${FSTYPE:-ext3} MOUNT=${MOUNT:-/mnt/lustre} MOUNT2=${MOUNT2:-${MOUNT}2} diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index d44386d..fdea9d0 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -358,7 +358,33 @@ test_17() { } run_test 17 "fail OST during recovery (3571)" +# cleanup with blocked enqueue fails until timer elapses (MDS busy), wait for it +export NOW=0 + +test_18() { # bug 3822 - evicting client with enqueued lock + set -vx + mkdir -p $MOUNT1/$tdir + touch $MOUNT1/$tdir/f0 +#define OBD_FAIL_LDLM_ENQUEUE_BLOCKED 0x30b + statmany -s $MOUNT1/$tdir/f 500 & + OPENPID=$! + NOW=`date +%s` + do_facet mds sysctl -w lustre.fail_loc=0x8000030b # hold enqueue + sleep 1 +#define OBD_FAIL_LDLM_BL_CALLBACK 0x305 + do_facet client sysctl -w lustre.fail_loc=0x80000305 # drop cb, evict + cancel_lru_locks MDC + usleep 500 # wait to ensure first client is one that will be evicted + openfile -f O_RDONLY $MOUNT2/$tdir/f0 + wait $OPENPID + dmesg | grep "entering recovery in server" && \ + error "client not evicted" || true +} +run_test 18 "ldlm_handle_enqueue succeeds on evicted export (3822)" + if [ "$ONLY" != "setup" ]; then equals_msg test complete, cleaning up + SLEEP=$((`date +%s` - $NOW)) + [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP $CLEANUP fi diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index ef785c8..876cc20 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -962,5 +962,20 @@ test_49() { } run_test 49 "Double OSC recovery, don't LASSERT" +# b3764 timed out lock replay +test_52() { + touch $DIR/$tfile + cancel_lru_locks MDC + + multiop $DIR/$tfile s + replay_barrier mds + do_facet mds "sysctl -w lustre.fail_loc=0x8000030c" + fail mds + do_facet mds "sysctl -w lustre.fail_loc=0x0" + + $CHECKSTAT -t file $DIR/$tfile-* && return 3 || true +} +run_test 52 "time out lock replay (3764)" + equals_msg test complete, cleaning up $CLEANUP diff --git a/lustre/tests/rmdirmany.c b/lustre/tests/rmdirmany.c new file mode 100755 index 0000000..d0c663a --- /dev/null +++ b/lustre/tests/rmdirmany.c @@ -0,0 +1,40 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char ** argv) +{ + int i, rc = 0, count; + char dirname[4096]; + + if (argc < 3) { + printf("Usage %s dirnamebase count\n", argv[0]); + return 1; + } + + if (strlen(argv[1]) > 4080) { + printf("name too long\n"); + return 1; + } + + count = strtoul(argv[2], NULL, 0); + + for (i = 0; i < count; i++) { + sprintf(dirname, "%s-%d", argv[1], i); + rc = rmdir(dirname); + if (rc) { + printf("rmdir(%s) error: %s\n", + dirname, strerror(errno)); + break; + } + if ((i % 10000) == 0) + printf(" - deleted %d (time %ld)\n", i, time(0)); + } + return rc; +} diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 2f04e06..e94bf5b 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -7,8 +7,8 @@ set -e ONLY=${ONLY:-"$*"} -# bug number for skipped test: 2108 3192 -ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"42a 68"} +# bug number for skipped test: 2108 3637 3561 +ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"42a 42c 45"} # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! [ "$ALWAYS_EXCEPT$EXCEPT" ] && echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT" @@ -93,7 +93,7 @@ run_one() { if ! mount | grep -q $DIR; then $START fi - log "== test $1: $2" + log "== test $1: $2= `date +%H:%M:%S`" export TESTNAME=test_$1 test_$1 || error "test_$1: exit with rc=$?" unset TESTNAME @@ -193,7 +193,7 @@ EXT2_DEV=${EXT2_DEV:-/tmp/SANITY.LOOP} touch $EXT2_DEV mke2fs -j -F $EXT2_DEV 8000 > /dev/null -umask 022 +umask 077 test_0() { touch $DIR/f @@ -724,7 +724,6 @@ test_24t() { } run_test 24t "mkdir .../R16a/b/c; rename .../R16a/b/c .../R16a =" - test_25a() { echo '== symlink sanity =============================================' mkdir $DIR/d25 @@ -1696,6 +1695,26 @@ test_51() { } run_test 51 "special situations: split htree with empty entry ==" +test_51b() { + check_kernel_version 39 || return 0 + NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'` + [ $NUMFREE -lt 70000 ] && \ + echo "skipping test 51b, not enough free inodes($NUMFREE)" && \ + return + mkdir -p $DIR/d51b + (cd $DIR/d51b; mkdirmany t 70001) +} +run_test 51b "mkdir .../t-0 --- .../t-70000 ====================" + +test_51c() { + check_kernel_version 39 || return 0 + NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'` + [ $NUMFREE -lt 70000 ] && echo "skipping test 51c" && return + mkdir -p $DIR/d51b + (cd $DIR/d51b; rmdirmany t 70001) +} +run_test 51c "rmdir .../t-0 --- .../t-70000 ====================" + test_52a() { [ -f $DIR/d52a/foo ] && chattr -a $DIR/d52a/foo mkdir -p $DIR/d52a @@ -1763,7 +1782,6 @@ test_54b() { run_test 54b "char device works in lustre ======================" find_loop_dev() { - [ "$LOOPNUM" ] && return [ -b /dev/loop/0 ] && LOOPBASE=/dev/loop/ [ -b /dev/loop0 ] && LOOPBASE=/dev/loop [ -z "$LOOPBASE" ] && echo "/dev/loop/0 and /dev/loop0 gone?" && return @@ -2081,14 +2099,13 @@ test_67() { # bug 3285 - supplementary group fails on MDS, passes on client } run_test 67 "supplementary group failure (should return error) =" -LOOPDEV="" cleanup_68() { if [ "$LOOPDEV" ]; then swapoff $LOOPDEV || error "swapoff failed" losetup -d $LOOPDEV || error "losetup -d failed" + unset LOOPDEV LOOPNUM fi rm -f $DIR/f68 - LOOPDEV="" } meminfo() { @@ -2103,6 +2120,8 @@ swap_used() { # and then consuming memory until it is used. test_68() { [ "$UID" != 0 ] && echo "skipping test 68 (must run as root)" && return + [ "`lsmod|grep obdfilter`" ] && echo "skipping test 68 (local OST)" && \ + return find_loop_dev dd if=/dev/zero of=$DIR/f68 bs=64k count=1024 @@ -2121,7 +2140,7 @@ test_68() { cleanup_68 - [ $SWAPUSED -eq 0 ] && error "no swap used???" + [ $SWAPUSED -eq 0 ] && echo "no swap used???" || true } run_test 68 "support swapping to Lustre ========================" diff --git a/lustre/utils/lmc b/lustre/utils/lmc index 567689e..53985a7 100755 --- a/lustre/utils/lmc +++ b/lustre/utils/lmc @@ -90,6 +90,7 @@ Object creation command summary: --nspath --journal_size size --inode_size size + --mdsuuid uuid --add lov --lov lov_name @@ -183,7 +184,8 @@ lmc_options = [ ('inode_size', "Specify new inode size for underlying ext3 file system.", PARAM,"0"), ('fstype', "Optional argument to specify the filesystem type.", PARAM, "ext3"), ('mkfsoptions', "Optional argument to mkfs.", PARAM, ""), - ('ostuuid', "", PARAM,""), + ('ostuuid', "Optional argument to specify OST UUID", PARAM,""), + ('mdsuuid', "Optional argument to specify MDS UUID", PARAM,""), ('nspath', "Local mount point of server namespace.", PARAM,""), ('format', ""), @@ -697,7 +699,13 @@ def add_mds(gen, lustre, options): mds_uuid = name2uuid(lustre, mds_name, 'mds', fatal=0) if not mds_uuid: - mds_uuid = new_uuid(mds_name) + mds_uuid = get_option(options, 'mdsuuid') + if mds_uuid: + if lookup(lustre, mds_uuid): + error("Duplicate MDS UUID:", mds_uuid) + else: + mds_uuid = new_uuid(mds_name) + mds = gen.mds(mds_name, mds_uuid, mdd_uuid, options.group) lustre.appendChild(mds) else: