BACKINGFS="ldiskfs"
],[
KMODEXT=".o"
- linux25="no"
+ linux25="no"
])
AC_MSG_CHECKING([if you are using Linux 2.6])
AC_MSG_RESULT([$linux25])
AC_MSG_RESULT([$LINUXRELEASE])
AC_SUBST(LINUXRELEASE)
- moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel
+ moduledir='/lib/modules/'$LINUXRELEASE/kernel
modulefsdir='$(moduledir)/fs/$(PACKAGE)'
- modulenetdir='$(moduledir)/net/$(PACKAGE)'
+ modulenetdir='$(moduledir)/net/$(PACKAGE)'
AC_SUBST(moduledir)
AC_SUBST(modulefsdir)
- AC_SUBST(modulenetdir)
+ AC_SUBST(modulenetdir)
# ------------ RELEASE --------------------------------
AC_MSG_CHECKING([for Lustre release])
LIBEFENCE=""
fi
AC_SUBST(LIBEFENCE)
+
+# -------- enable acceptor libwrap (TCP wrappers) support? -------
+AC_MSG_CHECKING([if libwrap support is requested])
+AC_ARG_ENABLE([libwrap],
+ AC_HELP_STRING([--enable-libwrap], [use TCP wrappers]),
+ [case "${enableval}" in
+ yes) enable_libwrap=yes ;;
+ no) enable_libwrap=no ;;
+ *) AC_MSG_ERROR(bad value ${enableval} for --enable-libwrap) ;;
+ esac],[enable_libwrap=no])
+AC_MSG_RESULT([$enable_libwrap])
+if test x$enable_libwrap = xyes ; then
+ LIBWRAP="-lwrap"
+ AC_DEFINE(HAVE_LIBWRAP, 1, [libwrap support is requested])
+else
+ LIBWRAP=""
+fi
+AC_SUBST(LIBWRAP)
const int line);
#define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__, \
__FUNCTION__, __LINE__))
-/* it would be great to dump_stack() here, but some kernels
- * export it as show_stack() and I can't be bothered to
- * proprely engage in that dance right now */
#define LASSERTF(cond, fmt...) \
do { \
if (unlikely(!(cond))) { \
- portals_debug_msg(0, D_EMERG, __FILE__, __FUNCTION__,\
- __LINE__, CDEBUG_STACK, \
+ portals_debug_msg(DEBUG_SUBSYSTEM, D_EMERG, __FILE__,\
+ __FUNCTION__,__LINE__, CDEBUG_STACK,\
"ASSERTION(" #cond ") failed:" fmt);\
LBUG(); \
} \
#define LBUG_WITH_LOC(file, func, line) \
do { \
CEMERG("LBUG\n"); \
+ CERROR("STACK: %s\n", portals_debug_dumpstack()); \
portals_debug_dumplog(); \
portals_run_lbug_upcall(file, func, line); \
set_task_state(current, TASK_UNINTERRUPTIBLE); \
#include "router.h"
#define KPR_PROC_ROUTER "sys/portals/router"
+#define KPR_PROC_ROUTES "sys/portals/routes"
-int
-kpr_proc_read (char *page, char **start, off_t off, int count, int *eof, void *data)
+/* Used for multi-page route list book keeping */
+struct proc_route_data {
+ struct list_head *curr;
+ unsigned int generation;
+ off_t skip;
+} kpr_read_routes_data;
+
+/* nal2name support re-used from utils/portals.c */
+struct name2num {
+ char *name;
+ int num;
+} nalnames[] = {
+ { "any", 0},
+ { "elan", QSWNAL},
+ { "tcp", SOCKNAL},
+ { "gm", GMNAL},
+ { "ib", IBNAL},
+ { NULL, -1}
+};
+
+static struct name2num *name2num_lookup_num(struct name2num *table, int num)
+{
+ while (table->name != NULL)
+ if (num == table->num)
+ return (table);
+ else
+ table++;
+ return (NULL);
+}
+
+static char *nal2name(int nal)
{
- unsigned long long bytes = kpr_fwd_bytes;
- unsigned long packets = kpr_fwd_packets;
- unsigned long errors = kpr_fwd_errors;
+ struct name2num *e = name2num_lookup_num(nalnames, nal);
+ return ((e == NULL) ? "???" : e->name);
+}
+
+
+static int kpr_proc_router_read(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ unsigned long long bytes = kpr_fwd_bytes;
+ unsigned long packets = kpr_fwd_packets;
+ unsigned long errors = kpr_fwd_errors;
unsigned int qdepth = atomic_read (&kpr_queue_depth);
- int len;
-
- *eof = 1;
- if (off != 0)
- return (0);
-
- len = sprintf (page, "%Ld %ld %ld %d\n", bytes, packets, errors, qdepth);
-
- *start = page;
- return (len);
+ int len;
+
+ *eof = 1;
+ if (off != 0)
+ return (0);
+
+ len = sprintf(page, "%Ld %ld %ld %d\n", bytes, packets, errors, qdepth);
+
+ *start = page;
+ return (len);
}
-int
-kpr_proc_write (struct file *file, const char *ubuffer, unsigned long count, void *data)
+static int kpr_proc_router_write(struct file *file, const char *ubuffer,
+ unsigned long count, void *data)
{
- /* Ignore what we've been asked to write, and just zero the stats counters */
- kpr_fwd_bytes = 0;
- kpr_fwd_packets = 0;
- kpr_fwd_errors = 0;
+ /* Ignore what we've been asked to write, and just zero the stats */
+ kpr_fwd_bytes = 0;
+ kpr_fwd_packets = 0;
+ kpr_fwd_errors = 0;
- return (count);
+ return (count);
}
-void
-kpr_proc_init(void)
+static int kpr_proc_routes_read(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
{
- struct proc_dir_entry *entry = create_proc_entry (KPR_PROC_ROUTER, S_IFREG | S_IRUGO | S_IWUSR, NULL);
+ struct proc_route_data *prd = data;
+ kpr_route_entry_t *re;
+ kpr_gateway_entry_t *ge;
+ int chunk_len = 0;
+ int line_len = 0;
+ int user_len = 0;
+
+ *eof = 1;
+ *start = page;
+
+ if (prd->curr == NULL) {
+ if (off != 0)
+ return 0;
+
+ /* First pass, initialize our private data */
+ prd->curr = kpr_routes.next;
+ prd->generation = kpr_routes_generation;
+ prd->skip = 0;
+ } else {
+ /* Abort route list generation change */
+ if (prd->generation != kpr_routes_generation) {
+ prd->curr = NULL;
+ return sprintf(page, "\nError: Routes Changed\n");
+ }
+
+ /* All the routes have been walked */
+ if (prd->curr == &kpr_routes) {
+ prd->curr = NULL;
+ return 0;
+ }
+ }
+
+ read_lock(&kpr_rwlock);
+ *start = page + prd->skip;
+ user_len = -prd->skip;
+
+ for (; prd->curr != &kpr_routes; prd->curr = prd->curr->next) {
+ re = list_entry(prd->curr, kpr_route_entry_t, kpre_list);
+ ge = re->kpre_gateway;
+
+ line_len = sprintf(page + chunk_len,
+ "%12s "LPX64" : "LPX64" - "LPX64", %s\n",
+ nal2name(ge->kpge_nalid), ge->kpge_nid,
+ re->kpre_lo_nid, re->kpre_hi_nid,
+ ge->kpge_alive ? "up" : "down");
+ chunk_len += line_len;
+ user_len += line_len;
- if (entry == NULL)
- {
+ /* The route table will exceed one page */
+ if ((chunk_len > (PAGE_SIZE - 80)) || (user_len > count)) {
+ prd->curr = prd->curr->next;
+ break;
+ }
+ }
+
+ *eof = 0;
+
+ /* Caller received only a portion of the last entry, the
+ * remaining will be delivered in the next page if asked for.
+ */
+ if (user_len > count) {
+ prd->curr = prd->curr->prev;
+ prd->skip = line_len - (user_len - count);
+ read_unlock(&kpr_rwlock);
+ return count;
+ }
+
+ /* Not enough data to entirely satify callers request */
+ prd->skip = 0;
+ read_unlock(&kpr_rwlock);
+ return user_len;
+}
+
+static int kpr_proc_routes_write(struct file *file, const char *ubuffer,
+ unsigned long count, void *data)
+{
+ /* no-op; lctl should be used to adjust the routes */
+ return (count);
+}
+
+void kpr_proc_init(void)
+{
+ struct proc_dir_entry *router_entry;
+ struct proc_dir_entry *routes_entry;
+
+ /* Initialize KPR_PROC_ROUTER */
+ router_entry = create_proc_entry (KPR_PROC_ROUTER,
+ S_IFREG | S_IRUGO | S_IWUSR, NULL);
+
+ if (router_entry == NULL) {
CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTER);
return;
}
- entry->data = NULL;
- entry->read_proc = kpr_proc_read;
- entry->write_proc = kpr_proc_write;
+ router_entry->data = NULL;
+ router_entry->read_proc = kpr_proc_router_read;
+ router_entry->write_proc = kpr_proc_router_write;
+
+ /* Initialize KPR_PROC_ROUTES */
+ routes_entry = create_proc_entry (KPR_PROC_ROUTES,
+ S_IFREG | S_IRUGO | S_IWUSR, NULL);
+
+ if (routes_entry == NULL) {
+ CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTES);
+ return;
+ }
+
+ kpr_read_routes_data.curr = NULL;
+ kpr_read_routes_data.generation = 0;
+ kpr_read_routes_data.skip = 0;
+
+ routes_entry->data = &kpr_read_routes_data;
+ routes_entry->read_proc = kpr_proc_routes_read;
+ routes_entry->write_proc = kpr_proc_routes_write;
}
-void
-kpr_proc_fini(void)
+void kpr_proc_fini(void)
{
remove_proc_entry(KPR_PROC_ROUTER, 0);
+ remove_proc_entry(KPR_PROC_ROUTES, 0);
}
LIST_HEAD(kpr_gateways);
LIST_HEAD(kpr_nals);
+unsigned int kpr_routes_generation;
unsigned long long kpr_fwd_bytes;
unsigned long kpr_fwd_packets;
unsigned long kpr_fwd_errors;
list_for_each (e, &kpr_gateways) {
kpr_gateway_entry_t *ge2 = list_entry(e, kpr_gateway_entry_t,
kpge_list);
-
+
if (ge2->kpge_nalid == gateway_nalid &&
ge2->kpge_nid == gateway_nid) {
PORTAL_FREE (ge, sizeof (*ge));
if (!dup) {
/* Adding a new gateway... */
-
list_add (&ge->kpge_list, &kpr_gateways);
/* ...zero all gateway weights so this one doesn't have to
kpge_list);
atomic_set (&ge2->kpge_weight, 0);
}
-
}
re->kpre_gateway = ge;
ge->kpge_refcount++;
list_add (&re->kpre_list, &kpr_routes);
+ kpr_routes_generation++;
write_unlock_irqrestore (&kpr_rwlock, flags);
return (0);
ptl_nid_t lo, ptl_nid_t hi)
{
int specific = (lo != PTL_NID_ANY);
- unsigned long flags;
+ unsigned long flags;
int rc = -ENOENT;
- struct list_head *e;
- struct list_head *n;
+ struct list_head *e;
+ struct list_head *n;
- CDEBUG(D_NET, "Del route [%d] "LPX64" : "LPX64" - "LPX64"\n",
+ CDEBUG(D_NET, "Del route [%d] "LPX64" : "LPX64" - "LPX64"\n",
gw_nalid, gw_nid, lo, hi);
LASSERT(!in_interrupt());
/* NB Caller may specify either all routes via the given gateway
* (lo/hi == PTL_NID_ANY) or a specific route entry (lo/hi are
* actual NIDs) */
-
if (specific ? (hi == PTL_NID_ANY || hi < lo) : (hi != PTL_NID_ANY))
return (-EINVAL);
- write_lock_irqsave(&kpr_rwlock, flags);
+ write_lock_irqsave(&kpr_rwlock, flags);
list_for_each_safe (e, n, &kpr_routes) {
kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t,
kpre_list);
kpr_gateway_entry_t *ge = re->kpre_gateway;
-
+
if (ge->kpge_nalid != gw_nalid ||
ge->kpge_nid != gw_nid ||
- (specific &&
+ (specific &&
(lo != re->kpre_lo_nid || hi != re->kpre_hi_nid)))
continue;
break;
}
+ kpr_routes_generation++;
write_unlock_irqrestore(&kpr_rwlock, flags);
+
return (rc);
}
CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
atomic_read(&portal_kmemory));
+ kpr_routes_generation = 0;
kpr_proc_init();
PORTAL_SYMBOL_REGISTER(kpr_router_interface);
extern int kpr_sys_notify (int gw_nalid, ptl_nid_t gw_nid,
int alive, time_t when);
+extern unsigned int kpr_routes_generation;
extern unsigned long long kpr_fwd_bytes;
extern unsigned long kpr_fwd_packets;
extern unsigned long kpr_fwd_errors;
extern atomic_t kpr_queue_depth;
+extern struct list_head kpr_routes;
+extern rwlock_t kpr_rwlock;
#endif /* _KPLROUTER_H */
endif
acceptor_SOURCES = acceptor.c
+acceptor_LDADD = $(LIBWRAP)
wirecheck_SOURCES = wirecheck.c
#include <unistd.h>
#include <syslog.h>
#include <errno.h>
+#ifdef HAVE_LIBWRAP
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <tcpd.h>
+#endif
#include <portals/api-support.h>
#include <portals/list.h>
#define PROGNAME "acceptor"
+#ifdef HAVE_LIBWRAP
+/* needed because libwrap declares these as externs */
+int allow_severity = LOG_INFO;
+int deny_severity = LOG_WARNING;
+#endif
+
void create_pidfile(char *name, int port)
{
char pidfile[1024];
int cfd;
struct portal_ioctl_data data;
struct portals_cfg pcfg;
-
+#ifdef HAVE_LIBWRAP
+ struct request_info request;
+ char addrstr[INET_ADDRSTRLEN];
+#endif
+
cfd = accept(fd, (struct sockaddr *)&clntaddr, &len);
if ( cfd < 0 ) {
perror("accept");
continue;
}
+#ifdef HAVE_LIBWRAP
+ /* libwrap access control */
+ request_init(&request, RQ_DAEMON, "lustre", RQ_FILE, cfd, 0);
+ sock_host(&request);
+ if (!hosts_access(&request)) {
+ inet_ntop(AF_INET, &clntaddr.sin_addr,
+ addrstr, INET_ADDRSTRLEN);
+ syslog(LOG_WARNING, "Unauthorized access from %s:%hd\n",
+ addrstr, ntohs(clntaddr.sin_port));
+ close (cfd);
+ continue;
+ }
+#endif
show_connection (cfd, clntaddr.sin_addr.s_addr);
PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD);
fprintf(stderr, "usage: %s [file] [raw]\n", argv[0]);
return 0;
}
- sprintf(filename, "%s-%ld.tmp", argv[1], random);
+ sprintf(filename, "%s.%lu.%u", argc > 1 ? argv[1] : "/tmp/lustre-log",
+ time(NULL), getpid());
+
if (argc > 2)
raw = atoi(argv[2]);
unlink(filename);
-tbd Cluster File Systems, Inc. <info@clusterfs.com>
+2004-08-24 Cluster File Systems, Inc. <info@clusterfs.com>
* version 1.2.5
* bug fixes
- don't close LustreDB during write_conf until it is done (3860)
- don't allow multiple threads in OSC recovery(3812)
- fix debug_size parameters (3864)
- fix mds_postrecov to initialize import for llog ctxt (3121)
+ - replace config semaphore with spinlock (3306)
+ - be sure to send a reply for a CANCEL rpc with bad export (3863)
+ - don't allow enqueue to complete on a destroyed export (3822)
+ - down write_lock before checking llog header bitmap (3825)
+ - recover from lock replay timeout (3764)
+ - up llog sem before sending rpc (3652)
- reduce ns lock hold times when setting kms (3267)
+ - change a dlm LBUG to LASSERTF, to maybe learn something (4228)
+ - fix NULL deref and obd_dev leak on setup error (3312)
+ - replace some LBUG about llog ops with error handling (3841)
+ - don't match INVALID dentries from d_lookup and spin (3784)
+ - hold dcache_lock while marking dentries INVALID and hashing (4255)
+ * miscellania
+ - add libwrap support for the TCP acceptor (3996)
+ - add /proc/sys/portals/routes for non-root route listing (3994)
+ - allow setting MDS UUID in .xml (2580)
+ - print the stack of a process that LBUGs (4228)
2004-07-14 Cluster File Systems, Inc. <info@clusterfs.com>
* version 1.2.4
- return -ENOENT instead of asserting if ost getattr+unlink race (3558)
- avoid deadlock after precreation failure (3758)
- fix race and lock order deadlock in orphan handling (3450, 3750)
+ - add validity checks when grabbing inodes from l_ast_data (3599)
* miscellania
- add /proc/.../recovery_status to obdfilter (3428)
- lightweight CDEBUG infrastructure, debug daemon (3668)
AC_INIT
AC_CANONICAL_SYSTEM
-AM_INIT_AUTOMAKE(lustre, 1.2.4.1)
+AM_INIT_AUTOMAKE(lustre, 1.2.5)
# AM_MAINTAINER_MODE
# Four main targets: lustre kernel modules, utilities, tests, and liblustre
#define ll_vfs_symlink(dir, dentry, path, mode) vfs_symlink(dir, dentry, path, mode)
#endif
+#ifndef container_of
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+#endif
+
#ifdef HAVE_I_ALLOC_SEM
#define UP_WRITE_I_ALLOC_SEM(i) do { up_write(&(i)->i_alloc_sem); } while (0)
#define DOWN_WRITE_I_ALLOC_SEM(i) do { down_write(&(i)->i_alloc_sem); } while(0)
struct llog_rec_tail llh_tail;
} __attribute__((packed));
+#define LLOG_BITMAP_SIZE(llh) ((llh->llh_hdr.lrh_len - \
+ llh->llh_bitmap_offset - \
+ sizeof(llh->llh_tail)) * 8)
+
/* log cookies are used to reference a specific log file and a record therein */
struct llog_cookie {
struct llog_logid lgc_lgl;
extern struct file_operations ll_pgcache_seq_fops;
+#define LLI_INODE_MAGIC 0x111d0de5
+#define LLI_INODE_DEAD 0xdeadd00d
#define LLI_F_HAVE_OST_SIZE_LOCK 0
#define LLI_F_HAVE_MDS_SIZE_LOCK 1
#define LLI_F_PREFER_EXTENDED_SIZE 2
struct ll_inode_info {
+ int lli_inode_magic;
struct lov_stripe_md *lli_smd;
char *lli_symlink_name;
struct semaphore lli_open_sem;
struct llog_operations *loc_logops;
struct llog_handle *loc_handle;
struct llog_canceld_ctxt *loc_llcd;
- struct semaphore loc_sem; /* protects loc_llcd */
+ struct semaphore loc_sem; /* protects loc_llcd and loc_imp */
void *llog_proc_cb;
};
/* OBD Device Declarations */
#define MAX_OBD_DEVICES 256
extern struct obd_device obd_dev[MAX_OBD_DEVICES];
+extern spinlock_t obd_dev_lock;
/* OBD Operations Declarations */
extern struct obd_device *class_conn2obd(struct lustre_handle *);
int class_register_type(struct obd_ops *ops, struct lprocfs_vars *, char *nm);
int class_unregister_type(char *nm);
-struct obd_device *class_newdev(int *dev);
+struct obd_device *class_newdev(struct obd_type *type);
+void class_release_dev(struct obd_device *obd);
int class_name2dev(char *name);
struct obd_device *class_name2obd(char *name);
#define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308
#define OBD_FAIL_LDLM_ENQUEUE_INTENT_ERR 0x309
#define OBD_FAIL_LDLM_CREATE_RESOURCE 0x30a
+#define OBD_FAIL_LDLM_ENQUEUE_BLOCKED 0x30b
+#define OBD_FAIL_LDLM_REPLY 0x30c
#define OBD_FAIL_OSC 0x400
#define OBD_FAIL_OSC_BRW_READ_BULK 0x401
#define OBD_FAIL_TGT_REPLY_NET 0x700
#define OBD_FAIL_TGT_CONN_RACE 0x701
+#define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800
+
/* preparation for a more advanced failure testbed (not functional yet) */
#define OBD_FAIL_MASK_SYS 0x0000FF00
#define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS)
void ext3_put_super (struct super_block * sb)
{
struct ext3_sb_info *sbi = EXT3_SB(sb);
-@@ -407,6 +529,7 @@
+@@ -407,6 +529,9 @@
kdev_t j_dev = sbi->s_journal->j_dev;
int i;
++#ifdef EXT3_DELETE_THREAD
+ J_ASSERT(sbi->s_delete_inodes == 0);
++#endif
ext3_xattr_put_super(sb);
journal_destroy(sbi->s_journal);
if (!(sb->s_flags & MS_RDONLY)) {
===================================================================
--- linux-2.4.24.orig/fs/ext3/namei.c 2004-01-12 20:36:31.000000000 +0300
+++ linux-2.4.24/fs/ext3/namei.c 2004-01-12 20:36:32.000000000 +0300
-@@ -1936,6 +1936,36 @@
+@@ -1936,6 +1936,40 @@
return retval;
}
++#ifdef EXT3_DELETE_THREAD
+static int ext3_try_to_delay_deletion(struct inode *inode)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(inode->i_sb);
+
+ return 0;
+}
++#else
++#define ext3_try_to_delay_deletion(inode) do {} while (0)
++#endif
+
static int ext3_unlink(struct inode * dir, struct dentry *dentry)
{
--- /dev/null
+--- ./fs/ext3/namei.c.orig 2004-08-19 12:53:21.000000000 +0800
++++ ./fs/ext3/namei.c 2004-08-19 12:44:18.000000000 +0800
+@@ -1541,11 +1541,16 @@
+ static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
+ {
+ inode->i_nlink++;
++ if (is_dx(inode) && inode->i_nlink > 1) {
++ if (inode->i_nlink >= 65000) /* limit is 16-bit i_links_count */
++ inode->i_nlink = 1;
++ }
+ }
+
+ static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
+ {
+- inode->i_nlink--;
++ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
++ inode->i_nlink--;
+ }
+
+ static int ext3_add_nondir(handle_t *handle,
+@@ -1646,7 +1651,7 @@
+ struct ext3_dir_entry_2 * de;
+ int err;
+
+- if (dir->i_nlink >= EXT3_LINK_MAX)
++ if (EXT3_DIR_LINK_MAXED(dir))
+ return -EMLINK;
+
+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+@@ -1668,7 +1673,7 @@
+ inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
+ dir_block = ext3_bread (handle, inode, 0, 1, &err);
+ if (!dir_block) {
+- inode->i_nlink--; /* is this nlink == 0? */
++ ext3_dec_count(handle, inode); /* is this nlink == 0? */
+ ext3_mark_inode_dirty(handle, inode);
+ iput (inode);
+ goto out_stop;
+@@ -1700,7 +1705,7 @@
+ iput (inode);
+ goto out_stop;
+ }
+- dir->i_nlink++;
++ ext3_inc_count(handle, dir);
+ ext3_update_dx_flag(dir);
+ ext3_mark_inode_dirty(handle, dir);
+ d_instantiate(dentry, inode);
+@@ -1761,10 +1766,11 @@
+ }
+ de = (struct ext3_dir_entry_2 *) bh->b_data;
+ }
+- if (!ext3_check_dir_entry ("empty_dir", inode, de, bh,
+- offset)) {
+- brelse (bh);
+- return 1;
++ if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) {
++ /* On error skip the de and offset to the next block. */
++ de = (void *)(bh->b_data + sb->s_blocksize);
++ offset = (offset | (sb->s_blocksize - 1)) + 1;
++ continue;
+ }
+ if (le32_to_cpu(de->inode)) {
+ brelse (bh);
+@@ -1957,14 +1963,14 @@
+ retval = ext3_delete_entry(handle, dir, de, bh);
+ if (retval)
+ goto end_rmdir;
+- if (inode->i_nlink != 2)
+- ext3_warning (inode->i_sb, "ext3_rmdir",
+- "empty directory has nlink!=2 (%d)",
+- inode->i_nlink);
++ if (!EXT3_DIR_LINK_EMPTY(inode))
++ ext3_warning(inode->i_sb, __FUNCTION__,
++ "empty directory has too many links (%d)",
++ inode->i_nlink);
+ inode->i_version = ++event;
+ inode->i_nlink = 0;
+ ext3_orphan_add(handle, inode);
+- dir->i_nlink--;
++ ext3_dec_count(handle, dir);
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ ext3_mark_inode_dirty(handle, inode);
+ ext3_update_dx_flag(dir);
+@@ -2046,7 +2052,7 @@
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ ext3_update_dx_flag(dir);
+ ext3_mark_inode_dirty(handle, dir);
+- inode->i_nlink--;
++ ext3_dec_count(handle, inode);
+ if (!inode->i_nlink) {
+ ext3_try_to_delay_deletion(inode);
+ ext3_orphan_add(handle, inode);
+@@ -2140,9 +2146,8 @@
+ if (S_ISDIR(inode->i_mode))
+ return -EPERM;
+
+- if (inode->i_nlink >= EXT3_LINK_MAX) {
++ if (EXT3_DIR_LINK_MAXED(inode))
+ return -EMLINK;
+- }
+
+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+ EXT3_INDEX_EXTRA_TRANS_BLOCKS);
+@@ -2226,8 +2231,8 @@
+ if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
+ goto end_rename;
+ retval = -EMLINK;
+- if (!new_inode && new_dir!=old_dir &&
+- new_dir->i_nlink >= EXT3_LINK_MAX)
++ if (!new_inode && new_dir != old_dir &&
++ EXT3_DIR_LINK_MAXED(new_dir))
+ goto end_rename;
+ }
+ if (!new_bh) {
+@@ -2285,7 +2290,7 @@
+ }
+
+ if (new_inode) {
+- new_inode->i_nlink--;
++ ext3_dec_count(handle, new_inode);
+ new_inode->i_ctime = CURRENT_TIME;
+ }
+ old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
+@@ -2296,11 +2301,11 @@
+ PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino);
+ BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
+ ext3_journal_dirty_metadata(handle, dir_bh);
+- old_dir->i_nlink--;
++ ext3_dec_count(handle, old_dir);
+ if (new_inode) {
+- new_inode->i_nlink--;
++ ext3_dec_count(handle, new_inode);
+ } else {
+- new_dir->i_nlink++;
++ ext3_inc_count(handle, new_dir);
+ ext3_update_dx_flag(new_dir);
+ ext3_mark_inode_dirty(handle, new_dir);
+ }
+--- ./include/linux/ext3_fs.h.orig 2004-08-19 12:53:52.000000000 +0800
++++ ./include/linux/ext3_fs.h 2004-08-19 11:06:33.000000000 +0800
+@@ -42,7 +42,7 @@
+ /*
+ * Always enable hashed directories
+ */
+-#define CONFIG_EXT3_INDEX
++#define CONFIG_EXT3_INDEX 1
+
+ /*
+ * Debug code
+@@ -581,14 +581,15 @@
+ */
+
+ #ifdef CONFIG_EXT3_INDEX
+- #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
+- EXT3_FEATURE_COMPAT_DIR_INDEX) && \
++#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
++ EXT3_FEATURE_COMPAT_DIR_INDEX) && \
+ (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
+-#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
+-#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
++#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \
++ (is_dx(dir) && (dir)->i_nlink == 1))
+ #else
+ #define is_dx(dir) 0
+-#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
+ #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
+ #endif
+
procfs-ndynamic-2.4.patch
ext3-truncate-buffer-head.patch
inode-max-readahead-2.4.24.patch
+ext3-nlinks-2.4.24.patch
if (rc) {
DEBUG_REQ(D_ERROR, req, "processing error (%d)", rc);
- if (req->rq_reply_state == NULL) {
- rc = lustre_pack_reply (req, 0, NULL, NULL);
- if (rc != 0) {
- CERROR ("can't allocate reply\n");
- return (rc);
- }
- }
- req->rq_type = PTL_RPC_MSG_ERR;
+ req->rq_status = rc;
+ return (ptlrpc_error(req));
} else {
DEBUG_REQ(D_NET, req, "sending reply");
}
* usage: pass in a resource on which you have done ldlm_resource_get
* pass in a parent lock on which you have done a ldlm_lock_get
* after return, ldlm_*_put the resource and parent
- * returns: lock with refcount 1
+ * returns: lock with refcount 2 - one for current caller and one for remote
*/
static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent,
struct ldlm_resource *resource)
policy = ldlm_processing_policy_table[res->lr_type];
policy(lock, flags, 1, &rc);
- EXIT;
+ GOTO(out, rc);
out:
l_unlock(&ns->ns_lock);
return rc;
LDLM_DEBUG(lock, "server-side enqueue handler, new lock created");
LASSERT(req->rq_export);
- lock->l_export = class_export_get(req->rq_export);
+
+ OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_ENQUEUE_BLOCKED, obd_timeout * 2);
l_lock(&lock->l_resource->lr_namespace->ns_lock);
+ if (req->rq_export->exp_failed) {
+ LDLM_ERROR(lock,"lock on destroyed export %p\n",req->rq_export);
+ l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+ GOTO(out, rc = -ENOTCONN);
+ }
+ lock->l_export = class_export_get(req->rq_export);
list_add(&lock->l_export_chain,
&lock->l_export->exp_ldlm_data.led_held_locks);
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
size[1]);
}
up(&lock->l_resource->lr_lvb_sem);
+ } else {
+ ldlm_lock_destroy(lock);
}
if (!err && dlm_req->lock_desc.l_resource.lr_type != LDLM_FLOCK)
lustre_swab_ldlm_request);
if (dlm_req != NULL)
ldlm_lock_dump_handle(D_ERROR, &dlm_req->lock_handle1);
- RETURN(-ENOTCONN);
+ ldlm_callback_reply(req, -ENOTCONN);
+ RETURN(0);
}
switch (req->rq_reqmsg->opc) {
default:
CERROR("invalid opcode %d\n", req->rq_reqmsg->opc);
- RETURN(-EINVAL);
+ ldlm_callback_reply(req, -EINVAL);
}
RETURN(0);
LDLM_DEBUG(lock, "replayed lock:");
ptlrpc_import_recovery_state_machine(req->rq_import);
out:
+ if (rc != ELDLM_OK)
+ ptlrpc_connect_import(req->rq_import, NULL);
+
+
RETURN(rc);
}
struct ldlm_resource *res;
ENTRY;
- if (type < LDLM_MIN_TYPE || type > LDLM_MAX_TYPE) {
- LBUG();
- RETURN(NULL);
- }
+ LASSERTF(type >= LDLM_MIN_TYPE && type <= LDLM_MAX_TYPE,
+ "type: %d", type);
res = ldlm_resource_new();
if (!res) {
static int null_if_equal(struct ldlm_lock *lock, void *data)
{
- if (data == lock->l_ast_data)
+ if (data == lock->l_ast_data) {
lock->l_ast_data = NULL;
- if (lock->l_req_mode != lock->l_granted_mode)
- return LDLM_ITER_STOP;
+ if (lock->l_req_mode != lock->l_granted_mode)
+ LDLM_ERROR(lock,"clearing inode with ungranted lock\n"); }
return LDLM_ITER_CONTINUE;
}
EXIT;
}
+/* Compare if two dentries are the same. Don't match if the existing dentry
+ * is marked DCACHE_LUSTRE_INVALID. Returns 1 if different, 0 if the same.
+ *
+ * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
+ * an AST before calling d_revalidate_it(). The dentry still exists (marked
+ * INVALID) so d_lookup() matches it, but we have no lock on it (so
+ * lock_match() fails) and we spin around real_lookup(). */
+static int ll_dcompare(struct dentry *parent, struct qstr *d_name,
+ struct qstr *name)
+{
+ struct dentry *dchild;
+ ENTRY;
+
+ if (d_name->len != name->len)
+ RETURN(1);
+
+ if (memcmp(d_name->name, name->name, name->len))
+ RETURN(1);
+
+ dchild = container_of(d_name, struct dentry, d_name); /* ugh */
+ if (dchild->d_flags & DCACHE_LUSTRE_INVALID) {
+ CDEBUG(D_DENTRY,"INVALID dentry %p not matched, was bug 3784\n",
+ dchild);
+ RETURN(1);
+ }
+
+ RETURN(0);
+}
+
/* should NOT be called with the dcache lock, see fs/dcache.c */
static int ll_ddelete(struct dentry *de)
{
EXIT;
}
-extern struct dentry *ll_find_alias(struct inode *, struct dentry *);
-
static int revalidate_it_finish(struct ptlrpc_request *request, int offset,
struct lookup_intent *it,
struct dentry *de)
if (d_mountpoint(de))
RETURN(1);
+ OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
ll_frob_intent(&it, &lookup_it);
LASSERT(it);
#endif
.d_release = ll_release,
.d_delete = ll_ddelete,
+ .d_compare = ll_dcompare,
#if 0
.d_pin = ll_pin,
.d_unpin = ll_unpin,
ENTRY;
if (lsm->lsm_stripe_count == 1)
- RETURN(0);
+ GOTO(check, stripe = 0);
/* get our offset in the lov */
rc = obd_get_info(exp, sizeof(key), &key, &vallen, &stripe);
if (rc != 0) {
CERROR("obd_get_info: rc = %d\n", rc);
- LBUG();
+ RETURN(rc);
}
LASSERT(stripe < lsm->lsm_stripe_count);
+
+check:
+ if (lsm->lsm_oinfo[stripe].loi_id != lock->l_resource->lr_name.name[0]||
+ lsm->lsm_oinfo[stripe].loi_gr != lock->l_resource->lr_name.name[1]){
+ LDLM_ERROR(lock, "resource doesn't match object "LPU64"/"LPU64,
+ lsm->lsm_oinfo[stripe].loi_id,
+ lsm->lsm_oinfo[stripe].loi_gr);
+ RETURN(-ELDLM_NO_LOCK_DATA);
+ }
+
RETURN(stripe);
}
lsm = lli->lli_smd;
stripe = ll_lock_to_stripe_offset(inode, lock);
+ if (stripe < 0)
+ goto iput;
ll_pgcache_remove_extent(inode, lsm, lock, stripe);
/* grabbing the i_sem will wait for write() to complete. ns
LDLM_DEBUG(lock, "client-side async enqueue: granted/glimpsed");
stripe = ll_lock_to_stripe_offset(inode, lock);
+ if (stripe < 0)
+ goto iput;
if (lock->l_lvb_len) {
struct lov_stripe_md *lsm = lli->lli_smd;
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
}
+iput:
iput(inode);
wake_up(&lock->l_waitq);
struct ptlrpc_request *req = reqp;
struct inode *inode = ll_inode_from_lock(lock);
struct ll_inode_info *lli;
+ struct lov_stripe_md *lsm;
struct ost_lvb *lvb;
- int rc, size = sizeof(*lvb), stripe = 0;
+ int rc, size = sizeof(*lvb), stripe;
ENTRY;
if (inode == NULL)
lli = ll_i2info(inode);
if (lli == NULL)
GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
- if (lli->lli_smd == NULL)
+ lsm = lli->lli_smd;
+ if (lsm == NULL)
GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
/* First, find out which stripe index this lock corresponds to. */
- if (lli->lli_smd->lsm_stripe_count > 1)
- stripe = ll_lock_to_stripe_offset(inode, lock);
+ stripe = ll_lock_to_stripe_offset(inode, lock);
+ if (stripe < 0)
+ GOTO(iput, rc = -ELDLM_NO_LOCK_DATA);
rc = lustre_pack_reply(req, 1, &size, NULL);
if (rc) {
if (list_empty(&lli->lli_close_item)) {
CDEBUG(D_INODE, "adding inode %lu/%u to close list\n",
inode->i_ino, inode->i_generation);
- LASSERT(igrab(inode) == inode);
+ igrab(inode);
list_add_tail(&lli->lli_close_item, &lcq->lcq_list);
wake_up(&lcq->lcq_waitq);
}
obd_disconnect(sbi->ll_mdc_exp, 0);
// We do this to get rid of orphaned dentries. That is not really trw.
- spin_lock(&dcache_lock);
hlist_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) {
struct dentry *dentry = hlist_entry(tmp, struct dentry, d_hash);
- CWARN("orphan dentry %*s (%p) at unmount\n",
- dentry->d_name.len, dentry->d_name.name, dentry);
+ CWARN("orphan dentry %*s (%p->%p) at unmount\n",
+ dentry->d_name.len, dentry->d_name.name, dentry, next);
shrink_dcache_parent(dentry);
}
- spin_unlock(&dcache_lock);
EXIT;
}
lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
spin_lock_init(&lli->lli_lock);
INIT_LIST_HEAD(&lli->lli_pending_write_llaps);
+ lli->lli_inode_magic = LLI_INODE_MAGIC;
}
int ll_fill_super(struct super_block *sb, void *data, int silent)
struct inode *ll_inode_from_lock(struct ldlm_lock *lock)
{
- struct inode *inode;
+ struct inode *inode = NULL;
l_lock(&lock->l_resource->lr_namespace->ns_lock);
- if (lock->l_ast_data)
- inode = igrab(lock->l_ast_data);
- else
- inode = NULL;
+ if (lock->l_ast_data) {
+ struct ll_inode_info *lli = ll_i2info(lock->l_ast_data);
+ if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
+ inode = igrab(lock->l_ast_data);
+ } else {
+ inode = lock->l_ast_data;
+ CDEBUG(inode->i_state & I_FREEING ? D_INFO : D_WARNING,
+ "l_ast_data %p is bogus: magic %0x8\n",
+ lock->l_ast_data, lli->lli_inode_magic);
+ inode = NULL;
+ }
+ }
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
return inode;
}
static int null_if_equal(struct ldlm_lock *lock, void *data)
{
- if (data == lock->l_ast_data)
+ if (data == lock->l_ast_data) {
lock->l_ast_data = NULL;
- if (lock->l_req_mode != lock->l_granted_mode)
- return LDLM_ITER_STOP;
+ if (lock->l_req_mode != lock->l_granted_mode)
+ LDLM_ERROR(lock,"clearing inode with ungranted lock\n"); }
return LDLM_ITER_CONTINUE;
}
strlen(lli->lli_symlink_name) + 1);
lli->lli_symlink_name = NULL;
}
+ lli->lli_inode_magic = LLI_INODE_DEAD;
EXIT;
}
data->mod_time = LTIME_S(CURRENT_TIME);
}
+static void ll_d_add(struct dentry *de, struct inode *inode)
+{
+ CDEBUG(D_DENTRY, "adding inode %p to dentry %p\n", inode, de);
+ /* d_instantiate */
+ if (!list_empty(&de->d_alias)) {
+ spin_unlock(&dcache_lock);
+ CERROR("dentry %*s %p alias next %p, prev %p\n",
+ de->d_name.len, de->d_name.name, de,
+ de->d_alias.next, de->d_alias.prev);
+ LBUG();
+ }
+ if (inode)
+ list_add(&de->d_alias, &inode->i_dentry);
+ de->d_inode = inode;
+
+ /* d_rehash */
+ if (!list_empty(&de->d_hash)) {
+ spin_unlock(&dcache_lock);
+ CERROR("dentry %*s %p hash next %p, prev %p\n",
+ de->d_name.len, de->d_name.name, de,
+ de->d_hash.next, de->d_hash.prev);
+ LBUG();
+ }
+ __d_rehash(de, 0);
+}
+
/* Search "inode"'s alias list for a dentry that has the same name and parent as
* de. If found, return it. If not found, return de. */
struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
hlist_del_init(&dentry->d_hash);
__d_rehash(dentry, 0); /* avoid taking dcache_lock inside */
- spin_unlock(&dcache_lock);
+ dentry->d_flags &= ~DCACHE_LUSTRE_INVALID;
atomic_inc(&dentry->d_count);
+ spin_unlock(&dcache_lock);
iput(inode);
- dentry->d_flags &= ~DCACHE_LUSTRE_INVALID;
CDEBUG(D_DENTRY, "alias dentry %*s (%p) parent %p inode %p "
"refc %d\n", de->d_name.len, de->d_name.name, de,
de->d_parent, de->d_inode, atomic_read(&de->d_count));
return dentry;
}
+ ll_d_add(de, inode);
+
spin_unlock(&dcache_lock);
return de;
struct dentry **de = icbd->icbd_childp;
struct inode *parent = icbd->icbd_parent;
struct ll_sb_info *sbi = ll_i2sbi(parent);
- struct dentry *dentry = *de, *saved = *de;
+ struct dentry *dentry = *de;
struct inode *inode = NULL;
int rc;
dentry = *de = ll_find_alias(inode, dentry);
} else {
ENTRY;
+ spin_lock(&dcache_lock);
+ ll_d_add(dentry, inode);
+ spin_unlock(&dcache_lock);
}
- dentry->d_op = &ll_d_ops;
ll_set_dd(dentry);
-
- if (dentry == saved)
- d_add(dentry, inode);
+ dentry->d_op = &ll_d_ops;
RETURN(0);
}
case FSFILT_OP_RENAME:
/* modify additional directory */
nblocks += EXT3_SINGLEDATA_TRANS_BLOCKS;
+ nblocks += (EXT3_INDEX_EXTRA_TRANS_BLOCKS +
+ EXT3_SINGLEDATA_TRANS_BLOCKS) * logs;
/* no break */
case FSFILT_OP_SYMLINK:
/* additional block + block bitmap + GDT for long symlink */
save->pwd = dget(current->fs->pwd);
save->pwdmnt = mntget(current->fs->pwdmnt);
save->ngroups = current_ngroups;
+ save->ouc.ouc_umask = current->fs->umask;
LASSERT(save->pwd);
LASSERT(save->pwdmnt);
save->ouc.ouc_cap = current->cap_effective;
save->ouc.ouc_suppgid1 = current_groups[0];
save->ouc.ouc_suppgid2 = current_groups[1];
- save->ouc.ouc_umask = current->fs->umask;
current->fsuid = uc->ouc_fsuid;
current->fsgid = uc->ouc_fsgid;
current->cap_effective = uc->ouc_cap;
current_ngroups = 0;
- current->fs->umask = 0; /* umask already applied on client */
if (uc->ouc_suppgid1 != -1)
current_groups[current_ngroups++] = uc->ouc_suppgid1;
if (uc->ouc_suppgid2 != -1)
current_groups[current_ngroups++] = uc->ouc_suppgid2;
}
+ current->fs->umask = 0; /* umask already applied on client */
set_fs(new_ctx->fs);
set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
dput(saved->pwd);
mntput(saved->pwdmnt);
+ current->fs->umask = saved->ouc.ouc_umask;
if (uc) {
current->fsuid = saved->ouc.ouc_fsuid;
current->fsgid = saved->ouc.ouc_fsgid;
current_ngroups = saved->ngroups;
current_groups[0] = saved->ouc.ouc_suppgid1;
current_groups[1] = saved->ouc.ouc_suppgid2;
- current->fs->umask = saved->ouc.ouc_umask;
}
/*
if (dchild->d_inode) {
int old_mode = dchild->d_inode->i_mode;
- if (!S_ISDIR(old_mode))
+ if (!S_ISDIR(old_mode)) {
+ CERROR("found %s (%lu/%u) is mode %o\n", name,
+ dchild->d_inode->i_ino,
+ dchild->d_inode->i_generation, old_mode);
GOTO(out_err, err = -ENOTDIR);
+ }
/* Fixup directory permissions if necessary */
if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast,
ldlm_server_blocking_ast, NULL);
+ fail = OBD_FAIL_LDLM_REPLY;
break;
case LDLM_CONVERT:
DEBUG_REQ(D_INODE, req, "convert");
fidlen = ll_fid2str(fidname, inode->i_ino, inode->i_generation);
- CDEBUG(D_ERROR, "pending destroy of %dx open %d linked %s %s = %s\n",
+ CDEBUG(D_INODE, "pending destroy of %dx open %d linked %s %s = %s\n",
mds_orphan_open_count(inode), inode->i_nlink,
S_ISDIR(inode->i_mode) ? "dir" :
S_ISREG(inode->i_mode) ? "file" : "other",rec->ur_name,fidname);
struct mds_obd *mds = mds_req2mds(req);
struct lustre_handle dlm_handles[4];
struct mds_body *body = NULL;
+ struct lov_mds_md *lmm = NULL;
int rc = 0, lock_count = 3, cleanup_phase = 0;
void *handle = NULL;
ENTRY;
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE,
de_srcdir->d_inode->i_sb);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
/* Check if we are moving old entry into its child. 2.6 does not
check for this in vfs_rename() anymore */
if (is_subdir(de_new, de_old))
GOTO(cleanup, rc = -EINVAL);
+#endif
+
+ lmm = lustre_msg_buf(req->rq_repmsg, 1, 0);
+ handle = fsfilt_start_log(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME,
+ NULL, le32_to_cpu(lmm->lmm_stripe_count));
- handle = fsfilt_start(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME, NULL);
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
atomic_t portal_kmemory = {0};
#endif
-struct semaphore obd_conf_sem; /* serialize configuration commands */
struct obd_device obd_dev[MAX_OBD_DEVICES];
struct list_head obd_types;
+spinlock_t obd_dev_lock;
#ifndef __KERNEL__
atomic_t obd_memory;
int obd_memmax;
DECLARE_WAIT_QUEUE_HEAD(obd_race_waitq);
#ifdef __KERNEL__
+unsigned int obd_print_fail_loc(void)
+{
+ CWARN("obd_fail_loc = %x\n", obd_fail_loc);
+ return obd_fail_loc;
+}
+
/* opening /dev/obd */
static int obd_class_open(struct inode * inode, struct file * file)
{
struct obd_ioctl_data *data;
struct portals_debug_ioctl_data *debug_data;
struct obd_device *obd = NULL;
- int err = 0, len = 0, serialised = 0;
+ int err = 0, len = 0;
ENTRY;
if (current->fsuid != 0)
return 0;
}
- switch (cmd) {
- case OBD_IOC_BRW_WRITE:
- case OBD_IOC_BRW_READ:
- case OBD_IOC_GETATTR:
- case ECHO_IOC_ENQUEUE:
- case ECHO_IOC_CANCEL:
- case OBD_IOC_CLIENT_RECOVER:
- case OBD_IOC_CATLOGLIST:
- case OBD_IOC_LLOG_INFO:
- case OBD_IOC_LLOG_PRINT:
- case OBD_IOC_LLOG_CANCEL:
- case OBD_IOC_LLOG_CHECK:
- case OBD_IOC_LLOG_REMOVE:
- break;
- default:
- down(&obd_conf_sem);
- serialised = 1;
- break;
- }
-
CDEBUG(D_IOCTL, "cmd = %x, obd = %p\n", cmd, obd);
if (obd_ioctl_getdata(&buf, &len, (void *)arg)) {
CERROR("OBD ioctl: data error\n");
out:
if (buf)
obd_ioctl_freedata(buf, len);
- if (serialised)
- up(&obd_conf_sem);
RETURN(err);
} /* class_handle_ioctl */
EXPORT_SYMBOL(obd_dev);
EXPORT_SYMBOL(obdo_cachep);
EXPORT_SYMBOL(obd_fail_loc);
+EXPORT_SYMBOL(obd_print_fail_loc);
EXPORT_SYMBOL(obd_race_waitq);
EXPORT_SYMBOL(obd_dump_on_timeout);
EXPORT_SYMBOL(obd_timeout);
if (err)
return err;
- sema_init(&obd_conf_sem, 1);
+ spin_lock_init(&obd_dev_lock);
INIT_LIST_HEAD(&obd_types);
err = misc_register(&obd_psdev);
RETURN(0);
} /* class_unregister_type */
-struct obd_device *class_newdev(int *dev)
+struct obd_device *class_newdev(struct obd_type *type)
{
struct obd_device *result = NULL;
int i;
- for (i = 0 ; i < MAX_OBD_DEVICES ; i++) {
+ spin_lock(&obd_dev_lock);
+ for (i = 0 ; i < MAX_OBD_DEVICES && result == NULL; i++) {
struct obd_device *obd = &obd_dev[i];
if (!obd->obd_type) {
+ LASSERT(obd->obd_minor == i);
+ memset(obd, 0, sizeof(*obd));
+ obd->obd_minor = i;
+ obd->obd_type = type;
result = obd;
- if (dev)
- *dev = i;
- break;
}
}
+ spin_unlock(&obd_dev_lock);
return result;
}
+void class_release_dev(struct obd_device *obd)
+{
+ int minor = obd->obd_minor;
+
+ spin_lock(&obd_dev_lock);
+ memset(obd, 0, sizeof(*obd));
+ obd->obd_minor = minor;
+ spin_unlock(&obd_dev_lock);
+}
+
int class_name2dev(char *name)
{
int i;
if (!name)
return -1;
+ spin_lock(&obd_dev_lock);
for (i = 0; i < MAX_OBD_DEVICES; i++) {
struct obd_device *obd = &obd_dev[i];
- if (obd->obd_name && strcmp(name, obd->obd_name) == 0)
+ if (obd->obd_name && strcmp(name, obd->obd_name) == 0) {
+ spin_unlock(&obd_dev_lock);
return i;
+ }
}
+ spin_unlock(&obd_dev_lock);
return -1;
}
{
int i;
+ spin_lock(&obd_dev_lock);
for (i = 0; i < MAX_OBD_DEVICES; i++) {
struct obd_device *obd = &obd_dev[i];
- if (obd_uuid_equals(uuid, &obd->obd_uuid))
+ if (obd_uuid_equals(uuid, &obd->obd_uuid)) {
+ spin_unlock(&obd_dev_lock);
return i;
+ }
}
+ spin_unlock(&obd_dev_lock);
return -1;
}
{
int i;
+ spin_lock(&obd_dev_lock);
for (i = 0; i < MAX_OBD_DEVICES; i++) {
struct obd_device *obd = &obd_dev[i];
if (obd->obd_type == NULL)
if (obd_uuid_equals(tgt_uuid, &imp->imp_target_uuid) &&
((grp_uuid)? obd_uuid_equals(grp_uuid,
&obd->obd_uuid) : 1)) {
+ spin_unlock(&obd_dev_lock);
return obd;
}
}
}
+ spin_unlock(&obd_dev_lock);
return NULL;
}
struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next)
{
int i;
+
if (next == NULL)
i = 0;
else if (*next >= 0 && *next < MAX_OBD_DEVICES)
else
return NULL;
+ spin_lock(&obd_dev_lock);
for (; i < MAX_OBD_DEVICES; i++) {
struct obd_device *obd = &obd_dev[i];
if (obd->obd_type == NULL)
if (obd_uuid_equals(grp_uuid, &obd->obd_uuid)) {
if (next != NULL)
*next = i+1;
+ spin_unlock(&obd_dev_lock);
return obd;
}
}
+ spin_unlock(&obd_dev_lock);
return NULL;
}
(llh->llh_count == 1) &&
(loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) {
rc = llog_destroy(loghandle);
- if (rc)
+ if (rc) {
CERROR("failure destroying log after last cancel: %d\n",
rc);
- LASSERT(rc == 0);
- RETURN(1);
+ ext2_set_bit(index, llh->llh_bitmap);
+ llh->llh_count++;
+ } else {
+ rc = 1;
+ }
+ RETURN(rc);
}
rc = llog_write_rec(loghandle, &llh->llh_hdr, NULL, 0, NULL, 0);
- if (rc)
+ if (rc) {
CERROR("failure re-writing header %d\n", rc);
- LASSERT(rc == 0);
+ ext2_set_bit(index, llh->llh_bitmap);
+ llh->llh_count++;
+ }
RETURN(rc);
}
EXPORT_SYMBOL(llog_cancel_rec);
ENTRY;
llh = cathandle->lgh_hdr;
- bitmap_size = sizeof(llh->llh_bitmap) * 8;
+ bitmap_size = LLOG_BITMAP_SIZE(llh);
index = (cathandle->lgh_last_idx + 1) % bitmap_size;
loghandle = cathandle->u.chd.chd_current_log;
if (loghandle) {
struct llog_log_hdr *llh = loghandle->lgh_hdr;
- if (loghandle->lgh_last_idx < (sizeof(llh->llh_bitmap)*8) - 1) {
- down_write(&loghandle->lgh_lock);
+ down_write(&loghandle->lgh_lock);
+ if (loghandle->lgh_last_idx < LLOG_BITMAP_SIZE(llh) - 1) {
up_read(&cathandle->lgh_lock);
RETURN(loghandle);
+ } else {
+ up_write(&loghandle->lgh_lock);
}
}
if (!create) {
loghandle = cathandle->u.chd.chd_current_log;
if (loghandle) {
struct llog_log_hdr *llh = loghandle->lgh_hdr;
- if (loghandle->lgh_last_idx < (sizeof(llh->llh_bitmap)*8) - 1) {
- down_write(&loghandle->lgh_lock);
+ down_write(&loghandle->lgh_lock);
+ if (loghandle->lgh_last_idx < LLOG_BITMAP_SIZE(llh) - 1) {
up_write(&cathandle->lgh_lock);
RETURN(loghandle);
+ } else {
+ up_write(&loghandle->lgh_lock);
}
}
int i, bitmap_size, idx;
ENTRY;
- bitmap_size = sizeof(llh->llh_bitmap) * 8;
+ bitmap_size = LLOG_BITMAP_SIZE(llh);
if (llh->llh_cat_idx == (index - 1)) {
idx = llh->llh_cat_idx + 1;
llh->llh_cat_idx = idx;
/* NOTE: padding is a record, but no bit is set */
if (left != 0 && left != reclen &&
left < (reclen + LLOG_MIN_REC_SIZE)) {
- int bitmap_size = sizeof(llh->llh_bitmap) * 8;
loghandle->lgh_last_idx++;
rc = llog_lvfs_pad(obd, file, left, loghandle->lgh_last_idx);
if (rc)
RETURN(rc);
/* if it's the last idx in log file, then return -ENOSPC */
- if (loghandle->lgh_last_idx == bitmap_size - 1)
+ if (loghandle->lgh_last_idx == LLOG_BITMAP_SIZE(llh) - 1)
RETURN(-ENOSPC);
}
loghandle->lgh_last_idx++;
index = loghandle->lgh_last_idx;
+ LASSERT(index < LLOG_BITMAP_SIZE(llh));
rec->lrh_index = index;
if (buf == NULL) {
lrt = (struct llog_rec_tail *)
struct obd_type *type;
struct obd_device *obd;
char *typename, *name, *uuid;
- int minor, rc, len, dev, stage = 0;
+ int rc, len, cleanup_phase = 0;
if (!lcfg->lcfg_inllen1 || !lcfg->lcfg_inlbuf1) {
CERROR("No type passed!\n");
CERROR("OBD: unknown type: %s\n", typename);
RETURN(-EINVAL);
}
- stage = 1;
+ cleanup_phase = 1; /* class_put_type */
obd = class_name2obd(name);
if (obd != NULL) {
GOTO(out, rc = -EEXIST);
}
- obd = class_newdev(&dev);
+ obd = class_newdev(type);
if (obd == NULL)
GOTO(out, rc = -EINVAL);
+ cleanup_phase = 2; /* class_release_dev */
- /* have we attached a type to this device */
- if (obd->obd_attached || obd->obd_type) {
- CERROR("OBD: Device %d already typed as %s.\n",
- obd->obd_minor, MKSTR(obd->obd_type->typ_name));
- GOTO(out, rc = -EBUSY);
- }
-
- LASSERT(obd == (obd_dev + obd->obd_minor));
-
- minor = obd->obd_minor;
- memset(obd, 0, sizeof(*obd));
- obd->obd_minor = minor;
- obd->obd_type = type;
INIT_LIST_HEAD(&obd->obd_exports);
obd->obd_num_exports = 0;
spin_lock_init(&obd->obd_dev_lock);
if (!obd->obd_name)
GOTO(out, rc = -ENOMEM);
memcpy(obd->obd_name, name, len);
- stage = 2;
+ cleanup_phase = 3; /* free obd_name */
len = strlen(uuid);
if (len >= sizeof(obd->obd_uuid)) {
obd->obd_minor, typename);
RETURN(0);
out:
- switch (stage) {
- case 2:
+ switch (cleanup_phase) {
+ case 3:
OBD_FREE(obd->obd_name, strlen(obd->obd_name) + 1);
+ case 2:
+ class_release_dev(obd);
case 1:
- class_put_type(obd->obd_type);
+ class_put_type(type);
obd->obd_type = NULL;
}
return rc;
int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg)
{
- int minor;
int err = 0;
ENTRY;
obd->obd_attached = 0;
obd->obd_type->typ_refcnt--;
class_put_type(obd->obd_type);
- obd->obd_type = NULL;
- minor = obd->obd_minor;
- memset(obd, 0, sizeof(*obd));
- obd->obd_minor = minor;
+ class_release_dev(obd);
RETURN(err);
}
/* Only do this on the MDS OSC's */
if (imp->imp_server_timeout) {
struct osc_creator *oscc = &obd->u.cli.cl_oscc;
-
+
spin_lock(&oscc->oscc_lock);
oscc->oscc_flags |= OSCC_FLAG_RECOVERING;
spin_unlock(&oscc->oscc_lock);
/* all pages go to failing rpcs due to the invalid import */
osc_check_rpcs(cli);
spin_unlock(&cli->cl_loi_list_lock);
-
+
ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
break;
BACKINGFS="ldiskfs"
],[
KMODEXT=".o"
- linux25="no"
+ linux25="no"
])
AC_MSG_CHECKING([if you are using Linux 2.6])
AC_MSG_RESULT([$linux25])
AC_MSG_RESULT([$LINUXRELEASE])
AC_SUBST(LINUXRELEASE)
- moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel
+ moduledir='/lib/modules/'$LINUXRELEASE/kernel
modulefsdir='$(moduledir)/fs/$(PACKAGE)'
- modulenetdir='$(moduledir)/net/$(PACKAGE)'
+ modulenetdir='$(moduledir)/net/$(PACKAGE)'
AC_SUBST(moduledir)
AC_SUBST(modulefsdir)
- AC_SUBST(modulenetdir)
+ AC_SUBST(modulenetdir)
# ------------ RELEASE --------------------------------
AC_MSG_CHECKING([for Lustre release])
LIBEFENCE=""
fi
AC_SUBST(LIBEFENCE)
+
+# -------- enable acceptor libwrap (TCP wrappers) support? -------
+AC_MSG_CHECKING([if libwrap support is requested])
+AC_ARG_ENABLE([libwrap],
+ AC_HELP_STRING([--enable-libwrap], [use TCP wrappers]),
+ [case "${enableval}" in
+ yes) enable_libwrap=yes ;;
+ no) enable_libwrap=no ;;
+ *) AC_MSG_ERROR(bad value ${enableval} for --enable-libwrap) ;;
+ esac],[enable_libwrap=no])
+AC_MSG_RESULT([$enable_libwrap])
+if test x$enable_libwrap = xyes ; then
+ LIBWRAP="-lwrap"
+ AC_DEFINE(HAVE_LIBWRAP, 1, [libwrap support is requested])
+else
+ LIBWRAP=""
+fi
+AC_SUBST(LIBWRAP)
const int line);
#define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__, \
__FUNCTION__, __LINE__))
-/* it would be great to dump_stack() here, but some kernels
- * export it as show_stack() and I can't be bothered to
- * proprely engage in that dance right now */
#define LASSERTF(cond, fmt...) \
do { \
if (unlikely(!(cond))) { \
- portals_debug_msg(0, D_EMERG, __FILE__, __FUNCTION__,\
- __LINE__, CDEBUG_STACK, \
+ portals_debug_msg(DEBUG_SUBSYSTEM, D_EMERG, __FILE__,\
+ __FUNCTION__,__LINE__, CDEBUG_STACK,\
"ASSERTION(" #cond ") failed:" fmt);\
LBUG(); \
} \
#define LBUG_WITH_LOC(file, func, line) \
do { \
CEMERG("LBUG\n"); \
+ CERROR("STACK: %s\n", portals_debug_dumpstack()); \
portals_debug_dumplog(); \
portals_run_lbug_upcall(file, func, line); \
set_task_state(current, TASK_UNINTERRUPTIBLE); \
#include "router.h"
#define KPR_PROC_ROUTER "sys/portals/router"
+#define KPR_PROC_ROUTES "sys/portals/routes"
-int
-kpr_proc_read (char *page, char **start, off_t off, int count, int *eof, void *data)
+/* Used for multi-page route list book keeping */
+struct proc_route_data {
+ struct list_head *curr;
+ unsigned int generation;
+ off_t skip;
+} kpr_read_routes_data;
+
+/* nal2name support re-used from utils/portals.c */
+struct name2num {
+ char *name;
+ int num;
+} nalnames[] = {
+ { "any", 0},
+ { "elan", QSWNAL},
+ { "tcp", SOCKNAL},
+ { "gm", GMNAL},
+ { "ib", IBNAL},
+ { NULL, -1}
+};
+
+static struct name2num *name2num_lookup_num(struct name2num *table, int num)
+{
+ while (table->name != NULL)
+ if (num == table->num)
+ return (table);
+ else
+ table++;
+ return (NULL);
+}
+
+static char *nal2name(int nal)
{
- unsigned long long bytes = kpr_fwd_bytes;
- unsigned long packets = kpr_fwd_packets;
- unsigned long errors = kpr_fwd_errors;
+ struct name2num *e = name2num_lookup_num(nalnames, nal);
+ return ((e == NULL) ? "???" : e->name);
+}
+
+
+static int kpr_proc_router_read(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ unsigned long long bytes = kpr_fwd_bytes;
+ unsigned long packets = kpr_fwd_packets;
+ unsigned long errors = kpr_fwd_errors;
unsigned int qdepth = atomic_read (&kpr_queue_depth);
- int len;
-
- *eof = 1;
- if (off != 0)
- return (0);
-
- len = sprintf (page, "%Ld %ld %ld %d\n", bytes, packets, errors, qdepth);
-
- *start = page;
- return (len);
+ int len;
+
+ *eof = 1;
+ if (off != 0)
+ return (0);
+
+ len = sprintf(page, "%Ld %ld %ld %d\n", bytes, packets, errors, qdepth);
+
+ *start = page;
+ return (len);
}
-int
-kpr_proc_write (struct file *file, const char *ubuffer, unsigned long count, void *data)
+static int kpr_proc_router_write(struct file *file, const char *ubuffer,
+ unsigned long count, void *data)
{
- /* Ignore what we've been asked to write, and just zero the stats counters */
- kpr_fwd_bytes = 0;
- kpr_fwd_packets = 0;
- kpr_fwd_errors = 0;
+ /* Ignore what we've been asked to write, and just zero the stats */
+ kpr_fwd_bytes = 0;
+ kpr_fwd_packets = 0;
+ kpr_fwd_errors = 0;
- return (count);
+ return (count);
}
-void
-kpr_proc_init(void)
+static int kpr_proc_routes_read(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
{
- struct proc_dir_entry *entry = create_proc_entry (KPR_PROC_ROUTER, S_IFREG | S_IRUGO | S_IWUSR, NULL);
+ struct proc_route_data *prd = data;
+ kpr_route_entry_t *re;
+ kpr_gateway_entry_t *ge;
+ int chunk_len = 0;
+ int line_len = 0;
+ int user_len = 0;
+
+ *eof = 1;
+ *start = page;
+
+ if (prd->curr == NULL) {
+ if (off != 0)
+ return 0;
+
+ /* First pass, initialize our private data */
+ prd->curr = kpr_routes.next;
+ prd->generation = kpr_routes_generation;
+ prd->skip = 0;
+ } else {
+ /* Abort route list generation change */
+ if (prd->generation != kpr_routes_generation) {
+ prd->curr = NULL;
+ return sprintf(page, "\nError: Routes Changed\n");
+ }
+
+ /* All the routes have been walked */
+ if (prd->curr == &kpr_routes) {
+ prd->curr = NULL;
+ return 0;
+ }
+ }
+
+ read_lock(&kpr_rwlock);
+ *start = page + prd->skip;
+ user_len = -prd->skip;
+
+ for (; prd->curr != &kpr_routes; prd->curr = prd->curr->next) {
+ re = list_entry(prd->curr, kpr_route_entry_t, kpre_list);
+ ge = re->kpre_gateway;
+
+ line_len = sprintf(page + chunk_len,
+ "%12s "LPX64" : "LPX64" - "LPX64", %s\n",
+ nal2name(ge->kpge_nalid), ge->kpge_nid,
+ re->kpre_lo_nid, re->kpre_hi_nid,
+ ge->kpge_alive ? "up" : "down");
+ chunk_len += line_len;
+ user_len += line_len;
- if (entry == NULL)
- {
+ /* The route table will exceed one page */
+ if ((chunk_len > (PAGE_SIZE - 80)) || (user_len > count)) {
+ prd->curr = prd->curr->next;
+ break;
+ }
+ }
+
+ *eof = 0;
+
+ /* Caller received only a portion of the last entry, the
+ * remaining will be delivered in the next page if asked for.
+ */
+ if (user_len > count) {
+ prd->curr = prd->curr->prev;
+ prd->skip = line_len - (user_len - count);
+ read_unlock(&kpr_rwlock);
+ return count;
+ }
+
+ /* Not enough data to entirely satify callers request */
+ prd->skip = 0;
+ read_unlock(&kpr_rwlock);
+ return user_len;
+}
+
+static int kpr_proc_routes_write(struct file *file, const char *ubuffer,
+ unsigned long count, void *data)
+{
+ /* no-op; lctl should be used to adjust the routes */
+ return (count);
+}
+
+void kpr_proc_init(void)
+{
+ struct proc_dir_entry *router_entry;
+ struct proc_dir_entry *routes_entry;
+
+ /* Initialize KPR_PROC_ROUTER */
+ router_entry = create_proc_entry (KPR_PROC_ROUTER,
+ S_IFREG | S_IRUGO | S_IWUSR, NULL);
+
+ if (router_entry == NULL) {
CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTER);
return;
}
- entry->data = NULL;
- entry->read_proc = kpr_proc_read;
- entry->write_proc = kpr_proc_write;
+ router_entry->data = NULL;
+ router_entry->read_proc = kpr_proc_router_read;
+ router_entry->write_proc = kpr_proc_router_write;
+
+ /* Initialize KPR_PROC_ROUTES */
+ routes_entry = create_proc_entry (KPR_PROC_ROUTES,
+ S_IFREG | S_IRUGO | S_IWUSR, NULL);
+
+ if (routes_entry == NULL) {
+ CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTES);
+ return;
+ }
+
+ kpr_read_routes_data.curr = NULL;
+ kpr_read_routes_data.generation = 0;
+ kpr_read_routes_data.skip = 0;
+
+ routes_entry->data = &kpr_read_routes_data;
+ routes_entry->read_proc = kpr_proc_routes_read;
+ routes_entry->write_proc = kpr_proc_routes_write;
}
-void
-kpr_proc_fini(void)
+void kpr_proc_fini(void)
{
remove_proc_entry(KPR_PROC_ROUTER, 0);
+ remove_proc_entry(KPR_PROC_ROUTES, 0);
}
LIST_HEAD(kpr_gateways);
LIST_HEAD(kpr_nals);
+unsigned int kpr_routes_generation;
unsigned long long kpr_fwd_bytes;
unsigned long kpr_fwd_packets;
unsigned long kpr_fwd_errors;
list_for_each (e, &kpr_gateways) {
kpr_gateway_entry_t *ge2 = list_entry(e, kpr_gateway_entry_t,
kpge_list);
-
+
if (ge2->kpge_nalid == gateway_nalid &&
ge2->kpge_nid == gateway_nid) {
PORTAL_FREE (ge, sizeof (*ge));
if (!dup) {
/* Adding a new gateway... */
-
list_add (&ge->kpge_list, &kpr_gateways);
/* ...zero all gateway weights so this one doesn't have to
kpge_list);
atomic_set (&ge2->kpge_weight, 0);
}
-
}
re->kpre_gateway = ge;
ge->kpge_refcount++;
list_add (&re->kpre_list, &kpr_routes);
+ kpr_routes_generation++;
write_unlock_irqrestore (&kpr_rwlock, flags);
return (0);
ptl_nid_t lo, ptl_nid_t hi)
{
int specific = (lo != PTL_NID_ANY);
- unsigned long flags;
+ unsigned long flags;
int rc = -ENOENT;
- struct list_head *e;
- struct list_head *n;
+ struct list_head *e;
+ struct list_head *n;
- CDEBUG(D_NET, "Del route [%d] "LPX64" : "LPX64" - "LPX64"\n",
+ CDEBUG(D_NET, "Del route [%d] "LPX64" : "LPX64" - "LPX64"\n",
gw_nalid, gw_nid, lo, hi);
LASSERT(!in_interrupt());
/* NB Caller may specify either all routes via the given gateway
* (lo/hi == PTL_NID_ANY) or a specific route entry (lo/hi are
* actual NIDs) */
-
if (specific ? (hi == PTL_NID_ANY || hi < lo) : (hi != PTL_NID_ANY))
return (-EINVAL);
- write_lock_irqsave(&kpr_rwlock, flags);
+ write_lock_irqsave(&kpr_rwlock, flags);
list_for_each_safe (e, n, &kpr_routes) {
kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t,
kpre_list);
kpr_gateway_entry_t *ge = re->kpre_gateway;
-
+
if (ge->kpge_nalid != gw_nalid ||
ge->kpge_nid != gw_nid ||
- (specific &&
+ (specific &&
(lo != re->kpre_lo_nid || hi != re->kpre_hi_nid)))
continue;
break;
}
+ kpr_routes_generation++;
write_unlock_irqrestore(&kpr_rwlock, flags);
+
return (rc);
}
CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
atomic_read(&portal_kmemory));
+ kpr_routes_generation = 0;
kpr_proc_init();
PORTAL_SYMBOL_REGISTER(kpr_router_interface);
extern int kpr_sys_notify (int gw_nalid, ptl_nid_t gw_nid,
int alive, time_t when);
+extern unsigned int kpr_routes_generation;
extern unsigned long long kpr_fwd_bytes;
extern unsigned long kpr_fwd_packets;
extern unsigned long kpr_fwd_errors;
extern atomic_t kpr_queue_depth;
+extern struct list_head kpr_routes;
+extern rwlock_t kpr_rwlock;
#endif /* _KPLROUTER_H */
endif
acceptor_SOURCES = acceptor.c
+acceptor_LDADD = $(LIBWRAP)
wirecheck_SOURCES = wirecheck.c
#include <unistd.h>
#include <syslog.h>
#include <errno.h>
+#ifdef HAVE_LIBWRAP
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <tcpd.h>
+#endif
#include <portals/api-support.h>
#include <portals/list.h>
#define PROGNAME "acceptor"
+#ifdef HAVE_LIBWRAP
+/* needed because libwrap declares these as externs */
+int allow_severity = LOG_INFO;
+int deny_severity = LOG_WARNING;
+#endif
+
void create_pidfile(char *name, int port)
{
char pidfile[1024];
int cfd;
struct portal_ioctl_data data;
struct portals_cfg pcfg;
-
+#ifdef HAVE_LIBWRAP
+ struct request_info request;
+ char addrstr[INET_ADDRSTRLEN];
+#endif
+
cfd = accept(fd, (struct sockaddr *)&clntaddr, &len);
if ( cfd < 0 ) {
perror("accept");
continue;
}
+#ifdef HAVE_LIBWRAP
+ /* libwrap access control */
+ request_init(&request, RQ_DAEMON, "lustre", RQ_FILE, cfd, 0);
+ sock_host(&request);
+ if (!hosts_access(&request)) {
+ inet_ntop(AF_INET, &clntaddr.sin_addr,
+ addrstr, INET_ADDRSTRLEN);
+ syslog(LOG_WARNING, "Unauthorized access from %s:%hd\n",
+ addrstr, ntohs(clntaddr.sin_port));
+ close (cfd);
+ continue;
+ }
+#endif
show_connection (cfd, clntaddr.sin_addr.s_addr);
PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD);
fprintf(stderr, "usage: %s [file] [raw]\n", argv[0]);
return 0;
}
- sprintf(filename, "%s-%ld.tmp", argv[1], random);
+ sprintf(filename, "%s.%lu.%u", argc > 1 ? argv[1] : "/tmp/lustre-log",
+ time(NULL), getpid());
+
if (argc > 2)
raw = atoi(argv[2]);
unlink(filename);
* EINTR.
* I don't really care if we go once more round the loop in
* the error cases -eeb. */
- } while (rc != 0);
+ } while (rc != 0 || set->set_remaining != 0);
LASSERT(set->set_remaining == 0);
}
ctxt = llog_get_context(obd, body->lgd_ctxt_idx);
- LASSERT(ctxt != NULL);
+ if (ctxt == NULL)
+ GOTO(out, rc = -EINVAL);
disk_obd = ctxt->loc_exp->exp_obd;
push_ctxt(&saved, &disk_obd->obd_ctxt, NULL);
GOTO(out, rc = -ENOMEM);
ctxt = llog_get_context(obd, body->lgd_ctxt_idx);
- LASSERT(ctxt != NULL);
+ if (ctxt == NULL)
+ GOTO(out, rc = -EINVAL);
disk_obd = ctxt->loc_exp->exp_obd;
push_ctxt(&saved, &disk_obd->obd_ctxt, NULL);
}
ctxt = llog_get_context(obd, body->lgd_ctxt_idx);
- LASSERT(ctxt != NULL);
+ if (ctxt == NULL)
+ GOTO(out, rc = -EINVAL);
disk_obd = ctxt->loc_exp->exp_obd;
push_ctxt(&saved, &disk_obd->obd_ctxt, NULL);
LASSERT(ctxt);
+ down(&ctxt->loc_sem);
if (ctxt->loc_imp == NULL) {
CWARN("no import for ctxt %p\n", ctxt);
- RETURN(0);
- }
-
- if (count == 0 || cookies == NULL) {
- down(&ctxt->loc_sem);
- if (ctxt->loc_llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW))
- GOTO(out, rc);
-
- llcd = ctxt->loc_llcd;
- GOTO(send_now, rc);
+ GOTO(out, rc = 0);
}
- down(&ctxt->loc_sem);
llcd = ctxt->loc_llcd;
- if (llcd == NULL) {
- llcd = llcd_grab();
+
+ if (count > 0 && cookies != NULL) {
if (llcd == NULL) {
- CERROR("couldn't get an llcd - dropped "LPX64":%x+%u\n",
- cookies->lgc_lgl.lgl_oid,
- cookies->lgc_lgl.lgl_ogen, cookies->lgc_index);
- GOTO(out, rc = -ENOMEM);
+ llcd = llcd_grab();
+ if (llcd == NULL) {
+ CERROR("couldn't get an llcd - dropped "LPX64
+ ":%x+%u\n",
+ cookies->lgc_lgl.lgl_oid,
+ cookies->lgc_lgl.lgl_ogen,
+ cookies->lgc_index);
+ GOTO(out, rc = -ENOMEM);
+ }
+ llcd->llcd_ctxt = ctxt;
+ ctxt->loc_llcd = llcd;
}
- llcd->llcd_ctxt = ctxt;
- ctxt->loc_llcd = llcd;
- }
- memcpy((char *)llcd->llcd_cookies + llcd->llcd_cookiebytes, cookies,
- sizeof(*cookies));
- llcd->llcd_cookiebytes += sizeof(*cookies);
+ memcpy((char *)llcd->llcd_cookies + llcd->llcd_cookiebytes,
+ cookies, sizeof(*cookies));
+ llcd->llcd_cookiebytes += sizeof(*cookies);
+ } else {
+ if (llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW))
+ GOTO(out, rc);
+ }
-send_now:
if ((LLCD_SIZE - llcd->llcd_cookiebytes < sizeof(*cookies) ||
flags & OBD_LLOG_FL_SENDNOW)) {
CDEBUG(D_HA, "send llcd %p:%p\n", llcd, llcd->llcd_ctxt);
if (exp && (ctxt->loc_imp == exp->exp_imp_reverse)) {
down(&ctxt->loc_sem);
if (ctxt->loc_llcd != NULL) {
- CWARN("import will be destroyed, put "
- "llcd %p:%p\n", ctxt->loc_llcd, ctxt);
llcd_put(ctxt->loc_llcd);
ctxt->loc_llcd = NULL;
- ctxt->loc_imp = NULL;
}
+ CWARN("import will be destroyed, put "
+ "llcd %p:%p\n", ctxt->loc_llcd, ctxt);
+ ctxt->loc_imp = NULL;
up(&ctxt->loc_sem);
} else {
rc = llog_cancel(ctxt, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW);
llcd_put(llcd);
continue;
}
+ up(&llcd->llcd_ctxt->loc_sem);
request = ptlrpc_prep_req(import, OBD_LOG_CANCEL, 1,
&llcd->llcd_cookiebytes,
bufs);
- up(&llcd->llcd_ctxt->loc_sem);
if (request == NULL) {
rc = -ENOMEM;
ptlrpc_req_finished(request);
continue;
}
+ up(&llcd->llcd_ctxt->loc_sem);
rc = ptlrpc_queue_wait(request);
ptlrpc_req_finished(request);
- up(&llcd->llcd_ctxt->loc_sem);
/* If the RPC failed, we put this and the remaining
* messages onto the resend list for another time. */
%attr(-, root, root) /usr/bin/mcreate
%attr(-, root, root) /usr/bin/munlink
%attr(-, root, root) /usr/bin/mkdirmany
-%attr(-, root, root) /usr/bin/iopentest1
-%attr(-, root, root) /usr/bin/iopentest2
-%attr(-, root, root) /usr/lib/lustre/python/*
-%attr(-, root, root) /usr/lib/lustre/examples/llmount.sh
-%attr(-, root, root) /usr/lib/lustre/examples/llmountcleanup.sh
-%attr(-, root, root) /usr/lib/lustre/examples/llecho.sh
-%attr(-, root, root) /usr/lib/lustre/examples/local.sh
-%attr(-, root, root) /usr/lib/lustre/examples/uml.sh
-%attr(-, root, root) /usr/lib/lustre/examples/lov.sh
-%attr(-, root, root) /usr/lib/lustre/examples/echo.sh
-%attr(-, root, root) /usr/lib/lustre/examples/llechocleanup.sh
+%attr(-, root, root) /usr/lib/lustre/python
+%attr(-, root, root) /usr/lib/lustre/examples
%attr(-, root, root) /etc/init.d/lustre
%attr(-, root, root) /etc/init.d/lustrefs
%attr(-, root, root) /usr/lib/libptlctl.a
%attr(-, root, root) /usr/lib/liblustreapi.a
-%attr(-, root, root) /usr/include/lustre/*.h
-%attr(-, root, root) /usr/include/portals/*.h
+%attr(-, root, root) /usr/include/lustre
+%attr(-, root, root) /usr/include/portals
%attr(-, root, root) /usr/include/linux/lustre_idl.h
%attr(-, root, root) /usr/share/man/man?/*
%files -n lustre-modules
%attr(-, root, root) %doc COPYING
-%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/*.o
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre
#portals modules
-%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/lustre/*.o
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/lustre
%files -n lustre-source
%attr(-, root, root) /usr/src/lustre-%{version}
#%ifarch i386
#%files -n liblustre
#%attr(-, root, root) /lib/lustre
-#%attr(-, root, root) /lib/lustre/liblov.a
-#%attr(-, root, root) /lib/lustre/liblustreclass.a
-#%attr(-, root, root) /lib/lustre/libptlrpc.a
-#%attr(-, root, root) /lib/lustre/libobdecho.a
-#%attr(-, root, root) /lib/lustre/libldlm.a
-#%attr(-, root, root) /lib/lustre/libosc.a
#%attr(-, root, root) /usr/sbin/lctl
#%attr(-, root, root) /usr/sbin/lfind
#%attr(-, root, root) /usr/sbin/lstripe
#%dir /var/lib/ldap/lustre
%attr(700, ldap, ldap) /var/lib/ldap/lustre
-%post -n lustre-modules
-if [ ! -e /dev/obd ]; then
- mknod /dev/obd c 10 241
-fi
-if [ ! -e /dev/portals ]; then
- mknod /dev/portals c 10 240
-fi
+%post
/sbin/chkconfig --add lustre
/sbin/chkconfig --add lustrefs
-depmod -ae || exit 0
%preun
if [ $1 = 0 ]; then
/sbin/chkconfig --del lustrefs
fi
+%post -n lustre-modules
+if [ ! -e /dev/obd ]; then
+ mknod /dev/obd c 10 241
+fi
+if [ ! -e /dev/portals ]; then
+ mknod /dev/portals c 10 240
+fi
+depmod -ae || exit 0
+
%postun -n lustre-modules
depmod -ae || exit 0
noinst_PROGRAMS += opendirunlink opendevunlink unlinkmany fchdir_test checkstat
noinst_PROGRAMS += wantedi statone runas openfile getdents mkdirdeep o_directory
noinst_PROGRAMS += small_write multiop sleeptest ll_sparseness_verify cmknod
-noinst_PROGRAMS += ll_sparseness_write mrename ll_dirstripe_verify
-noinst_PROGRAMS += openfilleddirunlink rename_many memhog
+noinst_PROGRAMS += ll_sparseness_write mrename ll_dirstripe_verify mkdirmany rmdirmany
+noinst_PROGRAMS += openfilleddirunlink rename_many memhog iopentest1 iopentest2
# noinst_PROGRAMS += ldaptest copy_attr
-bin_PROGRAMS = mcreate munlink mkdirmany iopentest1 iopentest2
+bin_PROGRAMS = mcreate munlink
endif # TESTS
stat_SOURCES = stat.c stat_fs.h
if [ "$REPLAY_OST_SINGLE" != "no" ]; then
sh replay-ost-single.sh
fi
+
+if [ "$REPLAY_DUAL" != "no" ]; then
+ sh replay-dual.sh
+fi
TMP=${TMP:-/tmp}
MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
-MDSSIZE=${MDSSIZE:-100000}
+MDSSIZE=${MDSSIZE:-400000}
FSTYPE=${FSTYPE:-ext3}
MOUNT=${MOUNT:-/mnt/lustre}
MOUNT2=${MOUNT2:-${MOUNT}2}
NETWORKTYPE=${NETWORKTYPE:-tcp}
OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`}
-OSTSIZE=${OSTSIZE:-200000}
+OSTSIZE=${OSTSIZE:-400000}
# specific journal size for the ost, in MB
JSIZE=${JSIZE:-0}
UNLINKMANY=${UNLINKMANY:-unlinkmany}
LCTL=${LCTL:-lctl}
-MOUNT1=${MOUNT1:-/mnt/lustre1}
+MOUNT1=${MOUNT1:-/mnt/lustre}
MOUNT2=${MOUNT2:-/mnt/lustre2}
DIR=${DIR:-$MOUNT1}
DIR2=${DIR2:-$MOUNT2}
MINFILE=$FILETMP
MINRES=$FILERES
else
- rm $TMPFILE
+ rm $FILETMP
fi
NUM=$(($NUM + 1))
done
$CREATEMANY -l$LOCKFILE $LOCKFILE -$COUNT &
CR_PID=$!
-sleep 1
+while ! test -f ${LOCKFILE}1 ; do
+ sleep 1
+done
# this will lock $DIR and ${LOCKFILE}0
$STATMANY -s $DIR2/lockdir/lockfile 1 -$COUNT &
TMP=${TMP:-/tmp}
MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
-MDSSIZE=${MDSSIZE:-100000}
+MDSSIZE=${MDSSIZE:-400000}
FSTYPE=${FSTYPE:-ext3}
MOUNT=${MOUNT:-/mnt/lustre}
MOUNT2=${MOUNT2:-${MOUNT}2}
}
run_test 17 "fail OST during recovery (3571)"
+# cleanup with blocked enqueue fails until timer elapses (MDS busy), wait for it
+export NOW=0
+
+test_18() { # bug 3822 - evicting client with enqueued lock
+ set -vx
+ mkdir -p $MOUNT1/$tdir
+ touch $MOUNT1/$tdir/f0
+#define OBD_FAIL_LDLM_ENQUEUE_BLOCKED 0x30b
+ statmany -s $MOUNT1/$tdir/f 500 &
+ OPENPID=$!
+ NOW=`date +%s`
+ do_facet mds sysctl -w lustre.fail_loc=0x8000030b # hold enqueue
+ sleep 1
+#define OBD_FAIL_LDLM_BL_CALLBACK 0x305
+ do_facet client sysctl -w lustre.fail_loc=0x80000305 # drop cb, evict
+ cancel_lru_locks MDC
+ usleep 500 # wait to ensure first client is one that will be evicted
+ openfile -f O_RDONLY $MOUNT2/$tdir/f0
+ wait $OPENPID
+ dmesg | grep "entering recovery in server" && \
+ error "client not evicted" || true
+}
+run_test 18 "ldlm_handle_enqueue succeeds on evicted export (3822)"
+
if [ "$ONLY" != "setup" ]; then
equals_msg test complete, cleaning up
+ SLEEP=$((`date +%s` - $NOW))
+ [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP
$CLEANUP
fi
}
run_test 49 "Double OSC recovery, don't LASSERT"
+# b3764 timed out lock replay
+test_52() {
+ touch $DIR/$tfile
+ cancel_lru_locks MDC
+
+ multiop $DIR/$tfile s
+ replay_barrier mds
+ do_facet mds "sysctl -w lustre.fail_loc=0x8000030c"
+ fail mds
+ do_facet mds "sysctl -w lustre.fail_loc=0x0"
+
+ $CHECKSTAT -t file $DIR/$tfile-* && return 3 || true
+}
+run_test 52 "time out lock replay (3764)"
+
equals_msg test complete, cleaning up
$CLEANUP
--- /dev/null
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+int main(int argc, char ** argv)
+{
+ int i, rc = 0, count;
+ char dirname[4096];
+
+ if (argc < 3) {
+ printf("Usage %s dirnamebase count\n", argv[0]);
+ return 1;
+ }
+
+ if (strlen(argv[1]) > 4080) {
+ printf("name too long\n");
+ return 1;
+ }
+
+ count = strtoul(argv[2], NULL, 0);
+
+ for (i = 0; i < count; i++) {
+ sprintf(dirname, "%s-%d", argv[1], i);
+ rc = rmdir(dirname);
+ if (rc) {
+ printf("rmdir(%s) error: %s\n",
+ dirname, strerror(errno));
+ break;
+ }
+ if ((i % 10000) == 0)
+ printf(" - deleted %d (time %ld)\n", i, time(0));
+ }
+ return rc;
+}
set -e
ONLY=${ONLY:-"$*"}
-# bug number for skipped test: 2108 3192
-ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"42a 68"}
+# bug number for skipped test: 2108 3637 3561
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"42a 42c 45"}
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
[ "$ALWAYS_EXCEPT$EXCEPT" ] && echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT"
if ! mount | grep -q $DIR; then
$START
fi
- log "== test $1: $2"
+ log "== test $1: $2= `date +%H:%M:%S`"
export TESTNAME=test_$1
test_$1 || error "test_$1: exit with rc=$?"
unset TESTNAME
touch $EXT2_DEV
mke2fs -j -F $EXT2_DEV 8000 > /dev/null
-umask 022
+umask 077
test_0() {
touch $DIR/f
}
run_test 24t "mkdir .../R16a/b/c; rename .../R16a/b/c .../R16a ="
-
test_25a() {
echo '== symlink sanity ============================================='
mkdir $DIR/d25
}
run_test 51 "special situations: split htree with empty entry =="
+test_51b() {
+ check_kernel_version 39 || return 0
+ NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'`
+ [ $NUMFREE -lt 70000 ] && \
+ echo "skipping test 51b, not enough free inodes($NUMFREE)" && \
+ return
+ mkdir -p $DIR/d51b
+ (cd $DIR/d51b; mkdirmany t 70001)
+}
+run_test 51b "mkdir .../t-0 --- .../t-70000 ===================="
+
+test_51c() {
+ check_kernel_version 39 || return 0
+ NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'`
+ [ $NUMFREE -lt 70000 ] && echo "skipping test 51c" && return
+ mkdir -p $DIR/d51b
+ (cd $DIR/d51b; rmdirmany t 70001)
+}
+run_test 51c "rmdir .../t-0 --- .../t-70000 ===================="
+
test_52a() {
[ -f $DIR/d52a/foo ] && chattr -a $DIR/d52a/foo
mkdir -p $DIR/d52a
run_test 54b "char device works in lustre ======================"
find_loop_dev() {
- [ "$LOOPNUM" ] && return
[ -b /dev/loop/0 ] && LOOPBASE=/dev/loop/
[ -b /dev/loop0 ] && LOOPBASE=/dev/loop
[ -z "$LOOPBASE" ] && echo "/dev/loop/0 and /dev/loop0 gone?" && return
}
run_test 67 "supplementary group failure (should return error) ="
-LOOPDEV=""
cleanup_68() {
if [ "$LOOPDEV" ]; then
swapoff $LOOPDEV || error "swapoff failed"
losetup -d $LOOPDEV || error "losetup -d failed"
+ unset LOOPDEV LOOPNUM
fi
rm -f $DIR/f68
- LOOPDEV=""
}
meminfo() {
# and then consuming memory until it is used.
test_68() {
[ "$UID" != 0 ] && echo "skipping test 68 (must run as root)" && return
+ [ "`lsmod|grep obdfilter`" ] && echo "skipping test 68 (local OST)" && \
+ return
find_loop_dev
dd if=/dev/zero of=$DIR/f68 bs=64k count=1024
cleanup_68
- [ $SWAPUSED -eq 0 ] && error "no swap used???"
+ [ $SWAPUSED -eq 0 ] && echo "no swap used???" || true
}
run_test 68 "support swapping to Lustre ========================"
--nspath
--journal_size size
--inode_size size
+ --mdsuuid uuid
--add lov
--lov lov_name
('inode_size', "Specify new inode size for underlying ext3 file system.", PARAM,"0"),
('fstype', "Optional argument to specify the filesystem type.", PARAM, "ext3"),
('mkfsoptions', "Optional argument to mkfs.", PARAM, ""),
- ('ostuuid', "", PARAM,""),
+ ('ostuuid', "Optional argument to specify OST UUID", PARAM,""),
+ ('mdsuuid', "Optional argument to specify MDS UUID", PARAM,""),
('nspath', "Local mount point of server namespace.", PARAM,""),
('format', ""),
mds_uuid = name2uuid(lustre, mds_name, 'mds', fatal=0)
if not mds_uuid:
- mds_uuid = new_uuid(mds_name)
+ mds_uuid = get_option(options, 'mdsuuid')
+ if mds_uuid:
+ if lookup(lustre, mds_uuid):
+ error("Duplicate MDS UUID:", mds_uuid)
+ else:
+ mds_uuid = new_uuid(mds_name)
+
mds = gen.mds(mds_name, mds_uuid, mdd_uuid, options.group)
lustre.appendChild(mds)
else: