config.log
config.status
configure
+.*.o.cmd
-EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
+# portals/utils/debug.c wants <linux/version.h> from userspace. sigh.
+HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS)
+LIBREADLINE := @LIBREADLINE@
+# 2.5's makefiles aren't nice to cross dir libraries in host programs
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
-EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
+HOSTCFLAGS := $(EXTRA_CFLAGS)
+# the kernel doesn't want us to build archives for host binaries :/
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
-include fs/lustre/portals/Kernelenv
+include $(src)/Kernelenv
-obj-y += portals/
+# The ordering of these determines the order that each subsystem's
+# module_init() functions are called in. if these are changed make sure
+# they reflect the dependencies between each subsystem's _init functions.
obj-y += libcfs/
-obj-y += knals/
+obj-y += portals/
obj-y += router/
+obj-y += knals/
+obj-y += tests/
+
+obj-m += utils/
AC_ARG_WITH(linux, [ --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux)
AC_SUBST(LINUX)
+if test x$enable_inkernel = xyes ; then
+ echo ln -s `pwd` $LINUX/fs/lustre
+ rm $LINUX/fs/lustre
+ ln -s `pwd` $LINUX/fs/lustre
+fi
-# --------- UML? --------------------
+# --------------------
AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...)
if test $host_cpu = "lib" ; then
host_cpu="lib"
MOD_LINK=elf64_ia64
;;
+ x86_64 )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -fomit-frame-pointer -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -finline-limit=2000 -fno-strength-reduce -fno-asynchronous-unwind-tables'
+ KCPPFLAGS='-D__KERNEL__ -DMODULE'
+ MOD_LINK=elf_x86_64
+;;
+
sparc64 )
AC_MSG_RESULT($host_cpu)
KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs'
AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.)
fi
-# ------------ RELEASE and moduledir ------------------
+# ------------ LINUXRELEASE and moduledir ------------------
AC_MSG_CHECKING(for Linux release)
dnl We need to rid ourselves of the nasty [ ] quotes.
changequote(, )
dnl Get release from version.h
- RELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
+ LINUXRELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
changequote([, ])
- moduledir='$(libdir)/modules/'$RELEASE/kernel
+ moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel
AC_SUBST(moduledir)
modulefsdir='$(moduledir)/fs/$(PACKAGE)'
AC_SUBST(modulefsdir)
+ AC_MSG_RESULT($LINUXRELEASE)
+ AC_SUBST(LINUXRELEASE)
+
+# ------------ RELEASE --------------------------------
+ AC_MSG_CHECKING(lustre release)
+
+ dnl We need to rid ourselves of the nasty [ ] quotes.
+ changequote(, )
+ dnl Get release from version.h
+ RELEASE="`sed -ne 's/-/_/g' -e 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_]*\).*/\1/p' $LINUX/include/linux/version.h`_`date +%Y%m%d%H%M`"
+ changequote([, ])
+
AC_MSG_RESULT($RELEASE)
AC_SUBST(RELEASE)
# This needs to run after we've defined the KCPPFLAGS
AC_MSG_CHECKING(for kernel version)
-AC_TRY_LINK([#define __KERNEL__
+AC_TRY_COMPILE([#define __KERNEL__
#include <linux/sched.h>],
[struct task_struct p;
p.sighand = NULL;],
AC_MSG_RESULT(redhat-2.4.20)
CPPFLAGS="$CPPFLAGS -DCONFIG_RH_2_4_20"
else
- AC_MSG_RESULT($RELEASE)
+ AC_MSG_RESULT($LINUXRELEASE)
fi
/* portals/include/config.h.in. Generated from configure.in by autoheader. */
+/* Compile with orphan support */
+#undef ENABLE_ORPHANS
+
+/* Use the Pinger */
+#undef ENABLE_PINGER
+
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
#ifndef _KP30_INCLUDED
#define _KP30_INCLUDED
-
#define PORTAL_DEBUG
#ifndef offsetof
#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
-#ifndef CONFIG_SMP
-# define smp_processor_id() 0
-#endif
-
/*
* Debugging
*/
extern unsigned int portal_stack;
extern unsigned int portal_debug;
extern unsigned int portal_printk;
-/* Debugging subsystems (8 bit ID)
- *
- * If you add debug subsystem #32, you need to send email to phil, because
- * you're going to break kernel subsystem debug filtering. */
-#define S_UNDEFINED (0 << 24)
-#define S_MDC (1 << 24)
-#define S_MDS (2 << 24)
-#define S_OSC (3 << 24)
-#define S_OST (4 << 24)
-#define S_CLASS (5 << 24)
-#define S_OBDFS (6 << 24) /* obsolete */
-#define S_LLITE (7 << 24)
-#define S_RPC (8 << 24)
-#define S_EXT2OBD (9 << 24) /* obsolete */
-#define S_PORTALS (10 << 24)
-#define S_SOCKNAL (11 << 24)
-#define S_QSWNAL (12 << 24)
-#define S_PINGER (13 << 24)
-#define S_FILTER (14 << 24)
-#define S_TRACE (15 << 24) /* obsolete */
-#define S_ECHO (16 << 24)
-#define S_LDLM (17 << 24)
-#define S_LOV (18 << 24)
-#define S_GMNAL (19 << 24)
-#define S_PTLROUTER (20 << 24)
-#define S_COBD (21 << 24)
-#define S_PTLBD (22 << 24)
-#define S_LOG (23 << 24)
-
-/* If you change these values, please keep portals/linux/utils/debug.c
+/* Debugging subsystems (32 bits, non-overlapping) */
+#define S_UNDEFINED (1 << 0)
+#define S_MDC (1 << 1)
+#define S_MDS (1 << 2)
+#define S_OSC (1 << 3)
+#define S_OST (1 << 4)
+#define S_CLASS (1 << 5)
+#define S_LOG (1 << 6)
+#define S_LLITE (1 << 7)
+#define S_RPC (1 << 8)
+#define S_MGMT (1 << 9)
+#define S_PORTALS (1 << 10)
+#define S_SOCKNAL (1 << 11)
+#define S_QSWNAL (1 << 12)
+#define S_PINGER (1 << 13)
+#define S_FILTER (1 << 14)
+#define S_PTLBD (1 << 15)
+#define S_ECHO (1 << 16)
+#define S_LDLM (1 << 17)
+#define S_LOV (1 << 18)
+#define S_GMNAL (1 << 19)
+#define S_PTLROUTER (1 << 20)
+#define S_COBD (1 << 21)
+
+/* If you change these values, please keep portals/utils/debug.c
* up to date! */
-/* Debugging masks (24 bits, non-overlapping) */
+/* Debugging masks (32 bits, non-overlapping) */
#define D_TRACE (1 << 0) /* ENTRY/EXIT markers */
#define D_INODE (1 << 1)
#define D_SUPER (1 << 2)
#define D_RPCTRACE (1 << 20) /* for distributed debugging */
#define D_VFSTRACE (1 << 21)
-#ifndef __KERNEL__
-#define THREAD_SIZE 8192
+#ifdef __KERNEL__
+# include <linux/sched.h> /* THREAD_SIZE */
+#else
+# define THREAD_SIZE 8192
#endif
-#ifdef __ia64__
-#define CDEBUG_STACK() (THREAD_SIZE - \
+
+#ifdef __KERNEL__
+# ifdef __ia64__
+# define CDEBUG_STACK (THREAD_SIZE - \
((unsigned long)__builtin_dwarf_cfa() & \
(THREAD_SIZE - 1)))
-#else
-#define CDEBUG_STACK() (THREAD_SIZE - \
+# else
+# define CDEBUG_STACK (THREAD_SIZE - \
((unsigned long)__builtin_frame_address(0) & \
(THREAD_SIZE - 1)))
-#endif
+# endif
-#ifdef __KERNEL__
#define CHECK_STACK(stack) \
do { \
if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \
/*panic("LBUG");*/ \
} \
} while (0)
-#else
+#else /* __KERNEL__ */
#define CHECK_STACK(stack) do { } while(0)
-#endif
+#define CDEBUG_STACK (0L)
+#endif /* __KERNEL__ */
#if 1
#define CDEBUG(mask, format, a...) \
do { \
- CHECK_STACK(CDEBUG_STACK()); \
+ CHECK_STACK(CDEBUG_STACK); \
if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) || \
(portal_debug & (mask) && \
- portal_subsystem_debug & (1 << (DEBUG_SUBSYSTEM >> 24)))) \
+ portal_subsystem_debug & DEBUG_SUBSYSTEM)) \
portals_debug_msg(DEBUG_SUBSYSTEM, mask, \
__FILE__, __FUNCTION__, __LINE__, \
- CDEBUG_STACK(), format , ## a); \
+ CDEBUG_STACK, format, ## a); \
} while (0)
#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
#define EXIT do { } while (0)
#endif
-
#ifdef __KERNEL__
# include <linux/vmalloc.h>
# include <linux/time.h>
#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
#ifdef PORTAL_DEBUG
-extern void kportal_assertion_failed(char *expr,char *file,char *func,int line);
+extern void kportal_assertion_failed(char *expr, char *file, const char *func,
+ const int line);
#define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__, \
__FUNCTION__, __LINE__))
#else
#endif /* PORTALS_PROFILING */
/* debug.c */
-void portals_run_lbug_upcall(char * file, char *fn, int line);
+void portals_run_lbug_upcall(char * file, const char *fn, const int line);
void portals_debug_dumplog(void);
int portals_debug_init(unsigned long bufsize);
int portals_debug_cleanup(void);
int portals_debug_clear_buffer(void);
int portals_debug_mark_buffer(char *text);
int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
- char *file, unsigned int size);
+ char *file, unsigned int size);
__s32 portals_debug_copy_to_user(char *buf, unsigned long len);
#if (__GNUC__)
/* Use the special GNU C __attribute__ hack to have the compiler check the
# warning printf has been defined as a macro...
# undef printf
#endif
-void portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
- unsigned long stack, const char *format, ...)
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+ const int line, unsigned long stack,
+ const char *format, ...)
__attribute__ ((format (printf, 7, 8)));
#else
-void portals_debug_msg (int subsys, int mask, char *file, char *fn,
- int line, unsigned long stack,
- const char *format, ...);
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+ const int line, unsigned long stack,
+ const char *format, ...);
#endif /* __GNUC__ */
void portals_debug_set_level(unsigned int debug_level);
# define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0);
# define PORTAL_FREE(a, b) do { free(a); } while (0);
# define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \
- printf ("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format, \
- (subsys) >> 24, (mask), (long)time(0), file, fn, line, \
- getpid() , stack, ## a);
+ printf("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format, \
+ (subsys), (mask), (long)time(0), file, fn, line, \
+ getpid() , stack, ## a);
#endif
#ifndef CURRENT_TIME
void kportal_put_ni (int nal);
#ifdef __CYGWIN__
-#ifndef BITS_PER_LONG
-#if (~0UL) == 0xffffffffUL
-#define BITS_PER_LONG 32
-#else
-#define BITS_PER_LONG 64
-#endif
-#endif
+# ifndef BITS_PER_LONG
+# if (~0UL) == 0xffffffffUL
+# define BITS_PER_LONG 32
+# else
+# define BITS_PER_LONG 64
+# endif
+# endif
#endif
#if (BITS_PER_LONG == 32 || __WORDSIZE == 32)
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _PORTALS_COMPAT_H
+#define _PORTALS_COMPAT_H
+
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+#if SPINLOCK_DEBUG
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
+# define SIGNAL_MASK_ASSERT() \
+ LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC)
+# else
+# define SIGNAL_MASK_ASSERT() \
+ LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC)
+# endif
+#else
+# define SIGNAL_MASK_ASSERT()
+#endif
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
-# define SIGNAL_MASK_LOCK(task, flags) \
+
+# define SIGNAL_MASK_LOCK(task, flags) \
spin_lock_irqsave(&task->sighand->siglock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags) \
+# define SIGNAL_MASK_UNLOCK(task, flags) \
spin_unlock_irqrestore(&task->sighand->siglock, flags)
+# define USERMODEHELPER(path, argv, envp) \
+ call_usermodehelper(path, argv, envp, 1)
# define RECALC_SIGPENDING recalc_sigpending()
-#else
-# define SIGNAL_MASK_LOCK(task, flags) \
+# define CURRENT_SECONDS get_seconds()
+
+#else /* 2.4.x */
+
+# define SIGNAL_MASK_LOCK(task, flags) \
spin_lock_irqsave(&task->sigmask_lock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags) \
+# define SIGNAL_MASK_UNLOCK(task, flags) \
spin_unlock_irqrestore(&task->sigmask_lock, flags)
+# define USERMODEHELPER(path, argv, envp) \
+ call_usermodehelper(path, argv, envp)
# define RECALC_SIGPENDING recalc_sigpending(current)
+# define CURRENT_SECONDS CURRENT_TIME
+
+#endif
+
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
+# define THREAD_NAME(comm, fmt, a...) \
+ sprintf(comm, fmt "|%d", ## a, current->thread.extern_pid)
+#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# define THREAD_NAME(comm, fmt, a...) \
+ sprintf(comm, fmt "|%d", ## a, current->thread.mode.tt.extern_pid)
+#else
+# define THREAD_NAME(comm, fmt, a...) \
+ sprintf(comm, fmt, ## a)
#endif
+
+#endif /* _PORTALS_COMPAT_H */
-/*
-*/
#ifndef _P30_INTERNAL_H_
#define _P30_INTERNAL_H_
#ifndef _LINUX_LIST_H
-#define _LINUX_LIST_H
-
/*
* Simple doubly linked list implementation.
__list_del(entry->prev, entry->next);
INIT_LIST_HEAD(entry);
}
+#endif
+#ifndef list_for_each_entry
/**
* list_move - delete from one list and add as another's head
* @list: the entry to move
__list_del(list->prev, list->next);
list_add_tail(list, head);
}
+#endif
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
/**
* list_empty - tests whether a list is empty
* @head: the list to test.
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Compile with:
- * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
+ * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
*/
#ifndef __LTRACE_H_
#define __LTRACE_H_
argv[0] = "debug_kernel";
argv[1] = fname;
argv[2] = "1";
-
+
fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]);
-
+
return jt_dbg_debug_kernel(3, argv);
}
static inline int ltrace_clear()
{
char* argv[1];
-
+
argv[0] = "clear";
-
+
fprintf(stderr, "[ptlctl] %s\n", argv[0]);
-
+
return jt_dbg_clear_debug_buf(1, argv);
}
{
char* argv[2];
char mark_buf[PATH_MAX];
-
+
snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text);
-
+
argv[0] = "mark";
argv[1] = mark_buf;
return jt_dbg_mark_debug_buf(2, argv);
char* argv[2];
argv[0] = "list";
argv[1] = "applymasks";
-
+
fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]);
-
+
return jt_dbg_list(2, argv);
}
#ifdef PORTALS_DEV_ID
rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
#endif
- ltrace_filter("class");
+ ltrace_filter("class");
ltrace_filter("socknal");
- ltrace_filter("qswnal");
- ltrace_filter("gmnal");
- ltrace_filter("portals");
-
- ltrace_show("all_types");
- ltrace_filter("trace");
- ltrace_filter("malloc");
- ltrace_filter("net");
- ltrace_filter("page");
- ltrace_filter("other");
- ltrace_filter("info");
+ ltrace_filter("qswnal");
+ ltrace_filter("gmnal");
+ ltrace_filter("portals");
+
+ ltrace_show("all_types");
+ ltrace_filter("trace");
+ ltrace_filter("malloc");
+ ltrace_filter("net");
+ ltrace_filter("page");
+ ltrace_filter("other");
+ ltrace_filter("info");
ltrace_applymasks();
return rc;
struct timezone tz;
int nob;
int underuml = !not_uml();
-
+
gettimeofday(&tv, &tz);
nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \"");
"(%s:%d:%s() %d+%lu): ",
"lltrace.h", __LINE__, __FUNCTION__, 0, 0L);
}
-
+
nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname);
system(cmdbuf);
}
-/*
-*/
-
#ifndef MYRNAL_H
#define MYRNAL_H
-/*
-*/
#ifndef _NAL_H_
#define _NAL_H_
-/*
- */
-
#ifndef _INCppidh_
#define _INCppidh_
/*
-*/
-/*
* stringtab.h
*/
#define _P30_TYPES_H_
#ifdef __linux__
-#include <asm/types.h>
-#include <asm/timex.h>
+# include <asm/types.h>
+# include <asm/timex.h>
#else
-#include <sys/types.h>
+# include <sys/types.h>
typedef u_int32_t __u32;
typedef u_int64_t __u64;
-typedef unsigned long long cycles_t;
-static inline cycles_t get_cycles(void) { return 0; }
+#endif
+
+#ifdef __KERNEL__
+# include <linux/time.h>
+#else
+# include <sys/time.h>
+# define do_gettimeofday(tv) gettimeofday(tv, NULL)
#endif
typedef __u64 ptl_nid_t;
typedef ptl_handle_any_t ptl_handle_me_t;
#define PTL_HANDLE_NONE \
-((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
+ ((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
#define PTL_EQ_NONE PTL_HANDLE_NONE
static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
ptl_handle_me_t unlinked_me;
ptl_md_t mem_desc;
ptl_hdr_data_t hdr_data;
- cycles_t arrival_time;
+ struct timeval arrival_time;
volatile ptl_seq_t sequence;
} ptl_event_t;
-
typedef enum {
PTL_ACK_REQ,
PTL_NOACK_REQ
} ptl_ack_req_t;
-
typedef struct {
volatile ptl_seq_t sequence;
ptl_size_t size;
ptl_eq_t *eq;
} ptl_ni_t;
-
typedef struct {
int max_match_entries; /* max number of match entries */
int max_mem_descriptors; /* max number of memory descriptors */
Makefile
Makefile.in
+.*.o.cmd
-include ../Kernelenv
+include $(obj)/../Kernelenv
obj-y = socknal/
-# more coming...
\ No newline at end of file
+# more coming...
return &kgmnal_api;
}
-static void __exit
+static void /*__exit*/
kgmnal_finalize(void)
{
struct list_head *tmp;
/* Called by kernel at module unload time */
-static void __exit
+static void /*__exit*/
kscimacnal_finalize(void)
{
/* FIXME: How should the shutdown procedure really look? */
.deps
Makefile
Makefile.in
+.*.o.cmd
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-include ../../Kernelenv
+include $(src)/../../Kernelenv
obj-y += ksocknal.o
ksocknal-objs := socknal.o socknal_cb.o
}
-void __exit
+void /*__exit*/
ktoenal_module_fini (void)
{
CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
goto get_fmb; /* => go get a fwd msg buffer */
default:
+ break;
}
/* Not Reached */
LBUG ();
goto out; /* (later) */
default:
+ break;
}
/* Not Reached */
Makefile
Makefile.in
link-stamp
+.*.o.cmd
include fs/lustre/portals/Kernelenv
obj-y += libcfs.o
-licfs-objs := module.o proc.o debug.o
\ No newline at end of file
+libcfs-objs := module.o proc.o debug.o
memset(debug_buf, 0, debug_size);
debug_wrapped = 0;
- printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
- bufsize, debug_buf);
+ //printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
+ //bufsize, debug_buf);
atomic_set(&debug_off_a, debug_off);
notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier);
debug_size = bufsize;
if (debug_buf == NULL)
return -EINVAL;
- CDEBUG(0, "*******************************************************************************\n");
+ CDEBUG(0, "********************************************************\n");
CDEBUG(0, "DEBUG MARKER: %s\n", text);
- CDEBUG(0, "*******************************************************************************\n");
+ CDEBUG(0, "********************************************************\n");
return 0;
}
/* FIXME: I'm not very smart; someone smarter should make this better. */
void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
- unsigned long stack, const char *format, ...)
+portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+ const int line, unsigned long stack, const char *format, ...)
{
va_list ap;
unsigned long flags;
do_gettimeofday(&tv);
prefix_nob = snprintf(debug_buf + debug_off, max_nob,
- "%02x:%06x:%d:%lu.%06lu ",
- subsys >> 24, mask, smp_processor_id(),
+ "%06x:%06x:%d:%lu.%06lu ",
+ subsys, mask, smp_processor_id(),
tv.tv_sec, tv.tv_usec);
max_nob -= prefix_nob;
va_start(ap, format);
msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob,
- max_nob, format, ap);
+ max_nob, format, ap);
max_nob -= msg_nob;
va_end(ap);
portal_debug = debug_level;
}
-void portals_run_lbug_upcall(char * file, char *fn, int line)
+void portals_run_lbug_upcall(char *file, const char *fn, const int line)
{
char *argv[6];
char *envp[3];
argv[0] = portals_upcall;
argv[1] = "LBUG";
argv[2] = file;
- argv[3] = fn;
+ argv[3] = (char *)fn;
argv[4] = buf;
argv[5] = NULL;
struct semaphore nal_cmd_sem;
#ifdef PORTAL_DEBUG
-void
-kportal_assertion_failed (char *expr, char *file, char *func, int line)
+void kportal_assertion_failed(char *expr, char *file, const char *func,
+ const int line)
{
- portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK(),
+ portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK,
"ASSERTION(%s) failed\n", expr);
LBUG_WITH_LOC(file, func, line);
}
.deps
Makefile
Makefile.in
+.*.o.cmd
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-include ../Kernelenv
+include $(src)/../Kernelenv
obj-y += portals.o
-portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o lib-move.o lib-msg.o lib-ni.o lib-not-impl.o lib-pid.o api-eq.o api-errno.o api-init.o api-md.o api-me.o api-ni.o api-wrap.o
+portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
+ lib-move.o lib-msg.o lib-ni.o lib-pid.o \
+ api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
+ api-wrap.o
#include <portals/api-support.h>
int ptl_init;
-unsigned int portal_subsystem_debug = 0xfff7e3ff;
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL | S_GMNAL);
unsigned int portal_debug = ~0;
unsigned int portal_printk;
unsigned int portal_stack;
msg->send_ack = 0;
msg->md = md;
- msg->ev.arrival_time = get_cycles();
+ do_gettimeofday(&msg->ev.arrival_time);
md->pending++;
if (md->threshold != PTL_MD_THRESH_INF) {
LASSERT (md->threshold > 0);
.deps
Makefile
Makefile.in
+.*.o.cmd
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-include ../Kernelenv
+include $(src)/../Kernelenv
obj-y += kptlrouter.o
kptlrouter-objs := router.o proc.o
#include "router.h"
-struct list_head kpr_routes;
-struct list_head kpr_nals;
+LIST_HEAD(kpr_routes);
+LIST_HEAD(kpr_nals);
unsigned long long kpr_fwd_bytes;
unsigned long kpr_fwd_packets;
*
* Once in a blue moon we register/deregister NALs and add/remove routing
* entries (thread context only)... */
-rwlock_t kpr_rwlock;
+rwlock_t kpr_rwlock = RW_LOCK_UNLOCKED;
kpr_router_interface_t kpr_router_interface = {
kprri_register: kpr_register_nal,
int
kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
{
- long flags;
+ unsigned long flags;
struct list_head *e;
kpr_nal_entry_t *ne;
void
kpr_shutdown_nal (void *arg)
{
- long flags;
+ unsigned long flags;
kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid);
void
kpr_deregister_nal (void *arg)
{
- long flags;
+ unsigned long flags;
kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid);
kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
ptl_nid_t hi_nid)
{
- long flags;
+ unsigned long flags;
struct list_head *e;
kpr_route_entry_t *re;
int
kpr_del_route (ptl_nid_t nid)
{
- long flags;
+ unsigned long flags;
struct list_head *e;
CDEBUG(D_OTHER, "Del route "LPX64"\n", nid);
return (-ENOENT);
}
-static void __exit
+static void /*__exit*/
kpr_finalise (void)
{
LASSERT (list_empty (&kpr_nals));
CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
atomic_read(&portal_kmemory));
- rwlock_init(&kpr_rwlock);
- INIT_LIST_HEAD(&kpr_routes);
- INIT_LIST_HEAD(&kpr_nals);
-
kpr_proc_init();
PORTAL_SYMBOL_REGISTER(kpr_router_interface);
Makefile
Makefile.in
.deps
+.*.o.cmd
/* called by the portals_ioctl for ping requests */
-static int kping_client(struct portal_ioctl_data *args)
+int kping_client(struct portal_ioctl_data *args)
{
PORTAL_ALLOC (client, sizeof(struct pingcli_data));
if (client == NULL)
} /* pingcli_init() */
-static void __exit pingcli_cleanup(void)
+static void /*__exit*/ pingcli_cleanup(void)
{
PORTAL_SYMBOL_UNREGISTER (kping_client);
} /* pingcli_cleanup() */
#include <asm/semaphore.h>
#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
-#define MAXSIZE (16*1024*1024)
+#define MAXSIZE (16*1024)
static unsigned ping_head_magic;
static unsigned ping_bulk_magic;
-static int nal = 0; // Your NAL,
+static int nal = SOCKNAL; // Your NAL,
static unsigned long packets_valid = 0; // Valid packets
static int running = 1;
atomic_t pkt;
} /* pingsrv_init() */
-static void __exit pingsrv_cleanup(void)
+static void /*__exit*/ pingsrv_cleanup(void)
{
remove_proc_entry ("net/pingsrv", NULL);
/* called by the portals_ioctl for ping requests */
-static int kping_client(struct portal_ioctl_data *args)
+int kping_client(struct portal_ioctl_data *args)
{
PORTAL_ALLOC (client, sizeof(struct pingcli_data));
} /* pingcli_init() */
-static void __exit pingcli_cleanup(void)
+static void /*__exit*/ pingcli_cleanup(void)
{
PORTAL_SYMBOL_UNREGISTER (kping_client);
} /* pingcli_cleanup() */
} /* pingsrv_init() */
-static void __exit pingsrv_cleanup(void)
+static void /*__exit*/ pingsrv_cleanup(void)
{
remove_proc_entry ("net/pingsrv", NULL);
/* FIXME: I'm not very smart; someone smarter should make this better. */
void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
- const char *format, ...)
+portals_debug_msg (int subsys, int mask, char *file, const char *fn,
+ const int line, const char *format, ...)
{
va_list ap;
unsigned long flags;
/* FIXME: I'm not very smart; someone smarter should make this better. */
void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
- const char *format, ...)
+portals_debug_msg (int subsys, int mask, char *file, const char *fn,
+ const int line, const char *format, ...)
{
va_list ap;
unsigned long flags;
ptlctl
.deps
routerstat
-wirecheck
\ No newline at end of file
+wirecheck
+.*.cmd
static char *buf = rawbuf;
static int max = 8192;
//static int g_pfd = -1;
-static int subsystem_array[1 << 8];
+static int subsystem_mask = ~0;
static int debug_mask = ~0;
static const char *portal_debug_subsystems[] =
- {"undefined", "mdc", "mds", "osc", "ost", "class", "obdfs", "llite",
- "rpc", "ext2obd", "portals", "socknal", "qswnal", "pinger", "filter",
- "obdtrace", "echo", "ldlm", "lov", "gmnal", "router", "ptldb", NULL};
+ {"undefined", "mdc", "mds", "osc", "ost", "class", "log", "llite",
+ "rpc", "mgmt", "portals", "socknal", "qswnal", "pinger", "filter",
+ "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", NULL};
static const char *portal_debug_masks[] =
{"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
"blocks", "net", "warning", "buffs", "other", "dentry", "portals",
- "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", NULL};
+ "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace",
+ NULL};
struct debug_daemon_cmd {
char *cmd;
printf("%s output from subsystem \"%s\"\n",
enable ? "Enabling" : "Disabling",
portal_debug_subsystems[i]);
- subsystem_array[i] = enable;
+ if (enable)
+ subsystem_mask |= (1 << i);
+ else
+ subsystem_mask &= ~(1 << i);
found = 1;
}
}
int dbg_initialize(int argc, char **argv)
{
- memset(subsystem_array, 1, sizeof(subsystem_array));
return 0;
}
for (i = 0; portal_debug_masks[i] != NULL; i++)
printf(", %s", portal_debug_masks[i]);
printf("\n");
- }
- else if (strcasecmp(argv[1], "applymasks") == 0) {
- unsigned int subsystem_mask = 0;
- for (i = 0; portal_debug_subsystems[i] != NULL; i++) {
- if (subsystem_array[i]) subsystem_mask |= (1 << i);
- }
+ } else if (strcasecmp(argv[1], "applymasks") == 0) {
applymask_all(subsystem_mask, debug_mask);
}
return 0;
{
char *p, *z;
unsigned long subsystem, debug, dropped = 0, kept = 0;
- int max_sub, max_type;
-
- for (max_sub = 0; portal_debug_subsystems[max_sub] != NULL; max_sub++)
- ;
- for (max_type = 0; portal_debug_masks[max_type] != NULL; max_type++)
- ;
while (size) {
p = memchr(buf, '\n', size);
z++;
/* for some reason %*s isn't working. */
*p = '\0';
- if (subsystem < max_sub &&
- subsystem_array[subsystem] &&
+ if ((subsystem_mask & subsystem) &&
(!debug || (debug_mask & debug))) {
if (raw)
fprintf(fd, "%s\n", buf);
{"mds_ext3", "lustre/mds"},
{"mds_extN", "lustre/mds"},
{"ptlbd", "lustre/ptlbd"},
+ {"mgmt_svc", "lustre/mgmt"},
+ {"mgmt_cli", "lustre/mgmt"},
{NULL, NULL}
};
char *path = "..";
#include <stdio.h>
#include <sys/types.h>
+#include <netdb.h>
#include <sys/socket.h>
#include <netinet/tcp.h>
#include <netdb.h>
return ((e == NULL) ? "???" : e->name);
}
+static struct hostent *
+ptl_gethostbyname(char * hname) {
+ struct hostent *he;
+ he = gethostbyname(hname);
+ if (!he) {
+ switch(h_errno) {
+ case HOST_NOT_FOUND:
+ case NO_ADDRESS:
+ fprintf(stderr, "Unable to resolve hostname: %s\n",
+ hname);
+ break;
+ default:
+ fprintf(stderr, "gethostbyname error: %s\n",
+ strerror(errno));
+ break;
+ }
+ return NULL;
+ }
+ return he;
+}
+
int
ptl_parse_nid (ptl_nid_t *nidp, char *str)
{
if ((('a' <= str[0] && str[0] <= 'z') ||
('A' <= str[0] && str[0] <= 'Z')) &&
- (he = gethostbyname (str)) != NULL)
+ (he = ptl_gethostbyname (str)) != NULL)
{
__u32 addr = *(__u32 *)he->h_addr;
goto usage;
}
- he = gethostbyname(argv[1]);
- if (!he) {
- fprintf(stderr, "gethostbyname error: %s\n",
- strerror(errno));
+ he = ptl_gethostbyname(argv[1]);
+ if (!he)
return -1;
- }
g_port = atol(argv[2]);
PORTAL_IOC_INIT(data);
if (argc == 2) {
- he = gethostbyname(argv[1]);
- if (!he) {
- fprintf(stderr, "gethostbyname error: %s\n",
- strerror(errno));
+ he = ptl_gethostbyname(argv[1]);
+ if (!he)
return -1;
- }
data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
PORTAL_IOC_INIT(data);
if (argc == 2) {
- he = gethostbyname(argv[1]);
- if (!he) {
- fprintf(stderr, "gethostbyname error: %s\n",
- strerror(errno));
+ he = ptl_gethostbyname(argv[1]);
+ if (!he)
return -1;
- }
data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
cscope.out
autom4te-2.53.cache
autom4te.cache
-
+.*.o.cmd
tbd
+ * version v0_8
+ * bug fixes
+ - orphans are moved into the PENDING directory for possible recovery
+ - replayed opens now open by fid for orphan/rename safety (1042)
+ - last close of an orphan inode generates a transno (683)
+ - chdir() and mount() now pin the directory entry (1020)
+ - avoid CERROR in normal ll_setattr_raw() error case (1500)
+ - discard very old requests without processing them (1502)
+
+2003-06-15 Phil Schwan <phil@clusterfs.com>
* version v0_7
* bug fixes
- imports and exports cleanup too early, need refcounts (349, 879, 1045)
-include fs/lustre/portals/Kernelenv
+include $(src)/portals/Kernelenv
+
+# for scripts/version_tag.pl
+LINUX = @LINUX@
obj-y += portals/
+# obdclass has to come before anything that does class_register..
+obj-y += obdclass/
+obj-y += ptlrpc/
+obj-y += ldlm/
+obj-y += obdfilter/
+obj-y += mdc/
obj-y += mds/
+obj-y += obdecho/
+obj-y += osc/
+obj-y += ost/
+obj-y += lov/
+obj-y += llite/
+
+# portals needs to be before utils/, which pulls in ptlctl objects
+obj-m += utils/
+obj-m += tests/
{
struct lprocfs_static_vars lvars;
- lprocfs_init_vars(&lvars);
- return lprocfs_obd_attach(dev, lvars.obd_vars);
+ lprocfs_init_vars(cobd, &lvars);
+ return lprocfs_obd_attach(dev, lvars.obd_vars);
}
static int cobd_detach(struct obd_device *dev)
{
- return lprocfs_obd_detach(dev);
+ return lprocfs_obd_detach(dev);
}
static int
return (0);
fail_0:
- obd_disconnect (&cobd->cobd_target, 0 );
+ obd_disconnect(&cobd->cobd_target, 0);
return (rc);
}
-static int
-cobd_cleanup (struct obd_device *dev, int force, int failover)
+static int cobd_cleanup(struct obd_device *dev, int flags)
{
struct cache_obd *cobd = &dev->u.cobd;
int rc;
- if (!list_empty (&dev->obd_exports))
+ if (!list_empty(&dev->obd_exports))
return (-EBUSY);
- rc = obd_disconnect (&cobd->cobd_cache, failover);
+ rc = obd_disconnect(&cobd->cobd_cache, flags);
if (rc != 0)
CERROR ("error %d disconnecting cache\n", rc);
- rc = obd_disconnect (&cobd->cobd_target, failover);
+ rc = obd_disconnect(&cobd->cobd_target, flags);
if (rc != 0)
CERROR ("error %d disconnecting target\n", rc);
return (rc);
}
-static int
-cobd_disconnect (struct lustre_handle *conn, int failover)
+static int cobd_disconnect(struct lustre_handle *conn, int flags)
{
- int rc = class_disconnect (conn, failover);
+ int rc = class_disconnect(conn, flags);
CERROR ("rc %d\n", rc);
- return (rc);
+ return (rc);
}
static int
return obd_get_info(&cobd->cobd_target, keylen, key, vallen, val);
}
-static int cobd_statfs(struct obd_export *exp, struct obd_statfs *osfs)
+static int cobd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+ unsigned long max_age)
{
- struct obd_export *cobd_exp;
- int rc;
-
- if (exp->exp_obd == NULL)
- return -EINVAL;
-
- cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target);
- rc = obd_statfs(cobd_exp, osfs);
- class_export_put(cobd_exp);
- return rc;
+ return obd_statfs(class_conn2obd(&obd->u.cobd.cobd_target), osfs,
+ max_age);
}
-static int
-cobd_getattr(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *lsm)
+static int cobd_getattr(struct lustre_handle *conn, struct obdo *oa,
+ struct lov_stripe_md *lsm)
{
struct obd_device *obd = class_conn2obd(conn);
struct cache_obd *cobd;
return (obd_close (&cobd->cobd_target, oa, lsm, oti));
}
-static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
+static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *nb,
- struct niobuf_local *res, void **desc_private,
- struct obd_trans_info *oti)
+ struct niobuf_local *res, struct obd_trans_info *oti)
{
struct obd_export *cobd_exp;
int rc;
return -EOPNOTSUPP;
cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target);
- rc = obd_preprw(cmd, cobd_exp, obdo, objcount, obj, niocount, nb, res,
- desc_private, oti);
+ rc = obd_preprw(cmd, cobd_exp, oa, objcount, obj, niocount, nb, res,
+ oti);
class_export_put(cobd_exp);
+
return rc;
}
-static int cobd_commitrw(int cmd, struct obd_export *exp,
+static int cobd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_local *local,
- void *desc_private, struct obd_trans_info *oti)
+ struct obd_trans_info *oti)
{
struct obd_export *cobd_exp;
int rc;
return -EOPNOTSUPP;
cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target);
- rc = obd_commitrw(cmd, cobd_exp, objcount, obj, niocount, local,
- desc_private, oti);
+ rc = obd_commitrw(cmd, cobd_exp, oa, objcount, obj,niocount,local,oti);
class_export_put(cobd_exp);
return rc;
}
-static inline int
-cobd_brw(int cmd, struct lustre_handle *conn,
- struct lov_stripe_md *lsm, obd_count oa_bufs,
- struct brw_page *pga, struct obd_trans_info *oti)
+static int cobd_brw(int cmd, struct lustre_handle *conn, struct obdo *oa,
+ struct lov_stripe_md *lsm, obd_count oa_bufs,
+ struct brw_page *pga, struct obd_trans_info *oti)
{
struct obd_device *obd = class_conn2obd(conn);
struct cache_obd *cobd;
return -EOPNOTSUPP;
cobd = &obd->u.cobd;
- return (obd_brw (cmd, &cobd->cobd_target,
- lsm, oa_bufs, pga, oti));
+ return (obd_brw(cmd, &cobd->cobd_target, oa, lsm, oa_bufs, pga, oti));
}
-static int
-cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
- void *karg, void *uarg)
+static int cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
+ void *karg, void *uarg)
{
struct obd_device *obd = class_conn2obd(conn);
struct cache_obd *cobd;
/* intercept? */
cobd = &obd->u.cobd;
- return (obd_iocontrol (cmd, &cobd->cobd_target, len, karg, uarg));
+ return (obd_iocontrol(cmd, &cobd->cobd_target, len, karg, uarg));
}
static struct obd_ops cobd_ops = {
struct lprocfs_static_vars lvars;
ENTRY;
- printk(KERN_INFO "Lustre Caching OBD driver; info@clusterfs.com\n");
+ printk(KERN_INFO "Lustre Caching OBD driver; info@clusterfs.com\n");
- lprocfs_init_vars(&lvars);
+ lprocfs_init_vars(cobd, &lvars);
RETURN(class_register_type(&cobd_ops, lvars.module_vars,
OBD_CACHE_DEVICENAME));
}
-static void __exit cobd_exit(void)
+static void /*__exit*/ cobd_exit(void)
{
- class_unregister_type(OBD_CACHE_DEVICENAME);
+ class_unregister_type(OBD_CACHE_DEVICENAME);
}
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
#include <linux/lprocfs_status.h>
#ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
#else
/* Common STATUS namespace */
-static int rd_target(char *page, char **start, off_t off, int count,
- int *eof, void *data)
+static int cobd_rd_target(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
- struct obd_device *dev = (struct obd_device*)data;
- struct lustre_handle *conn;
- struct obd_export *exp;
- int rc;
+ struct obd_device *cobd = (struct obd_device *)data;
+ int rc;
- LASSERT(dev != NULL);
- conn = &dev->u.cobd.cobd_target;
+ LASSERT(cobd != NULL);
- if (!dev->obd_set_up) {
- rc = snprintf (page, count, "not set up\n");
- } else {
- exp = class_conn2export(conn);
- LASSERT(exp != NULL);
- rc = snprintf(page, count, "%s\n",
- exp->exp_obd->obd_uuid.uuid);
- class_export_put(exp);
- }
- return (rc);
+ if (!cobd->obd_set_up) {
+ rc = snprintf(page, count, "not set up\n");
+ } else {
+ struct obd_device *tgt =
+ class_conn2obd(&cobd->u.cobd.cobd_target);
+ LASSERT(tgt != NULL);
+ rc = snprintf(page, count, "%s\n", tgt->obd_uuid.uuid);
+ }
+ return rc;
}
-static int rd_cache(char *page, char **start, off_t off, int count,
- int *eof, void *data)
+static int cobd_rd_cache(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
- struct obd_device *dev = (struct obd_device*)data;
- struct lustre_handle *conn;
- struct obd_export *exp;
- int rc;
+ struct obd_device *cobd = (struct obd_device*)data;
+ int rc;
- LASSERT(dev != NULL);
- conn = &dev->u.cobd.cobd_cache;
+ LASSERT(cobd != NULL);
- if (!dev->obd_set_up) {
- rc = snprintf (page, count, "not set up\n");
+ if (!cobd->obd_set_up) {
+ rc = snprintf(page, count, "not set up\n");
} else {
- exp = class_conn2export(conn);
- LASSERT (exp != NULL);
- rc = snprintf(page, count, "%s\n",
- exp->exp_obd->obd_uuid.uuid);
- class_export_put(exp);
- }
- return (rc);
+ struct obd_device *cache =
+ class_conn2obd(&cobd->u.cobd.cobd_cache);
+ LASSERT(cache != NULL);
+ rc = snprintf(page, count, "%s\n", cache->obd_uuid.uuid);
+ }
+ return rc;
}
-struct lprocfs_vars lprocfs_obd_vars[] = {
- { "uuid", lprocfs_rd_uuid, 0, 0 },
- { "target_uuid", rd_target, 0, 0 },
- { "cache_uuid", rd_cache, 0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
+ { "target_uuid", cobd_rd_target, 0, 0 },
+ { "cache_uuid", cobd_rd_cache, 0, 0 },
{ 0 }
};
struct lprocfs_vars lprocfs_module_vars[] = {
- { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
{ 0 }
};
#endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(cobd, lprocfs_module_vars, lprocfs_obd_vars)
<!ELEMENT profile (ldlm_ref | ptlrpc_ref | network_ref | routetbl_ref |
osd_ref | mdsdev_ref | lovconfig_ref|
- echoclient_ref | mountpoint_ref)*>
+ echoclient_ref | mountpoint_ref | mgmt_ref)*>
<!ATTLIST profile %object.attr;>
-<!ELEMENT mountpoint (path | fileset | mds_ref | obd_ref)*>
+<!ELEMENT mountpoint path #REQUIRED
+ filesystem_ref #REQUIRED >
<!ATTLIST mountpoint %object.attr;>
<!ELEMENT echoclient (obd_ref)>
<!ELEMENT ldlm EMPTY>
<!ATTLIST ldlm %object.attr;>
+<!ELEMENT mgmt (active_ref)*>
+<!ATTLIST mgmt %object.attr;>
+
<!ELEMENT ptlrpc EMPTY>
<!ATTLIST ptlrpc %object.attr;>
<!ATTLIST ost %object.attr;
failover ( 1 | 0 ) #IMPLIED>
+<!ELEMENT filesystem mds_ref #REQUIRED
+ obd_ref #REQUIRED
+ (mgmt_ref)* >
+<!ATTLIST filesystem %object.attr;>
+
<!ELEMENT mds (active_ref | lovconfig_ref | group)*>
<!ATTLIST mds %object.attr;
failover ( 1 | 0 ) #IMPLIED>
<!ATTLIST obd_ref %objref.attr;>
<!ELEMENT ost_ref %objref.content;>
<!ATTLIST ost_ref %objref.attr;>
-<!ELEMENT active_ref %objref.content;>
-<!ATTLIST active_ref %objref.attr;>
-<!ELEMENT target_ref %objref.content;>
-<!ATTLIST target_ref %objref.attr;>
+<!ELEMENT active_ref %objref.content;>
+<!ATTLIST active_ref %objref.attr;>
+<!ELEMENT target_ref %objref.content;>
+<!ATTLIST target_ref %objref.attr;>
<!ELEMENT lov_ref %objref.content;>
<!ATTLIST lov_ref %objref.attr;>
<!ELEMENT lovconfig_ref %objref.content;>
<!ATTLIST lovconfig_ref %objref.attr;>
+<!ELEMENT mgmt_ref %objref.content;>
+<!ATTLIST mgmt_ref %objref.attr;>
<!ELEMENT mountpoint_ref %objref.content;>
<!ATTLIST mountpoint_ref %objref.attr;>
+<!ELEMENT filesystem_ref %objref.content;>
+<!ATTLIST filesystem_ref %objref.attr;>
<!ELEMENT echoclient_ref %objref.content;>
<!ATTLIST echoclient_ref %objref.attr;>
<!ELEMENT failover_ref %objref.content;>
AC_ARG_ENABLE(extN, [ --enable-extN use extN instead of ext3 for lustre backend])
AM_CONDITIONAL(EXTN, test x$enable_extN = xyes)
+# the pinger is temporary, until we have the recovery node in place
+AC_ARG_ENABLE(pinger, [ --enable-pinger recovery pinger support])
+if test x$enable_pinger = xyes ; then
+ AC_DEFINE(ENABLE_PINGER, 1, Use the Pinger)
+fi
+
+# very experimental orphan support
+AC_ARG_ENABLE(orphans, [ --enable-orphans very experimental orphan recovery support])
+if test x$enable_orphans = xyes ; then
+ AC_DEFINE(ENABLE_ORPHANS, 1, Compile with orphan support)
+fi
+
AC_ARG_WITH(obd-buffer-size, [ --with-obd-buffer-size=[size] set lctl ioctl maximum (default=8K)],OBD_BUFFER_SIZE=$with_obd_buffer_size,OBD_BUFFER_SIZE=8192)
AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size])
sinclude(portals/archdep.m4)
if test x$enable_inkernel = xyes ; then
-cp Makefile.mk Makefile.in
-cp mds/Makefile.mk mds/Makefile.in
-cp portals/Kernelenv.mk portals/Kernelenv.in
-cp portals/Makefile.mk portals/Makefile.in
-cp portals/libcfs/Makefile.mk portals/libcfs/Makefile.in
-cp portals/portals/Makefile.mk portals/portals/Makefile.in
-cp portals/knals/Makefile.mk portals/knals/Makefile.in
-cp portals/knals/socknal/Makefile.mk portals/knals/socknal/Makefile.in
-cp portals/router/Makefile.mk portals/router/Makefile.in
+ find . -name Makefile.mk | sed 's/.mk$//' | xargs -n 1 \
+ sh -e -x -c '(cp -f $0.mk $0.in)'
fi
AM_CONFIG_HEADER(portals/include/config.h)
#define kmap(page) (page)->addr
#define kunmap(a) do { int foo = 1; foo++; } while (0)
-static inline struct page *alloc_pages(int mask, unsigned long foo)
+static inline struct page *alloc_pages(int mask, unsigned long order)
{
struct page *pg = malloc(sizeof(*pg));
if (!pg)
return NULL;
#ifdef MAP_ANONYMOUS
- pg->addr = mmap(0, PAGE_SIZE, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+ pg->addr = mmap(0, PAGE_SIZE << order, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
#else
- pg->addr = malloc(PAGE_SIZE);
+ pg->addr = malloc(PAGE_SIZE << order);
#endif
if (!pg->addr) {
/* arithmetic */
#define do_div(a,b) \
({ \
- unsigned long ret; \
- ret = (a)%(b); \
- (a) = (a)/(b); \
- (ret); \
+ unsigned long remainder;\
+ remainder = (a) % (b); \
+ (a) = (a) / (b); \
+ (remainder); \
})
/* VFS stuff */
-#define ATTR_MODE 1
-#define ATTR_UID 2
-#define ATTR_GID 4
-#define ATTR_SIZE 8
-#define ATTR_ATIME 16
-#define ATTR_MTIME 32
-#define ATTR_CTIME 64
-#define ATTR_ATIME_SET 128
-#define ATTR_MTIME_SET 256
-#define ATTR_FORCE 512 /* Not a change, but a change it */
-#define ATTR_ATTR_FLAG 1024
-#define ATTR_RAW 2048 /* file system, not vfs will massage attrs */
-#define ATTR_FROM_OPEN 4096 /* called from open path, ie O_TRUNC */
+#define ATTR_MODE 0x0001
+#define ATTR_UID 0x0002
+#define ATTR_GID 0x0004
+#define ATTR_SIZE 0x0008
+#define ATTR_ATIME 0x0010
+#define ATTR_MTIME 0x0020
+#define ATTR_CTIME 0x0040
+#define ATTR_ATIME_SET 0x0080
+#define ATTR_MTIME_SET 0x0100
+#define ATTR_FORCE 0x0200 /* Not a change, but a change it */
+#define ATTR_ATTR_FLAG 0x0400
+#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */
+#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */
+#define ATTR_CTIME_SET 0x2000
struct iattr {
unsigned int ia_valid;
#ifndef _LPROCFS_SNMP_H
#define _LPROCFS_SNMP_H
+
#ifdef __KERNEL__
#include <linux/config.h>
#include <linux/autoconf.h>
#include <linux/proc_fs.h>
+#include <linux/version.h>
#include <linux/smp.h>
+#include <linux/kp30.h>
+
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+# include <linux/statfs.h>
+# else
+# define kstatfs statfs
+# endif
+
+#else
+# define kstatfs statfs
#endif
-#include <linux/kp30.h>
#ifndef LPROCFS
#ifdef CONFIG_PROC_FS /* Ensure that /proc is configured */
/* class_obd.c */
extern struct proc_dir_entry *proc_lustre_root;
-/* lproc_lov.c */
-extern struct file_operations ll_proc_target_fops;
struct obd_device;
+struct file;
#ifdef LPROCFS
x->obd_vars = glob[idx].obd_vars; \
} \
-#define LPROCFS_INIT_VARS(vclass, vinstance) \
-void lprocfs_init_vars(struct lprocfs_static_vars *x) \
+#define LPROCFS_INIT_VARS(name, vclass, vinstance) \
+void lprocfs_##name##_init_vars(struct lprocfs_static_vars *x) \
{ \
x->module_vars = vclass; \
x->obd_vars = vinstance; \
} \
-extern void lprocfs_init_vars(struct lprocfs_static_vars *var);
+#define lprocfs_init_vars(NAME, VAR) \
+do { \
+ extern void lprocfs_##NAME##_init_vars(struct lprocfs_static_vars *); \
+ lprocfs_##NAME##_init_vars(VAR); \
+} while (0)
extern void lprocfs_init_multi_vars(unsigned int idx,
struct lprocfs_static_vars *var);
/* lprocfs_status.c */
int count, int *eof, void *data);
extern int lprocfs_rd_name(char *page, char **start, off_t off,
int count, int *eof, void *data);
+extern int lprocfs_rd_fstype(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
int count, int *eof, void *data);
extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
int count, int *eof, void *data);
/* Statfs helpers */
-struct statfs;
extern int lprocfs_rd_blksize(char *page, char **start, off_t off,
- int count, int *eof, struct statfs *sfs);
+ int count, int *eof, void *data);
extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
- int count, int *eof, struct statfs *sfs);
+ int count, int *eof, void *data);
extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
- int count, int *eof, struct statfs *sfs);
+ int count, int *eof, void *data);
extern int lprocfs_rd_filestotal(char *page, char **start, off_t off,
- int count, int *eof, struct statfs *sfs);
+ int count, int *eof, void *data);
extern int lprocfs_rd_filesfree(char *page, char **start, off_t off,
- int count, int *eof, struct statfs *sfs);
+ int count, int *eof, void *data);
extern int lprocfs_rd_filegroups(char *page, char **start, off_t off,
- int count, int *eof, struct statfs *sfs);
+ int count, int *eof, void *data);
/* lprocfs_status.c: counter read/write functions */
-struct file;
extern int lprocfs_counter_read(char *page, char **start, off_t off,
int count, int *eof, void *data);
extern int lprocfs_counter_write(struct file *file, const char *buffer,
unsigned long count, void *data);
-
-#define DEFINE_LPROCFS_STATFS_FCT(fct_name, get_statfs_fct) \
-int fct_name(char *page, char **start, off_t off, \
- int count, int *eof, void *data) \
-{ \
- struct statfs sfs; \
- int rc = get_statfs_fct((struct obd_device*)data, &sfs); \
- return (rc == 0 ? \
- lprocfs_##fct_name (page, start, off, count, eof, &sfs) : \
- rc); \
-}
-
#else
/* LPROCFS is not defined */
static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
static inline struct proc_dir_entry *
lprocfs_register(const char *name, struct proc_dir_entry *parent,
struct lprocfs_vars *list, void *data) { return NULL; }
-#define LPROCFS_INIT_MULTI_VARS(array, size)
+#define LPROCFS_INIT_MULTI_VARS(array, size) do {} while (0)
static inline void lprocfs_init_multi_vars(unsigned int idx,
struct lprocfs_static_vars *x) { return; }
-#define LPROCFS_INIT_VARS(vclass, vinstance)
-static inline void lprocfs_init_vars(struct lprocfs_static_vars *x) { return; }
+#define LPROCFS_INIT_VARS(name, vclass, vinstance) do {} while (0)
+#define lprocfs_init_vars(...) do {} while (0)
static inline int lprocfs_add_vars(struct proc_dir_entry *root,
struct lprocfs_vars *var,
void *data) { return 0; }
static inline void lprocfs_remove(struct proc_dir_entry *root) {};
static inline struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
const char *name) {return 0;}
-struct obd_device;
static inline int lprocfs_obd_attach(struct obd_device *dev,
struct lprocfs_vars *list) { return 0; }
static inline int lprocfs_obd_detach(struct obd_device *dev) { return 0; }
int count, int *eof, void *data) { return 0; }
/* Statfs helpers */
-struct statfs;
static inline
int lprocfs_rd_blksize(char *page, char **start, off_t off,
- int count, int *eof, struct statfs *sfs) { return 0; }
+ int count, int *eof, void *data) { return 0; }
static inline
int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
- int count, int *eof, struct statfs *sfs) { return 0; }
+ int count, int *eof, void *data) { return 0; }
static inline
int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
- int count, int *eof, struct statfs *sfs) { return 0; }
+ int count, int *eof, void *data) { return 0; }
static inline
int lprocfs_rd_filestotal(char *page, char **start, off_t off,
- int count, int *eof, struct statfs *sfs) { return 0; }
+ int count, int *eof, void *data) { return 0; }
static inline
int lprocfs_rd_filesfree(char *page, char **start, off_t off,
- int count, int *eof, struct statfs *sfs) { return 0; }
+ int count, int *eof, void *data) { return 0; }
static inline
int lprocfs_rd_filegroups(char *page, char **start, off_t off,
- int count, int *eof, struct statfs *sfs) { return 0; }
+ int count, int *eof, void *data) { return 0; }
static inline
int lprocfs_counter_read(char *page, char **start, off_t off,
int count, int *eof, void *data) { return 0; }
-struct file;
static inline
int lprocfs_counter_write(struct file *file, const char *buffer,
unsigned long count, void *data) { return 0; }
-
-#define DEFINE_LPROCFS_STATFS_FCT(fct_name, get_statfs_fct) \
-int fct_name(char *page, char **start, off_t off, \
- int count, int *eof, void *data) { *eof = 1; return 0; }
-
#endif /* LPROCFS */
#endif /* LPROCFS_SNMP_H */
#ifndef _COMPAT25_H
#define _COMPAT25_H
-#include <linux/portals_compat25.h>
+#ifdef __KERNEL__
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-#define KDEVT_VAL(dev, val) dev.value = 0
-#else
-#define KDEVT_VAL(dev, val) dev = 0
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) && LINUX_VERSION_CODE < KERNEL_VERSION(2,5,69)
+#error sorry, lustre requires at least 2.5.69
#endif
+#include <linux/portals_compat25.h>
+
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
# define PGCACHE_WRLOCK(mapping) write_lock(&mapping->page_lock)
# define PGCACHE_WRUNLOCK(mapping) write_unlock(&mapping->page_lock)
-#else
+
+#define KDEVT_INIT(val) { .value = val }
+#define LTIME_S(time) (time.tv_sec)
+#define USERMODEHELPER(path, argv, envp) \
+ call_usermodehelper(path, argv, envp, 1)
+#define ll_path_lookup path_lookup
+
+
+#define ll_pgcache_lock(mapping) spin_lock(&mapping->page_lock)
+#define ll_pgcache_unlock(mapping) spin_unlock(&mapping->page_lock)
+
+#else /* 2.4.. */
+
# define PGCACHE_WRLOCK(mapping) spin_lock(&pagecache_lock)
# define PGCACHE_WRUNLOCK(mapping) spin_unlock(&pagecache_lock)
+
+/* 2.5 uses hlists for some things, like the d_hash. we'll treat them
+ * as 2.5 and let macros drop back.. */
+#define hlist_entry list_entry
+#define hlist_head list_head
+#define hlist_node list_head
+#define HLIST_HEAD LIST_HEAD
+#define INIT_HLIST_HEAD INIT_LIST_HEAD
+#define hlist_del_init list_del_init
+#define hlist_add_head list_add
+#define hlist_for_each_safe list_for_each_safe
+#define KDEVT_INIT(val) (val)
+#define ext3_xattr_set_handle ext3_xattr_set
+#define try_module_get __MOD_INC_USE_COUNT
+#define module_put __MOD_DEC_USE_COUNT
+#define LTIME_S(time) (time)
+#ifndef CONFIG_RH_2_4_20
+#define cpu_online(cpu) (cpu_online_map & (1<<cpu))
#endif
+#define USERMODEHELPER(path, argv, envp) \
+ call_usermodehelper(path, argv, envp)
+static inline int ll_path_lookup(const char *path, unsigned flags,
+ struct nameidata *nd)
+{
+ int error = 0;
+ if (path_init(path, flags, nd))
+ error = path_walk(path, nd);
+ return error;
+}
+typedef long sector_t;
+
+#define ll_pgcache_lock(mapping) spin_lock(&pagecache_lock)
+#define ll_pgcache_unlock(mapping) spin_unlock(&pagecache_lock)
+#endif /* end of 2.4 compat macros */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
# define filemap_fdatasync(mapping) filemap_fdatawrite(mapping)
# define Page_Uptodate(page) PageUptodate(page)
#endif
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-# define USERMODEHELPER(path, argv, envp) call_usermodehelper(path, argv, envp, 0)
-#else
-# define USERMODEHELPER(path, argv, envp) call_usermodehelper(path, argv, envp)
-#endif
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-# define LL_CHECK_DIRTY(sb) do { }while(0)
-#else
-# define LL_CHECK_DIRTY(sb) ll_check_dirty(sb)
-#endif
-
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
#define rb_node_s rb_node
#define rb_root_s rb_root
typedef struct rb_node_s rb_node_t;
#endif
+#endif /* __KERNEL__ */
#endif /* _COMPAT25_H */
* it's no longer in use. If the lock is not granted, a process sleeps
* on this waitq to learn when it becomes granted. */
wait_queue_head_t l_waitq;
+ struct timeval l_enqueued_time;
};
typedef int (*ldlm_res_compat)(struct ldlm_lock *child, struct ldlm_lock *new);
ldlm_res_iterator_t iter, void *closure);
int ldlm_replay_locks(struct obd_import *imp);
+void ldlm_change_cbdata(struct ldlm_namespace *, struct ldlm_res_id *,
+ ldlm_iterator_t iter, void *data);
/* ldlm_extent.c */
int ldlm_extent_compat(struct ldlm_lock *, struct ldlm_lock *);
/* mds/handler.c */
/* This has to be here because recurisve inclusion sucks. */
+int intent_disposition(struct ldlm_reply *rep, int flag);
+void intent_set_disposition(struct ldlm_reply *rep, int flag);
int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
void *data, int flag);
#define __EXPORT_H
#include <linux/lustre_idl.h>
-#include <linux/obd_filter.h>
+#include <linux/lustre_dlm.h>
struct mds_client_data;
struct list_head med_open_head;
spinlock_t med_open_lock;
struct mds_client_data *med_mcd;
- int med_off;
+ loff_t med_off;
+ int med_idx;
};
struct ldlm_export_data {
struct list_head eced_locks;
};
+/* In-memory access to client data from OST struct */
+struct filter_client_data;
+struct filter_export_data {
+ struct list_head fed_open_head; //files to close on disconnect
+ spinlock_t fed_lock; /* protects fed_open_head */
+ struct filter_client_data *fed_fcd;
+ loff_t fed_lr_off;
+ int fed_lr_idx;
+};
+
struct obd_export {
struct portals_handle exp_handle;
atomic_t exp_refcount;
struct ptlrpc_request *exp_outstanding_reply;
time_t exp_last_request_time;
spinlock_t exp_lock; /* protects flags int below */
- int exp_failed:1, exp_failover:1;
+ int exp_failed:1;
+ int exp_flags;
union {
struct mds_export_data eu_mds_data;
struct filter_export_data eu_filter_data;
#include <linux/obd.h>
#include <linux/fs.h>
-typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd, int error);
+typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd,
+ void *data, int error);
struct fsfilt_objinfo {
struct dentry *fso_dentry;
struct list_head fs_list;
struct module *fs_owner;
char *fs_type;
- void *(* fs_start)(struct inode *inode, int op);
+ void *(* fs_start)(struct inode *inode, int op, void *desc_private);
void *(* fs_brw_start)(int objcount, struct fsfilt_objinfo *fso,
- int niocount, struct niobuf_remote *nb);
+ int niocount, void *desc_private);
int (* fs_commit)(struct inode *inode, void *handle,int force_sync);
int (* fs_setattr)(struct dentry *dentry, void *handle,
struct iattr *iattr, int do_trunc);
loff_t *offset);
int (* fs_journal_data)(struct file *file);
int (* fs_set_last_rcvd)(struct obd_device *obd, __u64 last_rcvd,
- void *handle, fsfilt_cb_t cb_func);
+ void *handle, fsfilt_cb_t cb_func,
+ void *cb_data);
int (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs);
int (* fs_sync)(struct super_block *sb);
int (* fs_prep_san_write)(struct inode *inode, long *blocks,
int nblocks, loff_t newsize);
+ int (* fs_write_record)(struct file *, char *, int size, loff_t *);
+ int (* fs_read_record)(struct file *, char *, int size, loff_t *);
};
extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops);
extern void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops);
-extern struct fsfilt_operations *fsfilt_get_ops(char *type);
+extern struct fsfilt_operations *fsfilt_get_ops(const char *type);
extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops);
#define FSFILT_OP_UNLINK 1
#define FSFILT_OP_MKNOD 7
#define FSFILT_OP_SETATTR 8
#define FSFILT_OP_LINK 9
+#define FSFILT_OP_CREATE_LOG 10
+#define FSFILT_OP_UNLINK_LOG 11
-static inline void *fsfilt_start(struct obd_device *obd,
- struct inode *inode, int op)
+static inline void *fsfilt_start(struct obd_device *obd, struct inode *inode,
+ int op, struct obd_trans_info *oti)
{
unsigned long now = jiffies;
- void *handle = obd->obd_fsops->fs_start(inode, op);
- CDEBUG(D_HA, "started handle %p\n", handle);
- if (time_after(jiffies, now + 15*HZ))
+ void *parent_handle = oti ? oti->oti_handle : NULL;
+ void *handle = obd->obd_fsops->fs_start(inode, op, parent_handle);
+ CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle);
+
+ if (oti != NULL) {
+ if (parent_handle == NULL) {
+ oti->oti_handle = handle;
+ } else if (handle != parent_handle) {
+ CERROR("mismatch: parent %p, handle %p, oti %p\n",
+ parent_handle, handle, oti->oti_handle);
+ LBUG();
+ }
+ }
+ if (time_after(jiffies, now + 15 * HZ))
CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
return handle;
}
static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount,
struct fsfilt_objinfo *fso, int niocount,
- struct niobuf_remote *nb)
+ struct obd_trans_info *oti)
{
unsigned long now = jiffies;
- void *handle = obd->obd_fsops->fs_brw_start(objcount, fso, niocount,nb);
- CDEBUG(D_HA, "started handle %p\n", handle);
- if (time_after(jiffies, now + 15*HZ))
+ void *parent_handle = oti ? oti->oti_handle : NULL;
+ void *handle;
+
+ handle = obd->obd_fsops->fs_brw_start(objcount, fso, niocount,
+ parent_handle);
+ CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle);
+
+ if (oti != NULL) {
+ if (parent_handle == NULL) {
+ oti->oti_handle = handle;
+ } else if (handle != parent_handle) {
+ CERROR("mismatch: parent %p, handle %p, oti %p\n",
+ parent_handle, handle, oti->oti_handle);
+ LBUG();
+ }
+ }
+ if (time_after(jiffies, now + 15 * HZ))
CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
return handle;
}
unsigned long now = jiffies;
int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync);
CDEBUG(D_HA, "committing handle %p\n", handle);
- if (time_after(jiffies, now + 15*HZ))
+ if (time_after(jiffies, now + 15 * HZ))
CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
return rc;
}
unsigned long now = jiffies;
int rc;
rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr, do_trunc);
- if (time_after(jiffies, now + 15*HZ))
+ if (time_after(jiffies, now + 15 * HZ))
CERROR("long setattr time %lus\n", (jiffies - now) / HZ);
-
return rc;
}
}
static inline int fsfilt_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
- void *handle, fsfilt_cb_t cb_func)
+ void *handle, fsfilt_cb_t cb_func,
+ void *cb_data)
{
- return obd->obd_fsops->fs_set_last_rcvd(obd, last_rcvd,handle,cb_func);
+ return obd->obd_fsops->fs_set_last_rcvd(obd, last_rcvd, handle,
+ cb_func, cb_data);
}
static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *fs,
return obd->obd_fsops->fs_prep_san_write(inode, blocks,
nblocks, newsize);
}
+
+static inline int fsfilt_read_record(struct obd_device *obd, struct file *file,
+ char *buf, loff_t size, loff_t *offs)
+{
+ return obd->obd_fsops->fs_read_record(file, buf, size, offs);
+}
+
+static inline int fsfilt_write_record(struct obd_device *obd, struct file *file,
+ char *buf, loff_t size, loff_t *offs)
+{
+ return obd->obd_fsops->fs_write_record(file, buf, size, offs);
+}
+
#endif /* __KERNEL__ */
#endif
* along with Lustre; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
- * (Un)packing of OST requests
+ * Lustre wire protocol definitions.
*
* We assume all nodes are either little-endian or big-endian, and we
* always send messages in the sender's native format. The receiver
* implemented either here, inline (trivial implementations) or in
* ptlrpc/pack_generic.c. These 'swabbers' convert the type from "other"
* endian, in-place in the message buffer.
- *
+ *
* A swabber takes a single pointer argument. The caller must already have
- * verified that the length of the message buffer >= sizeof (type).
+ * verified that the length of the message buffer >= sizeof (type).
*
* For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine
* may be defined that swabs just the variable part, after the caller has
* FOO_BULK_PORTAL is for incoming bulk on the FOO
*/
-#define CONNMGR_REQUEST_PORTAL 1
-#define CONNMGR_REPLY_PORTAL 2
-//#define OSC_REQUEST_PORTAL 3
-#define OSC_REPLY_PORTAL 4
-//#define OSC_BULK_PORTAL 5
-#define OST_REQUEST_PORTAL 6
-//#define OST_REPLY_PORTAL 7
-#define OST_BULK_PORTAL 8
-//#define MDC_REQUEST_PORTAL 9
-#define MDC_REPLY_PORTAL 10
-//#define MDC_BULK_PORTAL 11
-#define MDS_REQUEST_PORTAL 12
-//#define MDS_REPLY_PORTAL 13
-#define MDS_BULK_PORTAL 14
-#define LDLM_CB_REQUEST_PORTAL 15
-#define LDLM_CB_REPLY_PORTAL 16
+#define CONNMGR_REQUEST_PORTAL 1
+#define CONNMGR_REPLY_PORTAL 2
+//#define OSC_REQUEST_PORTAL 3
+#define OSC_REPLY_PORTAL 4
+//#define OSC_BULK_PORTAL 5
+#define OST_REQUEST_PORTAL 6
+//#define OST_REPLY_PORTAL 7
+#define OST_BULK_PORTAL 8
+//#define MDC_REQUEST_PORTAL 9
+#define MDC_REPLY_PORTAL 10
+//#define MDC_BULK_PORTAL 11
+#define MDS_REQUEST_PORTAL 12
+//#define MDS_REPLY_PORTAL 13
+#define MDS_BULK_PORTAL 14
+#define LDLM_CB_REQUEST_PORTAL 15
+#define LDLM_CB_REPLY_PORTAL 16
#define LDLM_CANCEL_REQUEST_PORTAL 17
#define LDLM_CANCEL_REPLY_PORTAL 18
#define PTLBD_REQUEST_PORTAL 19
#define PTLBD_REPLY_PORTAL 20
#define PTLBD_BULK_PORTAL 21
-#define MDS_SETATTR_PORTAL 22
-#define MDS_READPAGE_PORTAL 23
+#define MDS_SETATTR_PORTAL 22
+#define MDS_READPAGE_PORTAL 23
+#define MGMT_REQUEST_PORTAL 24
+#define MGMT_REPLY_PORTAL 25
+#define MGMT_CLI_REQUEST_PORTAL 26
+#define MGMT_CLI_REPLY_PORTAL 27
#define SVC_KILLED 1
#define SVC_EVENT 2
static inline int lustre_msg_swabbed (struct lustre_msg *msg)
{
- return (msg->magic == __swab32 (PTLRPC_MSG_MAGIC));
+ return (msg->magic == __swab32(PTLRPC_MSG_MAGIC));
}
/* Flags that are operation-specific go in the top 16 bits. */
* Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT)
*/
-#define MSG_CONNECT_RECOVERING 0x1
-#define MSG_CONNECT_RECONNECT 0x2
+#define MSG_CONNECT_RECOVERING 0x1
+#define MSG_CONNECT_RECONNECT 0x2
#define MSG_CONNECT_REPLAYABLE 0x4
+#define MSG_CONNECT_PEER 0x8
/*
* OST requests: OBDO & OBD request records
OST_SAN_READ = 14,
OST_SAN_WRITE = 15,
OST_SYNCFS = 16,
+ OST_SET_INFO = 17,
OST_LAST_OPC
} ost_cmd_t;
#define OST_FIRST_OPC OST_REPLY
/* When adding OST RPC opcodes, please update
* LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */
-
typedef uint64_t obd_id;
typedef uint64_t obd_gr;
typedef uint64_t obd_time;
#define OBD_MD_LINKNAME (0x00040000) /* symbolic link target */
#define OBD_MD_FLHANDLE (0x00080000) /* file handle */
#define OBD_MD_FLCKSUM (0x00100000) /* bulk data checksum */
+#define OBD_MD_FLQOS (0x00200000) /* quality of service stats */
+#define OBD_MD_FLOSCOPQ (0x00400000) /* osc opaque data */
+#define OBD_MD_FLCOOKIE (0x00800000) /* log cancellation cookie */
#define OBD_MD_FLNOTOBD (~(OBD_MD_FLOBDFLG | OBD_MD_FLBLOCKS | OBD_MD_LINKNAME|\
- OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | OBD_MD_FLCKSUM))
+ OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | OBD_MD_FLCKSUM|\
+ OBD_MD_FLQOS | OBD_MD_FLOSCOPQ | OBD_MD_FLCOOKIE))
+
+static inline struct lustre_handle *obdo_handle(struct obdo *oa)
+{
+ return (struct lustre_handle *)oa->o_inline;
+}
+
+static inline struct llog_cookie *obdo_logcookie(struct obdo *oa)
+{
+ return (struct llog_cookie *)(oa->o_inline +
+ sizeof(struct lustre_handle));
+}
struct obd_statfs {
__u64 os_type;
MDS_GETSTATUS = 40,
MDS_STATFS = 41,
MDS_GETLOVINFO = 42,
+ MDS_PIN = 43,
+ MDS_UNPIN = 44,
MDS_LAST_OPC
} mds_cmd_t;
#define MDS_FIRST_OPC MDS_GETATTR
#define REINT_OPEN 6
#define REINT_MAX 6
-#define IT_INTENT_EXEC 1
-#define IT_OPEN_LOOKUP (1 << 1)
-#define IT_OPEN_NEG (1 << 2)
-#define IT_OPEN_POS (1 << 3)
-#define IT_OPEN_CREATE (1 << 4)
-#define IT_OPEN_OPEN (1 << 5)
+/* the disposition of the intent outlines what was executed */
+#define DISP_IT_EXECD 1
+#define DISP_LOOKUP_EXECD (1 << 1)
+#define DISP_LOOKUP_NEG (1 << 2)
+#define DISP_LOOKUP_POS (1 << 3)
+#define DISP_OPEN_CREATE (1 << 4)
+#define DISP_OPEN_OPEN (1 << 5)
+#define DISP_ENQ_COMPLETE (1<<6)
+
+
+struct ll_uctxt {
+ __u32 gid1;
+ __u32 gid2;
+};
struct ll_fid {
__u64 id;
__u32 sa_suppgid;
};
+/* Remove this once we declare it in include/linux/fs.h (v21 kernel patch?) */
+#ifndef ATTR_CTIME_SET
+#define ATTR_CTIME_SET 0x2000
+#endif
+
extern void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa);
struct mds_rec_create {
extern void lustre_swab_ptlbd_rsp (struct ptlbd_rsp *r);
/*
+ * Opcodes for management/monitoring node.
+ */
+#define MGMT_CONNECT 250
+#define MGMT_DISCONNECT 251
+#define MGMT_EXCEPTION 252 /* node died, etc. */
+
+/*
* Opcodes for multiple servers.
*/
-#define OBD_PING 400
+#define OBD_PING 400
+#define OBD_LOG_CANCEL 401
+#define OBD_LAST_OPC (OBD_LOG_CANCEL + 1)
+#define OBD_FIRST_OPC OBD_PING
+
+/* catalog of log objects */
+
+/* Identifier for a single log object */
+struct llog_logid {
+ __u64 lgl_oid;
+ __u32 lgl_ogen;
+};
+
+/* Log data record types - there is no specific reason that these need to
+ * be related to the RPC opcodes, but no reason not to (may be handy later?)
+ */
+typedef enum {
+ OST_CREATE_REC = 0x10600000 | (OST_CREATE << 8),
+ OST_ORPHAN_REC = 0x10600000 | (OST_DESTROY << 8),
+ MDS_UNLINK_REC = 0x10610000 | (MDS_REINT << 8) | REINT_UNLINK,
+ LLOG_CATALOG_MAGIC = 0x1062e67d,
+ LLOG_OBJECT_MAGIC = 0x10645539,
+} llog_op_type;
+
+/* Log record header - stored in originating host endian order (use magic to
+ * check order).
+ * Each record must start with this struct, end with a __u32 for the struct
+ * length, and be a multiple of 64 bits in size.
+ */
+struct llog_trans_hdr {
+ __u32 lth_len;
+ __u32 lth_type;
+};
+
+struct llog_create_rec {
+ struct llog_trans_hdr lcr_hdr;
+ struct ll_fid lcr_fid;
+ obd_id lcr_oid;
+ obd_count lcr_ogen;
+ __u32 lcr_end_len;
+} __attribute__((packed));
+
+struct llog_orphan_rec {
+ struct llog_trans_hdr lor_hdr;
+ obd_id lor_oid;
+ obd_count lor_ogen;
+ __u32 lor_end_len;
+} __attribute__((packed));
+
+struct llog_unlink_rec {
+ struct llog_trans_hdr lur_hdr;
+ obd_id lur_oid;
+ obd_count lur_ogen;
+ __u32 lur_end_len;
+} __attribute__((packed));
+
+/* On-disk header structure of each log object - stored in creating host
+ * endian order, with the exception of the bitmap - stored in little endian
+ * order so that we can use ext2_{clear,set,test}_bit() for proper/optimized
+ * little-endian handling of bitmaps (which are otherwise a pain to handle).
+ */
+#define LLOG_CHUNK_SIZE 4096
+#define LLOG_HEADER_SIZE (96)
+#define LLOG_BITMAP_BYTES (LLOG_CHUNK_SIZE - LLOG_HEADER_SIZE)
+
+#define LLOG_MIN_REC_SIZE (16) /* round(struct llog_trans_hdr+end_len) */
+
+struct llog_object_hdr {
+ struct llog_trans_hdr llh_hdr;
+ __u64 llh_timestamp;
+ __u32 llh_count;
+ __u16 llh_bitmap_offset;
+ __u16 llh_unused;
+ struct obd_uuid llh_tgtuuid;
+ __u8 llh_padding[3];
+ __u32 llh_reserved[LLOG_HEADER_SIZE/sizeof(__u32)-17];
+ __u32 llh_bitmap[LLOG_BITMAP_BYTES/sizeof(__u32)];
+ __u32 llh_hdr_end_len;
+};
+
+static inline int llog_log_swabbed(struct llog_object_hdr *hdr)
+{
+ if (hdr->llh_hdr.lth_type == __swab32(LLOG_OBJECT_MAGIC))
+ return 1;
+ if (hdr->llh_hdr.lth_type == LLOG_OBJECT_MAGIC)
+ return 0;
+ return -1;
+}
+
+/* log cookies are used to reference a specific log file and a record therein */
+struct llog_cookie {
+ struct llog_logid lgc_lgl;
+ __u32 lgc_index;
+};
#endif
int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf);
int client_sanobd_setup(struct obd_device *obddev, obd_count len, void *buf);
-int client_obd_cleanup(struct obd_device * obddev, int force, int failover);
+int client_obd_cleanup(struct obd_device * obddev, int flags);
struct client_obd *client_conn2cli(struct lustre_handle *conn);
struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid);
* the server, we can just send the whole struct unaltered. */
struct obd_client_handle {
struct lustre_handle och_fh;
+ struct llog_cookie och_cookie;
struct ptlrpc_request *och_req;
__u32 och_magic;
};
#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
/* statfs_pack.c */
-int obd_self_statfs(struct obd_device *dev, struct statfs *sfs);
+struct statfs;
+void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs);
+void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs);
/* l_lock.c */
struct lustre_lock {
#ifdef __KERNEL__
+#include <linux/version.h>
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#include <asm/statfs.h>
+#endif
+
#include <linux/fs.h>
+#include <linux/dcache.h>
#include <linux/ext2_fs.h>
#include <linux/proc_fs.h>
__u32 it_lock_mode;
};
+#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
+
+static inline struct lookup_intent *ll_nd2it(struct nameidata *nd)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ return &nd->it;
+#else
+ return nd->it;
+#endif
+}
+
struct ll_dentry_data {
- struct semaphore lld_it_sem;
+ int lld_cwd_count;
+ int lld_mnt_count;
+ struct obd_client_handle lld_cwd_och;
+ struct obd_client_handle lld_mnt_och;
};
-#define ll_d2d(dentry) ((struct ll_dentry_data*) dentry->d_fsdata)
+#define ll_d2d(de) ((struct ll_dentry_data*) de->d_fsdata)
extern struct file_operations ll_pgcache_seq_fops;
+/*
+ * XXX used in obdecho/echo_client.c must move (pjb)
+ *'p' list as its a list of pages linked together
+ * by ->private..
+ */
+struct plist {
+ struct page *pl_head;
+ struct page *pl_tail;
+ int pl_num;
+};
+
+struct ll_dirty_offsets {
+ rb_root_t do_root;
+ spinlock_t do_lock;
+ unsigned long do_num_dirty;
+};
+
+struct ll_writeback_pages {
+ obd_count npgs, max;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ int rw;
+ struct inode *inode;
+ struct brw_page pga[0];
+#else
+ struct brw_page *pga;
+#endif
+};
+
struct ll_inode_info {
struct lov_stripe_md *lli_smd;
char *lli_symlink_name;
struct semaphore lli_open_sem;
struct list_head lli_read_extents;
- loff_t lli_maxbytes;
+ __u64 lli_maxbytes;
spinlock_t lli_read_extent_lock;
unsigned long lli_flags;
#define LLI_F_HAVE_SIZE_LOCK 0
struct ldlm_extent re_extent;
};
-int ll_check_dirty( struct super_block *sb );
-int ll_batch_writepage( struct inode *inode, struct page *page );
-
-/* interpet return codes from intent lookup */
-#define LL_LOOKUP_POSITIVE 1
-#define LL_LOOKUP_NEGATIVE 2
-
#define LL_SUPER_MAGIC 0x0BD00BD0
#define LL_COMMITCBD_STOPPING 0x1
struct lprocfs_stats *ll_stats; /* lprocfs stats counter */
};
-static inline struct ll_sb_info *ll_s2sbi(struct super_block *sb)
-{
+
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
- return (struct ll_sb_info *)(sb->s_fs_info);
-#else
- return (struct ll_sb_info *)(sb->u.generic_sbp);
-#endif
+#define ll_s2sbi(sb) ((struct ll_sb_info *)((sb)->s_fs_info))
+void __d_rehash(struct dentry * entry, int lock);
+static inline __u64 ll_ts2u64(struct timespec *time)
+{
+ __u64 t = time->tv_sec;
+ return t;
+}
+#else /* 2.4 here */
+#define ll_s2sbi(sb) ((struct ll_sb_info *)((sb)->u.generic_sbp))
+static inline __u64 ll_ts2u64(time_t *time)
+{
+ return *time;
}
+#endif
static inline struct lustre_handle *ll_s2obdconn(struct super_block *sb)
{
return ll_s2sbi(inode->i_sb);
}
-static inline void d_unhash_aliases(struct inode *inode)
-{
- struct dentry *dentry = NULL;
- struct list_head *tmp;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- ENTRY;
-
- CDEBUG(D_INODE, "marking dentries for ino %lx/%x invalid\n",
- inode->i_ino, inode->i_generation);
-
- spin_lock(&dcache_lock);
- list_for_each(tmp, &inode->i_dentry) {
- dentry = list_entry(tmp, struct dentry, d_alias);
-
- list_del_init(&dentry->d_hash);
- dentry->d_flags |= DCACHE_LUSTRE_INVALID;
- list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
- }
-
- spin_unlock(&dcache_lock);
- EXIT;
-}
-
// FIXME: replace the name of this with LL_I to conform to kernel stuff
// static inline struct ll_inode_info *LL_I(struct inode *inode)
static inline struct ll_inode_info *ll_i2info(struct inode *inode)
return sbi2mdc(ll_s2sbi(sb))->cl_max_mds_easize;
}
-static inline loff_t ll_file_maxbytes(struct inode *inode)
+static inline __u64 ll_file_maxbytes(struct inode *inode)
{
return ll_i2info(inode)->lli_maxbytes;
}
/* namei.c */
-int ll_lock(struct inode *dir, struct dentry *dentry,
- struct lookup_intent *it, struct lustre_handle *lockh);
-int ll_unlock(__u32 mode, struct lustre_handle *lockh);
-
-typedef int (*intent_finish_cb)(int flag, struct ptlrpc_request *,
+typedef int (*intent_finish_cb)(struct ptlrpc_request *,
struct inode *parent, struct dentry **,
struct lookup_intent *, int offset, obd_id ino);
int ll_intent_lock(struct inode *parent, struct dentry **,
- struct lookup_intent *, intent_finish_cb);
+ struct lookup_intent *, int, intent_finish_cb);
int ll_mdc_blocking_ast(struct ldlm_lock *lock,
struct ldlm_lock_desc *desc,
void *data, int flag);
struct inode *i1, struct inode *i2,
const char *name, int namelen, int mode);
-/* dcache.c */
-void ll_intent_release(struct dentry *, struct lookup_intent *);
-
-/****
-
-I originally implmented these as functions, then realized a macro
-would be more helpful for debugging, so the CDEBUG messages show
-the current calling function. The orignal functions are in llite/dcache.c
-
-int ll_save_intent(struct dentry * de, struct lookup_intent * it);
-struct lookup_intent * ll_get_intent(struct dentry * de);
-****/
-
-#define IT_RELEASED_MAGIC 0xDEADCAFE
-
-#define LL_SAVE_INTENT(de, it) \
-do { \
- LASSERT(ll_d2d(de) != NULL); \
- \
- down(&ll_d2d(de)->lld_it_sem); \
- LASSERT(de->d_it == NULL); \
- de->d_it = it; \
- CDEBUG(D_DENTRY, \
- "D_IT DOWN dentry %p fsdata %p intent: %p %s sem %d\n", \
- de, ll_d2d(de), de->d_it, ldlm_it2str(de->d_it->it_op), \
- atomic_read(&(ll_d2d(de)->lld_it_sem.count))); \
-} while(0)
-
-#define LL_GET_INTENT(de, it) \
-do { \
- it = de->d_it; \
- \
- LASSERT(ll_d2d(de) != NULL); \
- LASSERT(it); \
- LASSERT(it->it_op != IT_RELEASED_MAGIC); \
- \
- CDEBUG(D_DENTRY, "D_IT UP dentry %p fsdata %p intent: %p %s\n", \
- de, ll_d2d(de), de->d_it, ldlm_it2str(de->d_it->it_op)); \
- de->d_it = NULL; \
- it->it_op = IT_RELEASED_MAGIC; \
- up(&ll_d2d(de)->lld_it_sem); \
-} while(0)
-
-#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
-
+/* lprocfs.c */
enum {
LPROC_LL_DIRTY_HITS = 0,
LPROC_LL_DIRTY_MISSES,
extern struct inode_operations ll_file_inode_operations;
extern struct inode_operations ll_special_inode_operations;
struct ldlm_lock;
-int ll_extent_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *,
- void *data, int flag);
int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
struct lov_stripe_md *lsm, int mode,
struct ldlm_extent *extent, struct lustre_handle *lockh);
int ll_file_release(struct inode *inode, struct file *file);
-/* rw.c */
-struct page *ll_getpage(struct inode *inode, unsigned long offset,
- int create, int locked);
-void ll_truncate(struct inode *inode);
/* super.c */
void ll_update_inode(struct inode *, struct mds_body *, struct lov_stripe_md *);
int ll_setattr_raw(struct inode *inode, struct iattr *attr);
+int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
+ unsigned long maxage);
/* symlink.c */
extern struct inode_operations ll_fast_symlink_inode_operations;
extern struct inode_operations ll_symlink_inode_operations;
-/* sysctl.c */
-void ll_sysctl_init(void);
-void ll_sysctl_clean(void);
-
#else
#include <linux/lustre_idl.h>
#endif /* __KERNEL__ */
-static inline void ll_ino2fid(struct ll_fid *fid,
- obd_id ino,
- __u32 generation,
+static inline void ll_ino2fid(struct ll_fid *fid, obd_id ino, __u32 generation,
int type)
{
fid->id = ino;
fid->f_type = type;
}
-struct ll_read_inode2_cookie {
- struct mds_body *lic_body;
- struct lov_stripe_md *lic_lsm;
-};
-
#include <asm/types.h>
#define LL_IOC_GETFLAGS _IOR ('f', 151, long)
#include <linux/lustre_idl.h>
#include <linux/lustre_lib.h>
#include <linux/lustre_dlm.h>
+#include <linux/lustre_log.h>
+#include <linux/lustre_export.h>
struct ldlm_lock_desc;
struct mds_obd;
#define LUSTRE_MDT_NAME "mdt"
#define LUSTRE_MDC_NAME "mdc"
+struct lustre_md {
+ struct mds_body *body;
+ struct lov_stripe_md *lsm;
+};
+
struct mdc_rpc_lock {
struct semaphore rpcl_sem;
struct lookup_intent *rpcl_it;
char *ur_tgt;
int ur_eadatalen;
void *ur_eadata;
+ int ur_cookielen;
+ struct llog_cookie *ur_logcookies;
struct iattr ur_iattr;
struct obd_ucred ur_uc;
__u64 ur_rdev;
#define ur_suppgid1 ur_uc.ouc_suppgid1
#define ur_suppgid2 ur_uc.ouc_suppgid2
-#define MDS_LR_CLIENT 8192
-#define MDS_LR_SIZE 128
+/* i_attr_flags holds the open count in the inode in 2.4 */
+//Alex implement on 2.4 with i_attr_flags and find soln for 2.5 please
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+# define mds_open_orphan_count(inode) (0)
+# define mds_open_orphan_inc(inode) do { } while (0);
+# define mds_open_orphan_dec_test(inode) (0)
+#else
+# define mds_inode_oatomic(inode) ((atomic_t *)&(inode)->i_attr_flags)
+# define mds_open_orphan_count(inode) \
+ atomic_read(mds_inode_oatomic(inode))
+# define mds_open_orphan_inc(inode) \
+ atomic_inc(mds_inode_oatomic(inode))
+# define mds_open_orphan_dec_test(inode) \
+ atomic_dec_and_test(mds_inode_oatomic(inode))
+#endif
+#define mds_inode_is_orphan(inode) ((inode)->i_flags & 0x4000000)
+#define mds_inode_set_orphan(inode) (inode)->i_flags |= 0x4000000
+
+#define MDS_LR_SERVER_SIZE 512
+
+#define MDS_LR_CLIENT_START 8192
+#define MDS_LR_CLIENT_SIZE 128
+#if MDS_LR_CLIENT_START < MDS_LR_SERVER_SIZE
+#error "Can't have MDS_LR_CLIENT_START < MDS_LR_SERVER_SIZE"
+#endif
#define MDS_CLIENT_SLOTS 17
/* Data stored per server at the head of the last_rcvd file. In le32 order. */
struct mds_server_data {
- __u8 msd_uuid[37]; /* server UUID */
- __u8 uuid_padding[3]; /* unused */
- __u64 msd_last_transno; /* last completed transaction ID */
- __u64 msd_mount_count; /* MDS incarnation number */
- __u8 padding[512 - 56];
+ __u8 msd_uuid[37]; /* server UUID */
+ __u8 uuid_padding[3]; /* unused */
+// __u64 msd_last_objid; /* last created object ID */
+ __u64 msd_last_transno; /* last completed transaction ID */
+ __u64 msd_mount_count; /* MDS incarnation number */
+ __u64 msd_padding_until_last_objid_is_enabled;
+ __u32 msd_feature_compat; /* compatible feature flags */
+ __u32 msd_feature_rocompat;/* read-only compatible feature flags */
+ __u32 msd_feature_incompat;/* incompatible feature flags */
+ __u32 msd_server_size; /* size of server data area */
+ __u32 msd_client_start; /* start of per-client data area */
+ __u16 msd_client_size; /* size of per-client data area */
+ __u16 msd_subdir_count; /* number of subdirectories for objects */
+ __u64 msd_catalog_oid; /* recovery catalog object id */
+ __u32 msd_catalog_ogen; /* recovery catalog inode generation */
+ __u8 msd_peeruuid[37]; /* UUID of LOV/OSC associated with MDS */
+ __u8 peer_padding[3]; /* unused */
+ __u8 msd_padding[MDS_LR_SERVER_SIZE - 140];
};
/* Data stored per client in the last_rcvd file. In le32 order. */
__u64 mcd_last_xid; /* xid for the last transaction */
__u32 mcd_last_result; /* result from last RPC */
__u32 mcd_last_data; /* per-op data (disposition for open &c.) */
- __u8 padding[MDS_LR_SIZE - 74];
+ __u8 mcd_padding[MDS_LR_CLIENT_SIZE - 72];
};
/* file data for open files on MDS */
int mds_reint_rec(struct mds_update_record *r, int offset,
struct ptlrpc_request *req, struct lustre_handle *);
-/* mds/mds_open.c */
-int mds_open(struct mds_update_record *rec, int offset,
- struct ptlrpc_request *req, struct lustre_handle *);
-
/* mds/handler.c */
#ifdef __KERNEL__
struct dentry *mds_name2locked_dentry(struct obd_device *, struct dentry *dir,
int offset, struct mds_body *body, struct inode *inode);
void mds_steal_ack_locks(struct obd_export *exp,
struct ptlrpc_request *req);
+int mds_update_server_data(struct obd_device *);
/* mds/mds_fs.c */
int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt);
int mds_fs_cleanup(struct obd_device *obddev, int failover);
#endif
+/* mds/mds_lov.c */
+extern int mds_get_lovtgts(struct mds_obd *obd, int tgt_count,
+ struct obd_uuid *uuidarray);
+extern int mds_get_lovdesc(struct mds_obd *obd, struct lov_desc *desc);
+
/* mdc/mdc_request.c */
+int mdc_req2lustre_md(struct ptlrpc_request *req, int offset,
+ struct lustre_handle *obd_import,
+ struct lustre_md *md);
int mdc_enqueue(struct lustre_handle *conn, int lock_type,
struct lookup_intent *it, int lock_mode,
struct mdc_op_data *enq_data,
unsigned int ea_size, struct ptlrpc_request **request);
int mdc_setattr(struct lustre_handle *conn,
struct mdc_op_data *data,
- struct iattr *iattr, void *ea, int ealen,
+ struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len,
struct ptlrpc_request **request);
int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh,
/* OST_MAXREQSIZE ~= 1640 bytes =
* lustre_msg + obdo + 16 * obd_ioobj + 64 * niobuf_remote
*
- * single object with 16 pages is 512 bytes
+ * - single object with 16 pages is 512 bytes
+ * - OST_MAXREQSIZE must be at least 1 page of cookies plus some spillover
*/
-#define OST_MAXREQSIZE (2 * 1024)
+#define OST_MAXREQSIZE (5 * 1024)
#define PTLBD_NUM_THREADS 4
#define PTLBD_NEVENTS 1024
* big enough. For _tons_ of context, OBD_ALLOC a struct and store
* a pointer to it here. The pointer_arg ensures this struct is at
* least big enough for that. */
- void *pointer_arg[4];
+ void *pointer_arg[5];
__u64 space[4];
};
+struct ptlrpc_request_set;
+typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int);
+
struct ptlrpc_request_set {
int set_remaining; /* # uncompleted requests */
wait_queue_head_t set_waitq;
+ wait_queue_head_t *set_wakeup_ptr;
struct list_head set_requests;
- void *set_interpret; /* completion callback */
+ set_interpreter_func set_interpret; /* completion callback */
union ptlrpc_async_args set_args; /* completion context */
};
#ifndef __OBD_H
#define __OBD_H
+#define IOC_OSC_TYPE 'h'
+#define IOC_OSC_MIN_NR 20
+#define IOC_OSC_REGISTER_LOV _IOWR(IOC_OSC_TYPE, 20, struct obd_device *)
+#define IOC_OSC_SET_ACTIVE _IOWR(IOC_OSC_TYPE, 21, struct obd_device *)
+#define IOC_OSC_MAX_NR 50
+
+#define IOC_MDC_TYPE 'i'
+#define IOC_MDC_MIN_NR 20
+#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
+#define IOC_MDC_GETSTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *)
+#define IOC_MDC_MAX_NR 50
+
+#ifdef __KERNEL__
+# include <linux/fs.h>
+# include <linux/list.h>
+# include <linux/sched.h> /* for struct task_struct, for current.h */
+# include <asm/current.h> /* for smp_lock.h */
+# include <linux/smp_lock.h>
+# include <linux/proc_fs.h>
+# include <linux/mount.h>
+#endif
+
+#include <linux/lustre_lib.h>
+#include <linux/lustre_idl.h>
+#include <linux/lustre_export.h>
#include <linux/lustre_otree.h>
struct lov_oinfo { /* per-child structure */
struct lov_oinfo lsm_oinfo[0];
};
-#define IOC_OSC_TYPE 'h'
-#define IOC_OSC_MIN_NR 20
-#define IOC_OSC_REGISTER_LOV _IOWR(IOC_OSC_TYPE, 20, struct obd_device *)
-#define IOC_OSC_SET_ACTIVE _IOWR(IOC_OSC_TYPE, 21, struct obd_device *)
-#define IOC_OSC_MAX_NR 50
-
-#define IOC_MDC_TYPE 'i'
-#define IOC_MDC_MIN_NR 20
-#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
-#define IOC_MDC_MAX_NR 50
-
-#ifdef __KERNEL__
-# include <linux/fs.h>
-# include <linux/list.h>
-# include <linux/sched.h> /* for struct task_struct, for current.h */
-# include <asm/current.h> /* for smp_lock.h */
-# include <linux/smp_lock.h>
-# include <linux/proc_fs.h>
-
-# include <linux/lustre_lib.h>
-# include <linux/lustre_idl.h>
-# include <linux/lustre_mds.h>
-# include <linux/lustre_export.h>
-#endif
-
struct obd_type {
struct list_head typ_chain;
struct obd_ops *typ_ops;
struct filter_obd {
const char *fo_fstype;
- char *fo_nspath;
+ char *fo_nspath;
struct super_block *fo_sb;
struct vfsmount *fo_vfsmnt;
struct obd_run_ctxt fo_ctxt;
spinlock_t fo_grant_lock; /* protects tot_granted */
obd_size fo_tot_granted;
obd_size fo_tot_cached;
+
+ struct llog_handle *fo_catalog;
+ struct obd_import *fo_mdc_imp;
+ struct obd_uuid fo_mdc_uuid;
+ struct lustre_handle fo_mdc_conn;
+ struct ptlrpc_client fo_mdc_client;
+ struct llog_commit_data *fo_llcd;
+ struct semaphore fo_sem; /* protects fo_llcd */
};
struct mds_server_data;
struct client_obd {
- struct obd_import *cl_import;
- struct semaphore cl_sem;
- int cl_conn_count;
+ struct obd_import *cl_import;
+ struct semaphore cl_sem;
+ int cl_conn_count;
/* max_mds_easize is purely a performance thing so we don't have to
* call obd_size_wiremd() all the time. */
- int cl_max_mds_easize;
- struct obd_device *cl_containing_lov;
- kdev_t cl_sandev;
- struct semaphore cl_dirty_sem;
- obd_size cl_dirty; /* both in bytes */
- obd_size cl_dirty_granted;
- /* this is just to keep existing infinitely caching behaviour between
- * clients and OSTs that don't have the grant code in yet.. it can
+ int cl_max_mds_easize;
+ int cl_max_mds_cookiesize;
+ /* XXX can we replace cl_containing_lov with mgmt-events? */
+ struct obd_device *cl_containing_lov;
+ kdev_t cl_sandev;
+
+ struct llog_commit_data *cl_llcd;
+ void *cl_llcd_offset;
+
+ struct semaphore cl_dirty_sem;
+ obd_size cl_dirty; /* both in bytes */
+ obd_size cl_dirty_granted;
+
+ struct obd_device *cl_mgmtcli_obd;
+
+ /* this is just to keep existing infinitely caching behaviour between
+ * clients and OSTs that don't have the grant code in yet.. it can
* be yanked once everything speaks grants */
- char cl_ost_can_grant;
+ char cl_ost_can_grant;
};
+/* Like a client, with some hangers-on. Keep mc_client_obd first so that we
+ * can reuse the various client setup/connect functions. */
+struct mgmtcli_obd {
+ struct client_obd mc_client_obd; /* nested */
+ struct ptlrpc_thread *mc_ping_thread;
+ struct lustre_handle mc_ping_handle; /* XXX single-target */
+ struct list_head mc_registered;
+ void *mc_hammer;
+};
+
+#define mc_import mc_client_obd.cl_import
+
struct mds_obd {
struct ptlrpc_service *mds_service;
struct ptlrpc_service *mds_setattr_service;
struct address_space_operations *mds_aops;
int mds_max_mdsize;
+ int mds_max_cookiesize;
struct file *mds_rcvd_filp;
spinlock_t mds_transno_lock;
__u64 mds_last_transno;
__u64 mds_mount_count;
struct ll_fid mds_rootfid;
struct mds_server_data *mds_server_data;
+ struct dentry *mds_pending_dir;
+ struct dentry *mds_logs_dir;
+
+ struct llog_handle *mds_catalog;
+ struct obd_device *mds_osc_obd;
+ struct obd_uuid mds_osc_uuid;
+ struct lustre_handle mds_osc_conn;
int mds_has_lov_desc;
struct lov_desc mds_lov_desc;
};
struct echo_obd {
- char *eo_fstype;
struct obdo oa;
spinlock_t eo_lock;
__u64 eo_lastino;
struct lov_tgt_desc {
struct obd_uuid uuid;
struct lustre_handle conn;
+ struct llog_handle *ltd_cathandle;
int active; /* is this target available for requests, etc */
};
struct lov_desc desc;
int bufsize;
int refcount;
+ int lo_catalog_loaded:1;
struct lov_tgt_desc *tgts;
};
#define N_LOCAL_TEMP_PAGE 0x10000000
struct obd_trans_info {
- __u64 oti_transno;
+ __u64 oti_transno;
/* Only used on the server side for tracking acks. */
struct oti_req_ack_lock {
struct lustre_handle lock;
__u32 mode;
} oti_ack_locks[4];
+ void *oti_handle;
+ struct llog_cookie oti_onecookie;
+ struct llog_cookie *oti_logcookies;
+ int oti_numcookies;
};
+static inline void oti_alloc_cookies(struct obd_trans_info *oti,int num_cookies)
+{
+ if (!oti)
+ return;
+
+ if (num_cookies == 1)
+ oti->oti_logcookies = &oti->oti_onecookie;
+ else
+ OBD_ALLOC(oti->oti_logcookies,
+ num_cookies * sizeof(oti->oti_onecookie));
+
+ oti->oti_numcookies = num_cookies;
+}
+
+static inline void oti_free_cookies(struct obd_trans_info *oti)
+{
+ if (!oti || !oti->oti_logcookies)
+ return;
+
+ if (oti->oti_logcookies == &oti->oti_onecookie)
+ LASSERT(oti->oti_numcookies == 1);
+ else
+ OBD_FREE(oti->oti_logcookies,
+ oti->oti_numcookies * sizeof(oti->oti_onecookie));
+ oti->oti_logcookies = NULL;
+ oti->oti_numcookies = 0;
+}
+
/* corresponds to one of the obd's */
struct obd_device {
struct obd_type *obd_type;
struct ldlm_namespace *obd_namespace;
struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */
/* a spinlock is OK for what we do now, may need a semaphore later */
- spinlock_t obd_dev_lock;
+ spinlock_t obd_dev_lock;
__u64 obd_last_committed;
struct fsfilt_operations *obd_fsops;
+ struct obd_statfs obd_osfs;
+ unsigned long obd_osfs_age;
/* XXX encapsulate all this recovery data into one struct */
svc_handler_t obd_recovery_handler;
struct mds_obd mds;
struct client_obd cli;
struct ost_obd ost;
- struct echo_client_obd echo_client;;
+ struct echo_client_obd echo_client;
struct ldlm_obd ldlm;
struct echo_obd echo;
struct recovd_obd recovd;
struct lov_obd lov;
struct cache_obd cobd;
struct ptlbd_obd ptlbd;
+ struct mgmtcli_obd mgmtcli;
} u;
/* Fields used by LProcFS */
unsigned int obd_cntr_base;
struct lprocfs_stats *obd_stats;
};
+#define OBD_OPT_FORCE 0x0001
+#define OBD_OPT_FAILOVER 0x0002
+
+#define OBD_LLOG_FL_SENDNOW 0x0001
+
struct obd_ops {
struct module *o_owner;
int (*o_iocontrol)(unsigned int cmd, struct lustre_handle *, int len,
int (*o_attach)(struct obd_device *dev, obd_count len, void *data);
int (*o_detach)(struct obd_device *dev);
int (*o_setup) (struct obd_device *dev, obd_count len, void *data);
- int (*o_cleanup)(struct obd_device *dev, int force, int failover);
+ int (*o_cleanup)(struct obd_device *dev, int flags);
int (*o_connect)(struct lustre_handle *conn, struct obd_device *src,
struct obd_uuid *cluuid);
- int (*o_disconnect)(struct lustre_handle *conn, int failover);
+ int (*o_disconnect)(struct lustre_handle *conn, int flags);
- int (*o_statfs)(struct obd_export *exp, struct obd_statfs *osfs);
- int (*o_syncfs)(struct obd_export *);
+ int (*o_statfs)(struct obd_device *obd, struct obd_statfs *osfs,
+ unsigned long max_age);
+ int (*o_syncfs)(struct obd_export *exp);
int (*o_packmd)(struct lustre_handle *, struct lov_mds_md **disk_tgt,
struct lov_stripe_md *mem_src);
- int (*o_unpackmd)(struct lustre_handle *,
+ int (*o_unpackmd)(struct lustre_handle *conn,
struct lov_stripe_md **mem_tgt,
struct lov_mds_md *disk_src, int disk_len);
int (*o_preallocate)(struct lustre_handle *, obd_count *req,
int (*o_getattr)(struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *ea);
int (*o_getattr_async)(struct lustre_handle *conn, struct obdo *oa,
- struct lov_stripe_md *ea,
+ struct lov_stripe_md *ea,
struct ptlrpc_request_set *set);
int (*o_open)(struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *ea, struct obd_trans_info *oti,
struct obd_client_handle *och);
int (*o_close)(struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *ea, struct obd_trans_info *oti);
- int (*o_brw)(int rw, struct lustre_handle *conn,
+ int (*o_brw)(int rw, struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *ea, obd_count oa_bufs,
struct brw_page *pgarr, struct obd_trans_info *oti);
- int (*o_brw_async)(int rw, struct lustre_handle *conn,
+ int (*o_brw_async)(int rw, struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *ea, obd_count oa_bufs,
struct brw_page *pgarr, struct ptlrpc_request_set *,
struct obd_trans_info *oti);
- int (*o_punch)(struct lustre_handle *conn, struct obdo *tgt,
+ int (*o_punch)(struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *ea, obd_size count,
obd_off offset, struct obd_trans_info *oti);
- int (*o_sync)(struct lustre_handle *conn, struct obdo *tgt,
+ int (*o_sync)(struct lustre_handle *conn, struct obdo *oa,
obd_size count, obd_off offset);
- int (*o_migrate)(struct lustre_handle *conn, struct obdo *dst,
- struct obdo *src, obd_size count, obd_off offset);
- int (*o_copy)(struct lustre_handle *dstconn, struct obdo *dst,
- struct lustre_handle *srconn, struct obdo *src,
+ int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst,
+ struct lov_stripe_md *src, obd_size count,
+ obd_off offset);
+ int (*o_copy)(struct lustre_handle *dstconn, struct lov_stripe_md *dst,
+ struct lustre_handle *srconn, struct lov_stripe_md *src,
obd_size count, obd_off offset, struct obd_trans_info *);
int (*o_iterate)(struct lustre_handle *conn,
int (*)(obd_id, obd_gr, void *),
obd_id *startid, obd_gr group, void *data);
- int (*o_preprw)(int cmd, struct obd_export *, struct obdo *obdo,
+ int (*o_preprw)(int cmd, struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *remote,
- struct niobuf_local *local, void **desc_private,
- struct obd_trans_info *oti);
- int (*o_commitrw)(int cmd, struct obd_export *,
+ struct niobuf_local *local, struct obd_trans_info *oti);
+ int (*o_commitrw)(int cmd, struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_local *local,
- void *desc_private, struct obd_trans_info *oti);
+ struct obd_trans_info *oti);
int (*o_enqueue)(struct lustre_handle *conn, struct lov_stripe_md *md,
struct lustre_handle *parent_lock,
__u32 type, void *cookie, int cookielen, __u32 mode,
int (*o_cancel)(struct lustre_handle *, struct lov_stripe_md *md,
__u32 mode, struct lustre_handle *);
int (*o_cancel_unused)(struct lustre_handle *, struct lov_stripe_md *,
- int local_only, void *opaque);
- int (*o_san_preprw)(int cmd, struct lustre_handle *conn,
- int objcount, struct obd_ioobj *obj,
- int niocount, struct niobuf_remote *remote);
+ int flags, void *opaque);
+ int (*o_log_add)(struct lustre_handle *conn,
+ struct llog_handle *cathandle,
+ struct llog_trans_hdr *rec, struct lov_stripe_md *lsm,
+ struct llog_cookie *logcookies, int numcookies);
+ int (*o_log_cancel)(struct lustre_handle *, struct lov_stripe_md *,
+ int count, struct llog_cookie *, int flags);
+ int (*o_san_preprw)(int cmd, struct obd_export *exp,
+ struct obdo *oa, int objcount,
+ struct obd_ioobj *obj, int niocount,
+ struct niobuf_remote *remote);
int (*o_mark_page_dirty)(struct lustre_handle *conn,
struct lov_stripe_md *ea,
unsigned long offset);
int (*o_last_dirty_offset)(struct lustre_handle *conn,
struct lov_stripe_md *ea,
unsigned long *offset);
- void (*o_destroy_export)(struct obd_export *export);
+ void (*o_destroy_export)(struct obd_export *exp);
+
+ /* metadata-only methods */
+ int (*o_pin)(struct lustre_handle *, obd_id ino, __u32 gen, int type,
+ struct obd_client_handle *, int flag);
+ int (*o_unpin)(struct lustre_handle *, struct obd_client_handle *, int);
+
+ /* If adding ops, also update obdclass/lprocfs_status.c,
+ * and include/linux/obd_class.h */
};
static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno,
int error)
{
if (error) {
- CDEBUG(D_ERROR, "%s: transno "LPD64" commit error: %d\n",
+ CERROR("%s: transno "LPD64" commit error: %d\n",
obd->obd_name, transno, error);
return;
}
}
}
-/* When adding a function pointer to struct obd_ops, please update
- * function lprocfs_alloc_obd_counters() in obdclass/lprocfs_status.c
- * accordingly. */
-
#endif /* __OBD_H */
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/time.h>
+#include <linux/timer.h>
#endif
#include <linux/obd_support.h>
int class_multi_setup(struct obd_device *obddev, uint32_t len, void *data);
int class_multi_cleanup(struct obd_device *obddev);
+/* obdo.c */
+#ifdef __KERNEL__
+void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned ia_valid);
+void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid);
+void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid);
+void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid);
+void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid);
+#endif
+void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid);
+int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_flag compare);
+
static inline int obd_check_conn(struct lustre_handle *conn)
{
struct obd_device *obd;
RETURN(rc);
}
-static inline int obd_cleanup(struct obd_device *obd, int force, int failover)
+static inline int obd_cleanup(struct obd_device *obd, int flags)
{
int rc;
ENTRY;
OBD_CHECK_OP(obd, cleanup);
OBD_COUNTER_INCREMENT(obd, cleanup);
- rc = OBP(obd, cleanup)(obd, force, failover);
+ rc = OBP(obd, cleanup)(obd, flags);
RETURN(rc);
}
RETURN(rc);
}
-static inline int obd_disconnect(struct lustre_handle *conn, int failover)
+static inline int obd_disconnect(struct lustre_handle *conn, int flags)
{
struct obd_export *exp;
int rc;
OBD_CHECK_OP(exp->exp_obd, disconnect);
OBD_COUNTER_INCREMENT(exp->exp_obd, disconnect);
- rc = OBP(exp->exp_obd, disconnect)(conn, failover);
+ rc = OBP(exp->exp_obd, disconnect)(conn, flags);
class_export_put(exp);
RETURN(rc);
}
EXIT;
}
-static inline int obd_statfs(struct obd_export *exp, struct obd_statfs *osfs)
+#ifndef time_before
+#define time_before(t1, t2) ((long)t2 - (long)t1 > 0)
+#endif
+
+static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+ unsigned long max_age)
{
- int rc;
+ int rc = 0;
ENTRY;
- OBD_CHECK_OP(exp->exp_obd, statfs);
- OBD_COUNTER_INCREMENT(exp->exp_obd, statfs);
-
- rc = OBP(exp->exp_obd, statfs)(exp, osfs);
+ if (obd == NULL)
+ RETURN(-EINVAL);
+
+ OBD_CHECK_OP(obd, statfs);
+ OBD_COUNTER_INCREMENT(obd, statfs);
+
+ CDEBUG(D_SUPER, "osfs %lu, max_age %lu\n", obd->obd_osfs_age, max_age);
+ if (obd->obd_osfs_age == 0 || time_before(obd->obd_osfs_age, max_age)) {
+ rc = OBP(obd, statfs)(obd, osfs, max_age);
+ spin_lock(&obd->obd_dev_lock);
+ memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs));
+ obd->obd_osfs_age = jiffies;
+ spin_unlock(&obd->obd_dev_lock);
+ } else {
+ CDEBUG(D_SUPER, "using cached obd_statfs data\n");
+ spin_lock(&obd->obd_dev_lock);
+ memcpy(osfs, &obd->obd_osfs, sizeof(*osfs));
+ spin_unlock(&obd->obd_dev_lock);
+ }
RETURN(rc);
}
RETURN(rc);
}
-static inline int obd_brw(int cmd, struct lustre_handle *conn,
+static inline int obd_brw(int cmd, struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *ea, obd_count oa_bufs,
struct brw_page *pg, struct obd_trans_info *oti)
{
LBUG();
}
- rc = OBP(exp->exp_obd, brw)(cmd, conn, ea, oa_bufs, pg, oti);
+ rc = OBP(exp->exp_obd, brw)(cmd, conn, oa, ea, oa_bufs, pg, oti);
class_export_put(exp);
RETURN(rc);
}
static inline int obd_brw_async(int cmd, struct lustre_handle *conn,
- struct lov_stripe_md *ea, obd_count oa_bufs,
- struct brw_page *pg,
+ struct obdo *oa, struct lov_stripe_md *ea,
+ obd_count oa_bufs, struct brw_page *pg,
struct ptlrpc_request_set *set,
struct obd_trans_info *oti)
{
LBUG();
}
- rc = OBP(exp->exp_obd, brw_async)(cmd, conn, ea, oa_bufs, pg, set, oti);
+ rc = OBP(exp->exp_obd, brw_async)(cmd, conn, oa, ea, oa_bufs, pg, set,
+ oti);
class_export_put(exp);
RETURN(rc);
}
-static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
+static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *remote,
- struct niobuf_local *local, void **desc_private,
+ struct niobuf_local *local,
struct obd_trans_info *oti)
{
int rc;
OBD_CHECK_OP(exp->exp_obd, preprw);
OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
- rc = OBP(exp->exp_obd, preprw)(cmd, exp, obdo, objcount, obj, niocount,
- remote, local, desc_private, oti);
+ rc = OBP(exp->exp_obd, preprw)(cmd, exp, oa, objcount, obj, niocount,
+ remote, local, oti);
RETURN(rc);
}
-static inline int obd_commitrw(int cmd, struct obd_export *exp,
+static inline int obd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_local *local,
- void *desc_private, struct obd_trans_info *oti)
+ struct obd_trans_info *oti)
{
int rc;
ENTRY;
OBD_CHECK_OP(exp->exp_obd, commitrw);
OBD_COUNTER_INCREMENT(exp->exp_obd, commitrw);
- rc = OBP(exp->exp_obd, commitrw)(cmd, exp, objcount, obj, niocount,
- local, desc_private, oti);
+ rc = OBP(exp->exp_obd, commitrw)(cmd, exp, oa, objcount, obj, niocount,
+ local, oti);
RETURN(rc);
}
RETURN(rc);
}
-static inline int obd_san_preprw(int cmd, struct lustre_handle *conn,
+static inline int obd_log_add(struct lustre_handle *conn,
+ struct llog_handle *cathandle,
+ struct llog_trans_hdr *rec,
+ struct lov_stripe_md *lsm,
+ struct llog_cookie *logcookies,
+ int numcookies)
+{
+ struct obd_export *exp;
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_SETUP(conn, exp);
+ OBD_CHECK_OP(exp->exp_obd, log_add);
+ OBD_COUNTER_INCREMENT(exp->exp_obd, log_add);
+
+ rc = OBP(exp->exp_obd, log_add)(conn, cathandle, rec, lsm, logcookies,
+ numcookies);
+ class_export_put(exp);
+ RETURN(rc);
+}
+
+static inline int obd_log_cancel(struct lustre_handle *conn,
+ struct lov_stripe_md *lsm, int count,
+ struct llog_cookie *cookies, int flags)
+{
+ struct obd_export *exp;
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_SETUP(conn, exp);
+ OBD_CHECK_OP(exp->exp_obd, log_cancel);
+ OBD_COUNTER_INCREMENT(exp->exp_obd, log_cancel);
+
+ rc = OBP(exp->exp_obd, log_cancel)(conn, lsm, count, cookies, flags);
+ class_export_put(exp);
+ RETURN(rc);
+}
+
+static inline int obd_san_preprw(int cmd, struct obd_export *exp,
+ struct obdo *oa,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *remote)
{
- struct obd_export *exp;
int rc;
- OBD_CHECK_ACTIVE(conn, exp);
OBD_CHECK_OP(exp->exp_obd, preprw);
OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
- rc = OBP(exp->exp_obd, san_preprw)(cmd, conn, objcount, obj,
+ rc = OBP(exp->exp_obd, san_preprw)(cmd, exp, oa, objcount, obj,
niocount, remote);
class_export_put(exp);
- RETURN(rc);
+ return(rc);
+}
+
+static inline int obd_pin(struct lustre_handle *conn, obd_id ino, __u32 gen,
+ int type, struct obd_client_handle *handle, int flag)
+{
+ struct obd_export *exp;
+ int rc;
+
+ OBD_CHECK_ACTIVE(conn, exp);
+ OBD_CHECK_OP(exp->exp_obd, pin);
+ OBD_COUNTER_INCREMENT(exp->exp_obd, pin);
+
+ rc = OBP(exp->exp_obd, pin)(conn, ino, gen, type, handle, flag);
+ class_export_put(exp);
+ return(rc);
+}
+
+static inline int obd_unpin(struct lustre_handle *conn,
+ struct obd_client_handle *handle, int flag)
+{
+ struct obd_export *exp;
+ int rc;
+
+ OBD_CHECK_ACTIVE(conn, exp);
+ OBD_CHECK_OP(exp->exp_obd, unpin);
+ OBD_COUNTER_INCREMENT(exp->exp_obd, unpin);
+
+ rc = OBP(exp->exp_obd, unpin)(conn, handle, flag);
+ class_export_put(exp);
+ return(rc);
}
static inline int obd_mark_page_dirty(struct lustre_handle *conn,
- struct lov_stripe_md *lsm,
+ struct lov_stripe_md *lsm,
unsigned long offset)
{
struct obd_export *exp;
OBD_CHECK_SETUP(conn, exp);
OBD_CHECK_OP(exp->exp_obd, mark_page_dirty);
+ OBD_COUNTER_INCREMENT(exp->exp_obd, mark_page_dirty);
rc = OBP(exp->exp_obd, mark_page_dirty)(conn, lsm, offset);
class_export_put(exp);
- RETURN(rc);
+ return(rc);
}
static inline int obd_clear_dirty_pages(struct lustre_handle *conn,
- struct lov_stripe_md *lsm,
+ struct lov_stripe_md *lsm,
unsigned long start,
unsigned long end,
unsigned long *cleared)
OBD_CHECK_SETUP(conn, exp);
OBD_CHECK_OP(exp->exp_obd, clear_dirty_pages);
+ OBD_COUNTER_INCREMENT(exp->exp_obd, clear_dirty_pages);
rc = OBP(exp->exp_obd, clear_dirty_pages)(conn, lsm, start, end,
cleared);
class_export_put(exp);
- RETURN(rc);
+ return(rc);
}
static inline int obd_last_dirty_offset(struct lustre_handle *conn,
OBD_CHECK_SETUP(conn, exp);
OBD_CHECK_OP(exp->exp_obd, last_dirty_offset);
+ OBD_COUNTER_INCREMENT(exp->exp_obd, last_dirty_offset);
rc = OBP(exp->exp_obd, last_dirty_offset)(conn, lsm, offset);
class_export_put(exp);
- RETURN(rc);
+ return(rc);
}
/* OBD Metadata Support */
extern int obd_init_caches(void);
extern void obd_cleanup_caches(void);
-static inline struct lustre_handle *obdo_handle(struct obdo *oa)
-{
- return (struct lustre_handle *)&oa->o_inline;
-}
-
/* support routines */
extern kmem_cache_t *obdo_cachep;
static inline struct obdo *obdo_alloc(void)
oa = kmem_cache_alloc(obdo_cachep, SLAB_KERNEL);
if (oa == NULL)
LBUG();
+ CDEBUG(D_MALLOC, "kmem_cache_alloced oa at %p\n", oa);
memset(oa, 0, sizeof (*oa));
return oa;
{
if (!oa)
return;
+ CDEBUG(D_MALLOC, "kmem_cache_freed oa at %p\n", oa);
kmem_cache_free(obdo_cachep, oa);
}
#define kdev_t_to_nr(dev) dev
#endif
-#ifdef __KERNEL__
-static inline void obdo_from_iattr(struct obdo *oa, struct iattr *attr)
-{
- unsigned int ia_valid = attr->ia_valid;
-
- if (ia_valid & ATTR_ATIME) {
- oa->o_atime = LTIME_S(attr->ia_atime);
- oa->o_valid |= OBD_MD_FLATIME;
- }
- if (ia_valid & ATTR_MTIME) {
- oa->o_mtime = LTIME_S(attr->ia_mtime);
- oa->o_valid |= OBD_MD_FLMTIME;
- }
- if (ia_valid & ATTR_CTIME) {
- oa->o_ctime = LTIME_S(attr->ia_ctime);
- oa->o_valid |= OBD_MD_FLCTIME;
- }
- if (ia_valid & ATTR_SIZE) {
- oa->o_size = attr->ia_size;
- oa->o_valid |= OBD_MD_FLSIZE;
- }
- if (ia_valid & ATTR_MODE) {
- oa->o_mode = attr->ia_mode;
- oa->o_valid |= OBD_MD_FLTYPE | OBD_MD_FLMODE;
- if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID))
- oa->o_mode &= ~S_ISGID;
- }
- if (ia_valid & ATTR_UID) {
- oa->o_uid = attr->ia_uid;
- oa->o_valid |= OBD_MD_FLUID;
- }
- if (ia_valid & ATTR_GID) {
- oa->o_gid = attr->ia_gid;
- oa->o_valid |= OBD_MD_FLGID;
- }
-}
-
-
-static inline void iattr_from_obdo(struct iattr *attr, struct obdo *oa,
- obd_flag valid)
-{
- memset(attr, 0, sizeof(*attr));
- if (valid & OBD_MD_FLATIME) {
- LTIME_S(attr->ia_atime) = oa->o_atime;
- attr->ia_valid |= ATTR_ATIME;
- }
- if (valid & OBD_MD_FLMTIME) {
- LTIME_S(attr->ia_mtime) = oa->o_mtime;
- attr->ia_valid |= ATTR_MTIME;
- }
- if (valid & OBD_MD_FLCTIME) {
- LTIME_S(attr->ia_ctime) = oa->o_ctime;
- attr->ia_valid |= ATTR_CTIME;
- }
- if (valid & OBD_MD_FLSIZE) {
- attr->ia_size = oa->o_size;
- attr->ia_valid |= ATTR_SIZE;
- }
- if (valid & OBD_MD_FLTYPE) {
- attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT);
- attr->ia_valid |= ATTR_MODE;
- }
- if (valid & OBD_MD_FLMODE) {
- attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT);
- attr->ia_valid |= ATTR_MODE;
- if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID))
- attr->ia_mode &= ~S_ISGID;
- }
- if (valid & OBD_MD_FLUID)
- {
- attr->ia_uid = oa->o_uid;
- attr->ia_valid |= ATTR_UID;
- }
- if (valid & OBD_MD_FLGID) {
- attr->ia_gid = oa->o_gid;
- attr->ia_valid |= ATTR_GID;
- }
-}
-
-
-/* WARNING: the file systems must take care not to tinker with
- attributes they don't manage (such as blocks). */
-
-
-static inline void obdo_from_inode(struct obdo *dst, struct inode *src,
- obd_flag valid)
-{
- if (valid & OBD_MD_FLATIME)
- dst->o_atime = LTIME_S(src->i_atime);
- if (valid & OBD_MD_FLMTIME)
- dst->o_mtime = LTIME_S(src->i_mtime);
- if (valid & OBD_MD_FLCTIME)
- dst->o_ctime = LTIME_S(src->i_ctime);
- if (valid & OBD_MD_FLSIZE)
- dst->o_size = src->i_size;
- if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
- dst->o_blocks = src->i_blocks;
- if (valid & OBD_MD_FLBLKSZ)
- dst->o_blksize = src->i_blksize;
- if (valid & OBD_MD_FLTYPE)
- dst->o_mode = (dst->o_mode & ~S_IFMT) | (src->i_mode & S_IFMT);
- if (valid & OBD_MD_FLMODE)
- dst->o_mode = (dst->o_mode & S_IFMT) | (src->i_mode & ~S_IFMT);
- if (valid & OBD_MD_FLUID)
- dst->o_uid = src->i_uid;
- if (valid & OBD_MD_FLGID)
- dst->o_gid = src->i_gid;
- if (valid & OBD_MD_FLFLAGS)
- dst->o_flags = src->i_flags;
- if (valid & OBD_MD_FLNLINK)
- dst->o_nlink = src->i_nlink;
- if (valid & OBD_MD_FLGENER)
- dst->o_generation = src->i_generation;
- if (valid & OBD_MD_FLRDEV)
- dst->o_rdev = (__u32)kdev_t_to_nr(src->i_rdev);
-
- dst->o_valid |= (valid & ~OBD_MD_FLID);
-}
-
-static inline void obdo_refresh_inode(struct inode *dst, struct obdo *src,
- obd_flag valid)
-{
- valid &= src->o_valid;
-
- if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(dst->i_atime))
- LTIME_S(dst->i_atime) = src->o_atime;
- if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(dst->i_mtime))
- LTIME_S(dst->i_mtime) = src->o_mtime;
- if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime))
- LTIME_S(dst->i_ctime) = src->o_ctime;
- if (valid & OBD_MD_FLSIZE && src->o_size > dst->i_size)
- dst->i_size = src->o_size;
- /* allocation of space */
- if (valid & OBD_MD_FLBLOCKS && src->o_blocks > dst->i_blocks)
- dst->i_blocks = src->o_blocks;
-}
-
-static inline void obdo_to_inode(struct inode *dst, struct obdo *src,
- obd_flag valid)
-{
- valid &= src->o_valid;
-
- if (valid & OBD_MD_FLATIME)
- LTIME_S(dst->i_atime) = src->o_atime;
- if (valid & OBD_MD_FLMTIME)
- LTIME_S(dst->i_mtime) = src->o_mtime;
- if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime))
- LTIME_S(dst->i_ctime) = src->o_ctime;
- if (valid & OBD_MD_FLSIZE)
- dst->i_size = src->o_size;
- if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
- dst->i_blocks = src->o_blocks;
- if (valid & OBD_MD_FLBLKSZ)
- dst->i_blksize = src->o_blksize;
- if (valid & OBD_MD_FLTYPE)
- dst->i_mode = (dst->i_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
- if (valid & OBD_MD_FLMODE)
- dst->i_mode = (dst->i_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
- if (valid & OBD_MD_FLUID)
- dst->i_uid = src->o_uid;
- if (valid & OBD_MD_FLGID)
- dst->i_gid = src->o_gid;
- if (valid & OBD_MD_FLFLAGS)
- dst->i_flags = src->o_flags;
- if (valid & OBD_MD_FLNLINK)
- dst->i_nlink = src->o_nlink;
- if (valid & OBD_MD_FLGENER)
- dst->i_generation = src->o_generation;
- if (valid & OBD_MD_FLRDEV)
- dst->i_rdev = to_kdev_t(src->o_rdev);
-}
-#endif
-
-static inline void obdo_cpy_md(struct obdo *dst, struct obdo *src,
- obd_flag valid)
-{
-#ifdef __KERNEL__
- CDEBUG(D_INODE, "src obdo %Ld valid 0x%x, dst obdo %Ld\n",
- (unsigned long long)src->o_id, src->o_valid,
- (unsigned long long)dst->o_id);
-#endif
- if (valid & OBD_MD_FLATIME)
- dst->o_atime = src->o_atime;
- if (valid & OBD_MD_FLMTIME)
- dst->o_mtime = src->o_mtime;
- if (valid & OBD_MD_FLCTIME)
- dst->o_ctime = src->o_ctime;
- if (valid & OBD_MD_FLSIZE)
- dst->o_size = src->o_size;
- if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
- dst->o_blocks = src->o_blocks;
- if (valid & OBD_MD_FLBLKSZ)
- dst->o_blksize = src->o_blksize;
- if (valid & OBD_MD_FLTYPE)
- dst->o_mode = (dst->o_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
- if (valid & OBD_MD_FLMODE)
- dst->o_mode = (dst->o_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
- if (valid & OBD_MD_FLUID)
- dst->o_uid = src->o_uid;
- if (valid & OBD_MD_FLGID)
- dst->o_gid = src->o_gid;
- if (valid & OBD_MD_FLFLAGS)
- dst->o_flags = src->o_flags;
- /*
- if (valid & OBD_MD_FLOBDFLG)
- dst->o_obdflags = src->o_obdflags;
- */
- if (valid & OBD_MD_FLNLINK)
- dst->o_nlink = src->o_nlink;
- if (valid & OBD_MD_FLGENER)
- dst->o_generation = src->o_generation;
- if (valid & OBD_MD_FLRDEV)
- dst->o_rdev = src->o_rdev;
- if (valid & OBD_MD_FLINLINE &&
- src->o_obdflags & OBD_FL_INLINEDATA) {
- memcpy(dst->o_inline, src->o_inline, sizeof(src->o_inline));
- dst->o_obdflags |= OBD_FL_INLINEDATA;
- }
-
- dst->o_valid |= valid;
-}
-
-
-/* returns FALSE if comparison (by flags) is same, TRUE if changed */
-static inline int obdo_cmp_md(struct obdo *dst, struct obdo *src,
- obd_flag compare)
-{
- int res = 0;
-
- if ( compare & OBD_MD_FLATIME )
- res = (res || (dst->o_atime != src->o_atime));
- if ( compare & OBD_MD_FLMTIME )
- res = (res || (dst->o_mtime != src->o_mtime));
- if ( compare & OBD_MD_FLCTIME )
- res = (res || (dst->o_ctime != src->o_ctime));
- if ( compare & OBD_MD_FLSIZE )
- res = (res || (dst->o_size != src->o_size));
- if ( compare & OBD_MD_FLBLOCKS ) /* allocation of space */
- res = (res || (dst->o_blocks != src->o_blocks));
- if ( compare & OBD_MD_FLBLKSZ )
- res = (res || (dst->o_blksize != src->o_blksize));
- if ( compare & OBD_MD_FLTYPE )
- res = (res || (((dst->o_mode ^ src->o_mode) & S_IFMT) != 0));
- if ( compare & OBD_MD_FLMODE )
- res = (res || (((dst->o_mode ^ src->o_mode) & ~S_IFMT) != 0));
- if ( compare & OBD_MD_FLUID )
- res = (res || (dst->o_uid != src->o_uid));
- if ( compare & OBD_MD_FLGID )
- res = (res || (dst->o_gid != src->o_gid));
- if ( compare & OBD_MD_FLFLAGS )
- res = (res || (dst->o_flags != src->o_flags));
- if ( compare & OBD_MD_FLNLINK )
- res = (res || (dst->o_nlink != src->o_nlink));
- if ( compare & OBD_MD_FLGENER )
- res = (res || (dst->o_generation != src->o_generation));
- /* XXX Don't know if thses should be included here - wasn't previously
- if ( compare & OBD_MD_FLINLINE )
- res = (res || memcmp(dst->o_inline, src->o_inline));
- */
- return res;
-}
-
/* I'm as embarrassed about this as you are.
*
* <shaver> // XXX do not look into _superhack with remaining eye
extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
extern void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp);
-struct obd_statfs;
-struct statfs;
-void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs);
-void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs);
-
struct obd_class_user_state {
struct obd_device *ocus_current_obd;
struct list_head ocus_conns;
#define OBD_LOV_DEVICENAME "lov"
struct lov_brw_async_args {
- obd_count aa_oa_bufs;
- struct brw_page *aa_ioarr;
+ struct lov_stripe_md *aa_lsm;
+ struct obdo *aa_obdos;
+ struct obdo *aa_oa;
+ struct brw_page *aa_ioarr;
+ obd_count aa_oa_bufs;
};
struct lov_getattr_async_args {
struct lov_stripe_md *aa_lsm;
struct obdo *aa_oa;
- struct obdo *aa_stripe_oas;
+ struct obdo *aa_obdos;
};
static inline int lov_stripe_md_size(int stripes)
return sizeof(struct lov_mds_md) + stripes*sizeof(struct lov_object_id);
}
-extern int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmm,
- struct lov_stripe_md *lsm);
-extern int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsm,
- struct lov_mds_md *lmm, int lmmsize);
-extern int lov_setstripe(struct lustre_handle *conn,
- struct lov_stripe_md **lsmp, struct lov_mds_md *lmmu);
-extern int lov_getstripe(struct lustre_handle *conn,
- struct lov_stripe_md *lsm, struct lov_mds_md *lmmu);
-
#define IOC_LOV_TYPE 'g'
#define IOC_LOV_MIN_NR 50
#define IOC_LOV_SET_OSC_ACTIVE _IOWR('g', 50, long)
#define LUSTRE_SANOST_NAME "sanost"
struct osc_brw_async_args {
+ struct obdo *aa_oa;
int aa_requested_nob;
int aa_nio_count;
obd_count aa_page_count;
#define OBD_FAIL_MDS_STATFS_PACK 0x11d
#define OBD_FAIL_MDS_STATFS_NET 0x11e
#define OBD_FAIL_MDS_GETATTR_NAME_NET 0x11f
-#define OBD_FAIL_MDS_ALL_REPLY_NET 0x120
-#define OBD_FAIL_MDS_ALL_REQUEST_NET 0x121
+#define OBD_FAIL_MDS_PIN_NET 0x120
+#define OBD_FAIL_MDS_UNPIN_NET 0x121
+#define OBD_FAIL_MDS_ALL_REPLY_NET 0x122
+#define OBD_FAIL_MDS_ALL_REQUEST_NET 0x123
#define OBD_FAIL_OST 0x200
#define OBD_FAIL_OST_CONNECT_NET 0x201
#define OBD_FAIL_PTLRPC 0x500
#define OBD_FAIL_PTLRPC_ACK 0x501
+#define OBD_FAIL_OBD_PING_NET 0x600
+#define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601
+
/* preparation for a more advanced failure testbed (not functional yet) */
#define OBD_FAIL_MASK_SYS 0x0000FF00
#define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS)
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#define ll_bdevname(a) __bdevname((a))
+#define BDEVNAME_DECLARE_STORAGE(foo) char foo[BDEVNAME_SIZE]
+#define ll_bdevname(DEV, STORAGE) __bdevname(DEV, STORAGE)
#define ll_lock_kernel lock_kernel()
-#define LTIME_S(time) (time.tv_sec)
#else
+#define BDEVNAME_DECLARE_STORAGE(foo) char __unused_##foo
+#define ll_bdevname(DEV, STORAGE) ((void)__unused_##STORAGE, bdevname((DEV)))
#define ll_lock_kernel
-#define ll_bdevname(a) bdevname((a))
-#define LTIME_S(time) (time)
#endif
static inline void OBD_FAIL_WRITE(int id, kdev_t dev)
{
if (OBD_FAIL_CHECK(id)) {
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ BDEVNAME_DECLARE_STORAGE(tmp);
#ifdef CONFIG_DEV_RDONLY
CERROR("obd_fail_loc=%x, fail write operation on %s\n",
- id, ll_bdevname(dev));
+ id, ll_bdevname(kdev_t_to_nr(dev), tmp));
dev_set_rdonly(dev, 2);
#else
CERROR("obd_fail_loc=%x, can't fail write operation on %s\n",
- id, ll_bdevname(dev));
-#endif
-#else
-#ifdef CONFIG_DEV_RDONLY
- CERROR("obd_fail_loc=%x, fail write operation on %s\n",
- id, ll_bdevname(dev.value));
- dev_set_rdonly(dev, 2);
-#else
- CERROR("obd_fail_loc=%x, can't fail write operation on %s\n",
- id, ll_bdevname(dev.value));
-#endif
+ id, ll_bdevname(kdev_t_to_nr(dev), tmp));
#endif
/* We set FAIL_ONCE because we never "un-fail" a device */
obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE;
#define LTIME_S(time) (time)
#endif /* __KERNEL__ */
-#define OBD_ALLOC(ptr, size) \
+#define OBD_ALLOC_GFP(ptr, size, gfp_mask) \
do { \
- (ptr) = kmalloc(size, GFP_KERNEL); \
+ (ptr) = kmalloc(size, gfp_mask); \
if ((ptr) == NULL) { \
CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \
(int)(size), __FILE__, __LINE__); \
} \
} while (0)
+#ifndef OBD_GFP_MASK
+# define OBD_GFP_MASK GFP_KERNEL
+#endif
+
+#define OBD_ALLOC(ptr, size) OBD_ALLOC_GFP(ptr, size, OBD_GFP_MASK)
+
#ifdef __arch_um__
# define OBD_VMALLOC(ptr, size) OBD_ALLOC(ptr, size)
#else
#endif
#ifdef CONFIG_DEBUG_SLAB
-#define POISON(lptr, c, s) do {} while (0)
+#define POISON(ptr, c, s) do {} while (0)
#else
-#define POISON(lptr, c, s) memset(lptr, c, s)
+#define POISON(ptr, c, s) memset(ptr, c, s)
#endif
#define OBD_FREE(ptr, size) \
} while (0)
#endif
+/* we memset() the slab object to 0 when allocation succeeds, so DO NOT
+ * HAVE A CTOR THAT DOES ANYTHING. its work will be cleared here. we'd
+ * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */
#define OBD_SLAB_ALLOC(ptr, slab, type, size) \
do { \
- LASSERT (!in_interrupt()); \
+ LASSERT(!in_interrupt()); \
(ptr) = kmem_cache_alloc(slab, type); \
if ((ptr) == NULL) { \
CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \
-
-
-
drivers/block/blkpg.c | 35 +++++++++++++++++++++++++++++++++++
drivers/block/loop.c | 3 +++
- drivers/ide/ide-disk.c | 5 ++++-
- 3 files changed, 42 insertions(+), 1 deletion(-)
+ drivers/ide/ide-disk.c | 5 +++++
+ 3 files changed, 43 insertions(+)
---- rh-2.4.20/drivers/block/blkpg.c~dev_read_only_2.4.20 2003-04-11 14:05:03.000000000 +0800
-+++ rh-2.4.20-root/drivers/block/blkpg.c 2003-04-12 13:11:31.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/drivers/block/blkpg.c~dev_read_only_2.4.20-rh 2003-05-15 21:12:48.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/drivers/block/blkpg.c 2003-07-12 15:10:31.000000000 -0600
@@ -297,3 +297,38 @@ int blk_ioctl(kdev_t dev, unsigned int c
}
+EXPORT_SYMBOL(dev_set_rdonly);
+EXPORT_SYMBOL(dev_check_rdonly);
+EXPORT_SYMBOL(dev_clear_rdonly);
---- rh-2.4.20/drivers/block/loop.c~dev_read_only_2.4.20 2003-04-11 14:05:08.000000000 +0800
-+++ rh-2.4.20-root/drivers/block/loop.c 2003-04-12 13:11:31.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/drivers/block/loop.c~dev_read_only_2.4.20-rh 2003-05-15 21:12:50.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/drivers/block/loop.c 2003-07-12 15:10:31.000000000 -0600
@@ -491,6 +491,9 @@ static int loop_make_request(request_que
spin_unlock_irq(&lo->lo_lock);
if (lo->lo_flags & LO_FLAGS_READ_ONLY)
goto err;
} else if (rw == READA) {
---- rh-2.4.20/drivers/ide/ide-disk.c~dev_read_only_2.4.20 2003-04-11 14:04:53.000000000 +0800
-+++ rh-2.4.20-root/drivers/ide/ide-disk.c 2003-04-12 13:14:48.000000000 +0800
-@@ -381,7 +381,10 @@ static ide_startstop_t do_rw_disk (ide_d
- if (IS_PDC4030_DRIVE)
- return promise_rw_disk(drive, rq, block);
- #endif /* CONFIG_BLK_DEV_PDC4030 */
--
-+ if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) {
-+ ide_end_request(1, HWGROUP(drive));
-+ return ide_stopped;
-+ }
+--- kernel-2.4.20-6chaos_18_7/drivers/ide/ide-disk.c~dev_read_only_2.4.20-rh 2003-05-15 21:13:09.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/drivers/ide/ide-disk.c 2003-07-12 15:12:03.000000000 -0600
+@@ -371,6 +371,11 @@ ide_startstop_t __ide_do_rw_disk (ide_dr
+ if (driver_blocked)
+ panic("Request while ide driver is blocked?");
+
++ if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) {
++ ide_end_request(1, HWGROUP(drive));
++ return ide_stopped;
++ }
++
if (IDE_CONTROL_REG)
hwif->OUTB(drive->ctl, IDE_CONTROL_REG);
return 0;
}
+/* truncate.c */
-+extern void truncate_complete_page(struct page *);
++extern void truncate_complete_page(struct address_space *mapping,struct page *);
/* filemap.c */
extern unsigned long page_unuse(struct page *);
- fs/ext3/super.c | 229 +++++++++++++++++++++++++++++++++++++++++++++
- include/linux/ext3_fs.h | 2
+
+Create a service thread to handle delete and truncate of inodes, to avoid
+long latency while truncating very large files.
+
+
+ fs/ext3/inode.c | 116 ++++++++++++++++++++++
+ fs/ext3/super.c | 231 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h | 5
include/linux/ext3_fs_sb.h | 10 +
- 3 files changed, 241 insertions(+)
+ 4 files changed, 362 insertions(+)
--- linux-2.4.18-18.8.0-l15/fs/ext3/super.c~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:21 2003
-+++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c Wed Jun 18 11:59:14 2003
-@@ -396,6 +396,219 @@ static void dump_orphan_list(struct supe
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c Wed Jul 2 23:49:40 2003
+@@ -396,6 +396,220 @@ static void dump_orphan_list(struct supe
}
}
+ * If we have any problem deferring the delete, just delete it right away.
+ * If we defer it, we also mark how many blocks it would free, so that we
+ * can keep the statfs data correct, and we know if we should sleep on the
-+ * truncate thread when we run out of space.
-+ *
-+ * In 2.5 this can be done much more cleanly by just registering a "drop"
-+ * method in the super_operations struct.
++ * delete thread when we run out of space.
+ */
+static void ext3_delete_inode_thread(struct inode *old_inode)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
+ struct inode *new_inode;
+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
+
+ return;
+ }
+
-+ if (!test_opt(old_inode->i_sb, ASYNCDEL)) {
-+ ext3_delete_inode(old_inode);
-+ return;
-+ }
++ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++ goto out_delete;
+
+ /* We may want to delete the inode immediately and not defer it */
-+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
-+ !sbi->s_delete_list.next) {
-+ ext3_delete_inode(old_inode);
-+ return;
-+ }
++ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS)
++ goto out_delete;
+
-+ if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) ||
-+ (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
++ /* We can't use the delete thread as-is during real orphan recovery,
++ * as we add to the orphan list here, causing ext3_orphan_cleanup()
++ * to loop endlessly. It would be nice to do so, but needs work.
++ */
++ if (oei->i_state & EXT3_STATE_DELETE ||
++ sbi->s_mount_state & EXT3_ORPHAN_FS) {
+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
+ old_inode->i_ino, blocks);
-+ ext3_delete_inode(old_inode);
-+ return;
++ goto out_delete;
+ }
+
+ /* We can iget this inode again here, because our caller has unhashed
+ */
+ down(&sbi->s_orphan_lock);
+
-+ EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS;
++ sbi->s_mount_state |= EXT3_ORPHAN_FS;
+ new_inode = iget(old_inode->i_sb, old_inode->i_ino);
-+ EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
++ sbi->s_mount_state &= ~EXT3_ORPHAN_FS;
+ if (is_bad_inode(new_inode)) {
+ printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
+ iput(new_inode);
+ up(&sbi->s_orphan_lock);
+ ext3_debug("delete inode %lu directly (bad read)\n",
+ old_inode->i_ino);
-+ ext3_delete_inode(old_inode);
-+ return;
++ goto out_delete;
+ }
+ J_ASSERT(new_inode != old_inode);
+
-+ J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan));
++ J_ASSERT(!list_empty(&oei->i_orphan));
++
++ nei = EXT3_I(new_inode);
+ /* Ugh. We need to insert new_inode into the same spot on the list
+ * as old_inode was, to ensure the in-memory orphan list is still
+ * in the same order as the on-disk orphan list (badness otherwise).
+ */
-+ EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan;
-+ EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan;
-+ EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan;
-+ EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE;
++ nei->i_orphan = oei->i_orphan;
++ nei->i_orphan.next->prev = &nei->i_orphan;
++ nei->i_orphan.prev->next = &nei->i_orphan;
++ nei->i_state |= EXT3_STATE_DELETE;
+ up(&sbi->s_orphan_lock);
+
+ clear_inode(old_inode);
+ new_inode->i_ino, blocks);
+
+ wake_up(&sbi->s_delete_thread_queue);
++ return;
++
++out_delete:
++ ext3_delete_inode(old_inode);
+}
+#else
+#define ext3_start_delete_thread(sbi) do {} while(0)
void ext3_put_super (struct super_block * sb)
{
struct ext3_sb_info *sbi = EXT3_SB(sb);
-@@ -403,6 +615,7 @@ void ext3_put_super (struct super_block
+@@ -403,6 +617,7 @@ void ext3_put_super (struct super_block
kdev_t j_dev = sbi->s_journal->j_dev;
int i;
ext3_xattr_put_super(sb);
journal_destroy(sbi->s_journal);
if (!(sb->s_flags & MS_RDONLY)) {
-@@ -451,7 +664,11 @@ static struct super_operations ext3_sops
+@@ -451,7 +666,11 @@ static struct super_operations ext3_sops
write_inode: ext3_write_inode, /* BKL not held. Don't need */
dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */
put_inode: ext3_put_inode, /* BKL not held. Don't need */
put_super: ext3_put_super, /* BKL held */
write_super: ext3_write_super, /* BKL held */
write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
-@@ -511,6 +728,14 @@ static int parse_options (char * options
+@@ -511,6 +730,14 @@ static int parse_options (char * options
this_char = strtok (NULL, ",")) {
if ((value = strchr (this_char, '=')) != NULL)
*value++ = 0;
if (!strcmp (this_char, "bsddf"))
clear_opt (*mount_options, MINIX_DF);
else if (!strcmp (this_char, "nouid32")) {
-@@ -1206,6 +1431,7 @@ struct super_block * ext3_read_super (st
+@@ -1206,6 +1433,7 @@ struct super_block * ext3_read_super (st
}
ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
/*
* akpm: core read_super() calls in here with the superblock locked.
* That deadlocks, because orphan cleanup needs to lock the superblock
-@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s
+@@ -1648,6 +1876,9 @@ int ext3_remount (struct super_block * s
if (!parse_options(data, &tmp, sbi, &tmp, 1))
return -EINVAL;
if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
ext3_abort(sb, __FUNCTION__, "Abort forced by user");
+--- linux/fs/ext3/file.c.orig Fri Jan 17 10:57:31 2003
++++ linux/fs/ext3/file.c Mon Jun 30 13:28:52 2003
+@@ -121,7 +121,11 @@ struct file_operations ext3_file_operati
+ };
+
+ struct inode_operations ext3_file_inode_operations = {
++#ifdef EXT3_DELETE_THREAD
++ truncate: ext3_truncate_thread, /* BKL held */
++#else
+ truncate: ext3_truncate, /* BKL held */
++#endif
+ setattr: ext3_setattr, /* BKL held */
+ };
+
+--- linux-2.4.18-18.8.0-l15/fs/ext3/inode.c~ext3-delete_thread-2.4.18 Wed Jul 2 23:13:58 2003
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/inode.c Wed Jul 2 23:50:29 2003
+@@ -2004,6 +2004,118 @@ out_stop:
+ ext3_journal_stop(handle, inode);
+ }
+
++#ifdef EXT3_DELETE_THREAD
++/* Move blocks from to-be-truncated inode over to a new inode, and delete
++ * that one from the delete thread instead. This avoids a lot of latency
++ * when truncating large files.
++ *
++ * If we have any problem deferring the truncate, just truncate it right away.
++ * If we defer it, we also mark how many blocks it would free, so that we
++ * can keep the statfs data correct, and we know if we should sleep on the
++ * delete thread when we run out of space.
++ */
++void ext3_truncate_thread(struct inode *old_inode)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
++ struct inode *new_inode;
++ handle_t *handle;
++ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
++
++ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++ goto out_truncate;
++
++ /* XXX This is a temporary limitation for code simplicity.
++ * We could truncate to arbitrary sizes at some later time.
++ */
++ if (old_inode->i_size != 0)
++ goto out_truncate;
++
++ /* We may want to truncate the inode immediately and not defer it */
++ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
++ old_inode->i_size > oei->i_disksize)
++ goto out_truncate;
++
++ /* We can't use the delete thread as-is during real orphan recovery,
++ * as we add to the orphan list here, causing ext3_orphan_cleanup()
++ * to loop endlessly. It would be nice to do so, but needs work.
++ */
++ if (oei->i_state & EXT3_STATE_DELETE ||
++ sbi->s_mount_state & EXT3_ORPHAN_FS) {
++ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
++ old_inode->i_ino, blocks);
++ goto out_truncate;
++ }
++
++ ext3_discard_prealloc(old_inode);
++
++ /* old_inode = 1
++ * new_inode = sb + GDT + ibitmap
++ * orphan list = 1 inode/superblock for add, 2 inodes for del
++ * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
++ */
++ handle = ext3_journal_start(old_inode, 7);
++ if (IS_ERR(handle))
++ goto out_truncate;
++
++ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
++ if (IS_ERR(new_inode)) {
++ ext3_debug("truncate inode %lu directly (no new inodes)\n",
++ old_inode->i_ino);
++ goto out_journal;
++ }
++
++ nei = EXT3_I(new_inode);
++
++ down_write(&oei->truncate_sem);
++ new_inode->i_size = old_inode->i_size;
++ new_inode->i_blocks = old_inode->i_blocks;
++ new_inode->i_uid = old_inode->i_uid;
++ new_inode->i_gid = old_inode->i_gid;
++ new_inode->i_nlink = 0;
++
++ /* FIXME when we do arbitrary truncates */
++ old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
++ old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
++
++ memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
++ memset(oei->i_data, 0, sizeof(oei->i_data));
++
++ nei->i_disksize = oei->i_disksize;
++ nei->i_state |= EXT3_STATE_DELETE;
++ up_write(&oei->truncate_sem);
++
++ if (ext3_orphan_add(handle, new_inode) < 0)
++ goto out_journal;
++
++ if (ext3_orphan_del(handle, old_inode) < 0) {
++ ext3_orphan_del(handle, new_inode);
++ iput(new_inode);
++ goto out_journal;
++ }
++
++ ext3_journal_stop(handle, old_inode);
++
++ spin_lock(&sbi->s_delete_lock);
++ J_ASSERT(list_empty(&new_inode->i_dentry));
++ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
++ sbi->s_delete_blocks += blocks;
++ sbi->s_delete_inodes++;
++ spin_unlock(&sbi->s_delete_lock);
++
++ ext3_debug("delete inode %lu (%lu blocks) by thread\n",
++ new_inode->i_ino, blocks);
++
++ wake_up(&sbi->s_delete_thread_queue);
++ return;
++
++out_journal:
++ ext3_journal_stop(handle, old_inode);
++out_truncate:
++ ext3_truncate(old_inode);
++}
++#endif /* EXT3_DELETE_THREAD */
++
+ /*
+ * ext3_get_inode_loc returns with an extra refcount against the
+ * inode's underlying buffer_head on success.
--- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:20 2003
-+++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h Tue Jun 17 12:36:56 2003
++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h Wed Jul 2 23:19:09 2003
@@ -190,6 +190,7 @@ struct ext3_group_desc
*/
#define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
+@@ -651,6 +653,9 @@ extern void ext3_discard_prealloc (struc
+ extern void ext3_dirty_inode(struct inode *);
+ extern int ext3_change_inode_journal_flag(struct inode *, int);
+ extern void ext3_truncate (struct inode *);
++#ifdef EXT3_DELETE_THREAD
++extern void ext3_truncate_thread(struct inode *inode);
++#endif
+
+ /* ioctl.c */
+ extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
--- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:21 2003
-+++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h Tue Jun 17 12:36:56 2003
++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h Wed Jul 2 23:19:09 2003
@@ -29,6 +29,8 @@
#define EXT3_MAX_GROUP_LOADED 32
-diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
---- origin/fs/ext3/super.c 2003-05-04 17:23:52.000000000 +0400
-+++ linux/fs/ext3/super.c 2003-05-04 17:09:20.000000000 +0400
-@@ -398,6 +398,219 @@ static void dump_orphan_list(struct supe
+ fs/ext3/file.c | 4
+ fs/ext3/inode.c | 116 ++++++++++++++++++++++
+ fs/ext3/super.c | 230 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h | 5
+ include/linux/ext3_fs_sb.h | 10 +
+ 5 files changed, 365 insertions(+)
+
+--- linux/fs/ext3/super.c~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:32 2003
++++ linux-mmonroe/fs/ext3/super.c Thu Jul 10 14:11:33 2003
+@@ -400,6 +400,220 @@ static void dump_orphan_list(struct supe
}
}
+ * If we have any problem deferring the delete, just delete it right away.
+ * If we defer it, we also mark how many blocks it would free, so that we
+ * can keep the statfs data correct, and we know if we should sleep on the
-+ * truncate thread when we run out of space.
-+ *
-+ * In 2.5 this can be done much more cleanly by just registering a "drop"
-+ * method in the super_operations struct.
++ * delete thread when we run out of space.
+ */
+static void ext3_delete_inode_thread(struct inode *old_inode)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
+ struct inode *new_inode;
+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
+
+ return;
+ }
+
-+ if (!test_opt(old_inode->i_sb, ASYNCDEL)) {
-+ ext3_delete_inode(old_inode);
-+ return;
-+ }
++ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++ goto out_delete;
+
+ /* We may want to delete the inode immediately and not defer it */
-+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
-+ !sbi->s_delete_list.next) {
-+ ext3_delete_inode(old_inode);
-+ return;
-+ }
++ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS)
++ goto out_delete;
+
-+ if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) ||
-+ (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
++ /* We can't use the delete thread as-is during real orphan recovery,
++ * as we add to the orphan list here, causing ext3_orphan_cleanup()
++ * to loop endlessly. It would be nice to do so, but needs work.
++ */
++ if (oei->i_state & EXT3_STATE_DELETE ||
++ sbi->s_mount_state & EXT3_ORPHAN_FS) {
+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
+ old_inode->i_ino, blocks);
-+ ext3_delete_inode(old_inode);
-+ return;
++ goto out_delete;
+ }
+
+ /* We can iget this inode again here, because our caller has unhashed
+ */
+ down(&sbi->s_orphan_lock);
+
-+ EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS;
++ sbi->s_mount_state |= EXT3_ORPHAN_FS;
+ new_inode = iget(old_inode->i_sb, old_inode->i_ino);
-+ EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
++ sbi->s_mount_state &= ~EXT3_ORPHAN_FS;
+ if (is_bad_inode(new_inode)) {
+ printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
+ iput(new_inode);
+ up(&sbi->s_orphan_lock);
+ ext3_debug("delete inode %lu directly (bad read)\n",
+ old_inode->i_ino);
-+ ext3_delete_inode(old_inode);
-+ return;
++ goto out_delete;
+ }
+ J_ASSERT(new_inode != old_inode);
+
-+ J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan));
++ J_ASSERT(!list_empty(&oei->i_orphan));
++
++ nei = EXT3_I(new_inode);
+ /* Ugh. We need to insert new_inode into the same spot on the list
+ * as old_inode was, to ensure the in-memory orphan list is still
+ * in the same order as the on-disk orphan list (badness otherwise).
+ */
-+ EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan;
-+ EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan;
-+ EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan;
-+ EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE;
++ nei->i_orphan = oei->i_orphan;
++ nei->i_orphan.next->prev = &nei->i_orphan;
++ nei->i_orphan.prev->next = &nei->i_orphan;
++ nei->i_state |= EXT3_STATE_DELETE;
+ up(&sbi->s_orphan_lock);
+
+ clear_inode(old_inode);
+ new_inode->i_ino, blocks);
+
+ wake_up(&sbi->s_delete_thread_queue);
++ return;
++
++out_delete:
++ ext3_delete_inode(old_inode);
+}
+#else
+#define ext3_start_delete_thread(sbi) do {} while(0)
void ext3_put_super (struct super_block * sb)
{
struct ext3_sb_info *sbi = EXT3_SB(sb);
-@@ -405,6 +611,7 @@ void ext3_put_super (struct super_block
+@@ -407,6 +621,7 @@ void ext3_put_super (struct super_block
kdev_t j_dev = sbi->s_journal->j_dev;
int i;
ext3_xattr_put_super(sb);
journal_destroy(sbi->s_journal);
if (!(sb->s_flags & MS_RDONLY)) {
-@@ -453,7 +660,11 @@ static struct super_operations ext3_sops
+@@ -455,7 +670,11 @@ static struct super_operations ext3_sops
write_inode: ext3_write_inode, /* BKL not held. Don't need */
dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */
put_inode: ext3_put_inode, /* BKL not held. Don't need */
+#endif
put_super: ext3_put_super, /* BKL held */
write_super: ext3_write_super, /* BKL held */
- write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
-@@ -514,6 +725,13 @@ static int parse_options (char * options
- this_char = strtok (NULL, ",")) {
- if ((value = strchr (this_char, '=')) != NULL)
- *value++ = 0;
+ sync_fs: ext3_sync_fs,
+@@ -524,6 +743,13 @@ static int parse_options (char * options
+ clear_opt (*mount_options, XATTR_USER);
+ else
+ #endif
+#ifdef EXT3_DELETE_THREAD
+ if (!strcmp(this_char, "asyncdel"))
+ set_opt(*mount_options, ASYNCDEL);
+ clear_opt(*mount_options, ASYNCDEL);
+ else
+#endif
- #ifdef CONFIG_EXT3_FS_XATTR_USER
- if (!strcmp (this_char, "user_xattr"))
- set_opt (*mount_options, XATTR_USER);
-@@ -1220,6 +1436,7 @@ struct super_block * ext3_read_super (st
+ if (!strcmp (this_char, "bsddf"))
+ clear_opt (*mount_options, MINIX_DF);
+ else if (!strcmp (this_char, "nouid32")) {
+@@ -1223,6 +1449,7 @@ struct super_block * ext3_read_super (st
}
ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
/*
* akpm: core read_super() calls in here with the superblock locked.
* That deadlocks, because orphan cleanup needs to lock the superblock
-@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s
+@@ -1678,6 +1905,9 @@ int ext3_remount (struct super_block * s
if (!parse_options(data, &tmp, sbi, &tmp, 1))
return -EINVAL;
if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
ext3_abort(sb, __FUNCTION__, "Abort forced by user");
-diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h
---- origin/include/linux/ext3_fs.h 2003-05-04 17:22:49.000000000 +0400
-+++ linux/include/linux/ext3_fs.h 2003-05-04 15:06:10.000000000 +0400
+--- linux/fs/ext3/inode.c~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:29 2003
++++ linux-mmonroe/fs/ext3/inode.c Thu Jul 10 14:11:33 2003
+@@ -2013,6 +2013,118 @@ out_stop:
+ ext3_journal_stop(handle, inode);
+ }
+
++#ifdef EXT3_DELETE_THREAD
++/* Move blocks from to-be-truncated inode over to a new inode, and delete
++ * that one from the delete thread instead. This avoids a lot of latency
++ * when truncating large files.
++ *
++ * If we have any problem deferring the truncate, just truncate it right away.
++ * If we defer it, we also mark how many blocks it would free, so that we
++ * can keep the statfs data correct, and we know if we should sleep on the
++ * delete thread when we run out of space.
++ */
++void ext3_truncate_thread(struct inode *old_inode)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
++ struct inode *new_inode;
++ handle_t *handle;
++ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
++
++ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++ goto out_truncate;
++
++ /* XXX This is a temporary limitation for code simplicity.
++ * We could truncate to arbitrary sizes at some later time.
++ */
++ if (old_inode->i_size != 0)
++ goto out_truncate;
++
++ /* We may want to truncate the inode immediately and not defer it */
++ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
++ old_inode->i_size > oei->i_disksize)
++ goto out_truncate;
++
++ /* We can't use the delete thread as-is during real orphan recovery,
++ * as we add to the orphan list here, causing ext3_orphan_cleanup()
++ * to loop endlessly. It would be nice to do so, but needs work.
++ */
++ if (oei->i_state & EXT3_STATE_DELETE ||
++ sbi->s_mount_state & EXT3_ORPHAN_FS) {
++ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
++ old_inode->i_ino, blocks);
++ goto out_truncate;
++ }
++
++ ext3_discard_prealloc(old_inode);
++
++ /* old_inode = 1
++ * new_inode = sb + GDT + ibitmap
++ * orphan list = 1 inode/superblock for add, 2 inodes for del
++ * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
++ */
++ handle = ext3_journal_start(old_inode, 7);
++ if (IS_ERR(handle))
++ goto out_truncate;
++
++ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
++ if (IS_ERR(new_inode)) {
++ ext3_debug("truncate inode %lu directly (no new inodes)\n",
++ old_inode->i_ino);
++ goto out_journal;
++ }
++
++ nei = EXT3_I(new_inode);
++
++ down_write(&oei->truncate_sem);
++ new_inode->i_size = old_inode->i_size;
++ new_inode->i_blocks = old_inode->i_blocks;
++ new_inode->i_uid = old_inode->i_uid;
++ new_inode->i_gid = old_inode->i_gid;
++ new_inode->i_nlink = 0;
++
++ /* FIXME when we do arbitrary truncates */
++ old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
++ old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
++
++ memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
++ memset(oei->i_data, 0, sizeof(oei->i_data));
++
++ nei->i_disksize = oei->i_disksize;
++ nei->i_state |= EXT3_STATE_DELETE;
++ up_write(&oei->truncate_sem);
++
++ if (ext3_orphan_add(handle, new_inode) < 0)
++ goto out_journal;
++
++ if (ext3_orphan_del(handle, old_inode) < 0) {
++ ext3_orphan_del(handle, new_inode);
++ iput(new_inode);
++ goto out_journal;
++ }
++
++ ext3_journal_stop(handle, old_inode);
++
++ spin_lock(&sbi->s_delete_lock);
++ J_ASSERT(list_empty(&new_inode->i_dentry));
++ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
++ sbi->s_delete_blocks += blocks;
++ sbi->s_delete_inodes++;
++ spin_unlock(&sbi->s_delete_lock);
++
++ ext3_debug("delete inode %lu (%lu blocks) by thread\n",
++ new_inode->i_ino, blocks);
++
++ wake_up(&sbi->s_delete_thread_queue);
++ return;
++
++out_journal:
++ ext3_journal_stop(handle, old_inode);
++out_truncate:
++ ext3_truncate(old_inode);
++}
++#endif /* EXT3_DELETE_THREAD */
++
+ /*
+ * ext3_get_inode_loc returns with an extra refcount against the
+ * inode's underlying buffer_head on success.
+--- linux/fs/ext3/file.c~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:21 2003
++++ linux-mmonroe/fs/ext3/file.c Thu Jul 10 14:12:17 2003
+@@ -125,7 +125,11 @@ struct file_operations ext3_file_operati
+ };
+
+ struct inode_operations ext3_file_inode_operations = {
++#ifdef EXT3_DELETE_THREAD
++ truncate: ext3_truncate_thread, /* BKL held */
++#else
+ truncate: ext3_truncate, /* BKL held */
++#endif
+ setattr: ext3_setattr, /* BKL held */
+ setxattr: ext3_setxattr, /* BKL held */
+ getxattr: ext3_getxattr, /* BKL held */
+--- linux/include/linux/ext3_fs.h~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:26 2003
++++ linux-mmonroe/include/linux/ext3_fs.h Thu Jul 10 14:11:33 2003
@@ -193,6 +193,7 @@ struct ext3_group_desc
*/
#define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */
/*
* ioctl commands
-@@ -321,6 +322,7 @@ struct ext3_inode {
+@@ -320,6 +321,7 @@ struct ext3_inode {
#define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
#define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
-+#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */
++#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
-diff -puNr origin/include/linux/ext3_fs_sb.h linux/include/linux/ext3_fs_sb.h
---- origin/include/linux/ext3_fs_sb.h 2003-05-04 17:23:52.000000000 +0400
-+++ linux/include/linux/ext3_fs_sb.h 2003-05-04 11:37:04.000000000 +0400
+@@ -694,6 +696,9 @@ extern void ext3_discard_prealloc (struc
+ extern void ext3_dirty_inode(struct inode *);
+ extern int ext3_change_inode_journal_flag(struct inode *, int);
+ extern void ext3_truncate (struct inode *);
++#ifdef EXT3_DELETE_THREAD
++extern void ext3_truncate_thread(struct inode *inode);
++#endif
+
+ /* ioctl.c */
+ extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
+--- linux/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:32 2003
++++ linux-mmonroe/include/linux/ext3_fs_sb.h Thu Jul 10 14:11:33 2003
@@ -29,6 +29,8 @@
#define EXT3_MAX_GROUP_LOADED 8
};
#endif /* _LINUX_EXT3_FS_SB */
+
+_
---- linux-2.4.17/fs/ext3/super.c.orig Fri Dec 21 10:41:55 2001
-+++ linux-2.4.17/fs/ext3/super.c Fri Mar 22 11:00:41 2002
-@@ -1344,10 +1342,10 @@
+ fs/ext3/super.c | 4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+--- linux-2.4.18-p4smp/fs/ext3/super.c~extN-misc-fixup 2003-07-21 23:07:50.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c 2003-07-21 23:08:06.000000000 -0600
+@@ -1578,10 +1578,10 @@ static journal_t *ext3_get_dev_journal(s
printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
goto out_journal;
}
goto out_journal;
}
EXT3_SB(sb)->journal_bdev = bdev;
-@@ -1560,6 +1560,7 @@
- unlock_kernel();
- return ret;
- }
-+EXPORT_SYMBOL(ext3_force_commit); /* here to avoid potential patch collisions */
-
- /*
- * Ext3 always journals updates to the superblock itself, so we don't
+
+_
DQUOT_DROP(inode);
--- linux-2.4.18-chaos52/fs/ext3/inode.c~extN-noread 2003-05-16 12:26:29.000000000 +0800
+++ linux-2.4.18-chaos52-root/fs/ext3/inode.c 2003-05-16 12:27:06.000000000 +0800
-@@ -2011,23 +2011,28 @@ out_stop:
- ext3_journal_stop(handle, inode);
- }
+@@ -2013,21 +2013,26 @@ out_stop:
-/*
- * ext3_get_inode_loc returns with an extra refcount against the
j += i * EXT3_INODES_PER_GROUP(sb) + 1;
if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) {
ext3_error (sb, "ext3_new_inode",
+--- linux-2.4.18-18.8.0-l15/fs/ext3/inode.c~extN-wantedi Thu Jul 3 00:15:41 2003
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/inode.c Thu Jul 3 00:17:28 2003
+@@ -2070,7 +2070,7 @@ void ext3_truncate_thread(struct inode *
+ if (IS_ERR(handle))
+ goto out_truncate;
+
+- new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
++ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode, 0);
+ if (IS_ERR(new_inode)) {
+ ext3_debug("truncate inode %lu directly (no new inodes)\n",
+ old_inode->i_ino);
--- linux-2.4.20/fs/ext3/ioctl.c~extN-wantedi 2003-04-08 23:35:55.000000000 -0600
+++ linux-2.4.20-braam/fs/ext3/ioctl.c 2003-04-08 23:35:55.000000000 -0600
@@ -23,6 +23,31 @@ int ext3_ioctl (struct inode * inode, st
- 0 files changed
+ Documentation/filesystems/ext2.txt | 16 ++
+ fs/ext3/Makefile | 2
+ fs/ext3/inode.c | 4
+ fs/ext3/iopen.c | 259 +++++++++++++++++++++++++++++++++++++
+ fs/ext3/iopen.h | 13 +
+ fs/ext3/namei.c | 12 +
+ fs/ext3/super.c | 11 +
+ include/linux/ext3_fs.h | 2
+ 8 files changed, 318 insertions(+), 1 deletion(-)
---- linux-2.4.18-chaos52/Documentation/filesystems/ext2.txt~iopen-2.4.18 2003-04-13 15:21:33.000000000 +0800
-+++ linux-2.4.18-chaos52-root/Documentation/filesystems/ext2.txt 2003-06-03 17:10:55.000000000 +0800
+--- linux-2.4.18-p4smp/Documentation/filesystems/ext2.txt~iopen-2.4.18 2003-07-09 12:17:30.000000000 -0600
++++ linux-2.4.18-p4smp-braam/Documentation/filesystems/ext2.txt 2003-07-09 17:13:02.000000000 -0600
@@ -35,6 +35,22 @@ resgid=n The group ID which may use th
sb=n Use alternate superblock at this location.
grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2.
---- linux-2.4.18-chaos52/fs/ext3/Makefile~iopen-2.4.18 2003-06-01 03:24:07.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/Makefile 2003-06-03 17:10:55.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/Makefile~iopen-2.4.18 2003-07-09 17:12:12.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/Makefile 2003-07-09 17:13:15.000000000 -0600
@@ -11,7 +11,7 @@ O_TARGET := ext3.o
- export-objs := super.o inode.o xattr.o
+ export-objs := super.o inode.o xattr.o ext3-exports.o
-obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-+obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
- ioctl.o namei.o super.o symlink.o xattr.o
++obj-y := balloc.o iopen.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+ ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o
obj-m := $(O_TARGET)
---- linux-2.4.18-chaos52/fs/ext3/inode.c~iopen-2.4.18 2003-06-03 17:10:21.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/inode.c 2003-06-03 17:10:55.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/inode.c~iopen-2.4.18 2003-07-09 17:11:19.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/inode.c 2003-07-09 17:13:02.000000000 -0600
@@ -31,6 +31,7 @@
#include <linux/highuid.h>
#include <linux/quotaops.h>
/*
* SEARCH_FROM_ZERO forces each block allocation to search from the start
-@@ -2135,6 +2136,9 @@ void ext3_read_inode(struct inode * inod
+@@ -2165,6 +2166,9 @@ void ext3_read_inode(struct inode * inod
struct buffer_head *bh;
int block;
if(ext3_get_inode_loc(inode, &iloc))
goto bad_inode;
bh = iloc.bh;
---- /dev/null 2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/iopen.c 2003-06-03 17:10:55.000000000 +0800
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/iopen.c 2003-07-09 17:13:02.000000000 -0600
@@ -0,0 +1,259 @@
+/*
+ * linux/fs/ext3/iopen.c
+
+ return 1;
+}
---- /dev/null 2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/iopen.h 2003-06-03 17:10:55.000000000 +0800
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/iopen.h 2003-07-09 17:13:02.000000000 -0600
@@ -0,0 +1,13 @@
+/*
+ * iopen.h
+
+extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
+extern int ext3_iopen_get_inode(struct inode *inode);
---- linux-2.4.18-chaos52/fs/ext3/namei.c~iopen-2.4.18 2003-06-03 17:10:20.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/namei.c 2003-06-03 17:10:55.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/namei.c~iopen-2.4.18 2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/namei.c 2003-07-09 17:13:02.000000000 -0600
@@ -34,6 +34,7 @@
#include <linux/locks.h>
#include <linux/quotaops.h>
d_add(dentry, inode);
return NULL;
}
---- linux-2.4.18-chaos52/fs/ext3/super.c~iopen-2.4.18 2003-06-03 17:10:21.000000000 +0800
-+++ linux-2.4.18-chaos52-root/fs/ext3/super.c 2003-06-03 17:10:55.000000000 +0800
-@@ -820,6 +820,17 @@ static int parse_options (char * options
+--- linux-2.4.18-p4smp/fs/ext3/super.c~iopen-2.4.18 2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c 2003-07-09 17:13:02.000000000 -0600
+@@ -831,6 +831,17 @@ static int parse_options (char * options
|| !strcmp (this_char, "quota")
|| !strcmp (this_char, "usrquota"))
/* Don't do anything ;-) */ ;
else if (!strcmp (this_char, "journal")) {
/* @@@ FIXME */
/* Eventually we will want to be able to create
---- linux-2.4.18-chaos52/include/linux/ext3_fs.h~iopen-2.4.18 2003-06-03 17:10:22.000000000 +0800
-+++ linux-2.4.18-chaos52-root/include/linux/ext3_fs.h 2003-06-03 17:12:08.000000000 +0800
+--- linux-2.4.18-p4smp/include/linux/ext3_fs.h~iopen-2.4.18 2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_fs.h 2003-07-09 17:13:02.000000000 -0600
@@ -321,6 +321,8 @@ struct ext3_inode {
#define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
#define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
Documentation/filesystems/ext2.txt | 16 ++
fs/ext3/Makefile | 2
fs/ext3/inode.c | 4
- fs/ext3/iopen.c | 240 +++++++++++++++++++++++++++++++++++++
- fs/ext3/iopen.h | 15 ++
- fs/ext3/namei.c | 13 +-
+ fs/ext3/iopen.c | 259 +++++++++++++++++++++++++++++++++++++
+ fs/ext3/iopen.h | 13 +
+ fs/ext3/namei.c | 13 +
fs/ext3/super.c | 11 +
include/linux/ext3_fs.h | 2
- 8 files changed, 301 insertions(+), 2 deletions(-)
+ 8 files changed, 318 insertions(+), 2 deletions(-)
---- linux-2.4.20/Documentation/filesystems/ext2.txt~iopen 2001-07-11 16:44:45.000000000 -0600
-+++ linux-2.4.20-braam/Documentation/filesystems/ext2.txt 2003-05-17 14:06:00.000000000 -0600
+--- linux/Documentation/filesystems/ext2.txt~iopen-2.4.20 Wed Jul 11 15:44:45 2001
++++ linux-mmonroe/Documentation/filesystems/ext2.txt Thu Jul 10 12:28:54 2003
@@ -35,6 +35,22 @@ resgid=n The group ID which may use th
sb=n Use alternate superblock at this location.
grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2.
---- linux-2.4.20/fs/ext3/Makefile~iopen 2003-05-17 14:05:57.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/Makefile 2003-05-17 14:06:00.000000000 -0600
+--- linux/fs/ext3/Makefile~iopen-2.4.20 Thu Jul 10 12:28:44 2003
++++ linux-mmonroe/fs/ext3/Makefile Thu Jul 10 12:28:54 2003
@@ -11,7 +11,7 @@ O_TARGET := ext3.o
export-objs := ext3-exports.o
ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o
obj-m := $(O_TARGET)
---- linux-2.4.20/fs/ext3/inode.c~iopen 2003-05-17 14:06:00.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/inode.c 2003-05-17 14:06:00.000000000 -0600
+--- linux/fs/ext3/inode.c~iopen-2.4.20 Thu Jul 10 12:28:46 2003
++++ linux-mmonroe/fs/ext3/inode.c Thu Jul 10 12:28:54 2003
@@ -31,6 +31,7 @@
#include <linux/highuid.h>
#include <linux/quotaops.h>
/*
* SEARCH_FROM_ZERO forces each block allocation to search from the start
-@@ -2137,6 +2138,9 @@ void ext3_read_inode(struct inode * inod
+@@ -2253,6 +2254,9 @@ void ext3_read_inode(struct inode * inod
struct buffer_head *bh;
int block;
if(ext3_get_inode_loc(inode, &iloc))
goto bad_inode;
bh = iloc.bh;
---- /dev/null 2003-01-30 03:24:37.000000000 -0700
-+++ linux-2.4.20-braam/fs/ext3/iopen.c 2003-05-17 22:18:55.000000000 -0600
+--- /dev/null Tue Jan 28 04:00:01 2003
++++ linux-mmonroe/fs/ext3/iopen.c Thu Jul 10 12:28:54 2003
@@ -0,0 +1,259 @@
+/*
+ * linux/fs/ext3/iopen.c
+
+ return 1;
+}
---- /dev/null 2003-01-30 03:24:37.000000000 -0700
-+++ linux-2.4.20-braam/fs/ext3/iopen.h 2003-05-17 14:06:00.000000000 -0600
+--- /dev/null Tue Jan 28 04:00:01 2003
++++ linux-mmonroe/fs/ext3/iopen.h Thu Jul 10 12:28:54 2003
@@ -0,0 +1,13 @@
+/*
+ * iopen.h
+
+extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
+extern int ext3_iopen_get_inode(struct inode *inode);
---- linux-2.4.20/fs/ext3/namei.c~iopen 2003-05-17 14:05:59.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/namei.c 2003-05-17 22:23:08.000000000 -0600
+--- linux/fs/ext3/namei.c~iopen-2.4.20 Thu Jul 10 12:28:46 2003
++++ linux-mmonroe/fs/ext3/namei.c Thu Jul 10 12:28:54 2003
@@ -35,7 +35,7 @@
#include <linux/string.h>
#include <linux/locks.h>
d_add(dentry, inode);
return NULL;
}
---- linux-2.4.20/fs/ext3/super.c~iopen 2003-05-17 14:05:59.000000000 -0600
-+++ linux-2.4.20-braam/fs/ext3/super.c 2003-05-17 14:06:00.000000000 -0600
-@@ -820,6 +820,17 @@ static int parse_options (char * options
+--- linux/fs/ext3/super.c~iopen-2.4.20 Thu Jul 10 12:28:45 2003
++++ linux-mmonroe/fs/ext3/super.c Thu Jul 10 12:28:54 2003
+@@ -835,6 +835,17 @@ static int parse_options (char * options
|| !strcmp (this_char, "quota")
|| !strcmp (this_char, "usrquota"))
/* Don't do anything ;-) */ ;
else if (!strcmp (this_char, "journal")) {
/* @@@ FIXME */
/* Eventually we will want to be able to create
---- linux-2.4.20/include/linux/ext3_fs.h~iopen 2003-05-17 14:05:59.000000000 -0600
-+++ linux-2.4.20-braam/include/linux/ext3_fs.h 2003-05-17 14:06:29.000000000 -0600
+--- linux/include/linux/ext3_fs.h~iopen-2.4.20 Thu Jul 10 12:28:46 2003
++++ linux-mmonroe/include/linux/ext3_fs.h Thu Jul 10 12:30:12 2003
@@ -322,6 +322,8 @@ struct ext3_inode {
#define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
#define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
+#define EXT3_MOUNT_IOPEN 0x8000 /* Allow access via iopen */
+#define EXT3_MOUNT_IOPEN_NOPRIV 0x10000 /* Make iopen world-readable */
- #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */
+ #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
- 0 files changed
+ fs/ext3/Makefile | 4
+ fs/ext3/ext3-exports.c | 13
+ fs/ext3/ialloc.c | 2
+ fs/ext3/inode.c | 29 -
+ fs/ext3/namei.c | 12
+ fs/ext3/super.c | 22
+ fs/ext3/xattr.c | 1242 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h | 46 -
+ include/linux/ext3_jbd.h | 8
+ include/linux/ext3_xattr.h | 155 +++++
+ include/linux/xattr.h | 15
+ 11 files changed, 1496 insertions(+), 52 deletions(-)
---- linux-2.4.18-18/fs/ext3/ialloc.c~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/ialloc.c 2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/ialloc.c~linux-2.4.18ea-0.8.26 2003-07-20 17:12:43.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/ialloc.c 2003-07-21 22:49:05.000000000 -0600
@@ -17,6 +17,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
DQUOT_FREE_INODE(inode);
DQUOT_DROP(inode);
---- linux-2.4.18-18/fs/ext3/inode.c~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/inode.c 2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/inode.c~linux-2.4.18ea-0.8.26 2003-07-20 17:12:43.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/inode.c 2003-07-21 22:49:05.000000000 -0600
@@ -39,6 +39,18 @@
*/
#undef SEARCH_FROM_ZERO
goto no_delete;
lock_kernel();
-@@ -1861,6 +1871,8 @@ void ext3_truncate(struct inode * inode)
+@@ -1877,6 +1887,8 @@ void ext3_truncate(struct inode * inode)
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))
return;
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return;
-@@ -2008,8 +2020,6 @@ int ext3_get_inode_loc (struct inode *in
+@@ -2038,8 +2050,6 @@ int ext3_get_inode_loc (struct inode *in
struct ext3_group_desc * gdp;
if ((inode->i_ino != EXT3_ROOT_INO &&
inode->i_ino != EXT3_JOURNAL_INO &&
inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) ||
inode->i_ino > le32_to_cpu(
-@@ -2136,10 +2146,7 @@ void ext3_read_inode(struct inode * inod
+@@ -2166,10 +2176,7 @@ void ext3_read_inode(struct inode * inod
brelse (iloc.bh);
inode->i_op = &ext3_file_inode_operations;
inode->i_fop = &ext3_file_operations;
inode->i_mapping->a_ops = &ext3_aops;
-@@ -2147,7 +2154,7 @@ void ext3_read_inode(struct inode * inod
+@@ -2177,7 +2184,7 @@ void ext3_read_inode(struct inode * inod
inode->i_op = &ext3_dir_inode_operations;
inode->i_fop = &ext3_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &ext3_fast_symlink_inode_operations;
else {
inode->i_op = &page_symlink_inode_operations;
---- linux-2.4.18-18/fs/ext3/namei.c~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/namei.c 2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/namei.c~linux-2.4.18ea-0.8.26 2003-07-21 22:29:27.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/namei.c 2003-07-21 22:49:05.000000000 -0600
@@ -27,6 +27,7 @@
#include <linux/sched.h>
#include <linux/ext3_fs.h>
inode->i_op = &page_symlink_inode_operations;
inode->i_mapping->a_ops = &ext3_aops;
/*
---- linux-2.4.18-18/fs/ext3/super.c~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/super.c 2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/super.c~linux-2.4.18ea-0.8.26 2003-07-21 22:29:27.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c 2003-07-21 22:50:28.000000000 -0600
@@ -24,6 +24,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/locks.h>
-@@ -404,6 +405,7 @@ void ext3_put_super (struct super_block
+@@ -406,6 +407,7 @@ void ext3_put_super (struct super_block
kdev_t j_dev = sbi->s_journal->j_dev;
int i;
journal_destroy(sbi->s_journal);
if (!(sb->s_flags & MS_RDONLY)) {
EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-@@ -1748,14 +1750,25 @@ int ext3_statfs (struct super_block * sb
+@@ -1749,17 +1751,27 @@ int ext3_statfs (struct super_block * sb
static DECLARE_FSTYPE_DEV(ext3_fs_type, "ext3", ext3_read_super);
+ return error;
}
- EXPORT_SYMBOL(ext3_bread);
---- /dev/null 2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/xattr.c 2003-04-20 16:14:31.000000000 +0800
-@@ -0,0 +1,1247 @@
+-EXPORT_SYMBOL(ext3_bread);
+
+ MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+ MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/ext3-exports.c 2003-07-21 22:49:05.000000000 -0600
+@@ -0,0 +1,13 @@
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/ext3_xattr.h>
++
++EXPORT_SYMBOL(ext3_force_commit);
++EXPORT_SYMBOL(ext3_bread);
++EXPORT_SYMBOL(ext3_xattr_register);
++EXPORT_SYMBOL(ext3_xattr_unregister);
++EXPORT_SYMBOL(ext3_xattr_get);
++EXPORT_SYMBOL(ext3_xattr_list);
++EXPORT_SYMBOL(ext3_xattr_set);
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/xattr.c 2003-07-21 22:50:40.000000000 -0600
+@@ -0,0 +1,1242 @@
+/*
+ * linux/fs/ext3/xattr.c
+ *
+#include <linux/module.h>
+
+/* These symbols may be needed by a module. */
-+EXPORT_SYMBOL(ext3_xattr_register);
-+EXPORT_SYMBOL(ext3_xattr_unregister);
-+EXPORT_SYMBOL(ext3_xattr_get);
-+EXPORT_SYMBOL(ext3_xattr_list);
-+EXPORT_SYMBOL(ext3_xattr_set);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)
+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1)
+}
+
+#endif /* CONFIG_EXT3_FS_XATTR_SHARING */
---- linux-2.4.18-18/include/linux/ext3_fs.h~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/include/linux/ext3_fs.h 2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/include/linux/ext3_fs.h~linux-2.4.18ea-0.8.26 2003-07-21 22:29:27.000000000 -0600
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_fs.h 2003-07-21 22:49:05.000000000 -0600
@@ -58,8 +58,6 @@
*/
#define EXT3_BAD_INO 1 /* Bad blocks inode */
extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
---- linux-2.4.18-18/include/linux/ext3_jbd.h~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800
-+++ linux-2.4.18-18-root/include/linux/ext3_jbd.h 2003-04-20 16:14:31.000000000 +0800
+--- linux-2.4.18-p4smp/include/linux/ext3_jbd.h~linux-2.4.18ea-0.8.26 2003-07-21 22:29:27.000000000 -0600
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_jbd.h 2003-07-21 22:49:05.000000000 -0600
@@ -30,13 +30,19 @@
#define EXT3_SINGLEDATA_TRANS_BLOCKS 8
extern int ext3_writepage_trans_blocks(struct inode *inode);
---- /dev/null 2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-18-root/include/linux/ext3_xattr.h 2003-04-20 16:14:31.000000000 +0800
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_xattr.h 2003-07-21 22:49:05.000000000 -0600
@@ -0,0 +1,155 @@
+/*
+ File: linux/ext3_xattr.h
+
+#endif /* __KERNEL__ */
+
---- /dev/null 2002-08-31 07:31:37.000000000 +0800
-+++ linux-2.4.18-18-root/include/linux/xattr.h 2003-04-20 16:14:31.000000000 +0800
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/include/linux/xattr.h 2003-07-21 22:49:05.000000000 -0600
@@ -0,0 +1,15 @@
+/*
+ File: linux/xattr.h
+#define XATTR_REPLACE 2 /* set value, fail if attr does not exist */
+
+#endif /* _LINUX_XATTR_H */
---- linux-2.4.18-18/fs/ext3/Makefile~linux-2.4.18ea-0.8.26 2003-04-20 16:14:54.000000000 +0800
-+++ linux-2.4.18-18-root/fs/ext3/Makefile 2003-04-20 16:15:15.000000000 +0800
+--- linux-2.4.18-p4smp/fs/ext3/Makefile~linux-2.4.18ea-0.8.26 2003-07-21 22:27:37.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/Makefile 2003-07-21 22:51:23.000000000 -0600
@@ -9,10 +9,10 @@
O_TARGET := ext3.o
-export-objs := super.o inode.o
-+export-objs := super.o inode.o xattr.o
++export-objs := ext3-exports.o
obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
- ioctl.o namei.o super.o symlink.o
-+ ioctl.o namei.o super.o symlink.o xattr.o
++ ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o
obj-m := $(O_TARGET)
include $(TOPDIR)/Rules.make
fs/ext2/xattr.c | 1212 +++++++++++++++++++++++++++++++++++++++++
fs/ext2/xattr_user.c | 103 +++
fs/ext3/Makefile | 10
+ fs/ext3/ext3-exports.c | 13
fs/ext3/file.c | 5
fs/ext3/ialloc.c | 2
fs/ext3/inode.c | 35 -
include/linux/mbcache.h | 69 ++
kernel/ksyms.c | 4
mm/vmscan.c | 36 +
- fs/ext3/ext3-exports.c | 14 +
- 62 files changed, 4331 insertions(+), 197 deletions(-)
+ 62 files changed, 4344 insertions(+), 183 deletions(-)
---- linux-rh-2.4.20-8/Documentation/Configure.help~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:50.000000000 +0800
-+++ linux-rh-2.4.20-8-root/Documentation/Configure.help 2003-05-07 17:34:25.000000000 +0800
-@@ -15226,6 +15226,39 @@ CONFIG_EXT2_FS
+--- kernel-2.4.20-6chaos_18_7/Documentation/Configure.help~linux-2.4.20-xattr-0.8.54-chaos 2003-06-23 10:39:21.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/Documentation/Configure.help 2003-07-12 15:34:44.000000000 -0600
+@@ -15253,6 +15253,39 @@ CONFIG_EXT2_FS
be compiled as a module, and so this could be dangerous. Most
everyone wants to say Y here.
Ext3 journalling file system support (EXPERIMENTAL)
CONFIG_EXT3_FS
This is the journalling version of the Second extended file system
-@@ -15258,6 +15291,39 @@ CONFIG_EXT3_FS
+@@ -15285,6 +15318,39 @@ CONFIG_EXT3_FS
of your root partition (the one containing the directory /) cannot
be compiled as a module, and so this may be dangerous.
Journal Block Device support (JBD for ext3) (EXPERIMENTAL)
CONFIG_JBD
This is a generic journalling layer for block devices. It is
---- linux-rh-2.4.20-8/arch/alpha/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2001-11-20 07:19:42.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/alpha/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/alpha/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:54.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/alpha/defconfig 2003-07-12 15:34:44.000000000 -0600
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
CONFIG_ALPHA=y
# CONFIG_UID16 is not set
# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
---- linux-rh-2.4.20-8/arch/alpha/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:53.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/alpha/kernel/entry.S 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/alpha/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:11:53.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/alpha/kernel/entry.S 2003-07-12 15:34:44.000000000 -0600
@@ -1162,6 +1162,18 @@ sys_call_table:
.quad sys_readahead
.quad sys_ni_syscall /* 380, sys_security */
/* Remember to update everything, kids. */
.ifne (. - sys_call_table) - (NR_SYSCALLS * 8)
---- linux-rh-2.4.20-8/arch/arm/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2001-05-20 08:43:05.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/arm/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/arm/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:56.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/arm/defconfig 2003-07-12 15:34:44.000000000 -0600
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
CONFIG_ARM=y
# CONFIG_EISA is not set
# CONFIG_SBUS is not set
---- linux-rh-2.4.20-8/arch/arm/kernel/calls.S~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:42.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/arm/kernel/calls.S 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/arm/kernel/calls.S~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:09:16.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/arm/kernel/calls.S 2003-07-12 15:34:44.000000000 -0600
@@ -240,18 +240,18 @@ __syscall_start:
.long SYMBOL_NAME(sys_ni_syscall) /* Security */
.long SYMBOL_NAME(sys_gettid)
.long SYMBOL_NAME(sys_tkill)
/*
* Please check 2.5 _before_ adding calls here,
---- linux-rh-2.4.20-8/arch/i386/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:53.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/i386/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/i386/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:12:00.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/i386/defconfig 2003-07-12 15:34:44.000000000 -0600
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
CONFIG_X86=y
CONFIG_ISA=y
# CONFIG_SBUS is not set
---- linux-rh-2.4.20-8/arch/ia64/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/ia64/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/ia64/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:12:04.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/ia64/defconfig 2003-07-12 15:34:44.000000000 -0600
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
#
# Code maturity level options
---- linux-rh-2.4.20-8/arch/m68k/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2000-06-20 03:56:08.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/m68k/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/m68k/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:55.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/m68k/defconfig 2003-07-12 15:34:44.000000000 -0600
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
CONFIG_UID16=y
#
---- linux-rh-2.4.20-8/arch/mips/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:10.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/mips/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/mips/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:06.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/mips/defconfig 2003-07-12 15:34:44.000000000 -0600
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
CONFIG_MIPS=y
CONFIG_MIPS32=y
# CONFIG_MIPS64 is not set
---- linux-rh-2.4.20-8/arch/mips64/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:10.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/mips64/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/mips64/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:11.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/mips64/defconfig 2003-07-12 15:34:44.000000000 -0600
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
CONFIG_MIPS=y
# CONFIG_MIPS32 is not set
CONFIG_MIPS64=y
---- linux-rh-2.4.20-8/arch/ppc/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/ppc/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/ppc/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:12:20.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/ppc/defconfig 2003-07-12 15:34:44.000000000 -0600
@@ -1,6 +1,20 @@
#
# Automatically generated make config: don't edit
# CONFIG_UID16 is not set
# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
---- linux-rh-2.4.20-8/arch/ppc64/kernel/misc.S~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/ppc64/kernel/misc.S 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/ppc64/kernel/misc.S~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:20.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/ppc64/kernel/misc.S 2003-07-12 15:34:44.000000000 -0600
@@ -731,6 +731,7 @@ _GLOBAL(sys_call_table32)
.llong .sys_gettid /* 207 */
#if 0 /* Reserved syscalls */
.llong .sys_futex
#endif
.llong .sys_perfmonctl /* Put this here for now ... */
---- linux-rh-2.4.20-8/arch/s390/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:20.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390/defconfig 2003-07-12 15:34:44.000000000 -0600
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
# CONFIG_ISA is not set
# CONFIG_EISA is not set
# CONFIG_MCA is not set
---- linux-rh-2.4.20-8/arch/s390/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390/kernel/entry.S 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:20.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390/kernel/entry.S 2003-07-12 15:34:44.000000000 -0600
@@ -558,18 +558,18 @@ sys_call_table:
.long sys_fcntl64
.long sys_ni_syscall
.long sys_gettid
.long sys_tkill
.rept 255-237
---- linux-rh-2.4.20-8/arch/s390x/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390x/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390x/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:21.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390x/defconfig 2003-07-12 15:34:44.000000000 -0600
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
# CONFIG_ISA is not set
# CONFIG_EISA is not set
# CONFIG_MCA is not set
---- linux-rh-2.4.20-8/arch/s390x/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390x/kernel/entry.S 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390x/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:21.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390x/kernel/entry.S 2003-07-12 15:34:44.000000000 -0600
@@ -591,18 +591,18 @@ sys_call_table:
.long SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper)
.long SYSCALL(sys_ni_syscall,sys_ni_syscall)
.long SYSCALL(sys_gettid,sys_gettid)
.long SYSCALL(sys_tkill,sys_tkill)
.rept 255-237
---- linux-rh-2.4.20-8/arch/s390x/kernel/wrapper32.S~linux-2.4.20-xattr-0.8.54-chaos 2002-02-26 03:37:56.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/s390x/kernel/wrapper32.S 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/s390x/kernel/wrapper32.S~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:59.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390x/kernel/wrapper32.S 2003-07-12 15:34:44.000000000 -0600
@@ -1091,3 +1091,95 @@ sys32_fstat64_wrapper:
llgtr %r3,%r3 # struct stat64 *
llgfr %r4,%r4 # long
+ jg sys_fremovexattr
+
+
---- linux-rh-2.4.20-8/arch/sparc/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/sparc/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/sparc/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:10:50.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc/defconfig 2003-07-12 15:34:44.000000000 -0600
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
CONFIG_UID16=y
CONFIG_HIGHMEM=y
---- linux-rh-2.4.20-8/arch/sparc/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/sparc/kernel/systbls.S 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/sparc/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:10:52.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc/kernel/systbls.S 2003-07-12 15:34:44.000000000 -0600
@@ -51,11 +51,11 @@ sys_call_table:
/*150*/ .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64
/*155*/ .long sys_fcntl64, sys_nis_syscall, sys_statfs, sys_fstatfs, sys_oldumount
/*190*/ .long sys_init_module, sys_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
/*195*/ .long sys_nis_syscall, sys_nis_syscall, sys_getppid, sparc_sigaction, sys_sgetmask
/*200*/ .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, old_readdir
---- linux-rh-2.4.20-8/arch/sparc64/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/sparc64/defconfig 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/sparc64/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:12:29.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc64/defconfig 2003-07-12 15:34:44.000000000 -0600
@@ -1,6 +1,13 @@
#
# Automatically generated make config: don't edit
#
# Code maturity level options
---- linux-rh-2.4.20-8/arch/sparc64/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:43.000000000 +0800
-+++ linux-rh-2.4.20-8-root/arch/sparc64/kernel/systbls.S 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/arch/sparc64/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:10:55.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc64/kernel/systbls.S 2003-07-12 15:34:44.000000000 -0600
@@ -52,11 +52,11 @@ sys_call_table32:
/*150*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64
.word sys32_fcntl64, sys_nis_syscall, sys32_statfs, sys32_fstatfs, sys_oldumount
/*190*/ .word sys_init_module, sparc64_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall
.word sys_nis_syscall, sys_nis_syscall, sys_getppid, sys_nis_syscall, sys_sgetmask
/*200*/ .word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall
---- linux-rh-2.4.20-8/fs/Config.in~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:05:03.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/Config.in 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/Config.in~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:14:24.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/Config.in 2003-07-12 15:34:44.000000000 -0600
@@ -34,6 +34,11 @@ dep_mbool ' Debug Befs' CONFIG_BEFS_DEB
dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL
mainmenu_option next_comment
comment 'Partition Types'
source fs/partitions/Config.in
---- linux-rh-2.4.20-8/fs/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/Makefile 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:34.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/Makefile 2003-07-12 15:34:44.000000000 -0600
@@ -84,6 +84,9 @@ obj-y += binfmt_script.o
obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o
# persistent filesystems
obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o))
---- linux-rh-2.4.20-8/fs/ext2/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2001-10-11 23:05:18.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/Makefile 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/Makefile 2003-07-12 15:34:44.000000000 -0600
@@ -13,4 +13,8 @@ obj-y := balloc.o bitmap.o dir.o file
ioctl.o namei.o super.o symlink.o
obj-m := $(O_TARGET)
+obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o
+
include $(TOPDIR)/Rules.make
---- linux-rh-2.4.20-8/fs/ext2/file.c~linux-2.4.20-xattr-0.8.54-chaos 2001-10-11 23:05:18.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/file.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/file.c~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/file.c 2003-07-12 15:34:44.000000000 -0600
@@ -20,6 +20,7 @@
#include <linux/fs.h>
+ listxattr: ext2_listxattr,
+ removexattr: ext2_removexattr,
};
---- linux-rh-2.4.20-8/fs/ext2/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/ialloc.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:59:09.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/ialloc.c 2003-07-12 15:34:44.000000000 -0600
@@ -15,6 +15,7 @@
#include <linux/config.h>
#include <linux/fs.h>
DQUOT_FREE_INODE(inode);
DQUOT_DROP(inode);
}
---- linux-rh-2.4.20-8/fs/ext2/inode.c~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/inode.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/inode.c~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:59:09.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/inode.c 2003-07-12 15:34:44.000000000 -0600
@@ -39,6 +39,18 @@ MODULE_LICENSE("GPL");
static int ext2_update_inode(struct inode * inode, int do_sync);
brelse (bh);
inode->i_attr_flags = 0;
if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) {
---- linux-rh-2.4.20-8/fs/ext2/namei.c~linux-2.4.20-xattr-0.8.54-chaos 2001-10-04 13:57:36.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/namei.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/namei.c~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/namei.c 2003-07-12 15:34:44.000000000 -0600
@@ -31,6 +31,7 @@
#include <linux/fs.h>
+ listxattr: ext2_listxattr,
+ removexattr: ext2_removexattr,
};
---- linux-rh-2.4.20-8/fs/ext2/super.c~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/super.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/super.c~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:59:09.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/super.c 2003-07-12 15:34:44.000000000 -0600
@@ -21,6 +21,7 @@
#include <linux/string.h>
#include <linux/fs.h>
}
EXPORT_NO_SYMBOLS;
---- linux-rh-2.4.20-8/fs/ext2/symlink.c~linux-2.4.20-xattr-0.8.54-chaos 2000-09-28 04:41:33.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/symlink.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext2/symlink.c~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/symlink.c 2003-07-12 15:34:44.000000000 -0600
@@ -19,6 +19,7 @@
#include <linux/fs.h>
+ listxattr: ext2_listxattr,
+ removexattr: ext2_removexattr,
};
---- /dev/null 2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/xattr.c 2003-05-07 17:34:25.000000000 +0800
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/xattr.c 2003-07-12 15:34:44.000000000 -0600
@@ -0,0 +1,1212 @@
+/*
+ * linux/fs/ext2/xattr.c
+}
+
+#endif /* CONFIG_EXT2_FS_XATTR_SHARING */
---- /dev/null 2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext2/xattr_user.c 2003-05-07 17:34:25.000000000 +0800
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/xattr_user.c 2003-07-12 15:34:44.000000000 -0600
@@ -0,0 +1,103 @@
+/*
+ * linux/fs/ext2/xattr_user.c
+ ext2_xattr_unregister(EXT2_XATTR_INDEX_USER,
+ &ext2_xattr_user_handler);
+}
---- linux-rh-2.4.20-8/fs/ext3/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/Makefile 2003-05-07 17:45:13.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:38.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/Makefile 2003-07-12 15:34:44.000000000 -0600
@@ -1,5 +1,5 @@
#
-# Makefile for the linux ext2-filesystem routines.
+obj-$(CONFIG_EXT3_FS_XATTR_USER) += xattr_user.o
+
include $(TOPDIR)/Rules.make
---- linux-rh-2.4.20-8/fs/ext3/file.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/file.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/file.c~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:38.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/file.c 2003-07-12 15:34:44.000000000 -0600
@@ -23,6 +23,7 @@
#include <linux/locks.h>
#include <linux/jbd.h>
+ removexattr: ext3_removexattr, /* BKL held */
};
---- linux-rh-2.4.20-8/fs/ext3/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:48.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/ialloc.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:14:30.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/ialloc.c 2003-07-12 15:34:44.000000000 -0600
@@ -17,6 +17,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
DQUOT_FREE_INODE(inode);
DQUOT_DROP(inode);
---- linux-rh-2.4.20-8/fs/ext3/inode.c~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/inode.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/inode.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:14:30.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/inode.c 2003-07-12 15:34:44.000000000 -0600
@@ -39,6 +39,18 @@
*/
#undef SEARCH_FROM_ZERO
/* inode->i_attr_flags = 0; unused */
if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) {
/* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */
---- linux-rh-2.4.20-8/fs/ext3/namei.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/namei.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/namei.c~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:43.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/namei.c 2003-07-12 15:34:44.000000000 -0600
@@ -29,6 +29,7 @@
#include <linux/sched.h>
#include <linux/ext3_fs.h>
+ removexattr: ext3_removexattr, /* BKL held */
+};
+
---- linux-rh-2.4.20-8/fs/ext3/super.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/super.c 2003-05-07 17:40:45.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/super.c~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:38.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/super.c 2003-07-12 15:34:44.000000000 -0600
@@ -24,6 +24,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
MODULE_LICENSE("GPL");
---- linux-rh-2.4.20-8/fs/ext3/symlink.c~linux-2.4.20-xattr-0.8.54-chaos 2001-11-10 06:25:04.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/symlink.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/ext3/symlink.c~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/symlink.c 2003-07-12 15:34:44.000000000 -0600
@@ -20,6 +20,7 @@
#include <linux/fs.h>
#include <linux/jbd.h>
+ listxattr: ext3_listxattr, /* BKL held */
+ removexattr: ext3_removexattr, /* BKL held */
};
---- /dev/null 2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/xattr.c 2003-05-07 17:42:06.000000000 +0800
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/xattr.c 2003-07-12 15:34:44.000000000 -0600
@@ -0,0 +1,1225 @@
+/*
+ * linux/fs/ext3/xattr.c
+}
+
+#endif /* CONFIG_EXT3_FS_XATTR_SHARING */
---- /dev/null 2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/ext3/xattr_user.c 2003-05-07 17:34:25.000000000 +0800
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/xattr_user.c 2003-07-12 15:34:44.000000000 -0600
@@ -0,0 +1,111 @@
+/*
+ * linux/fs/ext3/xattr_user.c
+ ext3_xattr_unregister(EXT3_XATTR_INDEX_USER,
+ &ext3_xattr_user_handler);
+}
---- linux-rh-2.4.20-8/fs/jfs/jfs_xattr.h~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/jfs/jfs_xattr.h 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/jfs/jfs_xattr.h~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:59:11.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/jfs/jfs_xattr.h 2003-07-12 15:34:44.000000000 -0600
@@ -52,8 +52,10 @@ struct jfs_ea_list {
#define END_EALIST(ealist) \
((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist)))
extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t);
extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t);
extern ssize_t jfs_listxattr(struct dentry *, char *, size_t);
---- linux-rh-2.4.20-8/fs/jfs/xattr.c~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/jfs/xattr.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/fs/jfs/xattr.c~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:59:11.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/jfs/xattr.c 2003-07-12 15:34:44.000000000 -0600
@@ -641,7 +641,7 @@ static int ea_put(struct inode *inode, s
}
size_t value_len, int flags)
{
if (value == NULL) { /* empty EA, do not remove */
---- /dev/null 2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/fs/mbcache.c 2003-05-07 17:34:25.000000000 +0800
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/mbcache.c 2003-07-12 15:34:44.000000000 -0600
@@ -0,0 +1,648 @@
+/*
+ * linux/fs/mbcache.c
+module_init(init_mbcache)
+module_exit(exit_mbcache)
+
---- linux-rh-2.4.20-8/include/asm-arm/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:53.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-arm/unistd.h 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-arm/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:14:42.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-arm/unistd.h 2003-07-12 15:34:44.000000000 -0600
@@ -244,7 +244,6 @@
#define __NR_security (__NR_SYSCALL_BASE+223)
#define __NR_gettid (__NR_SYSCALL_BASE+224)
#define __NR_tkill (__NR_SYSCALL_BASE+238)
/*
* Please check 2.5 _before_ adding calls here,
---- linux-rh-2.4.20-8/include/asm-ppc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-ppc64/unistd.h 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-ppc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:13:42.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-ppc64/unistd.h 2003-07-12 15:34:44.000000000 -0600
@@ -218,6 +218,7 @@
#define __NR_gettid 207
#if 0 /* Reserved syscalls */
#define __NR_futex 221
#endif
---- linux-rh-2.4.20-8/include/asm-s390/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-s390/unistd.h 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-s390/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:13:44.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-s390/unistd.h 2003-07-12 15:34:44.000000000 -0600
@@ -212,9 +212,18 @@
#define __NR_madvise 219
#define __NR_getdents64 220
#define __NR_gettid 236
#define __NR_tkill 237
---- linux-rh-2.4.20-8/include/asm-s390x/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-s390x/unistd.h 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-s390x/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:13:45.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-s390x/unistd.h 2003-07-12 15:34:44.000000000 -0600
@@ -180,9 +180,18 @@
#define __NR_pivot_root 217
#define __NR_mincore 218
#define __NR_gettid 236
#define __NR_tkill 237
---- linux-rh-2.4.20-8/include/asm-sparc/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-sparc/unistd.h 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-sparc/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:13:46.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-sparc/unistd.h 2003-07-12 15:34:44.000000000 -0600
@@ -184,24 +184,24 @@
/* #define __NR_exportfs 166 SunOS Specific */
#define __NR_mount 167 /* Common */
#define __NR_tkill 187 /* SunOS: fpathconf */
/* #define __NR_sysconf 188 SunOS Specific */
#define __NR_uname 189 /* Linux Specific */
---- linux-rh-2.4.20-8/include/asm-sparc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:45.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/asm-sparc64/unistd.h 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/asm-sparc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:13:48.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-sparc64/unistd.h 2003-07-12 15:34:44.000000000 -0600
@@ -184,24 +184,24 @@
/* #define __NR_exportfs 166 SunOS Specific */
#define __NR_mount 167 /* Common */
#define __NR_tkill 187 /* SunOS: fpathconf */
/* #define __NR_sysconf 188 SunOS Specific */
#define __NR_uname 189 /* Linux Specific */
---- /dev/null 2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/cache_def.h 2003-05-07 17:34:25.000000000 +0800
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/cache_def.h 2003-07-12 15:34:44.000000000 -0600
@@ -0,0 +1,15 @@
+/*
+ * linux/cache_def.h
+
+extern void register_cache(struct cache_definition *);
+extern void unregister_cache(struct cache_definition *);
---- linux-rh-2.4.20-8/include/linux/errno.h~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:53.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/errno.h 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/linux/errno.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:15:06.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/errno.h 2003-07-12 15:34:44.000000000 -0600
@@ -26,4 +26,8 @@
#endif
+#define ENOTSUP EOPNOTSUPP /* Operation not supported */
+
#endif
---- linux-rh-2.4.20-8/include/linux/ext2_fs.h~linux-2.4.20-xattr-0.8.54-chaos 2003-04-12 15:46:42.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext2_fs.h 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/linux/ext2_fs.h~linux-2.4.20-xattr-0.8.54-chaos 2003-06-24 11:31:16.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext2_fs.h 2003-07-12 15:34:44.000000000 -0600
@@ -57,8 +57,6 @@
*/
#define EXT2_BAD_INO 1 /* Bad blocks inode */
#define EXT2_FEATURE_INCOMPAT_SUPP EXT2_FEATURE_INCOMPAT_FILETYPE
#define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
-@@ -623,8 +600,10 @@ extern struct address_space_operations e
+@@ -624,8 +601,10 @@ extern struct address_space_operations e
/* namei.c */
extern struct inode_operations ext2_dir_inode_operations;
extern struct inode_operations ext2_fast_symlink_inode_operations;
#endif /* __KERNEL__ */
---- /dev/null 2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext2_xattr.h 2003-05-07 17:34:25.000000000 +0800
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext2_xattr.h 2003-07-12 15:34:44.000000000 -0600
@@ -0,0 +1,157 @@
+/*
+ File: linux/ext2_xattr.h
+
+#endif /* __KERNEL__ */
+
---- linux-rh-2.4.20-8/include/linux/ext3_fs.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext3_fs.h 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/linux/ext3_fs.h~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:41.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext3_fs.h 2003-07-12 15:34:44.000000000 -0600
@@ -63,8 +63,6 @@
*/
#define EXT3_BAD_INO 1 /* Bad blocks inode */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
-@@ -520,7 +496,7 @@ struct ext3_super_block {
+@@ -521,7 +497,7 @@ struct ext3_super_block {
#define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
#define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
#define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \
EXT3_FEATURE_INCOMPAT_RECOVER)
#define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
-@@ -703,6 +679,7 @@ extern void ext3_check_inodes_bitmap (st
+@@ -704,6 +680,7 @@ extern void ext3_check_inodes_bitmap (st
extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
/* inode.c */
extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
-@@ -771,8 +748,10 @@ extern struct address_space_operations e
+@@ -773,8 +750,10 @@ extern struct address_space_operations e
/* namei.c */
extern struct inode_operations ext3_dir_inode_operations;
extern struct inode_operations ext3_fast_symlink_inode_operations;
---- linux-rh-2.4.20-8/include/linux/ext3_jbd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext3_jbd.h 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/include/linux/ext3_jbd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:38.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext3_jbd.h 2003-07-12 15:34:44.000000000 -0600
@@ -30,13 +30,19 @@
#define EXT3_SINGLEDATA_TRANS_BLOCKS 8U
extern int ext3_writepage_trans_blocks(struct inode *inode);
---- /dev/null 2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/ext3_xattr.h 2003-05-07 17:34:25.000000000 +0800
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext3_xattr.h 2003-07-12 15:34:44.000000000 -0600
@@ -0,0 +1,157 @@
+/*
+ File: linux/ext3_xattr.h
+
+#endif /* __KERNEL__ */
+
---- linux-rh-2.4.20-8/include/linux/fs.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/fs.h 2003-05-07 17:34:25.000000000 +0800
-@@ -915,7 +915,7 @@ struct inode_operations {
+--- kernel-2.4.20-6chaos_18_7/include/linux/fs.h~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:31:35.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/fs.h 2003-07-12 15:34:44.000000000 -0600
+@@ -914,7 +914,7 @@ struct inode_operations {
int (*setattr) (struct dentry *, struct iattr *);
- int (*setattr_raw) (struct inode *, struct iattr *);
+ int (*setattr_raw) (struct inode *, struct iattr *);
int (*getattr) (struct dentry *, struct iattr *);
- int (*setxattr) (struct dentry *, const char *, void *, size_t, int);
+ int (*setxattr) (struct dentry *, const char *, const void *, size_t, int);
ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*removexattr) (struct dentry *, const char *);
---- /dev/null 2003-01-30 18:24:37.000000000 +0800
-+++ linux-rh-2.4.20-8-root/include/linux/mbcache.h 2003-05-07 17:34:25.000000000 +0800
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/mbcache.h 2003-07-12 15:34:44.000000000 -0600
@@ -0,0 +1,69 @@
+/*
+ File: linux/mbcache.h
+struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int,
+ kdev_t, unsigned int);
+#endif
---- linux-rh-2.4.20-8/kernel/ksyms.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/kernel/ksyms.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/kernel/ksyms.c~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:14:02.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/kernel/ksyms.c 2003-07-12 15:35:19.000000000 -0600
@@ -12,6 +12,7 @@
#define __KERNEL_SYSCALLS__
#include <linux/config.h>
#include <linux/smp.h>
#include <linux/module.h>
#include <linux/blkdev.h>
-@@ -107,6 +108,7 @@ EXPORT_SYMBOL(exit_mm);
+@@ -106,6 +107,7 @@ EXPORT_SYMBOL(do_brk);
+ EXPORT_SYMBOL(exit_mm);
EXPORT_SYMBOL(exit_files);
EXPORT_SYMBOL(exit_fs);
- EXPORT_SYMBOL(exit_sighand);
+EXPORT_SYMBOL(copy_fs_struct);
+ EXPORT_SYMBOL(exit_sighand);
+ EXPORT_SYMBOL_GPL(make_pages_present);
- /* internal kernel memory management */
- EXPORT_SYMBOL(_alloc_pages);
-@@ -125,6 +127,8 @@ EXPORT_SYMBOL(kmem_cache_alloc);
+@@ -126,6 +128,8 @@ EXPORT_SYMBOL(kmem_cache_alloc);
EXPORT_SYMBOL(kmem_cache_free);
EXPORT_SYMBOL(kmem_cache_validate);
EXPORT_SYMBOL(kmem_cache_size);
EXPORT_SYMBOL(kmalloc);
EXPORT_SYMBOL(kfree);
EXPORT_SYMBOL(vfree);
---- linux-rh-2.4.20-8/mm/vmscan.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:58.000000000 +0800
-+++ linux-rh-2.4.20-8-root/mm/vmscan.c 2003-05-07 17:34:25.000000000 +0800
+--- kernel-2.4.20-6chaos_18_7/mm/vmscan.c~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:34.000000000 -0600
++++ kernel-2.4.20-6chaos_18_7-braam/mm/vmscan.c 2003-07-12 15:34:44.000000000 -0600
@@ -21,6 +21,7 @@
#include <linux/kernel_stat.h>
#include <linux/swap.h>
#ifdef CONFIG_QUOTA
ret += shrink_dqcache_memory(DEF_PRIORITY, gfp_mask);
#endif
---- /dev/null 2003-01-30 18:24:37.000000000 +0800
-+++ linux-root/fs/ext3/ext3-exports.c 2003-05-05 18:19:11.000000000 +0800
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/ext3-exports.c 2003-07-12 15:34:44.000000000 -0600
@@ -0,0 +1,13 @@
+#include <linux/config.h>
+#include <linux/module.h>
--- /dev/null Fri Aug 30 17:31:37 2002
+++ linux-2.4.18-18.8.0-l12-braam/include/linux/lustre_version.h Thu Feb 13 07:58:33 2003
@@ -0,0 +1 @@
-+#define LUSTRE_KERNEL_VERSION 19
++#define LUSTRE_KERNEL_VERSION 21
_
0 files changed
---- linux-2.4.20-rh/fs/dcache.c~vfs_intent-2.4.20-rh 2003-04-11 14:04:58.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/dcache.c 2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/fs/dcache.c~vfs_intent-2.4.20-rh 2003-07-17 08:32:59.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/dcache.c 2003-07-17 08:35:22.000000000 -0700
@@ -186,6 +186,13 @@ int d_invalidate(struct dentry * dentry)
spin_unlock(&dcache_lock);
return 0;
/*
* Check whether to do a partial shrink_dcache
* to get rid of unused child entries.
-@@ -624,6 +631,7 @@ struct dentry * d_alloc(struct dentry *
- dentry->d_fsdata = NULL;
- dentry->d_extra_attributes = NULL;
- dentry->d_mounted = 0;
-+ dentry->d_it = NULL;
- dentry->d_cookie = NULL;
- INIT_LIST_HEAD(&dentry->d_hash);
- INIT_LIST_HEAD(&dentry->d_lru);
-@@ -839,13 +847,19 @@ void d_delete(struct dentry * dentry)
+@@ -839,13 +846,19 @@ void d_delete(struct dentry * dentry)
* Adds a dentry to the hash according to its name.
*/
}
#define do_switch(x,y) do { \
---- linux-2.4.20-rh/fs/namei.c~vfs_intent-2.4.20-rh 2003-04-11 14:04:57.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/namei.c 2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/fs/namei.c~vfs_intent-2.4.20-rh 2003-07-17 08:32:47.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/namei.c 2003-07-17 08:35:22.000000000 -0700
@@ -94,6 +94,13 @@
* XEmacs seems to be relying on it...
*/
-+void intent_release(struct dentry *de, struct lookup_intent *it)
++void intent_release(struct lookup_intent *it)
+{
-+ if (it && de->d_op && de->d_op->d_intent_release)
-+ de->d_op->d_intent_release(de, it);
++ if (it && it->it_op_release)
++ it->it_op_release(it);
+
+}
+
{
struct dentry * dentry = d_lookup(parent, name);
-+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
-+ if (!dentry->d_op->d_revalidate2(dentry, flags, it) &&
++ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
++ if (!dentry->d_op->d_revalidate_it(dentry, flags, it) &&
+ !d_invalidate(dentry)) {
+ dput(dentry);
+ dentry = NULL;
result = ERR_PTR(-ENOMEM);
if (dentry) {
lock_kernel();
-+ if (dir->i_op->lookup2)
-+ result = dir->i_op->lookup2(dir, dentry, it);
++ if (dir->i_op->lookup_it)
++ result = dir->i_op->lookup_it(dir, dentry, it, flags);
+ else
result = dir->i_op->lookup(dir, dentry);
unlock_kernel();
dput(result);
result = ERR_PTR(-ENOENT);
}
-+ } else if (result->d_op && result->d_op->d_revalidate2) {
-+ if (!result->d_op->d_revalidate2(result, flags, it) &&
++ } else if (result->d_op && result->d_op->d_revalidate_it) {
++ if (!result->d_op->d_revalidate_it(result, flags, it) &&
+ !d_invalidate(result)) {
+ dput(result);
+ goto again;
{
int err;
if (current->link_count >= max_recursive_link)
-@@ -348,10 +377,21 @@ static inline int do_follow_link(struct
+@@ -348,10 +377,18 @@ static inline int do_follow_link(struct
current->link_count++;
current->total_link_count++;
UPDATE_ATIME(dentry->d_inode);
-- err = dentry->d_inode->i_op->follow_link(dentry, nd);
+ nd->it = it;
-+ if (dentry->d_inode->i_op->follow_link2)
-+ err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+ else
-+ err = dentry->d_inode->i_op->follow_link(dentry, nd);
+ err = dentry->d_inode->i_op->follow_link(dentry, nd);
+ if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
+ /* vfs_follow_link was never called */
-+ intent_release(dentry, it);
++ intent_release(it);
+ path_release(nd);
+ err = -ENOLINK;
+ }
current->link_count--;
return err;
loop:
-+ intent_release(dentry, it);
++ intent_release(it);
path_release(nd);
return -ELOOP;
}
-@@ -381,15 +421,26 @@ int follow_up(struct vfsmount **mnt, str
+@@ -381,15 +418,26 @@ int follow_up(struct vfsmount **mnt, str
return __follow_up(mnt, dentry);
}
+ opc = it->it_op;
+ mode = it->it_mode;
+ }
-+ intent_release(*dentry, it);
++ intent_release(it);
+ if (it) {
+ it->it_op = opc;
+ it->it_mode = mode;
dput(*dentry);
mntput(mounted->mnt_parent);
*dentry = dget(mounted->mnt_root);
-@@ -401,7 +452,7 @@ static inline int __follow_down(struct v
+@@ -401,7 +449,7 @@ static inline int __follow_down(struct v
int follow_down(struct vfsmount **mnt, struct dentry **dentry)
{
}
static inline void follow_dotdot(struct nameidata *nd)
-@@ -437,7 +488,7 @@ static inline void follow_dotdot(struct
+@@ -437,7 +485,7 @@ static inline void follow_dotdot(struct
mntput(nd->mnt);
nd->mnt = parent;
}
;
}
-@@ -449,7 +500,8 @@ static inline void follow_dotdot(struct
+@@ -449,7 +497,8 @@ static inline void follow_dotdot(struct
*
* We expect 'base' to be positive and a directory.
*/
{
struct dentry *dentry;
struct inode *inode;
-@@ -526,18 +578,18 @@ int link_path_walk(const char * name, st
+@@ -526,19 +575,18 @@ int link_path_walk(const char * name, st
break;
}
/* This does the actual lookups.. */
- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
-+ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
++ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
if (!dentry) {
err = -EWOULDBLOCKIO;
if (atomic)
break;
- dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
-+ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
++ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
break;
}
/* Check mountpoints.. */
- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
-+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL))
- ;
+- ;
++ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL));
err = -ENOENT;
-@@ -548,8 +600,8 @@ int link_path_walk(const char * name, st
- if (!inode->i_op)
+ inode = dentry->d_inode;
+@@ -549,7 +597,7 @@ int link_path_walk(const char * name, st
goto out_dput;
-- if (inode->i_op->follow_link) {
+ if (inode->i_op->follow_link) {
- err = do_follow_link(dentry, nd);
-+ if (inode->i_op->follow_link || inode->i_op->follow_link2) {
+ err = do_follow_link(dentry, nd, NULL);
dput(dentry);
if (err)
goto return_err;
-@@ -565,7 +617,7 @@ int link_path_walk(const char * name, st
+@@ -565,7 +613,7 @@ int link_path_walk(const char * name, st
nd->dentry = dentry;
}
err = -ENOTDIR;
- if (!inode->i_op->lookup)
-+ if (!inode->i_op->lookup && !inode->i_op->lookup2)
++ if (!inode->i_op->lookup && !inode->i_op->lookup_it)
break;
continue;
/* here ends the main loop */
-@@ -592,22 +644,23 @@ last_component:
+@@ -592,22 +640,22 @@ last_component:
if (err < 0)
break;
}
- dentry = cached_lookup(nd->dentry, &this, 0);
-+ dentry = cached_lookup(nd->dentry, &this, 0, it);
++ dentry = cached_lookup(nd->dentry, &this, 0, it);
if (!dentry) {
err = -EWOULDBLOCKIO;
if (atomic)
break;
- dentry = real_lookup(nd->dentry, &this, 0);
-+ dentry = real_lookup(nd->dentry, &this, 0, it);
++ dentry = real_lookup(nd->dentry, &this, 0, it);
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
break;
}
- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
-+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it))
++ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it))
;
inode = dentry->d_inode;
if ((lookup_flags & LOOKUP_FOLLOW)
-- && inode && inode->i_op && inode->i_op->follow_link) {
+ && inode && inode->i_op && inode->i_op->follow_link) {
- err = do_follow_link(dentry, nd);
-+ && inode && inode->i_op &&
-+ (inode->i_op->follow_link || inode->i_op->follow_link2)) {
-+ err = do_follow_link(dentry, nd, it);
++ err = do_follow_link(dentry, nd, it);
dput(dentry);
if (err)
goto return_err;
-@@ -621,7 +674,8 @@ last_component:
+@@ -621,7 +669,8 @@ last_component:
goto no_inode;
if (lookup_flags & LOOKUP_DIRECTORY) {
err = -ENOTDIR;
- if (!inode->i_op || !inode->i_op->lookup)
+ if (!inode->i_op ||
-+ (!inode->i_op->lookup && !inode->i_op->lookup2))
++ (!inode->i_op->lookup && !inode->i_op->lookup_it))
break;
}
goto return_base;
-@@ -645,6 +699,23 @@ return_reval:
+@@ -645,6 +694,23 @@ return_reval:
* Check the cached dentry for staleness.
*/
dentry = nd->dentry;
-+ revalidate_again:
-+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
++ revalidate_again:
++ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
+ err = -ESTALE;
-+ if (!dentry->d_op->d_revalidate2(dentry, 0, it)) {
-+ struct dentry *new;
-+ err = permission(dentry->d_parent->d_inode,
-+ MAY_EXEC);
-+ if (err)
-+ break;
-+ new = real_lookup(dentry->d_parent,
-+ &dentry->d_name, 0, NULL);
++ if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) {
++ struct dentry *new;
++ err = permission(dentry->d_parent->d_inode,
++ MAY_EXEC);
++ if (err)
++ break;
++ new = real_lookup(dentry->d_parent,
++ &dentry->d_name, 0, NULL);
+ d_invalidate(dentry);
-+ dput(dentry);
-+ dentry = new;
-+ goto revalidate_again;
-+ }
++ dput(dentry);
++ dentry = new;
++ goto revalidate_again;
++ }
+ } else
if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
err = -ESTALE;
if (!dentry->d_op->d_revalidate(dentry, 0)) {
-@@ -658,15 +729,28 @@ out_dput:
+@@ -658,15 +724,28 @@ out_dput:
dput(dentry);
break;
}
+ if (err)
-+ intent_release(nd->dentry, it);
++ intent_release(it);
path_release(nd);
return_err:
return err;
}
/* SMP-safe */
-@@ -751,6 +835,17 @@ walk_init_root(const char *name, struct
+@@ -751,6 +830,17 @@ walk_init_root(const char *name, struct
}
/* SMP-safe */
int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
{
int error = 0;
-@@ -765,6 +860,7 @@ int path_init(const char *name, unsigned
+@@ -765,6 +855,7 @@ int path_init(const char *name, unsigned
{
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags;
if (*name=='/')
return walk_init_root(name,nd);
read_lock(¤t->fs->lock);
-@@ -779,7 +875,8 @@ int path_init(const char *name, unsigned
+@@ -779,7 +870,8 @@ int path_init(const char *name, unsigned
* needs parent already locked. Doesn't follow mounts.
* SMP-safe.
*/
{
struct dentry * dentry;
struct inode *inode;
-@@ -802,13 +899,16 @@ struct dentry * lookup_hash(struct qstr
+@@ -802,13 +894,16 @@ struct dentry * lookup_hash(struct qstr
goto out;
}
if (!new)
goto out;
lock_kernel();
-+ if (inode->i_op->lookup2)
-+ dentry = inode->i_op->lookup2(inode, new, it);
++ if (inode->i_op->lookup_it)
++ dentry = inode->i_op->lookup_it(inode, new, it, 0);
+ else
dentry = inode->i_op->lookup(inode, new);
unlock_kernel();
if (!dentry)
-@@ -820,6 +920,12 @@ out:
+@@ -820,6 +915,12 @@ out:
return dentry;
}
/* SMP-safe */
struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
{
-@@ -841,7 +947,7 @@ struct dentry * lookup_one_len(const cha
+@@ -841,7 +942,7 @@ struct dentry * lookup_one_len(const cha
}
this.hash = end_name_hash(hash);
access:
return ERR_PTR(-EACCES);
}
-@@ -872,6 +978,23 @@ int __user_walk(const char *name, unsign
+@@ -872,6 +973,23 @@ int __user_walk(const char *name, unsign
return err;
}
/*
* It's inline, so penalty for filesystems that don't use sticky bit is
* minimal.
-@@ -1010,7 +1133,8 @@ exit_lock:
+@@ -969,7 +1087,8 @@ static inline int lookup_flags(unsigned
+ return retval;
+ }
+
+-int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode,
++ struct lookup_intent *it)
+ {
+ int error;
+
+@@ -982,12 +1101,15 @@ int vfs_create(struct inode *dir, struct
+ goto exit_lock;
+
+ error = -EACCES; /* shouldn't it be ENOSYS? */
+- if (!dir->i_op || !dir->i_op->create)
++ if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it))
+ goto exit_lock;
+
+ DQUOT_INIT(dir);
+ lock_kernel();
+- error = dir->i_op->create(dir, dentry, mode);
++ if (dir->i_op->create_it)
++ error = dir->i_op->create_it(dir, dentry, mode, it);
++ else
++ error = dir->i_op->create(dir, dentry, mode);
+ unlock_kernel();
+ exit_lock:
+ up(&dir->i_zombie);
+@@ -996,6 +1118,11 @@ exit_lock:
+ return error;
+ }
+
++int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++{
++ return vfs_create_it(dir, dentry, mode, NULL);
++}
++
+ /*
+ * open_namei()
+ *
+@@ -1010,7 +1137,8 @@ exit_lock:
* for symlinks (where the permissions are checked later).
* SMP-safe
*/
{
int acc_mode, error = 0;
struct inode *inode;
-@@ -1024,7 +1148,7 @@ int open_namei(const char * pathname, in
+@@ -1024,7 +1152,7 @@ int open_namei(const char * pathname, in
* The simplest case - just a plain lookup.
*/
if (!(flag & O_CREAT)) {
if (error)
return error;
dentry = nd->dentry;
-@@ -1034,6 +1158,10 @@ int open_namei(const char * pathname, in
+@@ -1034,6 +1162,10 @@ int open_namei(const char * pathname, in
/*
* Create - we need to know the parent.
*/
error = path_lookup(pathname, LOOKUP_PARENT, nd);
if (error)
return error;
-@@ -1049,7 +1177,7 @@ int open_namei(const char * pathname, in
+@@ -1049,7 +1181,7 @@ int open_namei(const char * pathname, in
dir = nd->dentry;
down(&dir->d_inode->i_sem);
do_last:
error = PTR_ERR(dentry);
-@@ -1058,6 +1186,7 @@ do_last:
+@@ -1058,10 +1190,11 @@ do_last:
goto exit;
}
+ it->it_mode = mode;
/* Negative dentry, just create the file */
if (!dentry->d_inode) {
- error = vfs_create(dir->d_inode, dentry,
-@@ -1086,12 +1215,13 @@ do_last:
+- error = vfs_create(dir->d_inode, dentry,
+- mode & ~current->fs->umask);
++ error = vfs_create_it(dir->d_inode, dentry,
++ mode & ~current->fs->umask, it);
+ up(&dir->d_inode->i_sem);
+ dput(nd->dentry);
+ nd->dentry = dentry;
+@@ -1086,7 +1219,7 @@ do_last:
error = -ELOOP;
if (flag & O_NOFOLLOW)
goto exit_dput;
}
error = -ENOENT;
if (!dentry->d_inode)
- goto exit_dput;
-- if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
-+ if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link ||
-+ dentry->d_inode->i_op->follow_link2))
- goto do_link;
-
- dput(nd->dentry);
-@@ -1165,7 +1295,7 @@ ok:
+@@ -1165,7 +1298,7 @@ ok:
if (!error) {
DQUOT_INIT(inode);
}
put_write_access(inode);
if (error)
-@@ -1177,8 +1307,10 @@ ok:
+@@ -1177,8 +1310,10 @@ ok:
return 0;
exit_dput:
-+ intent_release(dentry, it);
++ intent_release(it);
dput(dentry);
exit:
-+ intent_release(nd->dentry, it);
++ intent_release(it);
path_release(nd);
return error;
-@@ -1197,7 +1329,19 @@ do_link:
+@@ -1197,7 +1332,16 @@ do_link:
* are done. Procfs-like symlinks just set LAST_BIND.
*/
UPDATE_ATIME(dentry->d_inode);
-- error = dentry->d_inode->i_op->follow_link(dentry, nd);
+ nd->it = it;
-+ if (dentry->d_inode->i_op->follow_link2)
-+ error = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+ else
-+ error = dentry->d_inode->i_op->follow_link(dentry, nd);
+ error = dentry->d_inode->i_op->follow_link(dentry, nd);
+ if (error) {
-+ intent_release(dentry, it);
++ intent_release(it);
+ } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
+ /* vfs_follow_link was never called */
-+ intent_release(dentry, it);
++ intent_release(it);
+ path_release(nd);
+ error = -ENOLINK;
+ }
if (IS_ERR(dentry))
goto fail;
if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1289,7 +1440,19 @@ asmlinkage long sys_mknod(const char * f
+@@ -1289,7 +1440,16 @@ asmlinkage long sys_mknod(const char * f
error = path_lookup(tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
- dentry = lookup_create(&nd, 0);
+
-+ if (nd.dentry->d_inode->i_op->mknod2) {
++ if (nd.dentry->d_inode->i_op->mknod_raw) {
+ struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ error = op->mknod2(nd.dentry->d_inode,
-+ nd.last.name,
-+ nd.last.len,
-+ mode, dev);
++ error = op->mknod_raw(&nd, mode, dev);
+ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out2;
error = PTR_ERR(dentry);
mode &= ~current->fs->umask;
-@@ -1310,6 +1473,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1310,6 +1470,7 @@ asmlinkage long sys_mknod(const char * f
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
out:
putname(tmp);
-@@ -1357,7 +1521,17 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1357,7 +1518,14 @@ asmlinkage long sys_mkdir(const char * p
error = path_lookup(tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
- dentry = lookup_create(&nd, 1);
-+ if (nd.dentry->d_inode->i_op->mkdir2) {
++ if (nd.dentry->d_inode->i_op->mkdir_raw) {
+ struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ error = op->mkdir2(nd.dentry->d_inode,
-+ nd.last.name,
-+ nd.last.len,
-+ mode);
++ error = op->mkdir_raw(&nd, mode);
+ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out2;
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
error = vfs_mkdir(nd.dentry->d_inode, dentry,
-@@ -1365,6 +1539,7 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1365,6 +1533,7 @@ asmlinkage long sys_mkdir(const char * p
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
out:
putname(tmp);
-@@ -1465,8 +1640,33 @@ asmlinkage long sys_rmdir(const char * p
+@@ -1465,8 +1634,16 @@ asmlinkage long sys_rmdir(const char * p
error = -EBUSY;
goto exit1;
}
-+ if (nd.dentry->d_inode->i_op->rmdir2) {
-+ struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ struct dentry *last;
-+
-+ down(&nd.dentry->d_inode->i_sem);
-+ last = lookup_hash_it(&nd.last, nd.dentry, NULL);
-+ up(&nd.dentry->d_inode->i_sem);
-+ if (IS_ERR(last)) {
-+ error = PTR_ERR(last);
-+ goto exit1;
-+ }
-+ if (d_mountpoint(last)) {
-+ dput(last);
-+ error = -EBUSY;
-+ goto exit1;
-+ }
-+ dput(last);
++ if (nd.dentry->d_inode->i_op->rmdir_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
+
-+ error = op->rmdir2(nd.dentry->d_inode,
-+ nd.last.name,
-+ nd.last.len);
-+ /* the file system wants to use normal vfs path now */
-+ if (error != -EOPNOTSUPP)
-+ goto exit1;
-+ }
++ error = op->rmdir_raw(&nd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
down(&nd.dentry->d_inode->i_sem);
- dentry = lookup_hash(&nd.last, nd.dentry);
+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
error = vfs_rmdir(nd.dentry->d_inode, dentry);
-@@ -1524,8 +1724,17 @@ asmlinkage long sys_unlink(const char *
+@@ -1524,8 +1701,15 @@ asmlinkage long sys_unlink(const char *
error = -EISDIR;
if (nd.last_type != LAST_NORM)
goto exit1;
-+ if (nd.dentry->d_inode->i_op->unlink2) {
-+ struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ error = op->unlink2(nd.dentry->d_inode,
-+ nd.last.name,
-+ nd.last.len);
-+ /* the file system wants to use normal vfs path now */
-+ if (error != -EOPNOTSUPP)
-+ goto exit1;
-+ }
++ if (nd.dentry->d_inode->i_op->unlink_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->unlink_raw(&nd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
down(&nd.dentry->d_inode->i_sem);
- dentry = lookup_hash(&nd.last, nd.dentry);
+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
/* Why not before? Because we want correct error value */
-@@ -1592,15 +1801,26 @@ asmlinkage long sys_symlink(const char *
+@@ -1592,15 +1776,23 @@ asmlinkage long sys_symlink(const char *
error = path_lookup(to, LOOKUP_PARENT, &nd);
if (error)
goto out;
- dentry = lookup_create(&nd, 0);
-+ if (nd.dentry->d_inode->i_op->symlink2) {
++ if (nd.dentry->d_inode->i_op->symlink_raw) {
+ struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ error = op->symlink2(nd.dentry->d_inode,
-+ nd.last.name,
-+ nd.last.len,
-+ from);
++ error = op->symlink_raw(&nd, from);
+ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out2;
putname(to);
}
putname(from);
-@@ -1676,7 +1896,17 @@ asmlinkage long sys_link(const char * ol
+@@ -1676,7 +1868,14 @@ asmlinkage long sys_link(const char * ol
error = -EXDEV;
if (old_nd.mnt != nd.mnt)
goto out_release;
- new_dentry = lookup_create(&nd, 0);
-+ if (nd.dentry->d_inode->i_op->link2) {
++ if (nd.dentry->d_inode->i_op->link_raw) {
+ struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ error = op->link2(old_nd.dentry->d_inode,
-+ nd.dentry->d_inode,
-+ nd.last.name,
-+ nd.last.len);
++ error = op->link_raw(&old_nd, &nd);
+ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out_release;
error = PTR_ERR(new_dentry);
if (!IS_ERR(new_dentry)) {
error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
-@@ -1720,7 +1950,8 @@ exit:
+@@ -1720,7 +1919,7 @@ exit:
* locking].
*/
int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
-+ struct inode *new_dir, struct dentry *new_dentry,
-+ struct lookup_intent *it)
++ struct inode *new_dir, struct dentry *new_dentry)
{
int error;
struct inode *target;
-@@ -1778,6 +2009,7 @@ int vfs_rename_dir(struct inode *old_dir
- error = -EBUSY;
- else
- error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-+ intent_release(new_dentry, it);
- if (target) {
- if (!error)
- target->i_flags |= S_DEAD;
-@@ -1799,7 +2031,8 @@ out_unlock:
+@@ -1799,7 +1998,7 @@ out_unlock:
}
int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
-+ struct inode *new_dir, struct dentry *new_dentry,
-+ struct lookup_intent *it)
++ struct inode *new_dir, struct dentry *new_dentry)
{
int error;
-@@ -1830,6 +2063,7 @@ int vfs_rename_other(struct inode *old_d
- error = -EBUSY;
- else
- error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-+ intent_release(new_dentry, it);
- double_up(&old_dir->i_zombie, &new_dir->i_zombie);
- if (error)
- return error;
-@@ -1841,13 +2075,14 @@ int vfs_rename_other(struct inode *old_d
- }
-
- int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-- struct inode *new_dir, struct dentry *new_dentry)
-+ struct inode *new_dir, struct dentry *new_dentry,
-+ struct lookup_intent *it)
- {
- int error;
- if (S_ISDIR(old_dentry->d_inode->i_mode))
-- error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
-+ error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it);
- else
-- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
-+ error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it);
- if (!error) {
- if (old_dir == new_dir)
- inode_dir_notify(old_dir, DN_RENAME);
-@@ -1889,7 +2124,7 @@ static inline int do_rename(const char *
+@@ -1887,9 +2086,18 @@ static inline int do_rename(const char *
+ if (newnd.last_type != LAST_NORM)
+ goto exit2;
++ if (old_dir->d_inode->i_op->rename_raw) {
++ lock_kernel();
++ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++ unlock_kernel();
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit2;
++ }
++
double_lock(new_dir, old_dir);
- old_dentry = lookup_hash(&oldnd.last, old_dir);
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
goto exit3;
-@@ -1905,16 +2140,37 @@ static inline int do_rename(const char *
+@@ -1905,16 +2113,16 @@ static inline int do_rename(const char *
if (newnd.last.name[newnd.last.len])
goto exit4;
}
if (IS_ERR(new_dentry))
goto exit4;
-+ if (old_dir->d_inode->i_op->rename2) {
-+ lock_kernel();
-+ /* don't rename mount point. mds will take care of
-+ * the rest sanity checking */
-+ if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) {
-+ error = -EBUSY;
-+ goto exit5;
-+ }
-+
-+ error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
-+ new_dir->d_inode,
-+ oldnd.last.name,
-+ oldnd.last.len,
-+ newnd.last.name,
-+ newnd.last.len);
-+ unlock_kernel();
-+ /* the file system wants to use normal vfs path now */
-+ if (error != -EOPNOTSUPP)
-+ goto exit5;
-+ }
+
lock_kernel();
error = vfs_rename(old_dir->d_inode, old_dentry,
-- new_dir->d_inode, new_dentry);
-+ new_dir->d_inode, new_dentry, NULL);
+ new_dir->d_inode, new_dentry);
unlock_kernel();
-
-+exit5:
dput(new_dentry);
exit4:
dput(old_dentry);
-@@ -1965,20 +2221,28 @@ out:
+@@ -1965,20 +2173,28 @@ out:
}
static inline int
out:
if (current->link_count || res || nd->last_type!=LAST_NORM)
return res;
-@@ -2002,7 +2266,13 @@ fail:
+@@ -2002,7 +2218,13 @@ fail:
int vfs_follow_link(struct nameidata *nd, const char *link)
{
}
/* get the link contents into pagecache */
-@@ -2044,7 +2314,7 @@ int page_follow_link(struct dentry *dent
+@@ -2044,7 +2266,7 @@ int page_follow_link(struct dentry *dent
{
struct page *page = NULL;
char *s = page_getlink(dentry, &page);
if (page) {
kunmap(page);
page_cache_release(page);
---- linux-2.4.20-rh/fs/nfsd/vfs.c~vfs_intent-2.4.20-rh 2003-04-11 14:04:48.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/nfsd/vfs.c 2003-06-09 23:18:07.000000000 +0800
-@@ -1293,7 +1293,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
- err = nfserr_perm;
- } else
- #endif
-- err = vfs_rename(fdir, odentry, tdir, ndentry);
-+ err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
- if (!err && EX_ISSYNC(tfhp->fh_export)) {
- nfsd_sync_dir(tdentry);
- nfsd_sync_dir(fdentry);
---- linux-2.4.20-rh/fs/open.c~vfs_intent-2.4.20-rh 2003-04-11 14:04:57.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/open.c 2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/fs/open.c~vfs_intent-2.4.20-rh 2003-07-17 08:32:45.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/open.c 2003-07-17 08:35:22.000000000 -0700
@@ -19,6 +19,8 @@
#include <asm/uaccess.h>
int error;
struct iattr newattrs;
-@@ -108,7 +111,14 @@ int do_truncate(struct dentry *dentry, l
+@@ -108,7 +111,13 @@ int do_truncate(struct dentry *dentry, l
down(&inode->i_sem);
newattrs.ia_size = length;
newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+ newattrs.ia_valid |= ATTR_FROM_OPEN;
+ if (op->setattr_raw) {
+ newattrs.ia_valid |= ATTR_RAW;
-+ newattrs.ia_ctime = CURRENT_TIME;
+ error = op->setattr_raw(inode, &newattrs);
-+ } else
++ } else
+ error = notify_change(dentry, &newattrs);
up(&inode->i_sem);
return error;
}
-@@ -118,12 +128,13 @@ static inline long do_sys_truncate(const
+@@ -118,12 +127,13 @@ static inline long do_sys_truncate(const
struct nameidata nd;
struct inode * inode;
int error;
if (error)
goto out;
inode = nd.dentry->d_inode;
-@@ -163,11 +174,13 @@ static inline long do_sys_truncate(const
+@@ -163,11 +173,13 @@ static inline long do_sys_truncate(const
error = locks_verify_truncate(inode, NULL, length);
if (!error) {
DQUOT_INIT(inode);
- error = do_truncate(nd.dentry, length);
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
+ error = do_truncate(nd.dentry, length, 0);
}
put_write_access(inode);
dput_and_out:
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
path_release(&nd);
out:
return error;
-@@ -215,7 +228,7 @@ static inline long do_sys_ftruncate(unsi
+@@ -215,7 +227,7 @@ static inline long do_sys_ftruncate(unsi
error = locks_verify_truncate(inode, file, length);
if (!error)
out_putf:
fput(file);
out:
-@@ -260,11 +273,13 @@ asmlinkage long sys_utime(char * filenam
+@@ -260,11 +272,13 @@ asmlinkage long sys_utime(char * filenam
struct inode * inode;
struct iattr newattrs;
error = -EROFS;
if (IS_RDONLY(inode))
goto dput_and_out;
-@@ -279,11 +294,29 @@ asmlinkage long sys_utime(char * filenam
+@@ -279,11 +293,25 @@ asmlinkage long sys_utime(char * filenam
goto dput_and_out;
newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
+ goto dput_and_out;
+ }
+
-+ error = -EROFS;
-+ if (IS_RDONLY(inode))
-+ goto dput_and_out;
-+
+ error = -EPERM;
+ if (!times) {
if (current->fsuid != inode->i_uid &&
error = notify_change(nd.dentry, &newattrs);
dput_and_out:
path_release(&nd);
-@@ -304,12 +337,14 @@ asmlinkage long sys_utimes(char * filena
+@@ -304,12 +332,14 @@ asmlinkage long sys_utimes(char * filena
struct inode * inode;
struct iattr newattrs;
error = -EROFS;
if (IS_RDONLY(inode))
goto dput_and_out;
-@@ -324,7 +359,20 @@ asmlinkage long sys_utimes(char * filena
+@@ -324,7 +354,20 @@ asmlinkage long sys_utimes(char * filena
newattrs.ia_atime = times[0].tv_sec;
newattrs.ia_mtime = times[1].tv_sec;
newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
if (current->fsuid != inode->i_uid &&
(error = permission(inode,MAY_WRITE)) != 0)
goto dput_and_out;
-@@ -347,6 +395,7 @@ asmlinkage long sys_access(const char *
+@@ -347,6 +390,7 @@ asmlinkage long sys_access(const char *
int old_fsuid, old_fsgid;
kernel_cap_t old_cap;
int res;
if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
return -EINVAL;
-@@ -364,13 +413,14 @@ asmlinkage long sys_access(const char *
+@@ -364,13 +408,14 @@ asmlinkage long sys_access(const char *
else
current->cap_effective = current->cap_permitted;
if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
&& !special_file(nd.dentry->d_inode->i_mode))
res = -EROFS;
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
path_release(&nd);
}
-@@ -385,8 +435,9 @@ asmlinkage long sys_chdir(const char * f
+@@ -385,8 +430,9 @@ asmlinkage long sys_chdir(const char * f
{
int error;
struct nameidata nd;
if (error)
goto out;
-@@ -397,6 +448,7 @@ asmlinkage long sys_chdir(const char * f
+@@ -397,6 +443,7 @@ asmlinkage long sys_chdir(const char * f
set_fs_pwd(current->fs, nd.mnt, nd.dentry);
dput_and_out:
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
path_release(&nd);
out:
return error;
-@@ -436,9 +488,10 @@ asmlinkage long sys_chroot(const char *
+@@ -436,9 +483,10 @@ asmlinkage long sys_chroot(const char *
{
int error;
struct nameidata nd;
if (error)
goto out;
-@@ -454,6 +507,7 @@ asmlinkage long sys_chroot(const char *
+@@ -454,6 +502,7 @@ asmlinkage long sys_chroot(const char *
set_fs_altroot();
error = 0;
dput_and_out:
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
path_release(&nd);
out:
return error;
-@@ -508,6 +562,18 @@ asmlinkage long sys_chmod(const char * f
+@@ -508,6 +557,18 @@ asmlinkage long sys_chmod(const char * f
if (IS_RDONLY(inode))
goto dput_and_out;
error = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto dput_and_out;
-@@ -538,6 +604,20 @@ static int chown_common(struct dentry *
+@@ -538,6 +599,20 @@ static int chown_common(struct dentry *
error = -EROFS;
if (IS_RDONLY(inode))
goto out;
+
+ newattrs.ia_uid = user;
+ newattrs.ia_gid = group;
-+ newattrs.ia_valid = ATTR_UID | ATTR_GID;
++ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
+ newattrs.ia_valid |= ATTR_RAW;
+ error = op->setattr_raw(inode, &newattrs);
+ /* the file system wants to use normal vfs path now */
error = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto out;
-@@ -642,6 +722,7 @@ struct file *filp_open(const char * file
+@@ -642,8 +717,9 @@ struct file *filp_open(const char * file
{
int namei_flags, error;
struct nameidata nd;
-+ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = flags };
-
- flags &= ~O_DIRECT;
+-
+- flags &= ~O_DIRECT;
++ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = flags };
++
++ //flags &= ~O_DIRECT;
-@@ -651,14 +732,15 @@ struct file *filp_open(const char * file
+ namei_flags = flags;
+ if ((namei_flags+1) & O_ACCMODE)
+@@ -651,14 +727,15 @@ struct file *filp_open(const char * file
if (namei_flags & O_TRUNC)
namei_flags |= 2;
{
struct file * f;
struct inode *inode;
-@@ -701,6 +783,7 @@ struct file *dentry_open(struct dentry *
+@@ -695,12 +772,15 @@ struct file *dentry_open(struct dentry *
+ }
+
+ if (f->f_op && f->f_op->open) {
++ f->f_it = it;
+ error = f->f_op->open(inode,f);
++ f->f_it = NULL;
+ if (error)
+ goto cleanup_all;
}
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
-+ intent_release(dentry, it);
++ intent_release(it);
return f;
cleanup_all:
-@@ -715,11 +798,17 @@ cleanup_all:
+@@ -715,11 +795,17 @@ cleanup_all:
cleanup_file:
put_filp(f);
cleanup_dentry:
-+ intent_release(dentry, it);
++ intent_release(it);
dput(dentry);
mntput(mnt);
return ERR_PTR(error);
/*
* Find an empty file descriptor entry, and mark it busy.
*/
---- linux-2.4.20-rh/fs/stat.c~vfs_intent-2.4.20-rh 2003-04-11 14:05:08.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/stat.c 2003-06-09 23:18:07.000000000 +0800
-@@ -110,11 +110,13 @@ static int do_getattr(struct vfsmount *m
- int vfs_stat(char *name, struct kstat *stat)
+--- linux-2.4.20/fs/stat.c~vfs_intent-2.4.20-rh 2003-07-17 08:33:05.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/stat.c 2003-07-17 08:51:33.000000000 -0700
+@@ -17,10 +17,12 @@
+ * Revalidate the inode. This is required for proper NFS attribute caching.
+ */
+ static __inline__ int
+-do_revalidate(struct dentry *dentry)
++do_revalidate(struct dentry *dentry, struct lookup_intent *it)
+ {
+ struct inode * inode = dentry->d_inode;
+- if (inode->i_op && inode->i_op->revalidate)
++ if (inode->i_op && inode->i_op->revalidate_it)
++ return inode->i_op->revalidate_it(dentry, it);
++ else if (inode->i_op && inode->i_op->revalidate)
+ return inode->i_op->revalidate(dentry);
+ return 0;
+ }
+@@ -32,13 +34,13 @@ static inline nlink_t user_nlink(struct
+ return inode->i_nlink;
+ }
+
+-static int do_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
++static int do_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat, struct lookup_intent *it)
+ {
+ int res = 0;
+ unsigned int blocks, indirect;
+ struct inode *inode = dentry->d_inode;
+
+- res = do_revalidate(dentry);
++ res = do_revalidate(dentry, it);
+ if (res)
+ return res;
+
+@@ -111,10 +113,12 @@ int vfs_stat(char *name, struct kstat *s
{
struct nameidata nd;
-+ struct lookup_intent it = { .it_op = IT_GETATTR };
int error;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
- error = user_path_walk(name, &nd);
-+ error = user_path_walk_it(name, &nd, &it);
++ error = user_path_walk_it(name, &nd, &it);
if (!error) {
- error = do_getattr(nd.mnt, nd.dentry, stat);
-+ intent_release(nd.dentry, &it);
+- error = do_getattr(nd.mnt, nd.dentry, stat);
++ error = do_getattr(nd.mnt, nd.dentry, stat, &it);
++ intent_release(&it);
path_release(&nd);
}
return error;
-@@ -123,11 +125,13 @@ int vfs_stat(char *name, struct kstat *s
- int vfs_lstat(char *name, struct kstat *stat)
+@@ -124,10 +128,12 @@ int vfs_lstat(char *name, struct kstat *
{
struct nameidata nd;
-+ struct lookup_intent it = { .it_op = IT_GETATTR };
int error;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
- error = user_path_walk_link(name, &nd);
-+ error = user_path_walk_link_it(name, &nd, &it);
++ error = user_path_walk_link_it(name, &nd, &it);
if (!error) {
- error = do_getattr(nd.mnt, nd.dentry, stat);
-+ intent_release(nd.dentry, &it);
+- error = do_getattr(nd.mnt, nd.dentry, stat);
++ error = do_getattr(nd.mnt, nd.dentry, stat, &it);
++ intent_release(&it);
path_release(&nd);
}
return error;
---- linux-2.4.20-rh/include/linux/dcache.h~vfs_intent-2.4.20-rh 2003-04-12 15:46:39.000000000 +0800
-+++ linux-2.4.20-rh-root/include/linux/dcache.h 2003-06-09 23:18:07.000000000 +0800
-@@ -7,6 +7,28 @@
+@@ -139,7 +145,7 @@ int vfs_fstat(unsigned int fd, struct ks
+ int error = -EBADF;
+
+ if (f) {
+- error = do_getattr(f->f_vfsmnt, f->f_dentry, stat);
++ error = do_getattr(f->f_vfsmnt, f->f_dentry, stat, NULL);
+ fput(f);
+ }
+ return error;
+@@ -286,7 +292,7 @@ asmlinkage long sys_readlink(const char
+
+ error = -EINVAL;
+ if (inode->i_op && inode->i_op->readlink &&
+- !(error = do_revalidate(nd.dentry))) {
++ !(error = do_revalidate(nd.dentry, NULL))) {
+ UPDATE_ATIME(inode);
+ error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
+ }
+--- linux-2.4.20/include/linux/dcache.h~vfs_intent-2.4.20-rh 2003-07-17 08:32:48.000000000 -0700
++++ linux-2.4.20-mmonroe/include/linux/dcache.h 2003-07-17 08:35:22.000000000 -0700
+@@ -6,6 +6,45 @@
+ #include <asm/atomic.h>
#include <linux/mount.h>
#include <linux/kernel.h>
-
-+#define IT_OPEN (1)
-+#define IT_CREAT (1<<1)
-+#define IT_READDIR (1<<2)
-+#define IT_GETATTR (1<<3)
-+#define IT_LOOKUP (1<<4)
-+#define IT_UNLINK (1<<5)
++#include <linux/string.h>
++
++#define IT_OPEN 0x0001
++#define IT_CREAT 0x0002
++#define IT_READDIR 0x0004
++#define IT_GETATTR 0x0008
++#define IT_LOOKUP 0x0010
++#define IT_UNLINK 0x0020
++#define IT_GETXATTR 0x0040
++#define IT_EXEC 0x0080
++#define IT_PIN 0x0100
+
-+#define IT_FL_LOCKED (1)
-+#define IT_FL_FOLLOWED (1<<1) /* set by vfs_follow_link */
++#define IT_FL_LOCKED 0x0001
++#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */
++
++#define INTENT_MAGIC 0x19620323
+
+struct lookup_intent {
+ int it_op;
++ void (*it_op_release)(struct lookup_intent *);
++ int it_magic;
+ int it_mode;
+ int it_flags;
+ int it_disposition;
+ void *it_data;
+};
+
++static inline void intent_init(struct lookup_intent *it, int op, int flags)
++{
++ memset(it, 0, sizeof(*it));
++ it->it_magic = INTENT_MAGIC;
++ it->it_op = op;
++ it->it_flags = flags;
++}
++
+
/*
* linux/include/linux/dcache.h
- *
-@@ -82,6 +104,7 @@ struct dentry {
- unsigned long d_time; /* used by d_revalidate */
- struct dentry_operations *d_op;
- struct super_block * d_sb; /* The root of the dentry tree */
-+ struct lookup_intent *d_it;
- unsigned long d_vfs_flags;
- void * d_fsdata; /* fs-specific data */
- void * d_extra_attributes; /* TUX-specific data */
-@@ -96,8 +119,15 @@ struct dentry_operations {
+@@ -96,8 +135,22 @@ struct dentry_operations {
int (*d_delete)(struct dentry *);
void (*d_release)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
-+ int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *);
-+ void (*d_intent_release)(struct dentry *, struct lookup_intent *);
++ int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *);
++ void (*d_pin)(struct dentry *, struct vfsmount * , int);
++ void (*d_unpin)(struct dentry *, struct vfsmount *, int);
};
++#define PIN(de,mnt,flag) if (de->d_op && de->d_op->d_pin) \
++ de->d_op->d_pin(de, mnt, flag);
++#define UNPIN(de,mnt,flag) if (de->d_op && de->d_op->d_unpin) \
++ de->d_op->d_unpin(de, mnt, flag);
++
++
+/* defined in fs/namei.c */
-+extern void intent_release(struct dentry *de, struct lookup_intent *it);
++extern void intent_release(struct lookup_intent *it);
+/* defined in fs/dcache.c */
+extern void __d_rehash(struct dentry * entry, int lock);
+
/* the dentry parameter passed to d_hash and d_compare is the parent
* directory of the entries to be compared. It is used in case these
* functions need any directory specific information for determining
-@@ -129,6 +159,7 @@ d_iput: no no yes
+@@ -129,6 +182,7 @@ d_iput: no no yes
* s_nfsd_free_path semaphore will be down
*/
#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
extern spinlock_t dcache_lock;
---- linux-2.4.20-rh/include/linux/fs.h~vfs_intent-2.4.20-rh 2003-05-30 02:07:39.000000000 +0800
-+++ linux-2.4.20-rh-root/include/linux/fs.h 2003-06-09 23:18:07.000000000 +0800
-@@ -337,6 +337,8 @@ extern void set_bh_page(struct buffer_he
+--- linux-2.4.20/include/linux/fs.h~vfs_intent-2.4.20-rh 2003-07-17 08:34:44.000000000 -0700
++++ linux-2.4.20-mmonroe/include/linux/fs.h 2003-07-17 08:35:22.000000000 -0700
+@@ -337,6 +337,9 @@ extern void set_bh_page(struct buffer_he
#define ATTR_MTIME_SET 256
#define ATTR_FORCE 512 /* Not a change, but a change it */
#define ATTR_ATTR_FLAG 1024
-+#define ATTR_RAW 2048 /* file system, not vfs will massage attrs */
-+#define ATTR_FROM_OPEN 4096 /* called from open path, ie O_TRUNC */
++#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */
++#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */
++#define ATTR_CTIME_SET 0x2000
/*
* This is the Inode Attributes structure, used for notify_change(). It
-@@ -574,6 +576,7 @@ struct file {
+@@ -574,6 +577,7 @@ struct file {
/* needed for tty driver, and maybe others */
void *private_data;
-+ struct lookup_intent *f_intent;
++ struct lookup_intent *f_it;
/* preallocated helper kiobuf to speedup O_DIRECT */
struct kiobuf *f_iobuf;
-@@ -701,6 +704,7 @@ struct nameidata {
+@@ -701,6 +705,7 @@ struct nameidata {
struct qstr last;
unsigned int flags;
int last_type;
};
/*
-@@ -821,7 +825,9 @@ extern int vfs_symlink(struct inode *, s
+@@ -821,7 +826,8 @@ extern int vfs_symlink(struct inode *, s
extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
extern int vfs_rmdir(struct inode *, struct dentry *);
extern int vfs_unlink(struct inode *, struct dentry *);
-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-+ struct inode *new_dir, struct dentry *new_dentry,
-+ struct lookup_intent *it);
++ struct inode *new_dir, struct dentry *new_dentry);
/*
* File types
-@@ -882,20 +888,33 @@ struct file_operations {
+@@ -881,21 +887,32 @@ struct file_operations {
+
struct inode_operations {
int (*create) (struct inode *,struct dentry *,int);
++ int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *);
struct dentry * (*lookup) (struct inode *,struct dentry *);
-+ struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
++ struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags);
int (*link) (struct dentry *,struct inode *,struct dentry *);
-+ int (*link2) (struct inode *,struct inode *, const char *, int);
++ int (*link_raw) (struct nameidata *,struct nameidata *);
int (*unlink) (struct inode *,struct dentry *);
-+ int (*unlink2) (struct inode *, const char *, int);
++ int (*unlink_raw) (struct nameidata *);
int (*symlink) (struct inode *,struct dentry *,const char *);
-+ int (*symlink2) (struct inode *, const char *, int, const char *);
++ int (*symlink_raw) (struct nameidata *,const char *);
int (*mkdir) (struct inode *,struct dentry *,int);
-+ int (*mkdir2) (struct inode *, const char *, int,int);
++ int (*mkdir_raw) (struct nameidata *,int);
int (*rmdir) (struct inode *,struct dentry *);
-+ int (*rmdir2) (struct inode *, const char *, int);
++ int (*rmdir_raw) (struct nameidata *);
int (*mknod) (struct inode *,struct dentry *,int,int);
-+ int (*mknod2) (struct inode *, const char *, int,int,int);
++ int (*mknod_raw) (struct nameidata *,int,dev_t);
int (*rename) (struct inode *, struct dentry *,
struct inode *, struct dentry *);
-+ int (*rename2) (struct inode *, struct inode *,
-+ const char *oldname, int oldlen,
-+ const char *newname, int newlen);
++ int (*rename_raw) (struct nameidata *, struct nameidata *);
int (*readlink) (struct dentry *, char *,int);
int (*follow_link) (struct dentry *, struct nameidata *);
-+ int (*follow_link2) (struct dentry *, struct nameidata *,
-+ struct lookup_intent *it);
void (*truncate) (struct inode *);
int (*permission) (struct inode *, int);
int (*revalidate) (struct dentry *);
++ int (*revalidate_it) (struct dentry *, struct lookup_intent *);
int (*setattr) (struct dentry *, struct iattr *);
-+ int (*setattr_raw) (struct inode *, struct iattr *);
++ int (*setattr_raw) (struct inode *, struct iattr *);
int (*getattr) (struct dentry *, struct iattr *);
int (*setxattr) (struct dentry *, const char *, void *, size_t, int);
ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
-@@ -1091,10 +1110,14 @@ static inline int get_lease(struct inode
+@@ -1091,10 +1108,14 @@ static inline int get_lease(struct inode
asmlinkage long sys_open(const char *, int, int);
asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */
extern int filp_close(struct file *, fl_owner_t id);
extern char * getname(const char *);
-@@ -1385,6 +1408,7 @@ typedef int (*read_actor_t)(read_descrip
+@@ -1385,6 +1406,7 @@ typedef int (*read_actor_t)(read_descrip
extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_walk(const char *, struct nameidata *));
extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
-@@ -1396,6 +1420,8 @@ extern struct dentry * lookup_one_len(co
+@@ -1396,6 +1418,8 @@ extern struct dentry * lookup_one_len(co
extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
#define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
#define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
extern void inode_init_once(struct inode *);
extern void iput(struct inode *);
-@@ -1495,6 +1521,8 @@ extern struct file_operations generic_ro
+@@ -1497,6 +1521,8 @@ extern struct file_operations generic_ro
extern int vfs_readlink(struct dentry *, char *, int, const char *);
extern int vfs_follow_link(struct nameidata *, const char *);
extern int page_readlink(struct dentry *, char *, int);
extern int page_follow_link(struct dentry *, struct nameidata *);
extern struct inode_operations page_symlink_inode_operations;
---- linux-2.4.20-rh/kernel/ksyms.c~vfs_intent-2.4.20-rh 2003-05-30 02:07:42.000000000 +0800
-+++ linux-2.4.20-rh-root/kernel/ksyms.c 2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/kernel/ksyms.c~vfs_intent-2.4.20-rh 2003-07-17 08:34:45.000000000 -0700
++++ linux-2.4.20-mmonroe/kernel/ksyms.c 2003-07-17 08:35:22.000000000 -0700
@@ -298,6 +298,7 @@ EXPORT_SYMBOL(read_cache_page);
EXPORT_SYMBOL(set_page_dirty);
EXPORT_SYMBOL(vfs_readlink);
EXPORT_SYMBOL(page_readlink);
EXPORT_SYMBOL(page_follow_link);
EXPORT_SYMBOL(page_symlink_inode_operations);
---- linux-2.4.20-rh/fs/exec.c~vfs_intent-2.4.20-rh 2003-04-13 10:07:02.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/exec.c 2003-06-09 23:18:07.000000000 +0800
+--- linux-2.4.20/fs/exec.c~vfs_intent-2.4.20-rh 2003-07-17 08:33:09.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/exec.c 2003-07-17 08:35:22.000000000 -0700
@@ -114,8 +114,9 @@ asmlinkage long sys_uselib(const char *
struct file * file;
struct nameidata nd;
int error;
--
++ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
+
- error = user_path_walk(library, &nd);
-+ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
-+
-+ error = user_path_walk_it(library, &nd, &it);
++ error = user_path_walk_it(library, &nd, &it);
if (error)
goto out;
goto exit;
- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
-+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
-+ intent_release(nd.dentry, &it);
++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++ intent_release(&it);
error = PTR_ERR(file);
if (IS_ERR(file))
goto out;
struct inode *inode;
struct file *file;
int err = 0;
--
-- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
+ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
-+
+
+- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
+ err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
file = ERR_PTR(err);
if (!err) {
inode = nd.dentry->d_inode;
-@@ -395,7 +398,7 @@ struct file *open_exec(const char *name)
+@@ -395,7 +398,8 @@ struct file *open_exec(const char *name)
err = -EACCES;
file = ERR_PTR(err);
if (!err) {
- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
-+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++ intent_release(&it);
if (!IS_ERR(file)) {
err = deny_write_access(file);
if (err) {
-@@ -404,6 +407,7 @@ struct file *open_exec(const char *name)
- }
- }
- out:
-+ intent_release(nd.dentry, &it);
+@@ -407,6 +411,7 @@ out:
return file;
}
}
-@@ -1283,7 +1287,7 @@ int do_coredump(long signr, int exit_cod
++ intent_release(&it);
+ path_release(&nd);
+ }
+ goto out;
+@@ -1283,7 +1288,7 @@ int do_coredump(long signr, int exit_cod
goto close_fail;
if (!file->f_op->write)
goto close_fail;
goto close_fail;
retval = binfmt->core_dump(signr, regs, file);
---- linux-2.4.20-rh/fs/proc/base.c~vfs_intent-2.4.20-rh 2003-06-09 23:16:51.000000000 +0800
-+++ linux-2.4.20-rh-root/fs/proc/base.c 2003-06-09 23:18:52.000000000 +0800
+--- linux-2.4.20/fs/proc/base.c~vfs_intent-2.4.20-rh 2003-07-17 08:33:05.000000000 -0700
++++ linux-2.4.20-mmonroe/fs/proc/base.c 2003-07-17 08:35:22.000000000 -0700
@@ -464,6 +464,9 @@ static int proc_pid_follow_link(struct d
error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
nd->last_type = LAST_BIND;
+
-+ if (nd->it != NULL)
-+ nd->it->it_int_flags |= IT_FL_FOLLOWED;
++ if (nd->it != NULL)
++ nd->it->it_int_flags |= IT_FL_FOLLOWED;
out:
return error;
}
- fs/dcache.c | 20 ++
- fs/exec.c | 15 +
- fs/namei.c | 378 ++++++++++++++++++++++++++++++++++++++++++-------
- fs/nfsd/vfs.c | 2
- fs/open.c | 126 ++++++++++++++--
- fs/proc/base.c | 3
- fs/stat.c | 24 ++-
- include/linux/dcache.h | 31 ++++
- include/linux/fs.h | 32 +++-
- kernel/ksyms.c | 1
- 10 files changed, 543 insertions(+), 89 deletions(-)
+ fs/dcache.c | 19 ++
+ fs/exec.c | 15 +-
+ fs/namei.c | 329 ++++++++++++++++++++++++++++++++++++++--------
+ fs/namespace.c | 30 +++-
+ fs/open.c | 128 +++++++++++++++--
+ fs/proc/base.c | 3
+ fs/stat.c | 50 ++++--
+ include/linux/dcache.h | 53 +++++++
+ include/linux/fs.h | 29 +++-
+ include/linux/fs_struct.h | 4
+ kernel/exit.c | 3
+ kernel/fork.c | 3
+ kernel/ksyms.c | 1
+ 13 files changed, 560 insertions(+), 107 deletions(-)
---- linux-2.4.20-l18/fs/exec.c~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/exec.c Wed May 28 01:39:18 2003
+--- linux-2.4.20-ad/fs/exec.c~vfs_intent-2.4.20-vanilla 2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/exec.c 2003-07-07 15:13:53.000000000 -0600
@@ -107,8 +107,9 @@ asmlinkage long sys_uselib(const char *
struct file * file;
struct nameidata nd;
- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
error = PTR_ERR(file);
if (IS_ERR(file))
goto out;
if (!err) {
- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
if (!IS_ERR(file)) {
err = deny_write_access(file);
if (err) {
return file;
}
}
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
path_release(&nd);
}
goto out;
goto close_fail;
retval = binfmt->core_dump(signr, regs, file);
---- linux-2.4.20-l18/fs/dcache.c~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/dcache.c Wed May 28 01:39:18 2003
+--- linux-2.4.20-ad/fs/dcache.c~vfs_intent-2.4.20-vanilla 2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/dcache.c 2003-07-09 01:46:27.000000000 -0600
@@ -181,6 +181,13 @@ int d_invalidate(struct dentry * dentry)
spin_unlock(&dcache_lock);
return 0;
/*
* Check whether to do a partial shrink_dcache
* to get rid of unused child entries.
-@@ -616,6 +623,7 @@ struct dentry * d_alloc(struct dentry *
- dentry->d_op = NULL;
- dentry->d_fsdata = NULL;
- dentry->d_mounted = 0;
-+ dentry->d_it = NULL;
- INIT_LIST_HEAD(&dentry->d_hash);
- INIT_LIST_HEAD(&dentry->d_lru);
- INIT_LIST_HEAD(&dentry->d_subdirs);
-@@ -830,13 +838,19 @@ void d_delete(struct dentry * dentry)
+@@ -830,13 +837,19 @@ void d_delete(struct dentry * dentry)
* Adds a dentry to the hash according to its name.
*/
}
#define do_switch(x,y) do { \
---- linux-2.4.20-l18/fs/namei.c~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/namei.c Sun Jun 1 23:41:35 2003
+--- linux-2.4.20-ad/fs/namespace.c~vfs_intent-2.4.20-vanilla 2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/namespace.c 2003-07-07 15:13:53.000000000 -0600
+@@ -99,6 +99,7 @@ static void detach_mnt(struct vfsmount *
+ {
+ old_nd->dentry = mnt->mnt_mountpoint;
+ old_nd->mnt = mnt->mnt_parent;
++ UNPIN(old_nd->dentry, old_nd->mnt, 1);
+ mnt->mnt_parent = mnt;
+ mnt->mnt_mountpoint = mnt->mnt_root;
+ list_del_init(&mnt->mnt_child);
+@@ -110,6 +111,7 @@ static void attach_mnt(struct vfsmount *
+ {
+ mnt->mnt_parent = mntget(nd->mnt);
+ mnt->mnt_mountpoint = dget(nd->dentry);
++ PIN(nd->dentry, nd->mnt, 1);
+ list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+ list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+ nd->dentry->d_mounted++;
+@@ -485,14 +487,17 @@ static int do_loopback(struct nameidata
+ {
+ struct nameidata old_nd;
+ struct vfsmount *mnt = NULL;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
+ int err = mount_is_safe(nd);
+ if (err)
+ return err;
+ if (!old_name || !*old_name)
+ return -EINVAL;
+- err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
+- if (err)
++ err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
++ if (err) {
++ intent_release(&it);
+ return err;
++ }
+
+ down_write(¤t->namespace->sem);
+ err = -EINVAL;
+@@ -515,6 +520,7 @@ static int do_loopback(struct nameidata
+ }
+
+ up_write(¤t->namespace->sem);
++ intent_release(&it);
+ path_release(&old_nd);
+ return err;
+ }
+@@ -698,7 +704,8 @@ long do_mount(char * dev_name, char * di
+ unsigned long flags, void *data_page)
+ {
+ struct nameidata nd;
+- int retval = 0;
++ struct lookup_intent it = { .it_op = IT_GETATTR };
++ int retval = 0;
+ int mnt_flags = 0;
+
+ /* Discard magic */
+@@ -722,10 +729,11 @@ long do_mount(char * dev_name, char * di
+ flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
+
+ /* ... and get the mountpoint */
+- retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
+- if (retval)
++ retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
++ if (retval) {
++ intent_release(&it);
+ return retval;
+-
++ }
+ if (flags & MS_REMOUNT)
+ retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+ data_page);
+@@ -736,6 +744,8 @@ long do_mount(char * dev_name, char * di
+ else
+ retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+ dev_name, data_page);
++
++ intent_release(&it);
+ path_release(&nd);
+ return retval;
+ }
+@@ -901,6 +911,8 @@ asmlinkage long sys_pivot_root(const cha
+ {
+ struct vfsmount *tmp;
+ struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
++ struct lookup_intent new_it = { .it_op = IT_GETATTR };
++ struct lookup_intent old_it = { .it_op = IT_GETATTR };
+ int error;
+
+ if (!capable(CAP_SYS_ADMIN))
+@@ -908,14 +920,14 @@ asmlinkage long sys_pivot_root(const cha
+
+ lock_kernel();
+
+- error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
++ error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
+ if (error)
+ goto out0;
+ error = -EINVAL;
+ if (!check_mnt(new_nd.mnt))
+ goto out1;
+
+- error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
++ error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
+ if (error)
+ goto out1;
+
+@@ -970,8 +982,10 @@ out2:
+ up(&old_nd.dentry->d_inode->i_zombie);
+ up_write(¤t->namespace->sem);
+ path_release(&user_nd);
++ intent_release(&old_it);
+ path_release(&old_nd);
+ out1:
++ intent_release(&new_it);
+ path_release(&new_nd);
+ out0:
+ unlock_kernel();
+--- linux-2.4.20-ad/fs/namei.c~vfs_intent-2.4.20-vanilla 2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/namei.c 2003-07-08 13:53:48.000000000 -0600
@@ -94,6 +94,13 @@
* XEmacs seems to be relying on it...
*/
-+void intent_release(struct dentry *de, struct lookup_intent *it)
++void intent_release(struct lookup_intent *it)
+{
-+ if (it && de->d_op && de->d_op->d_intent_release)
-+ de->d_op->d_intent_release(de, it);
++ if (it && it->it_op_release)
++ it->it_op_release(it);
+
+}
+
{
struct dentry * dentry = d_lookup(parent, name);
-+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
-+ if (!dentry->d_op->d_revalidate2(dentry, flags, it) &&
++ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
++ if (!dentry->d_op->d_revalidate_it(dentry, flags, it) &&
+ !d_invalidate(dentry)) {
+ dput(dentry);
+ dentry = NULL;
result = ERR_PTR(-ENOMEM);
if (dentry) {
lock_kernel();
-+ if (dir->i_op->lookup2)
-+ result = dir->i_op->lookup2(dir, dentry, it);
++ if (dir->i_op->lookup_it)
++ result = dir->i_op->lookup_it(dir, dentry, it, flags);
+ else
result = dir->i_op->lookup(dir, dentry);
unlock_kernel();
dput(result);
result = ERR_PTR(-ENOENT);
}
-+ } else if (result->d_op && result->d_op->d_revalidate2) {
-+ if (!result->d_op->d_revalidate2(result, flags, it) &&
++ } else if (result->d_op && result->d_op->d_revalidate_it) {
++ if (!result->d_op->d_revalidate_it(result, flags, it) &&
+ !d_invalidate(result)) {
+ dput(result);
+ goto again;
{
int err;
if (current->link_count >= 5)
-@@ -346,10 +375,21 @@ static inline int do_follow_link(struct
+@@ -346,10 +375,18 @@ static inline int do_follow_link(struct
current->link_count++;
current->total_link_count++;
UPDATE_ATIME(dentry->d_inode);
- err = dentry->d_inode->i_op->follow_link(dentry, nd);
-+ nd->it = it;
-+ if (dentry->d_inode->i_op->follow_link2)
-+ err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+ else
-+ err = dentry->d_inode->i_op->follow_link(dentry, nd);
-+ if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
-+ /* vfs_follow_link was never called */
-+ intent_release(dentry, it);
-+ path_release(nd);
-+ err = -ENOLINK;
-+ }
++ nd->it = it;
++ err = dentry->d_inode->i_op->follow_link(dentry, nd);
++ if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
++ /* vfs_follow_link was never called */
++ intent_release(it);
++ path_release(nd);
++ err = -ENOLINK;
++ }
current->link_count--;
return err;
loop:
-+ intent_release(dentry, it);
++ intent_release(it);
path_release(nd);
return -ELOOP;
}
-@@ -379,15 +419,26 @@ int follow_up(struct vfsmount **mnt, str
+@@ -379,15 +416,26 @@ int follow_up(struct vfsmount **mnt, str
return __follow_up(mnt, dentry);
}
+ opc = it->it_op;
+ mode = it->it_mode;
+ }
-+ intent_release(*dentry, it);
++ intent_release(it);
+ if (it) {
+ it->it_op = opc;
+ it->it_mode = mode;
dput(*dentry);
mntput(mounted->mnt_parent);
*dentry = dget(mounted->mnt_root);
-@@ -399,7 +450,7 @@ static inline int __follow_down(struct v
+@@ -399,7 +447,7 @@ static inline int __follow_down(struct v
int follow_down(struct vfsmount **mnt, struct dentry **dentry)
{
}
static inline void follow_dotdot(struct nameidata *nd)
-@@ -435,7 +486,7 @@ static inline void follow_dotdot(struct
+@@ -435,7 +483,7 @@ static inline void follow_dotdot(struct
mntput(nd->mnt);
nd->mnt = parent;
}
;
}
-@@ -447,7 +498,8 @@ static inline void follow_dotdot(struct
+@@ -447,7 +495,8 @@ static inline void follow_dotdot(struct
*
* We expect 'base' to be positive and a directory.
*/
{
struct dentry *dentry;
struct inode *inode;
-@@ -520,15 +572,15 @@ int link_path_walk(const char * name, st
+@@ -520,15 +569,15 @@ int link_path_walk(const char * name, st
break;
}
/* This does the actual lookups.. */
;
err = -ENOENT;
-@@ -539,8 +591,8 @@ int link_path_walk(const char * name, st
+@@ -539,8 +588,8 @@ int link_path_walk(const char * name, st
if (!inode->i_op)
goto out_dput;
- if (inode->i_op->follow_link) {
- err = do_follow_link(dentry, nd);
-+ if (inode->i_op->follow_link || inode->i_op->follow_link2) {
++ if (inode->i_op->follow_link) {
+ err = do_follow_link(dentry, nd, NULL);
dput(dentry);
if (err)
goto return_err;
-@@ -556,7 +608,7 @@ int link_path_walk(const char * name, st
+@@ -556,7 +605,7 @@ int link_path_walk(const char * name, st
nd->dentry = dentry;
}
err = -ENOTDIR;
- if (!inode->i_op->lookup)
-+ if (!inode->i_op->lookup && !inode->i_op->lookup2)
++ if (!inode->i_op->lookup && !inode->i_op->lookup_it)
break;
continue;
/* here ends the main loop */
-@@ -583,19 +635,20 @@ last_component:
+@@ -583,19 +632,19 @@ last_component:
if (err < 0)
break;
}
;
inode = dentry->d_inode;
if ((lookup_flags & LOOKUP_FOLLOW)
-- && inode && inode->i_op && inode->i_op->follow_link) {
+ && inode && inode->i_op && inode->i_op->follow_link) {
- err = do_follow_link(dentry, nd);
-+ && inode && inode->i_op &&
-+ (inode->i_op->follow_link || inode->i_op->follow_link2)) {
+ err = do_follow_link(dentry, nd, it);
dput(dentry);
if (err)
goto return_err;
-@@ -609,7 +662,8 @@ last_component:
+@@ -609,7 +658,8 @@ last_component:
goto no_inode;
if (lookup_flags & LOOKUP_DIRECTORY) {
err = -ENOTDIR;
- if (!inode->i_op || !inode->i_op->lookup)
+ if (!inode->i_op ||
-+ (!inode->i_op->lookup && !inode->i_op->lookup2))
++ (!inode->i_op->lookup && !inode->i_op->lookup_it))
break;
}
goto return_base;
-@@ -633,6 +687,23 @@ return_reval:
+@@ -633,6 +683,23 @@ return_reval:
* Check the cached dentry for staleness.
*/
dentry = nd->dentry;
-+ revalidate_again:
-+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) {
++ revalidate_again:
++ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
+ err = -ESTALE;
-+ if (!dentry->d_op->d_revalidate2(dentry, 0, it)) {
-+ struct dentry *new;
-+ err = permission(dentry->d_parent->d_inode,
-+ MAY_EXEC);
-+ if (err)
-+ break;
-+ new = real_lookup(dentry->d_parent,
-+ &dentry->d_name, 0, NULL);
++ if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) {
++ struct dentry *new;
++ err = permission(dentry->d_parent->d_inode,
++ MAY_EXEC);
++ if (err)
++ break;
++ new = real_lookup(dentry->d_parent,
++ &dentry->d_name, 0, NULL);
+ d_invalidate(dentry);
-+ dput(dentry);
-+ dentry = new;
-+ goto revalidate_again;
-+ }
++ dput(dentry);
++ dentry = new;
++ goto revalidate_again;
++ }
+ } else
if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
err = -ESTALE;
if (!dentry->d_op->d_revalidate(dentry, 0)) {
-@@ -646,15 +717,28 @@ out_dput:
+@@ -646,15 +713,28 @@ out_dput:
dput(dentry);
break;
}
+ if (err)
-+ intent_release(nd->dentry, it);
++ intent_release(it);
path_release(nd);
return_err:
return err;
}
/* SMP-safe */
-@@ -739,6 +823,17 @@ walk_init_root(const char *name, struct
+@@ -739,6 +819,17 @@ walk_init_root(const char *name, struct
}
/* SMP-safe */
int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
{
int error = 0;
-@@ -753,6 +848,7 @@ int path_init(const char *name, unsigned
+@@ -753,6 +844,7 @@ int path_init(const char *name, unsigned
{
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags;
-+ nd->it = NULL;
++ nd->it = NULL;
if (*name=='/')
return walk_init_root(name,nd);
read_lock(¤t->fs->lock);
-@@ -767,7 +863,8 @@ int path_init(const char *name, unsigned
+@@ -767,7 +859,8 @@ int path_init(const char *name, unsigned
* needs parent already locked. Doesn't follow mounts.
* SMP-safe.
*/
{
struct dentry * dentry;
struct inode *inode;
-@@ -790,13 +887,16 @@ struct dentry * lookup_hash(struct qstr
+@@ -790,13 +883,16 @@ struct dentry * lookup_hash(struct qstr
goto out;
}
if (!new)
goto out;
lock_kernel();
-+ if (inode->i_op->lookup2)
-+ dentry = inode->i_op->lookup2(inode, new, it);
++ if (inode->i_op->lookup_it)
++ dentry = inode->i_op->lookup_it(inode, new, it, 0);
+ else
dentry = inode->i_op->lookup(inode, new);
unlock_kernel();
if (!dentry)
-@@ -808,6 +908,12 @@ out:
+@@ -808,6 +904,12 @@ out:
return dentry;
}
/* SMP-safe */
struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
{
-@@ -829,7 +935,7 @@ struct dentry * lookup_one_len(const cha
+@@ -829,7 +931,7 @@ struct dentry * lookup_one_len(const cha
}
this.hash = end_name_hash(hash);
access:
return ERR_PTR(-EACCES);
}
-@@ -860,6 +966,23 @@ int __user_walk(const char *name, unsign
+@@ -860,6 +962,23 @@ int __user_walk(const char *name, unsign
return err;
}
/*
* It's inline, so penalty for filesystems that don't use sticky bit is
* minimal.
-@@ -996,7 +1119,8 @@ exit_lock:
+@@ -955,7 +1074,8 @@ static inline int lookup_flags(unsigned
+ return retval;
+ }
+
+-int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode,
++ struct lookup_intent *it)
+ {
+ int error;
+
+@@ -968,12 +1088,15 @@ int vfs_create(struct inode *dir, struct
+ goto exit_lock;
+
+ error = -EACCES; /* shouldn't it be ENOSYS? */
+- if (!dir->i_op || !dir->i_op->create)
++ if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it))
+ goto exit_lock;
+
+ DQUOT_INIT(dir);
+ lock_kernel();
+- error = dir->i_op->create(dir, dentry, mode);
++ if (dir->i_op->create_it)
++ error = dir->i_op->create_it(dir, dentry, mode, it);
++ else
++ error = dir->i_op->create(dir, dentry, mode);
+ unlock_kernel();
+ exit_lock:
+ up(&dir->i_zombie);
+@@ -982,6 +1105,11 @@ exit_lock:
+ return error;
+ }
+
++int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
++{
++ return vfs_create_it(dir, dentry, mode, NULL);
++}
++
+ /*
+ * open_namei()
+ *
+@@ -996,7 +1124,8 @@ exit_lock:
* for symlinks (where the permissions are checked later).
* SMP-safe
*/
{
int acc_mode, error = 0;
struct inode *inode;
-@@ -1010,7 +1134,7 @@ int open_namei(const char * pathname, in
+@@ -1010,7 +1139,7 @@ int open_namei(const char * pathname, in
* The simplest case - just a plain lookup.
*/
if (!(flag & O_CREAT)) {
if (error)
return error;
dentry = nd->dentry;
-@@ -1020,6 +1144,10 @@ int open_namei(const char * pathname, in
+@@ -1020,6 +1149,10 @@ int open_namei(const char * pathname, in
/*
* Create - we need to know the parent.
*/
error = path_lookup(pathname, LOOKUP_PARENT, nd);
if (error)
return error;
-@@ -1035,7 +1163,7 @@ int open_namei(const char * pathname, in
+@@ -1035,7 +1168,7 @@ int open_namei(const char * pathname, in
dir = nd->dentry;
down(&dir->d_inode->i_sem);
do_last:
error = PTR_ERR(dentry);
-@@ -1044,6 +1172,7 @@ do_last:
+@@ -1044,10 +1177,11 @@ do_last:
goto exit;
}
+ it->it_mode = mode;
/* Negative dentry, just create the file */
if (!dentry->d_inode) {
- error = vfs_create(dir->d_inode, dentry,
-@@ -1072,12 +1201,13 @@ do_last:
+- error = vfs_create(dir->d_inode, dentry,
+- mode & ~current->fs->umask);
++ error = vfs_create_it(dir->d_inode, dentry,
++ mode & ~current->fs->umask, it);
+ up(&dir->d_inode->i_sem);
+ dput(nd->dentry);
+ nd->dentry = dentry;
+@@ -1072,7 +1206,7 @@ do_last:
error = -ELOOP;
if (flag & O_NOFOLLOW)
goto exit_dput;
}
error = -ENOENT;
if (!dentry->d_inode)
- goto exit_dput;
-- if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
-+ if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link ||
-+ dentry->d_inode->i_op->follow_link2))
- goto do_link;
-
- dput(nd->dentry);
-@@ -1151,7 +1281,7 @@ ok:
+@@ -1151,7 +1285,7 @@ ok:
if (!error) {
DQUOT_INIT(inode);
}
put_write_access(inode);
if (error)
-@@ -1163,8 +1293,10 @@ ok:
+@@ -1163,8 +1297,10 @@ ok:
return 0;
exit_dput:
-+ intent_release(dentry, it);
++ intent_release(it);
dput(dentry);
exit:
-+ intent_release(nd->dentry, it);
++ intent_release(it);
path_release(nd);
return error;
-@@ -1183,7 +1315,19 @@ do_link:
+@@ -1183,7 +1319,16 @@ do_link:
* are done. Procfs-like symlinks just set LAST_BIND.
*/
UPDATE_ATIME(dentry->d_inode);
- error = dentry->d_inode->i_op->follow_link(dentry, nd);
-+ nd->it = it;
-+ if (dentry->d_inode->i_op->follow_link2)
-+ error = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
-+ else
-+ error = dentry->d_inode->i_op->follow_link(dentry, nd);
++ nd->it = it;
++ error = dentry->d_inode->i_op->follow_link(dentry, nd);
+ if (error) {
-+ intent_release(dentry, it);
-+ } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
-+ /* vfs_follow_link was never called */
-+ intent_release(dentry, it);
-+ path_release(nd);
-+ error = -ENOLINK;
-+ }
++ intent_release(it);
++ } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
++ /* vfs_follow_link was never called */
++ intent_release(it);
++ path_release(nd);
++ error = -ENOLINK;
++ }
dput(dentry);
if (error)
return error;
-@@ -1205,13 +1349,20 @@ do_link:
+@@ -1205,13 +1350,20 @@ do_link:
}
dir = nd->dentry;
down(&dir->d_inode->i_sem);
{
struct dentry *dentry;
-@@ -1219,7 +1370,7 @@ static struct dentry *lookup_create(stru
+@@ -1219,7 +1371,7 @@ static struct dentry *lookup_create(stru
dentry = ERR_PTR(-EEXIST);
if (nd->last_type != LAST_NORM)
goto fail;
if (IS_ERR(dentry))
goto fail;
if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
-@@ -1275,7 +1426,19 @@ asmlinkage long sys_mknod(const char * f
+@@ -1275,7 +1427,16 @@ asmlinkage long sys_mknod(const char * f
error = path_lookup(tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
- dentry = lookup_create(&nd, 0);
+
-+ if (nd.dentry->d_inode->i_op->mknod2) {
++ if (nd.dentry->d_inode->i_op->mknod_raw) {
+ struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ error = op->mknod2(nd.dentry->d_inode,
-+ nd.last.name,
-+ nd.last.len,
-+ mode, dev);
++ error = op->mknod_raw(&nd, mode, dev);
+ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out2;
error = PTR_ERR(dentry);
mode &= ~current->fs->umask;
-@@ -1296,6 +1459,7 @@ asmlinkage long sys_mknod(const char * f
+@@ -1296,6 +1457,7 @@ asmlinkage long sys_mknod(const char * f
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
out:
putname(tmp);
-@@ -1343,7 +1507,17 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1343,7 +1505,14 @@ asmlinkage long sys_mkdir(const char * p
error = path_lookup(tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
- dentry = lookup_create(&nd, 1);
-+ if (nd.dentry->d_inode->i_op->mkdir2) {
++ if (nd.dentry->d_inode->i_op->mkdir_raw) {
+ struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ error = op->mkdir2(nd.dentry->d_inode,
-+ nd.last.name,
-+ nd.last.len,
-+ mode);
++ error = op->mkdir_raw(&nd, mode);
+ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out2;
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
error = vfs_mkdir(nd.dentry->d_inode, dentry,
-@@ -1351,6 +1525,7 @@ asmlinkage long sys_mkdir(const char * p
+@@ -1351,6 +1520,7 @@ asmlinkage long sys_mkdir(const char * p
dput(dentry);
}
up(&nd.dentry->d_inode->i_sem);
path_release(&nd);
out:
putname(tmp);
-@@ -1451,8 +1626,33 @@ asmlinkage long sys_rmdir(const char * p
+@@ -1451,8 +1621,16 @@ asmlinkage long sys_rmdir(const char * p
error = -EBUSY;
goto exit1;
}
-+ if (nd.dentry->d_inode->i_op->rmdir2) {
-+ struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ struct dentry *last;
-+
-+ down(&nd.dentry->d_inode->i_sem);
-+ last = lookup_hash_it(&nd.last, nd.dentry, NULL);
-+ up(&nd.dentry->d_inode->i_sem);
-+ if (IS_ERR(last)) {
-+ error = PTR_ERR(last);
-+ goto exit1;
-+ }
-+ if (d_mountpoint(last)) {
-+ dput(last);
-+ error = -EBUSY;
-+ goto exit1;
-+ }
-+ dput(last);
++ if (nd.dentry->d_inode->i_op->rmdir_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
+
-+ error = op->rmdir2(nd.dentry->d_inode,
-+ nd.last.name,
-+ nd.last.len);
-+ /* the file system wants to use normal vfs path now */
-+ if (error != -EOPNOTSUPP)
-+ goto exit1;
-+ }
++ error = op->rmdir_raw(&nd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
down(&nd.dentry->d_inode->i_sem);
- dentry = lookup_hash(&nd.last, nd.dentry);
+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
error = vfs_rmdir(nd.dentry->d_inode, dentry);
-@@ -1510,8 +1710,17 @@ asmlinkage long sys_unlink(const char *
+@@ -1510,8 +1688,15 @@ asmlinkage long sys_unlink(const char *
error = -EISDIR;
if (nd.last_type != LAST_NORM)
goto exit1;
-+ if (nd.dentry->d_inode->i_op->unlink2) {
-+ struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ error = op->unlink2(nd.dentry->d_inode,
-+ nd.last.name,
-+ nd.last.len);
-+ /* the file system wants to use normal vfs path now */
-+ if (error != -EOPNOTSUPP)
-+ goto exit1;
-+ }
++ if (nd.dentry->d_inode->i_op->unlink_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->unlink_raw(&nd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
down(&nd.dentry->d_inode->i_sem);
- dentry = lookup_hash(&nd.last, nd.dentry);
+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
/* Why not before? Because we want correct error value */
-@@ -1578,15 +1787,26 @@ asmlinkage long sys_symlink(const char *
+@@ -1578,15 +1763,23 @@ asmlinkage long sys_symlink(const char *
error = path_lookup(to, LOOKUP_PARENT, &nd);
if (error)
goto out;
- dentry = lookup_create(&nd, 0);
-+ if (nd.dentry->d_inode->i_op->symlink2) {
++ if (nd.dentry->d_inode->i_op->symlink_raw) {
+ struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ error = op->symlink2(nd.dentry->d_inode,
-+ nd.last.name,
-+ nd.last.len,
-+ from);
++ error = op->symlink_raw(&nd, from);
+ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out2;
putname(to);
}
putname(from);
-@@ -1662,7 +1882,17 @@ asmlinkage long sys_link(const char * ol
+@@ -1662,7 +1855,14 @@ asmlinkage long sys_link(const char * ol
error = -EXDEV;
if (old_nd.mnt != nd.mnt)
goto out_release;
- new_dentry = lookup_create(&nd, 0);
-+ if (nd.dentry->d_inode->i_op->link2) {
++ if (nd.dentry->d_inode->i_op->link_raw) {
+ struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ error = op->link2(old_nd.dentry->d_inode,
-+ nd.dentry->d_inode,
-+ nd.last.name,
-+ nd.last.len);
++ error = op->link_raw(&old_nd, &nd);
+ /* the file system wants to use normal vfs path now */
+ if (error != -EOPNOTSUPP)
+ goto out_release;
error = PTR_ERR(new_dentry);
if (!IS_ERR(new_dentry)) {
error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
-@@ -1706,7 +1936,8 @@ exit:
+@@ -1706,7 +1906,7 @@ exit:
* locking].
*/
int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
-+ struct inode *new_dir, struct dentry *new_dentry,
-+ struct lookup_intent *it)
++ struct inode *new_dir, struct dentry *new_dentry)
{
int error;
struct inode *target;
-@@ -1764,6 +1995,7 @@ int vfs_rename_dir(struct inode *old_dir
- error = -EBUSY;
- else
- error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-+ intent_release(new_dentry, it);
- if (target) {
- if (!error)
- target->i_flags |= S_DEAD;
-@@ -1785,7 +2017,8 @@ out_unlock:
+@@ -1785,7 +1985,7 @@ out_unlock:
}
int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
-+ struct inode *new_dir, struct dentry *new_dentry,
-+ struct lookup_intent *it)
++ struct inode *new_dir, struct dentry *new_dentry)
{
int error;
-@@ -1816,6 +2049,7 @@ int vfs_rename_other(struct inode *old_d
- error = -EBUSY;
- else
- error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
-+ intent_release(new_dentry, it);
- double_up(&old_dir->i_zombie, &new_dir->i_zombie);
- if (error)
- return error;
-@@ -1827,13 +2061,14 @@ int vfs_rename_other(struct inode *old_d
- }
-
- int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-- struct inode *new_dir, struct dentry *new_dentry)
-+ struct inode *new_dir, struct dentry *new_dentry,
-+ struct lookup_intent *it)
- {
- int error;
- if (S_ISDIR(old_dentry->d_inode->i_mode))
-- error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
-+ error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it);
- else
-- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
-+ error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it);
- if (!error) {
- if (old_dir == new_dir)
- inode_dir_notify(old_dir, DN_RENAME);
-@@ -1875,7 +2110,7 @@ static inline int do_rename(const char *
+@@ -1873,9 +2073,18 @@ static inline int do_rename(const char *
+ if (newnd.last_type != LAST_NORM)
+ goto exit2;
++ if (old_dir->d_inode->i_op->rename_raw) {
++ lock_kernel();
++ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++ unlock_kernel();
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit2;
++ }
++
double_lock(new_dir, old_dir);
- old_dentry = lookup_hash(&oldnd.last, old_dir);
error = PTR_ERR(old_dentry);
if (IS_ERR(old_dentry))
goto exit3;
-@@ -1891,16 +2126,37 @@ static inline int do_rename(const char *
+@@ -1891,16 +2100,16 @@ static inline int do_rename(const char *
if (newnd.last.name[newnd.last.len])
goto exit4;
}
if (IS_ERR(new_dentry))
goto exit4;
-+ if (old_dir->d_inode->i_op->rename2) {
-+ lock_kernel();
-+ /* don't rename mount point. mds will take care of
-+ * the rest sanity checking */
-+ if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) {
-+ error = -EBUSY;
-+ goto exit5;
-+ }
-+
-+ error = old_dir->d_inode->i_op->rename2(old_dir->d_inode,
-+ new_dir->d_inode,
-+ oldnd.last.name,
-+ oldnd.last.len,
-+ newnd.last.name,
-+ newnd.last.len);
-+ unlock_kernel();
-+ /* the file system wants to use normal vfs path now */
-+ if (error != -EOPNOTSUPP)
-+ goto exit5;
-+ }
+
lock_kernel();
error = vfs_rename(old_dir->d_inode, old_dentry,
-- new_dir->d_inode, new_dentry);
-+ new_dir->d_inode, new_dentry, NULL);
+ new_dir->d_inode, new_dentry);
unlock_kernel();
-
-+exit5:
dput(new_dentry);
exit4:
dput(old_dentry);
-@@ -1951,20 +2207,28 @@ out:
+@@ -1951,20 +2160,28 @@ out:
}
static inline int
if (IS_ERR(link))
goto fail;
-+ if (it == NULL)
-+ it = nd->it;
-+ else if (it != nd->it)
-+ printk("it != nd->it: tell phil@clusterfs.com\n");
-+ if (it != NULL)
-+ it->it_int_flags |= IT_FL_FOLLOWED;
++ if (it == NULL)
++ it = nd->it;
++ else if (it != nd->it)
++ printk("it != nd->it: tell phil@clusterfs.com\n");
++ if (it != NULL)
++ it->it_int_flags |= IT_FL_FOLLOWED;
+
if (*link == '/') {
path_release(nd);
out:
if (current->link_count || res || nd->last_type!=LAST_NORM)
return res;
-@@ -1986,7 +2250,13 @@ fail:
+@@ -1986,7 +2203,13 @@ fail:
int vfs_follow_link(struct nameidata *nd, const char *link)
{
}
/* get the link contents into pagecache */
-@@ -2028,7 +2298,7 @@ int page_follow_link(struct dentry *dent
+@@ -2028,7 +2251,7 @@ int page_follow_link(struct dentry *dent
{
struct page *page = NULL;
char *s = page_getlink(dentry, &page);
if (page) {
kunmap(page);
page_cache_release(page);
---- linux-2.4.20-l18/fs/nfsd/vfs.c~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/nfsd/vfs.c Wed May 28 01:39:18 2003
-@@ -1291,7 +1291,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru
- err = nfserr_perm;
- } else
- #endif
-- err = vfs_rename(fdir, odentry, tdir, ndentry);
-+ err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
- if (!err && EX_ISSYNC(tfhp->fh_export)) {
- nfsd_sync_dir(tdentry);
- nfsd_sync_dir(fdentry);
---- linux-2.4.20-l18/fs/open.c~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/fs/open.c Wed May 28 01:39:18 2003
+--- linux-2.4.20-ad/fs/open.c~vfs_intent-2.4.20-vanilla 2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/fs/open.c 2003-07-08 13:51:14.000000000 -0600
@@ -19,6 +19,8 @@
#include <asm/uaccess.h>
int error;
struct iattr newattrs;
-@@ -108,7 +111,14 @@ int do_truncate(struct dentry *dentry, l
+@@ -108,7 +111,13 @@ int do_truncate(struct dentry *dentry, l
down(&inode->i_sem);
newattrs.ia_size = length;
newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+ newattrs.ia_valid |= ATTR_FROM_OPEN;
+ if (op->setattr_raw) {
+ newattrs.ia_valid |= ATTR_RAW;
-+ newattrs.ia_ctime = CURRENT_TIME;
+ error = op->setattr_raw(inode, &newattrs);
-+ } else
++ } else
+ error = notify_change(dentry, &newattrs);
up(&inode->i_sem);
return error;
if (!error) {
DQUOT_INIT(inode);
- error = do_truncate(nd.dentry, length);
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
+ error = do_truncate(nd.dentry, length, 0);
}
put_write_access(inode);
dput_and_out:
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
path_release(&nd);
out:
return error;
error = -EROFS;
if (IS_RDONLY(inode))
goto dput_and_out;
-@@ -279,11 +294,29 @@ asmlinkage long sys_utime(char * filenam
+@@ -279,11 +294,25 @@ asmlinkage long sys_utime(char * filenam
goto dput_and_out;
newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
+ goto dput_and_out;
+ }
+
-+ error = -EROFS;
-+ if (IS_RDONLY(inode))
-+ goto dput_and_out;
-+
+ error = -EPERM;
+ if (!times) {
if (current->fsuid != inode->i_uid &&
if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
&& !special_file(nd.dentry->d_inode->i_mode))
res = -EROFS;
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
path_release(&nd);
}
set_fs_pwd(current->fs, nd.mnt, nd.dentry);
dput_and_out:
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
path_release(&nd);
out:
return error;
set_fs_altroot();
error = 0;
dput_and_out:
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
path_release(&nd);
out:
return error;
+
+ newattrs.ia_uid = user;
+ newattrs.ia_gid = group;
-+ newattrs.ia_valid = ATTR_UID | ATTR_GID;
++ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
+ newattrs.ia_valid |= ATTR_RAW;
+ error = op->setattr_raw(inode, &newattrs);
+ /* the file system wants to use normal vfs path now */
{
struct file * f;
struct inode *inode;
-@@ -699,6 +782,7 @@ struct file *dentry_open(struct dentry *
+@@ -693,12 +776,15 @@ struct file *dentry_open(struct dentry *
+ }
+
+ if (f->f_op && f->f_op->open) {
++ f->f_it = it;
+ error = f->f_op->open(inode,f);
++ f->f_it = NULL;
+ if (error)
+ goto cleanup_all;
}
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
-+ intent_release(dentry, it);
++ intent_release(it);
return f;
cleanup_all:
-@@ -713,11 +797,17 @@ cleanup_all:
+@@ -713,11 +799,17 @@ cleanup_all:
cleanup_file:
put_filp(f);
cleanup_dentry:
-+ intent_release(dentry, it);
++ intent_release(it);
dput(dentry);
mntput(mnt);
return ERR_PTR(error);
/*
* Find an empty file descriptor entry, and mark it busy.
*/
---- linux-2.4.20-l18/fs/stat.c~vfs_intent-2.4.20-vanilla Thu Sep 13 19:04:43 2001
-+++ linux-2.4.20-l18-phil/fs/stat.c Wed May 28 01:39:18 2003
-@@ -135,13 +135,15 @@ static int cp_new_stat(struct inode * in
+--- linux-2.4.20-ad/fs/stat.c~vfs_intent-2.4.20-vanilla 2001-09-13 17:04:43.000000000 -0600
++++ linux-2.4.20-ad-braam/fs/stat.c 2003-07-07 15:13:53.000000000 -0600
+@@ -17,10 +17,12 @@
+ * Revalidate the inode. This is required for proper NFS attribute caching.
+ */
+ static __inline__ int
+-do_revalidate(struct dentry *dentry)
++do_revalidate(struct dentry *dentry, struct lookup_intent *it)
+ {
+ struct inode * inode = dentry->d_inode;
+- if (inode->i_op && inode->i_op->revalidate)
++ if (inode->i_op && inode->i_op->revalidate_it)
++ return inode->i_op->revalidate_it(dentry, it);
++ else if (inode->i_op && inode->i_op->revalidate)
+ return inode->i_op->revalidate(dentry);
+ return 0;
+ }
+@@ -135,13 +137,15 @@ static int cp_new_stat(struct inode * in
asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf)
{
struct nameidata nd;
- error = user_path_walk(filename, &nd);
+ error = user_path_walk_it(filename, &nd, &it);
if (!error) {
- error = do_revalidate(nd.dentry);
+- error = do_revalidate(nd.dentry);
++ error = do_revalidate(nd.dentry, &it);
if (!error)
error = cp_old_stat(nd.dentry->d_inode, statbuf);
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
path_release(&nd);
}
return error;
-@@ -151,13 +153,15 @@ asmlinkage long sys_stat(char * filename
+@@ -151,13 +155,15 @@ asmlinkage long sys_stat(char * filename
asmlinkage long sys_newstat(char * filename, struct stat * statbuf)
{
struct nameidata nd;
- error = user_path_walk(filename, &nd);
+ error = user_path_walk_it(filename, &nd, &it);
if (!error) {
- error = do_revalidate(nd.dentry);
+- error = do_revalidate(nd.dentry);
++ error = do_revalidate(nd.dentry, &it);
if (!error)
error = cp_new_stat(nd.dentry->d_inode, statbuf);
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
path_release(&nd);
}
return error;
-@@ -172,13 +176,15 @@ asmlinkage long sys_newstat(char * filen
+@@ -172,13 +178,15 @@ asmlinkage long sys_newstat(char * filen
asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf)
{
struct nameidata nd;
- error = user_path_walk_link(filename, &nd);
+ error = user_path_walk_link_it(filename, &nd, &it);
if (!error) {
- error = do_revalidate(nd.dentry);
+- error = do_revalidate(nd.dentry);
++ error = do_revalidate(nd.dentry, &it);
if (!error)
error = cp_old_stat(nd.dentry->d_inode, statbuf);
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
path_release(&nd);
}
return error;
-@@ -189,13 +195,15 @@ asmlinkage long sys_lstat(char * filenam
+@@ -189,13 +197,15 @@ asmlinkage long sys_lstat(char * filenam
asmlinkage long sys_newlstat(char * filename, struct stat * statbuf)
{
struct nameidata nd;
- error = user_path_walk_link(filename, &nd);
+ error = user_path_walk_link_it(filename, &nd, &it);
if (!error) {
- error = do_revalidate(nd.dentry);
+- error = do_revalidate(nd.dentry);
++ error = do_revalidate(nd.dentry, &it);
if (!error)
error = cp_new_stat(nd.dentry->d_inode, statbuf);
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
path_release(&nd);
}
return error;
-@@ -333,12 +341,14 @@ asmlinkage long sys_stat64(char * filena
+@@ -216,7 +226,7 @@ asmlinkage long sys_fstat(unsigned int f
+ if (f) {
+ struct dentry * dentry = f->f_dentry;
+
+- err = do_revalidate(dentry);
++ err = do_revalidate(dentry, NULL);
+ if (!err)
+ err = cp_old_stat(dentry->d_inode, statbuf);
+ fput(f);
+@@ -235,7 +245,7 @@ asmlinkage long sys_newfstat(unsigned in
+ if (f) {
+ struct dentry * dentry = f->f_dentry;
+
+- err = do_revalidate(dentry);
++ err = do_revalidate(dentry, NULL);
+ if (!err)
+ err = cp_new_stat(dentry->d_inode, statbuf);
+ fput(f);
+@@ -257,7 +267,7 @@ asmlinkage long sys_readlink(const char
+
+ error = -EINVAL;
+ if (inode->i_op && inode->i_op->readlink &&
+- !(error = do_revalidate(nd.dentry))) {
++ !(error = do_revalidate(nd.dentry, NULL))) {
+ UPDATE_ATIME(inode);
+ error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
+ }
+@@ -333,12 +343,14 @@ asmlinkage long sys_stat64(char * filena
{
struct nameidata nd;
int error;
- error = user_path_walk(filename, &nd);
+ error = user_path_walk_it(filename, &nd, &it);
if (!error) {
- error = do_revalidate(nd.dentry);
+- error = do_revalidate(nd.dentry);
++ error = do_revalidate(nd.dentry, &it);
if (!error)
error = cp_new_stat64(nd.dentry->d_inode, statbuf);
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
path_release(&nd);
}
return error;
-@@ -348,12 +358,14 @@ asmlinkage long sys_lstat64(char * filen
+@@ -348,12 +360,14 @@ asmlinkage long sys_lstat64(char * filen
{
struct nameidata nd;
int error;
- error = user_path_walk_link(filename, &nd);
+ error = user_path_walk_link_it(filename, &nd, &it);
if (!error) {
- error = do_revalidate(nd.dentry);
+- error = do_revalidate(nd.dentry);
++ error = do_revalidate(nd.dentry, &it);
if (!error)
error = cp_new_stat64(nd.dentry->d_inode, statbuf);
-+ intent_release(nd.dentry, &it);
++ intent_release(&it);
path_release(&nd);
}
return error;
---- linux-2.4.20-l18/fs/proc/base.c~vfs_intent-2.4.20-vanilla Wed Jun 4 22:53:14 2003
-+++ linux-2.4.20-l18-phil/fs/proc/base.c Wed Jun 4 22:50:35 2003
+@@ -368,7 +382,7 @@ asmlinkage long sys_fstat64(unsigned lon
+ if (f) {
+ struct dentry * dentry = f->f_dentry;
+
+- err = do_revalidate(dentry);
++ err = do_revalidate(dentry, NULL);
+ if (!err)
+ err = cp_new_stat64(dentry->d_inode, statbuf);
+ fput(f);
+--- linux-2.4.20-ad/fs/proc/base.c~vfs_intent-2.4.20-vanilla 2002-08-02 18:39:45.000000000 -0600
++++ linux-2.4.20-ad-braam/fs/proc/base.c 2003-07-07 15:13:53.000000000 -0600
@@ -464,6 +464,9 @@ static int proc_pid_follow_link(struct d
error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
nd->last_type = LAST_BIND;
+
-+ if (nd->it != NULL)
-+ nd->it->it_int_flags |= IT_FL_FOLLOWED;
++ if (nd->it != NULL)
++ nd->it->it_int_flags |= IT_FL_FOLLOWED;
out:
return error;
}
---- linux-2.4.20-l18/include/linux/dcache.h~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002
-+++ linux-2.4.20-l18-phil/include/linux/dcache.h Sun Jun 1 22:35:10 2003
-@@ -7,6 +7,28 @@
+--- linux-2.4.20-ad/include/linux/dcache.h~vfs_intent-2.4.20-vanilla 2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/include/linux/dcache.h 2003-07-09 01:40:11.000000000 -0600
+@@ -7,6 +7,44 @@
#include <linux/mount.h>
#include <linux/kernel.h>
-+#define IT_OPEN (1)
-+#define IT_CREAT (1<<1)
-+#define IT_READDIR (1<<2)
-+#define IT_GETATTR (1<<3)
-+#define IT_LOOKUP (1<<4)
-+#define IT_UNLINK (1<<5)
++#define IT_OPEN 0x0001
++#define IT_CREAT 0x0002
++#define IT_READDIR 0x0004
++#define IT_GETATTR 0x0008
++#define IT_LOOKUP 0x0010
++#define IT_UNLINK 0x0020
++#define IT_GETXATTR 0x0040
++#define IT_EXEC 0x0080
++#define IT_PIN 0x0100
++
++#define IT_FL_LOCKED 0x0001
++#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */
+
-+#define IT_FL_LOCKED (1)
-+#define IT_FL_FOLLOWED (1<<1) /* set by vfs_follow_link */
++#define INTENT_MAGIC 0x19620323
+
+struct lookup_intent {
+ int it_op;
++ void (*it_op_release)(struct lookup_intent *);
++ int it_magic;
+ int it_mode;
+ int it_flags;
+ int it_disposition;
+ void *it_data;
+};
+
++static inline void intent_init(struct lookup_intent *it, int op, int flags)
++{
++ memset(it, 0, sizeof(*it));
++ it->it_magic = INTENT_MAGIC;
++ it->it_op = op;
++ it->it_flags = flags;
++}
++
++
/*
* linux/include/linux/dcache.h
*
-@@ -79,6 +101,7 @@ struct dentry {
- unsigned long d_time; /* used by d_revalidate */
- struct dentry_operations *d_op;
- struct super_block * d_sb; /* The root of the dentry tree */
-+ struct lookup_intent *d_it;
- unsigned long d_vfs_flags;
- void * d_fsdata; /* fs-specific data */
- unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
-@@ -91,8 +114,15 @@ struct dentry_operations {
+@@ -91,8 +129,22 @@ struct dentry_operations {
int (*d_delete)(struct dentry *);
void (*d_release)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
-+ int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *);
-+ void (*d_intent_release)(struct dentry *, struct lookup_intent *);
++ int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *);
++ void (*d_pin)(struct dentry *, struct vfsmount * , int);
++ void (*d_unpin)(struct dentry *, struct vfsmount *, int);
};
++#define PIN(de,mnt,flag) if (de->d_op && de->d_op->d_pin) \
++ de->d_op->d_pin(de, mnt, flag);
++#define UNPIN(de,mnt,flag) if (de->d_op && de->d_op->d_unpin) \
++ de->d_op->d_unpin(de, mnt, flag);
++
++
+/* defined in fs/namei.c */
-+extern void intent_release(struct dentry *de, struct lookup_intent *it);
++extern void intent_release(struct lookup_intent *it);
+/* defined in fs/dcache.c */
+extern void __d_rehash(struct dentry * entry, int lock);
+
/* the dentry parameter passed to d_hash and d_compare is the parent
* directory of the entries to be compared. It is used in case these
* functions need any directory specific information for determining
-@@ -124,6 +154,7 @@ d_iput: no no yes
+@@ -124,6 +176,7 @@ d_iput: no no yes
* s_nfsd_free_path semaphore will be down
*/
#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
extern spinlock_t dcache_lock;
---- linux-2.4.20-l18/include/linux/fs.h~vfs_intent-2.4.20-vanilla Wed May 28 01:39:17 2003
-+++ linux-2.4.20-l18-phil/include/linux/fs.h Sun Jun 1 22:07:11 2003
-@@ -338,6 +338,8 @@ extern void set_bh_page(struct buffer_he
+--- linux-2.4.20/include/linux/fs.h~vfs_intent-2.4.20-vanilla 2003-06-12 03:24:59.000000000 -0600
++++ linux-2.4.20-braam/include/linux/fs.h 2003-06-12 03:25:00.000000000 -0600
+@@ -338,6 +338,9 @@ extern void set_bh_page(struct buffer_he
#define ATTR_MTIME_SET 256
#define ATTR_FORCE 512 /* Not a change, but a change it */
#define ATTR_ATTR_FLAG 1024
-+#define ATTR_RAW 2048 /* file system, not vfs will massage attrs */
-+#define ATTR_FROM_OPEN 4096 /* called from open path, ie O_TRUNC */
++#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */
++#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */
++#define ATTR_CTIME_SET 0x2000
/*
* This is the Inode Attributes structure, used for notify_change(). It
/* needed for tty driver, and maybe others */
void *private_data;
-+ struct lookup_intent *f_intent;
++ struct lookup_intent *f_it;
/* preallocated helper kiobuf to speedup O_DIRECT */
struct kiobuf *f_iobuf;
struct qstr last;
unsigned int flags;
int last_type;
-+ struct lookup_intent *it;
++ struct lookup_intent *it;
};
#define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */
-@@ -794,7 +798,9 @@ extern int vfs_symlink(struct inode *, s
+@@ -794,7 +798,8 @@ extern int vfs_symlink(struct inode *, s
extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
extern int vfs_rmdir(struct inode *, struct dentry *);
extern int vfs_unlink(struct inode *, struct dentry *);
-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-+ struct inode *new_dir, struct dentry *new_dentry,
-+ struct lookup_intent *it);
++ struct inode *new_dir, struct dentry *new_dentry);
/*
* File types
-@@ -855,20 +861,33 @@ struct file_operations {
+@@ -854,21 +859,32 @@ struct file_operations {
+
struct inode_operations {
int (*create) (struct inode *,struct dentry *,int);
++ int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *);
struct dentry * (*lookup) (struct inode *,struct dentry *);
-+ struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *);
++ struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags);
int (*link) (struct dentry *,struct inode *,struct dentry *);
-+ int (*link2) (struct inode *,struct inode *, const char *, int);
++ int (*link_raw) (struct nameidata *,struct nameidata *);
int (*unlink) (struct inode *,struct dentry *);
-+ int (*unlink2) (struct inode *, const char *, int);
++ int (*unlink_raw) (struct nameidata *);
int (*symlink) (struct inode *,struct dentry *,const char *);
-+ int (*symlink2) (struct inode *, const char *, int, const char *);
++ int (*symlink_raw) (struct nameidata *,const char *);
int (*mkdir) (struct inode *,struct dentry *,int);
-+ int (*mkdir2) (struct inode *, const char *, int,int);
++ int (*mkdir_raw) (struct nameidata *,int);
int (*rmdir) (struct inode *,struct dentry *);
-+ int (*rmdir2) (struct inode *, const char *, int);
++ int (*rmdir_raw) (struct nameidata *);
int (*mknod) (struct inode *,struct dentry *,int,int);
-+ int (*mknod2) (struct inode *, const char *, int,int,int);
++ int (*mknod_raw) (struct nameidata *,int,dev_t);
int (*rename) (struct inode *, struct dentry *,
struct inode *, struct dentry *);
-+ int (*rename2) (struct inode *, struct inode *,
-+ const char *oldname, int oldlen,
-+ const char *newname, int newlen);
++ int (*rename_raw) (struct nameidata *, struct nameidata *);
int (*readlink) (struct dentry *, char *,int);
int (*follow_link) (struct dentry *, struct nameidata *);
-+ int (*follow_link2) (struct dentry *, struct nameidata *,
-+ struct lookup_intent *it);
void (*truncate) (struct inode *);
int (*permission) (struct inode *, int);
int (*revalidate) (struct dentry *);
++ int (*revalidate_it) (struct dentry *, struct lookup_intent *);
int (*setattr) (struct dentry *, struct iattr *);
-+ int (*setattr_raw) (struct inode *, struct iattr *);
++ int (*setattr_raw) (struct inode *, struct iattr *);
int (*getattr) (struct dentry *, struct iattr *);
int (*setxattr) (struct dentry *, const char *, void *, size_t, int);
ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
-@@ -1070,10 +1089,14 @@ static inline int get_lease(struct inode
+@@ -1070,10 +1086,14 @@ static inline int get_lease(struct inode
asmlinkage long sys_open(const char *, int, int);
asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */
extern int filp_close(struct file *, fl_owner_t id);
extern char * getname(const char *);
-@@ -1335,6 +1358,7 @@ typedef int (*read_actor_t)(read_descrip
+@@ -1335,6 +1355,7 @@ typedef int (*read_actor_t)(read_descrip
extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_walk(const char *, struct nameidata *));
extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
-@@ -1346,6 +1370,8 @@ extern struct dentry * lookup_one_len(co
+@@ -1346,6 +1367,8 @@ extern struct dentry * lookup_one_len(co
extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
#define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
#define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
extern void iput(struct inode *);
extern void force_delete(struct inode *);
-@@ -1455,6 +1481,8 @@ extern struct file_operations generic_ro
+@@ -1455,6 +1478,8 @@ extern struct file_operations generic_ro
extern int vfs_readlink(struct dentry *, char *, int, const char *);
extern int vfs_follow_link(struct nameidata *, const char *);
extern int page_readlink(struct dentry *, char *, int);
extern int page_follow_link(struct dentry *, struct nameidata *);
extern struct inode_operations page_symlink_inode_operations;
---- linux-2.4.20-l18/kernel/ksyms.c~vfs_intent-2.4.20-vanilla Wed May 28 01:39:18 2003
-+++ linux-2.4.20-l18-phil/kernel/ksyms.c Wed May 28 01:39:18 2003
+--- linux-2.4.20-ad/include/linux/fs_struct.h~vfs_intent-2.4.20-vanilla 2001-07-13 16:10:44.000000000 -0600
++++ linux-2.4.20-ad-braam/include/linux/fs_struct.h 2003-07-07 15:13:53.000000000 -0600
+@@ -34,10 +34,12 @@ static inline void set_fs_root(struct fs
+ write_lock(&fs->lock);
+ old_root = fs->root;
+ old_rootmnt = fs->rootmnt;
++ PIN(dentry, mnt, 1);
+ fs->rootmnt = mntget(mnt);
+ fs->root = dget(dentry);
+ write_unlock(&fs->lock);
+ if (old_root) {
++ UNPIN(old_root, old_rootmnt, 1);
+ dput(old_root);
+ mntput(old_rootmnt);
+ }
+@@ -57,10 +59,12 @@ static inline void set_fs_pwd(struct fs_
+ write_lock(&fs->lock);
+ old_pwd = fs->pwd;
+ old_pwdmnt = fs->pwdmnt;
++ PIN(dentry, mnt, 0);
+ fs->pwdmnt = mntget(mnt);
+ fs->pwd = dget(dentry);
+ write_unlock(&fs->lock);
+ if (old_pwd) {
++ UNPIN(old_pwd, old_pwdmnt, 0);
+ dput(old_pwd);
+ mntput(old_pwdmnt);
+ }
+--- linux-2.4.20-ad/kernel/ksyms.c~vfs_intent-2.4.20-vanilla 2003-07-07 15:13:52.000000000 -0600
++++ linux-2.4.20-ad-braam/kernel/ksyms.c 2003-07-07 15:13:53.000000000 -0600
@@ -269,6 +269,7 @@ EXPORT_SYMBOL(read_cache_page);
EXPORT_SYMBOL(set_page_dirty);
EXPORT_SYMBOL(vfs_readlink);
EXPORT_SYMBOL(page_readlink);
EXPORT_SYMBOL(page_follow_link);
EXPORT_SYMBOL(page_symlink_inode_operations);
+--- linux-2.4.20-ad/kernel/fork.c~vfs_intent-2.4.20-vanilla 2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/kernel/fork.c 2003-07-07 15:13:53.000000000 -0600
+@@ -384,10 +384,13 @@ static inline struct fs_struct *__copy_f
+ fs->umask = old->umask;
+ read_lock(&old->lock);
+ fs->rootmnt = mntget(old->rootmnt);
++ PIN(old->pwd, old->pwdmnt, 0);
++ PIN(old->root, old->rootmnt, 1);
+ fs->root = dget(old->root);
+ fs->pwdmnt = mntget(old->pwdmnt);
+ fs->pwd = dget(old->pwd);
+ if (old->altroot) {
++ PIN(old->altroot, old->altrootmnt, 1);
+ fs->altrootmnt = mntget(old->altrootmnt);
+ fs->altroot = dget(old->altroot);
+ } else {
+--- linux-2.4.20-ad/kernel/exit.c~vfs_intent-2.4.20-vanilla 2002-11-28 16:53:15.000000000 -0700
++++ linux-2.4.20-ad-braam/kernel/exit.c 2003-07-07 15:13:53.000000000 -0600
+@@ -238,11 +238,14 @@ static inline void __put_fs_struct(struc
+ {
+ /* No need to hold fs->lock if we are killing it */
+ if (atomic_dec_and_test(&fs->count)) {
++ UNPIN(fs->pwd, fs->pwdmnt, 0);
++ UNPIN(fs->root, fs->rootmnt, 1);
+ dput(fs->root);
+ mntput(fs->rootmnt);
+ dput(fs->pwd);
+ mntput(fs->pwdmnt);
+ if (fs->altroot) {
++ UNPIN(fs->altroot, fs->altrootmnt, 1);
+ dput(fs->altroot);
+ mntput(fs->altrootmnt);
+ }
_
fs/ext3/super.c
+fs/ext3/file.c
+fs/ext3/inode.c
include/linux/ext3_fs.h
include/linux/ext3_fs_sb.h
fs/ext3/super.c
+fs/ext3/inode.c
+fs/ext3/file.c
include/linux/ext3_fs.h
include/linux/ext3_fs_sb.h
fs/ext3/namei.c
fs/ext3/ialloc.c
+fs/ext3/inode.c
fs/ext3/ioctl.c
include/linux/ext3_fs.h
fs/ext3/inode.c
fs/ext3/namei.c
fs/ext3/super.c
+fs/ext3/ext3-exports.c
fs/ext3/xattr.c
include/linux/ext3_fs.h
include/linux/ext3_jbd.h
fs/exec.c
fs/dcache.c
+fs/namespace.c
fs/namei.c
fs/nfsd/vfs.c
fs/open.c
fs/proc/base.c
include/linux/dcache.h
include/linux/fs.h
+include/linux/fs_struct.h
kernel/ksyms.c
+kernel/fork.c
+kernel/exit.c
if [ $? != 0 ]; then
echo " $1 do not match with $2 "
echo " $2 will be changed to match $2"
- cat $tmpfile > $P/pc/$PATCH_NAME.pc
+ # cat $tmpfile > $P/pc/$PATCH_NAME.pc
fi
rm -rf $tmpfile
fi
exports_2.4.20-rh-hp.patch
kmem_cache_validate_hp.patch
lustre_version.patch
-vfs_intent-2.4.20-vanilla.patch
+vfs_intent-2.4.20-hp.patch
invalidate_show.patch
export-truncate.patch
iod-stock-24-exports_hp.patch
ext3-noread-2.4.20.patch
extN-wantedi.patch
ext3-san-2.4.20.patch
+ext3-map_inode_page.patch
+ext3-error-export.patch
iopen-2.4.20.patch
tcp-zero-copy.patch
linux-2.4.20-xattr-0.8.54-chaos.patch
ext3-2.4.20-fixes.patch
ext3_orphan_lock-2.4.20-rh.patch
-ext3-delete_thread-2.4.20.patch
+ext3_delete_thread_2.4.20_chaos.patch
ext3-noread-2.4.20.patch
extN-wantedi.patch
ext3-san-2.4.20.patch
+ext3-map_inode_page.patch
+ext3-error-export.patch
iopen-2.4.20.patch
-tcp-zero-copy.patch
+tcp_zero_copy_2.4.20_chaos.patch
+gpl_header-chaos-2.4.20.patch
-uml-patch-2.4.20-4.patch
+uml-patch-2.4.20-6.patch
dev_read_only_2.4.20.patch
exports_2.4.20.patch
kmem_cache_validate_2.4.20.patch
ext3-delete_thread-2.4.20.patch
extN-wantedi.patch
ext3-san-2.4.20.patch
+ext3-map_inode_page.patch
+ext3-error-export.patch
iopen-2.4.20.patch
tcp-zero-copy.patch
-SERIES MEMNONIC COMMENT
+SERIES MEMNONIC COMMENT ARCH
-hp-pnnl-2.4.20 linux-2.4.20-hp4_pnnl1 same as vanilla but no uml
-vanilla-2.4.20 linux-2.4.20 patch includes uml
-chaos-2.4.20 linux-chaos-2.4.20 same as rh-2.4.20-8
-rh-2.4.20 linux-rh-2.4.20-8 same as chaos-2.4.20
-rh-2.4.18-18 linux-rh-2.4.18-18 same as chaos but includes uml
-chaos linux-chaos-2.4.18 same as rh-2.4.18-18 but no uml
-
-REVIEW:
-
-vanilla-2.5 linux-2.5.63
-hp-pnnl linux-2.4.19-hp2_pnnl6
+chaos-2.4.18 linux-chaos-2.4.18 LLNL 2.4.18 chaos ~65 i386
+hp-pnnl-2.4.20 linux-2.4.20-hp4_pnnl1 same as vanilla but no uml ia64
+vanilla-2.4.20 linux-2.4.20 patch with uml-2.4.20-6 um
+chaos-2.4.20 linux-chaos-2.4.20 same as rh-2.4.20-8 i386
+rh-2.4.20 linux-rh-2.4.20-8 same as chaos-2.4.20 i386
+kgdb-2.5.73 linux-2.5.73 vanilla 2.5.73 with kgdb i386
.deps
Makefile
Makefile.in
+.*.cmd
#include <linux/lustre_mds.h>
#include <linux/lustre_net.h>
-int client_import_connect(struct lustre_handle *dlm_handle,
+int client_import_connect(struct lustre_handle *dlm_handle,
struct obd_device *obd,
struct obd_uuid *cluuid)
{
char *tmp[] = {imp->imp_target_uuid.uuid,
obd->obd_uuid.uuid,
(char *)dlm_handle};
- int rq_opc = (obd->obd_type->typ_ops->o_brw) ? OST_CONNECT :MDS_CONNECT;
int msg_flags;
ENTRY;
if (obd->obd_namespace == NULL)
GOTO(out_disco, rc = -ENOMEM);
- request = ptlrpc_prep_req(imp, rq_opc, 3, size, tmp);
+ request = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp);
if (!request)
GOTO(out_ldlm, rc = -ENOMEM);
request->rq_level = LUSTRE_CONN_NEW;
request->rq_replen = lustre_msg_size(0, NULL);
+ lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_PEER);
+
imp->imp_dlm_handle = *dlm_handle;
imp->imp_level = LUSTRE_CONN_CON;
class_export_put(exp);
msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
- if (rq_opc == MDS_CONNECT || msg_flags & MSG_CONNECT_REPLAYABLE) {
+ if (msg_flags & MSG_CONNECT_REPLAYABLE) {
imp->imp_replayable = 1;
CDEBUG(D_HA, "connected to replayable target: %s\n",
imp->imp_target_uuid.uuid);
RETURN(-EINVAL);
}
- rq_opc = obd->obd_type->typ_ops->o_brw ? OST_DISCONNECT:MDS_DISCONNECT;
+ switch (imp->imp_connect_op) {
+ case OST_CONNECT: rq_opc = OST_DISCONNECT; break;
+ case MDS_CONNECT: rq_opc = MDS_DISCONNECT; break;
+ case MGMT_CONNECT:rq_opc = MGMT_DISCONNECT;break;
+ default:
+ CERROR("don't know how to disconnect from %s (connect_op %d)\n",
+ imp->imp_target_uuid.uuid, imp->imp_connect_op);
+ RETURN(-EINVAL);
+ }
+
down(&cli->cl_sem);
if (!cli->cl_conn_count) {
CERROR("disconnecting disconnected device (%s)\n",
struct obd_uuid remote_uuid;
struct list_head *p;
char *str, *tmp;
- int rc, i, abort_recovery;
+ int rc = 0, abort_recovery;
ENTRY;
LASSERT_REQSWAB (req, 0);
- str = lustre_msg_string (req->rq_reqmsg, 0, sizeof (tgtuuid.uuid) - 1);
+ str = lustre_msg_string(req->rq_reqmsg, 0, sizeof(tgtuuid) - 1);
if (str == NULL) {
CERROR("bad target UUID for connect\n");
GOTO(out, rc = -EINVAL);
}
+
obd_str2uuid (&tgtuuid, str);
+ target = class_uuid2obd(&tgtuuid);
+ if (!target || target->obd_stopping || !target->obd_set_up) {
+ CERROR("UUID '%s' is not available for connect\n", str);
+ GOTO(out, rc = -ENODEV);
+ }
LASSERT_REQSWAB (req, 1);
- str = lustre_msg_string (req->rq_reqmsg, 1, sizeof (cluuid.uuid) - 1);
+ str = lustre_msg_string(req->rq_reqmsg, 1, sizeof(cluuid) - 1);
if (str == NULL) {
CERROR("bad client UUID for connect\n");
GOTO(out, rc = -EINVAL);
}
- obd_str2uuid (&cluuid, str);
- i = class_uuid2dev(&tgtuuid);
- if (i == -1) {
- CERROR("UUID '%s' not found for connect\n", tgtuuid.uuid);
- GOTO(out, rc = -ENODEV);
- }
-
- target = &obd_dev[i];
- if (!target || target->obd_stopping || !target->obd_set_up) {
- CERROR("UUID '%s' is not available for connect\n", str);
- GOTO(out, rc = -ENODEV);
- }
+ obd_str2uuid (&cluuid, str);
/* XXX extract a nettype and format accordingly */
snprintf(remote_uuid.uuid, sizeof remote_uuid,
if (!recovering)
return;
- CDEBUG(D_ERROR, "timer will expire in %ld seconds\n",
- OBD_RECOVERY_TIMEOUT / HZ);
+ CERROR("timer will expire in %ld seconds\n", OBD_RECOVERY_TIMEOUT / HZ);
mod_timer(&obd->obd_recovery_timer, jiffies + OBD_RECOVERY_TIMEOUT);
}
DEBUG_REQ(D_ERROR, req, "processing: ");
(void)obd->obd_recovery_handler(req);
reset_recovery_timer(obd);
-#warning FIXME: mds_fsync_super(mds->mds_sb);
+ /* bug 1580: decide how to properly sync() in recovery */
+ //mds_fsync_super(mds->mds_sb);
class_export_put(req->rq_export);
OBD_FREE(req->rq_reqmsg, req->rq_reqlen);
OBD_FREE(req, sizeof *req);
if (recovery_done) {
struct list_head *tmp, *n;
ldlm_reprocess_all_ns(req->rq_export->exp_obd->obd_namespace);
- CDEBUG(D_ERROR,
- "%s: all clients recovered, sending delayed replies\n",
+ CERROR("%s: all clients recovered, sending delayed replies\n",
obd->obd_name);
obd->obd_recovering = 0;
list_for_each_safe(tmp, n, &obd->obd_delayed_reply_queue) {
return "lookup";
case IT_UNLINK:
return "unlink";
+ case IT_GETXATTR:
+ return "getxattr";
default:
CERROR("Unknown intent %d\n", it);
return "UNKNOWN";
if (rc == -ERESTART)
retval = rc;
else if (rc)
- CERROR("Failed AST - should clean & disconnect "
- "client\n");
+ CDEBUG(D_DLMTRACE, "Failed AST - should clean & "
+ "disconnect client\n");
LDLM_LOCK_PUT(w->w_lock);
list_del(&w->w_list);
OBD_FREE(w, sizeof(*w));
#endif /* __KERNEL__ */
-static inline void ldlm_failed_ast(struct ldlm_lock *lock, int rc,
- char *ast_type)
+static void ldlm_failed_ast(struct ldlm_lock *lock, int rc, char *ast_type)
{
CERROR("%s AST failed (%d) for res "LPU64"/"LPU64
", mode %s: evicting client %s@%s NID "LPU64"\n",
RETURN(rc);
}
+/* XXX copied from ptlrpc/service.c */
+static long timeval_sub(struct timeval *large, struct timeval *small)
+{
+ return (large->tv_sec - small->tv_sec) * 1000000 +
+ (large->tv_usec - small->tv_usec);
+}
+
int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
{
struct ldlm_request *body;
struct ptlrpc_request *req;
+ struct timeval granted_time;
+ long total_enqueue_wait;
int rc = 0, size = sizeof(*body);
ENTRY;
RETURN(-EINVAL);
}
+ do_gettimeofday(&granted_time);
+ total_enqueue_wait = timeval_sub(&granted_time, &lock->l_enqueued_time);
+
+ if (total_enqueue_wait / 1000000 > obd_timeout)
+ LDLM_ERROR(lock, "enqueue wait took %ldus", total_enqueue_wait);
+
req = ptlrpc_prep_req(lock->l_export->exp_ldlm_data.led_import,
LDLM_CP_CALLBACK, 1, &size, NULL);
if (!req)
body->lock_flags = flags;
ldlm_lock2desc(lock, &body->lock_desc);
- LDLM_DEBUG(lock, "server preparing completion AST");
+ LDLM_DEBUG(lock, "server preparing completion AST (after %ldus wait)",
+ total_enqueue_wait);
req->rq_replen = lustre_msg_size(0, NULL);
req->rq_level = LUSTRE_CONN_RECOVER;
if (!lock)
GOTO(out, err = -ENOMEM);
+ do_gettimeofday(&lock->l_enqueued_time);
memcpy(&lock->l_remote_handle, &dlm_req->lock_handle1,
sizeof(lock->l_remote_handle));
LDLM_DEBUG(lock, "server-side enqueue handler, new lock created");
lock->l_req_mode = dlm_req->lock_desc.l_granted_mode;
LDLM_DEBUG(lock, "completion AST, new lock mode");
}
- if (lock->l_resource->lr_type == LDLM_EXTENT) {
+ if (lock->l_resource->lr_type == LDLM_EXTENT)
memcpy(&lock->l_extent, &dlm_req->lock_desc.l_extent,
sizeof(lock->l_extent));
- if ((lock->l_extent.end & ~PAGE_MASK) != ~PAGE_MASK) {
- /* XXX Old versions of BA OST code have a fencepost bug
- * which will cause them to grant a lock that's one
- * byte too large. This can be safely removed after BA
- * ships their next release -phik (02 Apr 2003) */
- lock->l_extent.end--;
- } else if ((lock->l_extent.start & ~PAGE_MASK) ==
- ~PAGE_MASK) {
- lock->l_extent.start++;
- }
- }
-
ldlm_resource_unlink_lock(lock);
if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
&lock->l_resource->lr_name,
return rc;
}
-static int ldlm_cleanup(struct obd_device *obddev, int force, int failover)
+static int ldlm_cleanup(struct obd_device *obddev, int flags)
{
struct ldlm_obd *ldlm = &obddev->u.ldlm;
ENTRY;
}
#ifdef __KERNEL__
- if (force) {
+ if (flags & OBD_OPT_FORCE) {
ptlrpc_put_ldlm_hooks();
} else if (ptlrpc_ldlm_hooks_referenced()) {
CERROR("Some connections weren't cleaned up; run lconf with "
EXPORT_SYMBOL(ldlm_resource_foreach);
EXPORT_SYMBOL(ldlm_namespace_foreach);
EXPORT_SYMBOL(ldlm_namespace_foreach_res);
+EXPORT_SYMBOL(ldlm_change_cbdata);
/* ldlm_lockd.c */
EXPORT_SYMBOL(ldlm_server_blocking_ast);
/* Set a flag to prevent us from sending a CANCEL (bug 407) */
l_lock(&ns->ns_lock);
lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+ LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
l_unlock(&ns->ns_lock);
ldlm_lock_decref_and_cancel(lockh, mode);
CERROR ("Can't unpack ldlm_reply\n");
GOTO (out_req, rc = -EPROTO);
}
-
+
memcpy(&lock->l_remote_handle, &reply->lock_handle,
sizeof(lock->l_remote_handle));
*flags = reply->lock_flags;
body->lock_desc.l_extent.end,
reply->lock_extent.start, reply->lock_extent.end);
- if ((reply->lock_extent.end & ~PAGE_MASK) != ~PAGE_MASK) {
- /* XXX Old versions of BA OST code have a fencepost bug
- * which will cause them to grant a lock that's one
- * byte too large. This can be safely removed after BA
- * ships their next release -phik (02 Apr 2003) */
- reply->lock_extent.end--;
- } else if ((reply->lock_extent.start & ~PAGE_MASK) ==
- ~PAGE_MASK) {
- reply->lock_extent.start++;
- }
-
cookie = &reply->lock_extent; /* FIXME bug 267 */
cookielen = sizeof(reply->lock_extent);
}
CERROR ("Can't unpack ldlm_reply\n");
GOTO (out, rc = -EPROTO);
}
-
+
res = ldlm_lock_convert(lock, new_mode, &reply->lock_flags);
if (res != NULL)
ldlm_reprocess_all(res);
local_cancel:
ldlm_lock_cancel(lock);
} else {
- LDLM_DEBUG(lock, "client-side local cancel");
if (lock->l_resource->lr_namespace->ns_client) {
- CERROR("Trying to cancel local lock\n");
+ LDLM_ERROR(lock, "Trying to cancel local lock\n");
LBUG();
}
+ LDLM_DEBUG(lock, "client-side local cancel");
ldlm_lock_cancel(lock);
ldlm_reprocess_all(lock->l_resource);
LDLM_DEBUG(lock, "client-side local cancel handler END");
lock = list_entry(tmp, struct ldlm_lock, l_res_link);
if (opaque != NULL && lock->l_data != opaque) {
- LDLM_ERROR(lock, "data %p doesn't match opaque %p res"
- LPU64":"LPU64, lock->l_data, opaque,
- res_id.name[0], res_id.name[1]);
+ LDLM_ERROR(lock, "data %p doesn't match opaque %p",
+ lock->l_data, opaque);
//LBUG();
continue;
}
ldlm_res_iterator_t iter, void *closure)
{
int i, rc = LDLM_ITER_CONTINUE;
-
+
l_lock(&ns->ns_lock);
for (i = 0; i < RES_HASH_SIZE; i++) {
struct list_head *tmp, *next;
list_for_each_safe(tmp, next, &(ns->ns_hash[i])) {
- struct ldlm_resource *res =
+ struct ldlm_resource *res =
list_entry(tmp, struct ldlm_resource, lr_hash);
ldlm_resource_getref(res);
RETURN(rc);
}
+/* non-blocking function to manipulate a lock whose cb_data is being put away.*/
+void ldlm_change_cbdata(struct ldlm_namespace *ns,
+ struct ldlm_res_id *res_id,
+ ldlm_iterator_t iter,
+ void *data)
+{
+ struct ldlm_resource *res;
+ int rc = 0;
+ ENTRY;
+
+ if (ns == NULL) {
+ CERROR("must pass in namespace");
+ LBUG();
+ }
+
+ res = ldlm_resource_get(ns, NULL, *res_id, 0, 0);
+ if (res == NULL) {
+ EXIT;
+ return;
+ }
+
+ l_lock(&ns->ns_lock);
+ rc = ldlm_resource_foreach(res, iter, data);
+ l_unlock(&ns->ns_lock);
+ ldlm_resource_putref(res);
+ EXIT;
+}
+
/* Lock replay */
static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure)
flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_WAIT;
else
flags = LDLM_FL_REPLAY;
-
+
size = sizeof(*body);
req = ptlrpc_prep_req(imp, LDLM_ENQUEUE, 1, &size, NULL);
if (!req)
/* We're part of recovery, so don't wait for it. */
req->rq_level = LUSTRE_CONN_RECOVER;
-
+
body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
ldlm_lock2desc(lock, &body->lock_desc);
body->lock_flags = flags;
rc = ptlrpc_queue_wait(req);
if (rc != ELDLM_OK)
GOTO(out, rc);
-
+
reply = lustre_swab_repbuf(req, 0, sizeof (*reply),
lustre_swab_ldlm_reply);
if (reply == NULL) {
CERROR("Can't unpack ldlm_reply\n");
GOTO (out, rc = -EPROTO);
}
-
+
memcpy(&lock->l_remote_handle, &reply->lock_handle,
sizeof(lock->l_remote_handle));
LDLM_DEBUG(lock, "replayed lock:");
struct list_head list, *pos, *next;
struct ldlm_lock *lock;
int rc = 0;
-
+
ENTRY;
INIT_LIST_HEAD(&list);
if (!ns)
RETURN(NULL);
- ns->ns_hash = vmalloc(sizeof(*ns->ns_hash) * RES_HASH_SIZE);
+ OBD_VMALLOC(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
if (!ns->ns_hash)
GOTO(out_ns, NULL);
- atomic_add(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
-
OBD_ALLOC(ns->ns_name, strlen(name) + 1);
if (!ns->ns_name)
GOTO(out_hash, NULL);
out_hash:
POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
- vfree(ns->ns_hash);
- atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
+ OBD_VFREE(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
out_ns:
OBD_FREE(ns, sizeof(*ns));
return NULL;
lock->l_flags |= LDLM_FL_CBPENDING;
/* ... without sending a CANCEL message. */
lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+ LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
/* ... and without calling the cancellation callback */
lock->l_flags |= LDLM_FL_CANCEL;
LDLM_LOCK_PUT(lock);
ldlm_namespace_cleanup(ns, 0);
POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
- vfree(ns->ns_hash /* , sizeof(*ns->ns_hash) * RES_HASH_SIZE */);
- atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory);
+ OBD_VFREE(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE);
OBD_FREE(ns->ns_name, strlen(ns->ns_name) + 1);
OBD_FREE(ns, sizeof(*ns));
it = dentry->d_it;
- rc = ll_it_open_error(IT_OPEN_CREATE, it);
+ rc = ll_it_open_error(DISP_OPEN_CREATE, it);
if (rc) {
LL_GET_INTENT(dentry, it);
ptlrpc_req_finished(it->it_data);
#if 0
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
LL_GET_INTENT(file->f_dentry, it);
- rc = ll_it_open_error(IT_OPEN_OPEN, it);
+ rc = ll_it_open_error(DISP_OPEN_OPEN, it);
if (rc)
RETURN(rc);
#endif
oa.o_id = lsm->lsm_object_id;
oa.o_mode = S_IFREG;
oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
-
+
memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE);
oa.o_valid |= OBD_MD_FLHANDLE;
/* XXX do we need this??
memset(&osfs, 0, sizeof(osfs));
- rc = obd_statfs(&sbi->ll_mdc_conn, &osfs);
+ rc = obd_statfs(class_conn2obd(&sbi->ll_mdc_conn),&osfs,jiffies-100*HZ);
*/
/* fetch attr of root inode */
err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid,
out_request:
ptlrpc_req_finished(request);
out_osc:
- obd_disconnect(&sbi->ll_osc_conn);
+ obd_disconnect(&sbi->ll_osc_conn, 0);
out_mdc:
- obd_disconnect(&sbi->ll_mdc_conn);
+ obd_disconnect(&sbi->ll_mdc_conn, 0);
out_free:
OBD_FREE(sbi, sizeof(*sbi));
return err;
Makefile.in
.deps
TAGS
+.*.cmd
modulefs_DATA = llite.o
EXTRA_PROGRAMS = llite
-llite_SOURCES = dcache.c commit_callback.c super.c rw.c iod.c super25.c
-llite_SOURCES += file.c dir.c sysctl.c symlink.c
-llite_SOURCES += namei.c lproc_llite.c llite_internal.h
+llite_SOURCES = dcache.c commit_callback.c rw.c super25.c
+llite_SOURCES += file.c dir.c sysctl.c symlink.c llite_lib.c
+llite_SOURCES += namei.c lproc_llite.c super.c iod.c llite_internal.h
include $(top_srcdir)/Rules
#include <linux/lustre_idl.h>
#include <linux/lustre_dlm.h>
+#include "llite_internal.h"
+
/* should NOT be called with the dcache lock, see fs/dcache.c */
-void ll_release(struct dentry *de)
+static void ll_release(struct dentry *de)
{
+ struct ll_dentry_data *lld = ll_d2d(de);
ENTRY;
+
+ LASSERT(lld->lld_cwd_count == 0);
+ LASSERT(lld->lld_mnt_count == 0);
OBD_FREE(de->d_fsdata, sizeof(struct ll_dentry_data));
- EXIT;
-}
-int ll_delete(struct dentry *de)
-{
- if (de->d_it != 0) {
- CERROR("%s put dentry %p+%p with d_it %p\n", current->comm,
- de, de->d_fsdata, de->d_it);
- LBUG();
- }
- return 0;
+ EXIT;
}
void ll_set_dd(struct dentry *de)
LASSERT(de != NULL);
lock_kernel();
-
if (de->d_fsdata == NULL) {
OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
- sema_init(&ll_d2d(de)->lld_it_sem, 1);
}
-
unlock_kernel();
EXIT;
}
-void ll_intent_release(struct dentry *de, struct lookup_intent *it)
+void ll_intent_release(struct lookup_intent *it)
{
struct lustre_handle *handle;
ENTRY;
- if (it->it_lock_mode) {
+ if (it->it_op && it->it_lock_mode) {
handle = (struct lustre_handle *)it->it_lock_handle;
CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
" from it %p\n",
lock (see bug 494) */
it->it_lock_mode = 0;
}
+ it->it_magic = 0;
+ it->it_op_release = 0;
+ EXIT;
+}
- if (!de->d_it || it->it_op == IT_RELEASED_MAGIC) {
- EXIT;
+void ll_unhash_aliases(struct inode *inode)
+{
+ struct dentry *dentry = NULL;
+ struct list_head *tmp;
+ struct ll_sb_info *sbi;
+ ENTRY;
+
+ if (inode == NULL) {
+ CERROR("unexpected NULL inode, tell phil\n");
return;
}
- if (de->d_it == it)
- LL_GET_INTENT(de, it);
- else
- CDEBUG(D_INODE, "STRANGE intent release: %p %p\n",
- de->d_it, it);
+ sbi = ll_i2sbi(inode);
+
+ CDEBUG(D_INODE, "marking dentries for ino %lx/%x invalid\n",
+ inode->i_ino, inode->i_generation);
+ spin_lock(&dcache_lock);
+ list_for_each(tmp, &inode->i_dentry) {
+ dentry = list_entry(tmp, struct dentry, d_alias);
+
+ list_del_init(&dentry->d_hash);
+ dentry->d_flags |= DCACHE_LUSTRE_INVALID;
+ list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
+ }
+
+ spin_unlock(&dcache_lock);
EXIT;
}
extern struct dentry *ll_find_alias(struct inode *, struct dentry *);
-static int revalidate2_finish(int flag, struct ptlrpc_request *request,
+static int revalidate_it_finish(struct ptlrpc_request *request,
struct inode *parent, struct dentry **de,
struct lookup_intent *it, int offset, obd_id ino)
{
struct ll_sb_info *sbi = ll_i2sbi(parent);
- struct mds_body *body;
- struct lov_stripe_md *lsm = NULL;
- struct lov_mds_md *lmm;
- int lmmsize;
+ struct lustre_md md;
int rc = 0;
ENTRY;
/* NB 1 request reference will be taken away by ll_intent_lock()
* when I return */
- if ((flag & LL_LOOKUP_NEGATIVE) != 0)
- GOTO (out, rc = -ENOENT);
+ if (it_disposition(it, DISP_LOOKUP_NEG))
+ RETURN(-ENOENT);
- /* We only get called if the mdc_enqueue() called from
- * ll_intent_lock() was successful. Therefore the mds_body is
- * present and correct, and the eadata is present (but still
- * opaque, so only obd_unpackmd() can check the size) */
- body = lustre_msg_buf(request->rq_repmsg, offset, sizeof (*body));
- LASSERT (body != NULL);
- LASSERT_REPSWABBED (request, offset);
+ /* ll_intent_lock was successful, now prepare the lustre_md) */
+ rc = mdc_req2lustre_md(request, offset, &sbi->ll_osc_conn, &md);
+ if (rc)
+ RETURN(rc);
- if (body->valid & OBD_MD_FLEASIZE) {
- /* Only bother with this if inodes's LSM not set? */
-
- if (body->eadatasize == 0) {
- CERROR ("OBD_MD_FLEASIZE set, but eadatasize 0\n");
- GOTO (out, rc = -EPROTO);
- }
- lmmsize = body->eadatasize;
- lmm = lustre_msg_buf (request->rq_repmsg, offset + 1, lmmsize);
- LASSERT (lmm != NULL);
- LASSERT_REPSWABBED (request, offset + 1);
-
- rc = obd_unpackmd (&sbi->ll_osc_conn,
- &lsm, lmm, lmmsize);
- if (rc < 0) {
- CERROR ("Error %d unpacking eadata\n", rc);
- LBUG();
- /* XXX don't know if I should do this... */
- GOTO (out, rc);
- /* or skip the ll_update_inode but still do
- * mdc_lock_set_inode() */
- }
- LASSERT (rc >= sizeof (*lsm));
- rc = 0;
- }
+ ll_update_inode((*de)->d_inode, md.body, md.lsm);
- ll_update_inode((*de)->d_inode, body, lsm);
+ if (md.lsm != NULL && ll_i2info((*de)->d_inode)->lli_smd != md.lsm)
+ obd_free_memmd (&sbi->ll_osc_conn, &md.lsm);
- if (lsm != NULL &&
- ll_i2info((*de)->d_inode)->lli_smd != lsm)
- obd_free_memmd (&sbi->ll_osc_conn, &lsm);
-
- ll_mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle,
- (*de)->d_inode);
- out:
+ CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
+ (*de)->d_inode, (*de)->d_inode->i_ino,
+ (*de)->d_inode->i_generation);
+ ldlm_lock_set_data((struct lustre_handle *)it->it_lock_handle,
+ (*de)->d_inode);
RETURN(rc);
}
RETURN(0);
}
-int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it)
+int ll_revalidate_it(struct dentry *de, int flags, struct lookup_intent *it)
{
int rc;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
LL_IT2STR(it));
- /* We don't want to cache negative dentries, so return 0 immediately.
- * We believe that this is safe, that negative dentries cannot be
- * pinned by someone else */
- if (de->d_inode == NULL) {
- CDEBUG(D_INODE, "negative dentry: ret 0 to force lookup2\n");
+ /* Cached negative dentries are unsafe for now - look them up again */
+ if (de->d_inode == NULL)
RETURN(0);
- }
+
+ /*
+ * never execute intents for mount points
+ * - attrs will be fixed up in ll_revalidate_inode
+ */
+ if (d_mountpoint(de))
+ RETURN(1);
+
+ if (it)
+ it->it_op_release = ll_intent_release;
if (it == NULL || it->it_op == IT_GETATTR) {
/* We could just return 1 immediately, but since we should only
memcpy(it->it_lock_handle, &lockh,
sizeof(lockh));
it->it_lock_mode = LCK_PR;
- LL_SAVE_INTENT(de, it);
} else {
ldlm_lock_decref(&lockh, LCK_PR);
}
memcpy(it->it_lock_handle, &lockh,
sizeof(lockh));
it->it_lock_mode = LCK_PW;
- LL_SAVE_INTENT(de, it);
} else {
ldlm_lock_decref(&lockh, LCK_PW);
}
}
if (S_ISDIR(de->d_inode->i_mode))
ll_invalidate_inode_pages(de->d_inode);
- d_unhash_aliases(de->d_inode);
+ ll_unhash_aliases(de->d_inode);
RETURN(0);
}
- rc = ll_intent_lock(de->d_parent->d_inode, &de, it, revalidate2_finish);
+ rc = ll_intent_lock(de->d_parent->d_inode, &de, it, flags,
+ revalidate_it_finish);
if (rc < 0) {
if (rc != -ESTALE) {
CERROR("ll_intent_lock: rc %d : it->it_status %d\n", rc,
it->it_status);
}
+ ll_unhash_aliases(de->d_inode);
RETURN(0);
}
/* unfortunately ll_intent_lock may cause a callback and revoke our
dentry */
spin_lock(&dcache_lock);
- list_del_init(&de->d_hash);
+ hlist_del_init(&de->d_hash);
__d_rehash(de, 0);
spin_unlock(&dcache_lock);
RETURN(1);
}
+static void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
+{
+ struct inode *inode= de->d_inode;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ll_dentry_data *ldd = ll_d2d(de);
+ struct obd_client_handle *handle;
+ int rc = 0;
+ ENTRY;
+ LASSERT(ldd);
+
+ lock_kernel();
+ /* Strictly speaking this introduces an additional race: the
+ * increments should wait until the rpc has returned.
+ * However, given that at present the function is void, this
+ * issue is moot. */
+ if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
+ unlock_kernel();
+ EXIT;
+ return;
+ }
+
+ if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
+ unlock_kernel();
+ EXIT;
+ return;
+ }
+ unlock_kernel();
+
+ handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
+ rc = obd_pin(&sbi->ll_mdc_conn, inode->i_ino, inode->i_generation,
+ inode->i_mode & S_IFMT, handle, flag);
+
+ if (rc) {
+ lock_kernel();
+ memset(handle, 0, sizeof(*handle));
+ if (flag == 0)
+ ldd->lld_cwd_count--;
+ else
+ ldd->lld_mnt_count--;
+ unlock_kernel();
+ }
+
+ EXIT;
+ return;
+}
+
+static void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
+ struct ll_dentry_data *ldd = ll_d2d(de);
+ struct obd_client_handle handle;
+ int count, rc = 0;
+ ENTRY;
+ LASSERT(ldd);
+
+ lock_kernel();
+ /* Strictly speaking this introduces an additional race: the
+ * increments should wait until the rpc has returned.
+ * However, given that at present the function is void, this
+ * issue is moot. */
+ handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
+ if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
+ /* the "pin" failed */
+ unlock_kernel();
+ EXIT;
+ return;
+ }
+
+ if (flag)
+ count = --ldd->lld_mnt_count;
+ else
+ count = --ldd->lld_cwd_count;
+ unlock_kernel();
+
+ if (count != 0) {
+ EXIT;
+ return;
+ }
+
+ rc = obd_unpin(&sbi->ll_mdc_conn, &handle, flag);
+ EXIT;
+ return;
+}
+
struct dentry_operations ll_d_ops = {
- .d_revalidate2 = ll_revalidate2,
- .d_intent_release = ll_intent_release,
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+ .d_revalidate_nd = ll_revalidate_nd,
+#else
+ .d_revalidate_it = ll_revalidate_it,
+#endif
.d_release = ll_release,
- .d_delete = ll_delete,
+#if 0
+ .d_pin = ll_pin,
+ .d_unpin = ll_unpin,
+#endif
};
#define PageChecked(page) test_bit(PG_checked, &(page)->flags)
#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags)
-
-static int ll_dir_prepare_write(struct file *file, struct page *page,
- unsigned from, unsigned to)
-{
- CDEBUG(D_VFSTRACE, "VFS Op:\n");
- return 0;
-}
-
/* returns the page unlocked, but with a reference */
static int ll_dir_readpage(struct file *file, struct page *page)
{
&lockh);
if (!rc) {
ll_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0);
-
+
rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_PR,
&data, &lockh, NULL, 0,
ldlm_completion_ast, ll_mdc_blocking_ast,
SetPageUptodate(page);
unlock_page(page);
- ll_unlock(LCK_PR, &lockh);
- if (rc != ELDLM_OK)
- CERROR("ll_unlock: err: %d\n", rc);
+ ldlm_lock_decref(&lockh, LCK_PR);
return rc;
}
struct address_space_operations ll_dir_aops = {
readpage: ll_dir_readpage,
- prepare_write: ll_dir_prepare_write
};
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3))
-int waitfor_one_page(struct page *page)
-{
- int error = 0;
- struct buffer_head *bh, *head = page->buffers;
-
- bh = head;
- do {
- wait_on_buffer(bh);
- if (buffer_req(bh) && !buffer_uptodate(bh))
- error = -EIO;
- } while ((bh = bh->b_this_page) != head);
- return error;
-}
-#elif (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-int waitfor_one_page(struct page *page)
-{
- wait_on_page_locked(page);
- return 0;
-}
-#endif
-
/*
* ext2 uses block-sized chunks. Arguably, sector-sized ones would be
* more robust, but we have what we have
return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
}
-extern void set_page_clean(struct page *page);
-
-static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to)
-{
- struct inode *dir = page->mapping->host;
- loff_t new_size = (page->index << PAGE_CACHE_SHIFT) + to;
- int err = 0;
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- dir->i_version = ++event;
-#endif
- if (new_size > dir->i_size)
- dir->i_size = new_size;
- SetPageUptodate(page);
- set_page_clean(page);
-
- //page->mapping->a_ops->commit_write(NULL, page, from, to);
- //if (IS_SYNC(dir))
- // err = waitfor_one_page(page);
- return err;
-}
static void ext2_check_page(struct page *page)
{
return ERR_PTR(-EIO);
}
-/*
- * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure.
- *
- * len <= EXT2_NAME_LEN and de != NULL are guaranteed by caller.
- */
-static inline int ext2_match (int len, const char * const name,
- struct ext2_dir_entry_2 * de)
-{
- if (len != de->name_len)
- return 0;
- if (!de->inode)
- return 0;
- return !memcmp(name, de->name, len);
-}
/*
* p is at least 6 bytes before the end of page
[EXT2_FT_SYMLINK] DT_LNK,
};
-static unsigned int ll_dt2fmt[DT_WHT + 1] = {
- [EXT2_FT_UNKNOWN] 0,
- [EXT2_FT_REG_FILE] S_IFREG,
- [EXT2_FT_DIR] S_IFDIR,
- [EXT2_FT_CHRDEV] S_IFCHR,
- [EXT2_FT_BLKDEV] S_IFBLK,
- [EXT2_FT_FIFO] S_IFIFO,
- [EXT2_FT_SOCK] S_IFSOCK,
- [EXT2_FT_SYMLINK] S_IFLNK
-};
-
-#define S_SHIFT 12
-static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = {
- [S_IFREG >> S_SHIFT] EXT2_FT_REG_FILE,
- [S_IFDIR >> S_SHIFT] EXT2_FT_DIR,
- [S_IFCHR >> S_SHIFT] EXT2_FT_CHRDEV,
- [S_IFBLK >> S_SHIFT] EXT2_FT_BLKDEV,
- [S_IFIFO >> S_SHIFT] EXT2_FT_FIFO,
- [S_IFSOCK >> S_SHIFT] EXT2_FT_SOCK,
- [S_IFLNK >> S_SHIFT] EXT2_FT_SYMLINK,
-};
-
-static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode)
-{
- mode_t mode = inode->i_mode;
- de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
-}
int ll_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
}
de = (ext2_dirent *)(kaddr+offset);
limit = kaddr + PAGE_CACHE_SIZE - EXT2_DIR_REC_LEN(1);
- for ( ;(char*)de <= limit; de = ext2_next_entry(de))
+ for ( ;(char*)de <= limit; de = ext2_next_entry(de)) {
if (de->inode) {
int over;
unsigned char d_type = DT_UNKNOWN;
GOTO(done,0);
}
}
+ }
ext2_put_page(page);
}
done:
filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset;
filp->f_version = inode->i_version;
- UPDATE_ATIME(inode);
+ update_atime(inode);
RETURN(0);
}
-/*
- * ext2_find_entry()
- *
- * finds an entry in the specified directory with the wanted name. It
- * returns the page in which the entry was found, and the entry itself
- * (as a parameter - res_dir). Page is returned mapped and unlocked.
- * Entry is guaranteed to be valid.
- */
-struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
- struct dentry *dentry, struct page ** res_page)
-{
- const char *name = dentry->d_name.name;
- int namelen = dentry->d_name.len;
- unsigned reclen = EXT2_DIR_REC_LEN(namelen);
- unsigned long start, n;
- unsigned long npages = dir_pages(dir);
- struct page *page = NULL;
- ext2_dirent * de;
-
- /* OFFSET_CACHE */
- *res_page = NULL;
-
- // start = dir->u.ext2_i.i_dir_start_lookup;
- start = 0;
- if (start >= npages)
- start = 0;
- n = start;
- do {
- char *kaddr;
- page = ll_get_dir_page(dir, n);
- if (!IS_ERR(page)) {
- kaddr = page_address(page);
- de = (ext2_dirent *) kaddr;
- kaddr += PAGE_CACHE_SIZE - reclen;
- while ((char *) de <= kaddr) {
- if (ext2_match (namelen, name, de))
- goto found;
- de = ext2_next_entry(de);
- }
- ext2_put_page(page);
- }
- if (++n >= npages)
- n = 0;
- } while (n != start);
- return NULL;
-
-found:
- *res_page = page;
- // dir->u.ext2_i.i_dir_start_lookup = n;
- return de;
-}
-
-struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p)
-{
- struct page *page = ll_get_dir_page(dir, 0);
- ext2_dirent *de = NULL;
-
- if (!IS_ERR(page)) {
- de = ext2_next_entry((ext2_dirent *) page_address(page));
- *p = page;
- }
- return de;
-}
-
-obd_id ll_inode_by_name(struct inode * dir, struct dentry *dentry, int *type)
-{
- obd_id res = 0;
- struct ext2_dir_entry_2 * de;
- struct page *page;
-
- de = ext2_find_entry (dir, dentry, &page);
- if (de) {
- res = le32_to_cpu(de->inode);
- *type = ll_dt2fmt[de->file_type];
- kunmap(page);
- page_cache_release(page);
- }
- return res;
-}
-
-/* Releases the page */
-void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
- struct page *page, struct inode *inode)
-{
- unsigned from = (char *) de - (char *) page_address(page);
- unsigned to = from + le16_to_cpu(de->rec_len);
- int err;
-
- lock_page(page);
- err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
- if (err)
- LBUG();
- de->inode = cpu_to_le32(inode->i_ino);
- ext2_set_de_type (de, inode);
- dir->i_mtime = dir->i_ctime = CURRENT_TIME;
- err = ext2_commit_chunk(page, from, to);
- unlock_page(page);
- ext2_put_page(page);
-}
-
-/*
- * Parent is locked.
- */
-int ll_add_link (struct dentry *dentry, struct inode *inode)
-{
- struct inode *dir = dentry->d_parent->d_inode;
- const char *name = dentry->d_name.name;
- int namelen = dentry->d_name.len;
- unsigned reclen = EXT2_DIR_REC_LEN(namelen);
- unsigned short rec_len, name_len;
- struct page *page = NULL;
- ext2_dirent * de;
- unsigned long npages = dir_pages(dir);
- unsigned long n;
- char *kaddr;
- unsigned from, to;
- int err;
-
- /* We take care of directory expansion in the same loop */
- for (n = 0; n <= npages; n++) {
- page = ll_get_dir_page(dir, n);
- err = PTR_ERR(page);
- if (IS_ERR(page))
- goto out;
- kaddr = page_address(page);
- de = (ext2_dirent *)kaddr;
- kaddr += PAGE_CACHE_SIZE - reclen;
- while ((char *)de <= kaddr) {
- err = -EEXIST;
- if (ext2_match (namelen, name, de))
- goto out_page;
- name_len = EXT2_DIR_REC_LEN(de->name_len);
- rec_len = le16_to_cpu(de->rec_len);
- if ( n==npages && rec_len == 0) {
- CERROR("Fatal dir behaviour\n");
- goto out_page;
- }
- if (!de->inode && rec_len >= reclen)
- goto got_it;
- if (rec_len >= name_len + reclen)
- goto got_it;
- de = (ext2_dirent *) ((char *) de + rec_len);
- }
- ext2_put_page(page);
- }
- LBUG();
- return -EINVAL;
-
-got_it:
- from = (char*)de - (char*)page_address(page);
- to = from + rec_len;
- lock_page(page);
- err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
- if (err)
- goto out_unlock;
- if (de->inode) {
- ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
- de1->rec_len = cpu_to_le16(rec_len - name_len);
- de->rec_len = cpu_to_le16(name_len);
- de = de1;
- }
- de->name_len = namelen;
- memcpy (de->name, name, namelen);
- de->inode = cpu_to_le32(inode->i_ino);
- ext2_set_de_type (de, inode);
- CDEBUG(D_INODE, "type set to %o\n", de->file_type);
- dir->i_mtime = dir->i_ctime = CURRENT_TIME;
- err = ext2_commit_chunk(page, from, to);
-
- // change_inode happens with the commit_chunk
- /* XXX OFFSET_CACHE */
-
-out_unlock:
- unlock_page(page);
-out_page:
- ext2_put_page(page);
-out:
- return err;
-}
-
-/*
- * ext2_delete_entry deletes a directory entry by merging it with the
- * previous entry. Page is up-to-date. Releases the page.
- */
-int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
-{
- struct address_space *mapping = page->mapping;
- struct inode *inode = mapping->host;
- char *kaddr = page_address(page);
- unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
- unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len);
- ext2_dirent * pde = NULL;
- ext2_dirent * de = (ext2_dirent *) (kaddr + from);
- int err;
-
- while ((char*)de < (char*)dir) {
- pde = de;
- de = ext2_next_entry(de);
- }
- if (pde)
- from = (char*)pde - (char*)page_address(page);
- lock_page(page);
- err = mapping->a_ops->prepare_write(NULL, page, from, to);
- if (err)
- LBUG();
- if (pde)
- pde->rec_len = cpu_to_le16(to-from);
- dir->inode = 0;
- inode->i_ctime = inode->i_mtime = CURRENT_TIME;
- err = ext2_commit_chunk(page, from, to);
- unlock_page(page);
- ext2_put_page(page);
- return err;
-}
-
-/*
- * Set the first fragment of directory.
- */
-int ext2_make_empty(struct inode *inode, struct inode *parent)
-{
- struct address_space *mapping = inode->i_mapping;
- struct page *page = grab_cache_page(mapping, 0);
- unsigned chunk_size = ext2_chunk_size(inode);
- struct ext2_dir_entry_2 * de;
- char *base;
- int err;
- ENTRY;
-
- if (!page)
- return -ENOMEM;
- base = kmap(page);
- if (!base)
- return -ENOMEM;
-
- err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size);
- if (err)
- goto fail;
-
- de = (struct ext2_dir_entry_2 *) base;
- de->name_len = 1;
- de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1));
- memcpy (de->name, ".\0\0", 4);
- de->inode = cpu_to_le32(inode->i_ino);
- ext2_set_de_type (de, inode);
-
- de = (struct ext2_dir_entry_2 *) (base + EXT2_DIR_REC_LEN(1));
- de->name_len = 2;
- de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1));
- de->inode = cpu_to_le32(parent->i_ino);
- memcpy (de->name, "..\0", 4);
- ext2_set_de_type (de, inode);
-
- err = ext2_commit_chunk(page, 0, chunk_size);
-fail:
- kunmap(page);
- unlock_page(page);
- page_cache_release(page);
- ENTRY;
- return err;
-}
-
-/*
- * routine to check that the specified directory is empty (for rmdir)
- */
-int ext2_empty_dir (struct inode * inode)
-{
- struct page *page = NULL;
- unsigned long i, npages = dir_pages(inode);
-
- for (i = 0; i < npages; i++) {
- char *kaddr;
- ext2_dirent * de;
- page = ll_get_dir_page(inode, i);
-
- if (IS_ERR(page))
- continue;
-
- kaddr = page_address(page);
- de = (ext2_dirent *)kaddr;
- kaddr += PAGE_CACHE_SIZE-EXT2_DIR_REC_LEN(1);
-
- while ((char *)de <= kaddr) {
- if (de->inode != 0) {
- /* check for . and .. */
- if (de->name[0] != '.')
- goto not_empty;
- if (de->name_len > 2)
- goto not_empty;
- if (de->name_len < 2) {
- if (de->inode !=
- cpu_to_le32(inode->i_ino))
- goto not_empty;
- } else if (de->name[1] != '.')
- goto not_empty;
- }
- de = ext2_next_entry(de);
- }
- ext2_put_page(page);
- }
- return 1;
-
-not_empty:
- ext2_put_page(page);
- return 0;
-}
-
static int ll_dir_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg)
{
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct obd_ioctl_data *data;
ENTRY;
+
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino,
inode->i_generation, inode, cmd);
if (_IOC_TYPE(cmd) == 'T') /* tty ioctls */
return -ENOTTY;
+ lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_IOCTL);
switch(cmd) {
case IOC_MDC_LOOKUP: {
struct ptlrpc_request *request = NULL;
obd_ioctl_freedata(buf, len);
return rc;
}
- default:
- CERROR("unrecognized ioctl %#x\n", cmd);
+ case LL_IOC_LOV_SETSTRIPE:
+ case LL_IOC_LOV_GETSTRIPE:
RETURN(-ENOTTY);
+ case IOC_MDC_GETSTRIPE: {
+ struct ptlrpc_request *request = NULL;
+ struct ll_fid fid;
+ struct mds_body *body;
+ struct lov_mds_md *lmm;
+ char *filename;
+ int rc, lmmsize;
+
+ filename = getname((const char *)arg);
+ if (IS_ERR(filename))
+ RETURN(PTR_ERR(filename));
+
+ ll_inode2fid(&fid, inode);
+ rc = mdc_getattr_name(&sbi->ll_mdc_conn, &fid, filename,
+ strlen(filename)+1, OBD_MD_FLEASIZE,
+ obd_size_diskmd(&sbi->ll_osc_conn, NULL),
+ &request);
+ if (rc < 0) {
+ CERROR("mdc_getattr_name: failed on %s: rc %d\n",
+ filename, rc);
+ GOTO(out_name, rc);
+ }
+
+ body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body));
+ LASSERT(body != NULL); /* checked by mdc_getattr_name */
+ LASSERT_REPSWABBED(request, 0);/* swabbed by mdc_getattr_name */
+
+ lmmsize = body->eadatasize;
+
+ if (!(body->valid & OBD_MD_FLEASIZE) || lmmsize == 0)
+ GOTO(out_req, rc = -ENODATA);
+
+ if (lmmsize > 4096)
+ GOTO(out_req, rc = -EFBIG);
+
+ lmm = lustre_msg_buf(request->rq_repmsg, 1, lmmsize);
+ LASSERT(lmm != NULL);
+ LASSERT_REPSWABBED(request, 1);
+
+ rc = copy_to_user((struct lov_mds_md *)arg, lmm, lmmsize);
+ if (rc)
+ GOTO(out_req, rc = -EFAULT);
+
+ EXIT;
+ out_req:
+ ptlrpc_req_finished(request);
+ out_name:
+ putname(filename);
+ return rc;
+ }
+ default:
+ return obd_iocontrol(cmd,&sbi->ll_osc_conn,0,NULL,(void *)arg);
}
}
#include <linux/lustre_compat25.h>
#endif
-int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc);
-extern int ll_setattr(struct dentry *de, struct iattr *attr);
+#include "llite_internal.h"
static int ll_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode,
struct file *file)
lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_RELEASE);
fd = (struct ll_file_data *)file->private_data;
if (!fd) /* no process opened the file after an mcreate */
- RETURN(rc = 0);
+ RETURN(0);
/* we might not be able to get a valid handle on this file
* again so we really want to flush our write cache.. */
- if (S_ISREG(inode->i_mode)) {
- filemap_fdatasync(inode->i_mapping);
- filemap_fdatawait(inode->i_mapping);
-
- if (lsm != NULL) {
- memset(&oa, 0, sizeof(oa));
- oa.o_id = lsm->lsm_object_id;
- oa.o_mode = S_IFREG;
- oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
-
- memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE);
- oa.o_valid |= OBD_MD_FLHANDLE;
+ if (S_ISREG(inode->i_mode) && lsm) {
+ write_inode_now(inode, 0);
+ obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+ OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+ memcpy(obdo_handle(&oa), &fd->fd_ost_och, FD_OSTDATA_SIZE);
+ oa.o_valid |= OBD_MD_FLHANDLE;
- rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL);
- if (rc)
- CERROR("inode %lu object close failed: rc = "
- "%d\n", inode->i_ino, rc);
- }
+ rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL);
+ if (rc)
+ CERROR("inode %lu object close failed: rc %d\n",
+ inode->i_ino, rc);
}
rc2 = ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
RETURN(-ENOMEM);
oa->o_id = lsm->lsm_object_id;
oa->o_mode = S_IFREG;
- oa->o_valid = (OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLBLOCKS |
- OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+ oa->o_valid = OBD_MD_FLID;
+ obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
rc = obd_open(conn, oa, lsm, NULL, &fd->fd_ost_och);
if (rc)
GOTO(out, rc);
file->f_flags &= ~O_LOV_DELAY_CREATE;
- obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
- OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-
+ obdo_refresh_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
+ OBD_MD_FLATIME | OBD_MD_FLMTIME |
+ OBD_MD_FLCTIME);
EXIT;
out:
obdo_free(oa);
struct obdo *oa;
struct iattr iattr;
struct mdc_op_data op_data;
- int rc, err, lmm_size = 0;;
+ struct obd_trans_info oti = { 0 };
+ int rc, err, lmm_size = 0;
ENTRY;
oa = obdo_alloc();
if (!oa)
RETURN(-ENOMEM);
+ LASSERT(S_ISREG(inode->i_mode));
oa->o_mode = S_IFREG | 0600;
oa->o_id = inode->i_ino;
+ oa->o_generation = inode->i_generation;
/* Keep these 0 for now, because chown/chgrp does not change the
* ownership on the OST, and we don't want to allow BA OST NFS
* users to access these objects by mistake. */
oa->o_uid = 0;
oa->o_gid = 0;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
- OBD_MD_FLUID | OBD_MD_FLGID;
+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLGENER | OBD_MD_FLTYPE |
+ OBD_MD_FLMODE | OBD_MD_FLUID | OBD_MD_FLGID;
+#ifdef ENABLE_ORPHANS
+ oa->o_valid |= OBD_MD_FLCOOKIE;
+#endif
- rc = obd_create(conn, oa, &lsm, NULL);
+ obdo_from_inode(oa, inode, OBD_MD_FLTYPE|OBD_MD_FLATIME|OBD_MD_FLMTIME|
+ OBD_MD_FLCTIME | (inode->i_size ? OBD_MD_FLSIZE : 0));
+
+ rc = obd_create(conn, oa, &lsm, &oti);
if (rc) {
CERROR("error creating objects for inode %lu: rc = %d\n",
inode->i_ino, rc);
}
GOTO(out_oa, rc);
}
- obdo_to_inode(inode, oa, OBD_MD_FLBLKSZ);
+ obdo_refresh_inode(inode, oa, OBD_MD_FLBLKSZ);
LASSERT(lsm && lsm->lsm_object_id);
rc = obd_packmd(conn, &lmm, lsm);
ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
- rc = mdc_setattr(&ll_i2sbi(inode)->ll_mdc_conn, &op_data,
- &iattr, lmm, lmm_size, &req);
+#if 0
+#warning FIXME: next line is for debugging purposes only
+ obd_log_cancel(&ll_i2sbi(inode)->ll_osc_conn, lsm, oti.oti_numcookies,
+ oti.oti_logcookies, OBD_LLOG_FL_SENDNOW);
+#endif
+
+ rc = mdc_setattr(&ll_i2sbi(inode)->ll_mdc_conn, &op_data, &iattr,
+ lmm, lmm_size, oti.oti_logcookies,
+ oti.oti_numcookies * sizeof(oti.oti_onecookie), &req);
ptlrpc_req_finished(req);
- obd_free_diskmd (conn, &lmm);
+ obd_free_diskmd(conn, &lmm);
/* If we couldn't complete mdc_open() and store the stripe MD on the
* MDS, we need to destroy the objects now or they will be leaked.
EXIT;
out_oa:
+ oti_free_cookies(&oti);
obdo_free(oa);
return rc;
out_destroy:
- obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
oa->o_id = lsm->lsm_object_id;
- oa->o_valid |= OBD_MD_FLID;
+ oa->o_valid = OBD_MD_FLID;
+ obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
+#if 0
+ err = obd_log_cancel(conn, lsm, oti.oti_numcookies, oti.oti_logcookies,
+ OBD_LLOG_FL_SENDNOW);
+ if (err)
+ CERROR("error cancelling inode %lu log cookies: rc %d\n",
+ inode->i_ino, err);
+#endif
err = obd_destroy(conn, oa, lsm, NULL);
obd_free_memmd(conn, &lsm);
if (err)
* before returning in the O_LOV_DELAY_CREATE case and dropping it here
* or in ll_file_release(), but I'm not sure that is desirable/necessary.
*/
-extern int ll_it_open_error(int phase, struct lookup_intent *it);
-
int ll_file_open(struct inode *inode, struct file *file)
{
struct ll_sb_info *sbi = ll_i2sbi(inode);
if (inode->i_sb->s_root == file->f_dentry)
RETURN(0);
+ it = file->f_it;
lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
- LL_GET_INTENT(file->f_dentry, it);
- rc = ll_it_open_error(IT_OPEN_OPEN, it);
+
+ rc = ll_it_open_error(DISP_OPEN_OPEN, it);
if (rc)
RETURN(rc);
lsm = lli->lli_smd;
if (lsm == NULL) {
- if (file->f_flags & O_LOV_DELAY_CREATE) {
+ if (file->f_flags & O_LOV_DELAY_CREATE ||
+ !(file->f_mode & FMODE_WRITE)) {
CDEBUG(D_INODE, "delaying object creation\n");
RETURN(0);
}
OBD_MD_FLCTIME;
if (ostdata != NULL) {
- memcpy(&oa.o_inline, ostdata, FD_OSTDATA_SIZE);
+ memcpy(obdo_handle(&oa), ostdata, FD_OSTDATA_SIZE);
oa.o_valid |= OBD_MD_FLHANDLE;
}
(aft != 0 || after < before) &&
oa.o_size < ((u64)before + 1) << PAGE_CACHE_SHIFT);
- obdo_to_inode(inode, &oa, (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
- OBD_MD_FLMTIME | OBD_MD_FLCTIME));
+ obdo_refresh_inode(inode, &oa, OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
+ OBD_MD_FLMTIME | OBD_MD_FLCTIME);
if (inode->i_blksize < PAGE_CACHE_SIZE)
inode->i_blksize = PAGE_CACHE_SIZE;
RETURN(0);
}
-/*
- * some callers, notably truncate, really don't want i_size set based
- * on the the size returned by the getattr, or lock acquisition in
- * the future.
- */
-int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
- struct lov_stripe_md *lsm,
- int mode, struct ldlm_extent *extent,
- struct lustre_handle *lockh)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- int rc, flags = 0;
- ENTRY;
-
- LASSERT(lockh->cookie == 0);
-
- /* XXX phil: can we do this? won't it screw the file size up? */
- if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
- (sbi->ll_flags & LL_SBI_NOLCK))
- RETURN(0);
-
- CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n",
- inode->i_ino, extent->start, extent->end);
-
- rc = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, extent,
- sizeof(extent), mode, &flags, ll_extent_lock_callback,
- inode, lockh);
-
- RETURN(rc);
-}
-
-/*
- * this grabs a lock and manually implements behaviour that makes it look like
- * the OST is returning the file size with each lock acquisition.
- */
-int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
- struct lov_stripe_md *lsm, int mode,
- struct ldlm_extent *extent, struct lustre_handle *lockh)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ldlm_extent size_lock;
- struct lustre_handle match_lockh = {0};
- int flags, rc, matched;
- ENTRY;
-
- rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh);
- if (rc != ELDLM_OK)
- RETURN(rc);
-
- if (test_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags))
- RETURN(0);
-
- rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL);
- if (rc) {
- ll_extent_unlock(fd, inode, lsm, mode, lockh);
- RETURN(rc);
- }
-
- size_lock.start = inode->i_size;
- size_lock.end = OBD_OBJECT_EOF;
-
- /* XXX I bet we should be checking the lock ignore flags.. */
- flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA;
- matched = obd_match(&ll_i2sbi(inode)->ll_osc_conn, lsm, LDLM_EXTENT,
- &size_lock, sizeof(size_lock), LCK_PR, &flags,
- inode, &match_lockh);
-
- /* hey, alright, we hold a size lock that covers the size we
- * just found, its not going to change for a while.. */
- if (matched == 1) {
- set_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags);
- obd_cancel(&ll_i2sbi(inode)->ll_osc_conn, lsm, LCK_PR,
- &match_lockh);
- }
-
- RETURN(0);
-}
-
-int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode,
- struct lov_stripe_md *lsm, int mode,
- struct lustre_handle *lockh)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- int rc;
- ENTRY;
-
- /* XXX phil: can we do this? won't it screw the file size up? */
- if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
- (sbi->ll_flags & LL_SBI_NOLCK))
- RETURN(0);
-
- rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh);
-
- RETURN(rc);
-}
-
static inline void ll_remove_suid(struct inode *inode)
{
unsigned int mode;
#if 0
static void ll_update_atime(struct inode *inode)
{
-#ifdef USE_ATIME
- struct iattr attr;
-
- attr.ia_atime = LTIME_S(CURRENT_TIME);
- attr.ia_valid = ATTR_ATIME;
-
- if (inode->i_atime == attr.ia_atime) return;
if (IS_RDONLY(inode)) return;
- if (IS_NOATIME(inode)) return;
- /* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */
- ll_inode_setattr(inode, &attr, 0);
-#else
/* update atime, but don't explicitly write it out just this change */
inode->i_atime = CURRENT_TIME;
-#endif
}
#endif
/* start writeback on dirty pages in the extent when its PW */
for (i = start, j = start % count;
- lock->l_granted_mode == LCK_PW && i < end; j++, i++) {
+ lock->l_granted_mode == LCK_PW && i < end; j++, i++) {
if (j == count) {
i += skip;
j = 0;
}
/* its unlikely, but give us a chance to bail when we're out */
- PGCACHE_WRLOCK(inode->i_mapping);
+ ll_pgcache_lock(inode->i_mapping);
if (list_empty(&inode->i_mapping->dirty_pages)) {
CDEBUG(D_INODE, "dirty list empty\n");
- PGCACHE_WRUNLOCK(inode->i_mapping);
+ ll_pgcache_unlock(inode->i_mapping);
break;
}
- PGCACHE_WRUNLOCK(inode->i_mapping);
+ ll_pgcache_unlock(inode->i_mapping);
if (need_resched())
schedule();
}
if (PageDirty(page)) {
CDEBUG(D_INODE, "writing page %p\n", page);
- PGCACHE_WRLOCK(inode->i_mapping);
+ ll_pgcache_lock(inode->i_mapping);
list_del(&page->list);
list_add(&page->list, &inode->i_mapping->locked_pages);
- PGCACHE_WRUNLOCK(inode->i_mapping);
+ ll_pgcache_unlock(inode->i_mapping);
/* this writepage might write out pages outside
* this extent, but that's ok, the pages are only
LASSERT((extent->start & ~PAGE_CACHE_MASK) == 0);
LASSERT(((extent->end+1) & ~PAGE_CACHE_MASK) == 0);
for (i = start, j = start % count ; i < end ; j++, i++) {
- if ( j == count ) {
+ if (j == count) {
i += skip;
j = 0;
}
- PGCACHE_WRLOCK(inode->i_mapping);
+ ll_pgcache_lock(inode->i_mapping);
if (list_empty(&inode->i_mapping->dirty_pages) &&
list_empty(&inode->i_mapping->clean_pages) &&
list_empty(&inode->i_mapping->locked_pages)) {
CDEBUG(D_INODE, "nothing left\n");
- PGCACHE_WRUNLOCK(inode->i_mapping);
+ ll_pgcache_unlock(inode->i_mapping);
break;
}
- PGCACHE_WRUNLOCK(inode->i_mapping);
+ ll_pgcache_unlock(inode->i_mapping);
if (need_resched())
schedule();
page = find_get_page(inode->i_mapping, i);
truncate_complete_page(page);
#else
truncate_complete_page(page->mapping, page);
-#endif
+#endif
unlock_page(page);
page_cache_release(page);
}
EXIT;
}
-int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new,
- void *data, int flag)
+static int ll_extent_lock_callback(struct ldlm_lock *lock,
+ struct ldlm_lock_desc *new, void *data,
+ int flag)
{
struct inode *inode = data;
struct ll_inode_info *lli = ll_i2info(inode);
int rc;
ENTRY;
- LASSERT(inode != NULL);
+ if ((unsigned long)inode < 0x1000) {
+ LDLM_ERROR(lock, "cancelling lock with bad data %p", data);
+ LBUG();
+ }
switch (flag) {
case LDLM_CB_BLOCKING:
* could know to write-back or simply throw away the pages
* based on if the cancel comes from a desire to, say,
* read or truncate.. */
- LASSERT((unsigned long)inode > 0x1000);
- LASSERT((unsigned long)lli > 0x1000);
- LASSERT((unsigned long)lli->lli_smd > 0x1000);
+ if ((unsigned long)lli->lli_smd < 0x1000) {
+ /* note that lli is part of the inode itself, so it
+ * is valid if as checked the inode pointer above. */
+ CERROR("inode %lu, sb %p, lli %p, lli_smd %p\n",
+ inode->i_ino, inode->i_sb, lli, lli->lli_smd);
+ LDLM_ERROR(lock, "cancel lock on bad inode %p", inode);
+ LBUG();
+ }
+
ll_pgcache_remove_extent(inode, lli->lli_smd, lock);
break;
default:
RETURN(0);
}
+/*
+ * some callers, notably truncate, really don't want i_size set based
+ * on the the size returned by the getattr, or lock acquisition in
+ * the future.
+ */
+int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode,
+ struct lov_stripe_md *lsm,
+ int mode, struct ldlm_extent *extent,
+ struct lustre_handle *lockh)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ int rc, flags = 0;
+ ENTRY;
+
+ LASSERT(lockh->cookie == 0);
+
+ /* XXX phil: can we do this? won't it screw the file size up? */
+ if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
+ (sbi->ll_flags & LL_SBI_NOLCK))
+ RETURN(0);
+
+ CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n",
+ inode->i_ino, extent->start, extent->end);
+
+ rc = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, extent,
+ sizeof(extent), mode, &flags, ll_extent_lock_callback,
+ inode, lockh);
+
+ RETURN(rc);
+}
+
+/*
+ * this grabs a lock and manually implements behaviour that makes it look like
+ * the OST is returning the file size with each lock acquisition.
+ */
+int ll_extent_lock(struct ll_file_data *fd, struct inode *inode,
+ struct lov_stripe_md *lsm, int mode,
+ struct ldlm_extent *extent, struct lustre_handle *lockh)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ldlm_extent size_lock;
+ struct lustre_handle match_lockh = {0};
+ int flags, rc, matched;
+ ENTRY;
+
+ rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh);
+ if (rc != ELDLM_OK)
+ RETURN(rc);
+
+ if (test_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags))
+ RETURN(0);
+
+ rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL);
+ if (rc) {
+ ll_extent_unlock(fd, inode, lsm, mode, lockh);
+ RETURN(rc);
+ }
+
+ size_lock.start = inode->i_size;
+ size_lock.end = OBD_OBJECT_EOF;
+
+ /* XXX I bet we should be checking the lock ignore flags.. */
+ flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA;
+ matched = obd_match(&ll_i2sbi(inode)->ll_osc_conn, lsm, LDLM_EXTENT,
+ &size_lock, sizeof(size_lock), LCK_PR, &flags,
+ inode, &match_lockh);
+
+ /* hey, alright, we hold a size lock that covers the size we
+ * just found, its not going to change for a while.. */
+ if (matched == 1) {
+ set_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags);
+ obd_cancel(&ll_i2sbi(inode)->ll_osc_conn, lsm, LCK_PR,
+ &match_lockh);
+ }
+
+ RETURN(0);
+}
+
+int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode,
+ struct lov_stripe_md *lsm, int mode,
+ struct lustre_handle *lockh)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ int rc;
+ ENTRY;
+
+ /* XXX phil: can we do this? won't it screw the file size up? */
+ if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
+ (sbi->ll_flags & LL_SBI_NOLCK))
+ RETURN(0);
+
+ rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh);
+
+ RETURN(rc);
+}
+
static ssize_t ll_file_read(struct file *filp, char *buf, size_t count,
loff_t *ppos)
{
lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_READ_BYTES,
count);
+
+ if (!lsm)
+ RETURN(0);
+
/* grab a -> eof extent to push extending writes out of node's caches
* so we can see them at the getattr after lock acquisition. this will
* turn into a seperate [*ppos + count, EOF] 'size intent' lock attempt
/*
* Write to a file (through the page cache).
*/
-static ssize_t
-ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
+static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
+ loff_t *ppos)
{
struct ll_file_data *fd = file->private_data;
struct inode *inode = file->f_dentry->d_inode;
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n",
inode->i_ino, inode->i_generation, inode, count, *ppos);
+ SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
/*
* sleep doing some writeback work of this mount's dirty data
* if the VM thinks we're low on memory.. other dirtying code
* careful not to hold locked pages while they do so. like
* ll_prepare_write. *cough*
*/
- LL_CHECK_DIRTY(inode->i_sb);
+ ll_check_dirty(inode->i_sb);
/* POSIX, but surprised the VFS doesn't check this already */
if (count == 0)
RETURN(0);
+ LASSERT(lsm);
+
if (file->f_flags & O_APPEND) {
extent.start = 0;
extent.end = OBD_OBJECT_EOF;
lsm = lli->lli_smd;
if (lsm) {
up(&lli->lli_open_sem);
- CERROR("stripe already exists for ino %lu\n", inode->i_ino);
+ CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n",
+ inode->i_ino);
/* If we haven't already done the open, do so now */
if (file->f_flags & O_LOV_DELAY_CREATE) {
int rc2 = ll_osc_open(conn, inode, file, lsm);
struct ll_file_data *fd = file->private_data;
struct lustre_handle *conn;
int flags;
+
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino,
inode->i_generation, inode, cmd);
int ll_fsync(struct file *file, struct dentry *dentry, int data)
{
- int ret;
struct inode *inode = dentry->d_inode;
+ int rc;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
inode->i_generation, inode);
* still holding the PW lock that covered the dirty pages. XXX we
* should probably get a reference on it, though, just to be clear.
*/
- ret = filemap_fdatasync(dentry->d_inode->i_mapping);
- if ( ret == 0 )
- ret = filemap_fdatawait(dentry->d_inode->i_mapping);
+ rc = filemap_fdatasync(inode->i_mapping);
+ if (rc == 0)
+ rc = filemap_fdatawait(inode->i_mapping);
- RETURN(ret);
+ RETURN(rc);
}
-int ll_inode_revalidate(struct dentry *dentry)
+int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
{
struct inode *inode = dentry->d_inode;
- struct lov_stripe_md *lsm = NULL;
+ struct lov_stripe_md *lsm;
ENTRY;
if (!inode) {
below when the lock is marked CB_PENDING. That RPC may not
go out because someone else may be in another RPC waiting for
that lock*/
- if (!(dentry->d_it && dentry->d_it->it_lock_mode) &&
- !ll_have_md_lock(dentry)) {
+ if (!(it && it->it_lock_mode) && !ll_have_md_lock(dentry)) {
+ struct lustre_md md;
struct ptlrpc_request *req = NULL;
struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
struct ll_fid fid;
- struct mds_body *body;
- struct lov_mds_md *lmm;
unsigned long valid = 0;
- int eadatalen = 0, rc;
+ int rc;
+ int ealen = 0;
- /* Why don't we update all valid MDS fields here, if we're
- * doing an RPC anyways? -phil */
if (S_ISREG(inode->i_mode)) {
- eadatalen = obd_size_diskmd(&sbi->ll_osc_conn, NULL);
+ ealen = obd_size_diskmd(&sbi->ll_osc_conn, NULL);
valid |= OBD_MD_FLEASIZE;
}
ll_inode2fid(&fid, inode);
- rc = mdc_getattr(&sbi->ll_mdc_conn, &fid,
- valid, eadatalen, &req);
+ rc = mdc_getattr(&sbi->ll_mdc_conn, &fid, valid, ealen, &req);
if (rc) {
CERROR("failure %d inode %lu\n", rc, inode->i_ino);
RETURN(-abs(rc));
}
-
- body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
- LASSERT (body != NULL); /* checked by mdc_getattr() */
- LASSERT_REPSWABBED (req, 0); /* swabbed by mdc_getattr() */
-
- if (S_ISREG(inode->i_mode) &&
- (body->valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS))) {
- CERROR("MDS sent back size for regular file\n");
- body->valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
- }
+ rc = mdc_req2lustre_md(req, 0, &sbi->ll_osc_conn, &md);
/* XXX Too paranoid? */
- if ((body->valid ^ valid) & OBD_MD_FLEASIZE)
+ if ((md.body->valid ^ valid) & OBD_MD_FLEASIZE)
CERROR("Asked for %s eadata but got %s\n",
(valid & OBD_MD_FLEASIZE) ? "some" : "no",
- (body->valid & OBD_MD_FLEASIZE) ? "some":"none");
-
- if (S_ISREG(inode->i_mode) &&
- (body->valid & OBD_MD_FLEASIZE)) {
- if (body->eadatasize == 0) { /* no EA data */
- CERROR("OBD_MD_FLEASIZE set but no data\n");
- RETURN(-EPROTO);
- }
- /* Only bother with this if inode's lsm not set? */
- lmm = lustre_msg_buf(req->rq_repmsg,1,body->eadatasize);
- LASSERT(lmm != NULL); /* mdc_getattr() checked */
- LASSERT_REPSWABBED(req, 1); /* mdc_getattr() swabbed */
-
- rc = obd_unpackmd (&sbi->ll_osc_conn,
- &lsm, lmm, body->eadatasize);
- if (rc < 0) {
- CERROR("Error %d unpacking eadata\n", rc);
- ptlrpc_req_finished(req);
- RETURN(rc);
- }
- LASSERT(rc >= sizeof(*lsm));
+ (md.body->valid & OBD_MD_FLEASIZE) ? "some":
+ "none");
+ if (rc) {
+ ptlrpc_req_finished(req);
+ RETURN(rc);
}
- ll_update_inode(inode, body, lsm);
- if (lsm != NULL && ll_i2info(inode)->lli_smd != lsm)
- obd_free_memmd(&sbi->ll_osc_conn, &lsm);
+ ll_update_inode(inode, md.body, md.lsm);
+ if (md.lsm != NULL && ll_i2info(inode)->lli_smd != md.lsm)
+ obd_free_memmd(&sbi->ll_osc_conn, &md.lsm);
ptlrpc_req_finished(req);
}
}
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-static int ll_getattr(struct vfsmount *mnt, struct dentry *de,
+int ll_getattr(struct vfsmount *mnt, struct dentry *de,
+ struct lookup_intent *it,
struct kstat *stat)
{
int res = 0;
struct inode *inode = de->d_inode;
+ res = ll_inode_revalidate_it(de, it);
lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_GETATTR);
- res = ll_inode_revalidate(de);
+
if (res)
return res;
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- stat->dev = inode->i_dev;
-#endif
+
+ stat->dev = inode->i_sb->s_dev;
stat->ino = inode->i_ino;
stat->mode = inode->i_mode;
stat->nlink = inode->i_nlink;
stat->mtime = inode->i_mtime;
stat->ctime = inode->i_ctime;
stat->size = inode->i_size;
+ stat->blksize = inode->i_blksize;
+ stat->blocks = inode->i_blocks;
return 0;
}
#endif
setattr: ll_setattr,
truncate: ll_truncate,
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
- getattr: ll_getattr,
+ getattr_it: ll_getattr,
#else
- revalidate: ll_inode_revalidate,
+ revalidate_it: ll_inode_revalidate_it,
#endif
};
setattr_raw: ll_setattr_raw,
setattr: ll_setattr,
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
- getattr: ll_getattr,
+ getattr_it: ll_getattr,
#else
- revalidate: ll_inode_revalidate,
+ revalidate_it: ll_inode_revalidate_it,
#endif
};
#include <linux/rbtree.h>
#include <linux/seq_file.h>
#include <linux/time.h>
-#include "llite_internal.h"
/* PG_inactive_clean is shorthand for rmap, we want free_high/low here.. */
#ifdef PG_inactive_clean
#define DEBUG_SUBSYSTEM S_LLITE
#include <linux/lustre_lite.h>
+#include "llite_internal.h"
#ifndef list_for_each_prev_safe
#define list_for_each_prev_safe(pos, n, head) \
extern spinlock_t inode_lock;
-struct ll_writeback_pages {
- obd_count npgs, max;
- struct brw_page *pga;
-};
-
/*
* check to see if we're racing with truncate and put the page in
* the brw_page array. returns 0 if there is more room and 1
list_del(&page->list);
list_add(&page->list, &mapping->locked_pages);
- if ( ! PageDirty(page) ) {
+ if (!PageDirty(page)) {
unlock_page(page);
continue;
}
ClearPageDirty(page);
- if ( llwp_consume_page(llwp, inode, page) != 0)
+ if (llwp_consume_page(llwp, inode, page) != 0)
break;
}
EXIT;
}
-static void ll_writeback(struct inode *inode, struct ll_writeback_pages *llwp)
+static void ll_writeback(struct inode *inode, struct obdo *oa,
+ struct ll_writeback_pages *llwp)
{
- int rc, i;
struct ptlrpc_request_set *set;
+ int rc, i;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),bytes=%u\n",
inode->i_ino, inode->i_generation, inode,
((llwp->npgs-1) << PAGE_SHIFT) + llwp->pga[llwp->npgs-1].count);
+ SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
set = ptlrpc_prep_set();
if (set == NULL) {
CERROR ("Can't create request set\n");
rc = -ENOMEM;
} else {
- rc = obd_brw_async(OBD_BRW_WRITE, ll_i2obdconn(inode),
+ rc = obd_brw_async(OBD_BRW_WRITE, ll_i2obdconn(inode), oa,
ll_i2info(inode)->lli_smd, llwp->npgs,
llwp->pga, set, NULL);
if (rc == 0)
- rc = ptlrpc_set_wait (set);
+ rc = ptlrpc_set_wait(set);
+ if (rc == 0)
+ obdo_refresh_inode(inode, oa,
+ oa->o_valid & ~OBD_MD_FLSIZE);
ptlrpc_set_destroy (set);
}
/*
unsigned long old_flags; /* hack? */
int making_progress;
struct inode *inode;
+ struct obdo oa;
int rc = 0;
ENTRY;
llwp.npgs = 0;
ll_get_dirty_pages(inode, &llwp);
if (llwp.npgs) {
- lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
- LPROC_LL_WB_PRESSURE,
- llwp.npgs);
- ll_writeback(inode, &llwp);
- rc += llwp.npgs;
- making_progress = 1;
+ oa.o_id =
+ ll_i2info(inode)->lli_smd->lsm_object_id;
+ oa.o_valid = OBD_MD_FLID;
+ obdo_from_inode(&oa, inode,
+ OBD_MD_FLTYPE | OBD_MD_FLATIME|
+ OBD_MD_FLMTIME| OBD_MD_FLCTIME);
+ lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
+ LPROC_LL_WB_PRESSURE,
+ llwp.npgs);
+ ll_writeback(inode, &oa, &llwp);
+ rc += llwp.npgs;
+ making_progress = 1;
}
} while (llwp.npgs && should_writeback());
}
#endif /* linux 2.5 */
-int ll_batch_writepage(struct inode *inode, struct page *page)
+int ll_batch_writepage(struct inode *inode, struct obdo *oa, struct page *page)
{
unsigned long old_flags; /* hack? */
struct ll_writeback_pages llwp;
int rc = 0;
ENTRY;
+ SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
old_flags = current->flags;
current->flags |= PF_MEMALLOC;
rc = ll_alloc_brw(inode, &llwp);
if (llwp.npgs) {
lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
LPROC_LL_WB_WRITEPAGE, llwp.npgs);
- ll_writeback(inode, &llwp);
+ ll_writeback(inode, oa, &llwp);
}
kfree(llwp.pga);
#ifndef LLITE_INTERNAL_H
#define LLITE_INTERNAL_H
+
+struct ll_sb_info;
struct lustre_handle;
struct lov_stripe_md;
+extern void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
+extern struct proc_dir_entry *proc_lustre_fs_root;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# define hlist_del_init list_del_init
+#endif
+
+static inline struct inode *ll_info2i(struct ll_inode_info *lli)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+ return &lli->lli_vfs_inode;
+#else
+ return list_entry(lli, struct inode, u.generic_ip);
+#endif
+}
+
+/* llite/commit_callback.c */
+int ll_commitcbd_setup(struct ll_sb_info *);
+int ll_commitcbd_cleanup(struct ll_sb_info *);
+
+/* lproc_llite.c */
+int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
+ struct super_block *sb, char *osc, char *mdc);
+void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
+
+/* llite/namei.c */
+struct inode *ll_iget(struct super_block *sb, ino_t hash,
+ struct lustre_md *lic);
+struct dentry *ll_find_alias(struct inode *, struct dentry *);
+int ll_it_open_error(int phase, struct lookup_intent *it);
int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
int flags, void *opaque);
+
+/* llite/rw.c */
+void ll_end_writeback(struct inode *, struct page *);
+
+void ll_remove_dirty(struct inode *inode, unsigned long start,
+ unsigned long end);
int ll_rd_dirty_pages(char *page, char **start, off_t off, int count,
int *eof, void *data);
int ll_rd_max_dirty_pages(char *page, char **start, off_t off, int count,
int ll_mark_dirty_page(struct lustre_handle *conn, struct lov_stripe_md *lsm,
unsigned long index);
+/* llite/file.c */
+extern int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *);
+
+/* llite/super.c */
+int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc);
+int ll_setattr(struct dentry *de, struct iattr *attr);
+
+/* iod.c */
+#define IO_STAT_ADD(FIS, STAT, VAL) do { \
+ struct file_io_stats *_fis_ = (FIS); \
+ spin_lock(&_fis_->fis_lock); \
+ _fis_->fis_##STAT += VAL; \
+ spin_unlock(&_fis_->fis_lock); \
+} while (0)
+
+#define INODE_IO_STAT_ADD(INODE, STAT, VAL) \
+ IO_STAT_ADD(&ll_i2sbi(INODE)->ll_iostats, STAT, VAL)
+
+#define PAGE_IO_STAT_ADD(PAGE, STAT, VAL) \
+ INODE_IO_STAT_ADD((PAGE)->mapping, STAT, VAL)
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+/* XXX lliod needs more work in 2.5 before being proven and brought back
+ * to 2.4, it'll at least require a patch to introduce page->private */
+int lliod_start(struct ll_sb_info *sbi, struct inode *inode);
+void lliod_stop(struct ll_sb_info *sbi);
+#else
+#define lliod_start(sbi, inode) ({int _ret = 0; (void)sbi, (void)inode; _ret;})
+#define lliod_stop(sbi) do { (void)sbi; } while (0)
+#endif
+void lliod_wakeup(struct inode *inode);
+void lliod_give_plist(struct inode *inode, struct plist *plist, int rw);
+void lliod_give_page(struct inode *inode, struct page *page, int rw);
+void plist_init(struct plist *plist); /* for lli initialization.. */
+
+void ll_lldo_init(struct ll_dirty_offsets *lldo);
+void ll_record_dirty(struct inode *inode, unsigned long offset);
+void ll_remove_dirty(struct inode *inode, unsigned long start,
+ unsigned long end);
+int ll_find_dirty(struct ll_dirty_offsets *lldo, unsigned long *start,
+ unsigned long *end);
+int ll_farthest_dirty(struct ll_dirty_offsets *lldo, unsigned long *farthest);
+
+
+/* llite/super25.c */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+int ll_getattr(struct vfsmount *mnt, struct dentry *de,
+ struct lookup_intent *it,
+ struct kstat *stat);
+#endif
+
+
+/* llite/dcache.c */
+void ll_intent_release(struct lookup_intent *);
+extern void ll_set_dd(struct dentry *de);
+void ll_unhash_aliases(struct inode *);
+
+/* llite/rw.c */
+void ll_truncate(struct inode *inode);
+void ll_end_writeback(struct inode *inode, struct page *page);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+int ll_check_dirty(struct super_block *sb);
+int ll_batch_writepage(struct inode *inode, struct obdo *oa, struct page *page);
+#else
+#define ll_check_dirty(SB) do { (void)SB; } while (0)
+#endif
+
+/* llite/llite_lib.c */
+
+extern struct super_operations ll_super_operations;
+
+char *ll_read_opt(const char *opt, char *data);
+int ll_set_opt(const char *opt, char *data, int fl);
+void ll_options(char *options, char **ost, char **mds, int *flags);
+void ll_lli_init(struct ll_inode_info *lli);
+int ll_fill_super(struct super_block *sb, void *data, int silent);
+void ll_put_super(struct super_block *sb);
+void ll_clear_inode(struct inode *inode);
+int ll_attr2inode(struct inode *inode, struct iattr *attr, int trunc);
+int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc);
+int ll_setattr_raw(struct inode *inode, struct iattr *attr);
+int ll_setattr(struct dentry *de, struct iattr *attr);
+int ll_statfs(struct super_block *sb, struct kstatfs *sfs);
+void ll_update_inode(struct inode *inode, struct mds_body *body,
+ struct lov_stripe_md *lsm);
+int it_disposition(struct lookup_intent *it, int flag);
+void it_set_disposition(struct lookup_intent *it, int flag);
+void ll_read_inode2(struct inode *inode, void *opaque);
+void ll_umount_begin(struct super_block *sb);
+
+
+
#endif /* LLITE_INTERNAL_H */
#define DEBUG_SUBSYSTEM S_LLITE
#include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
#include <linux/lustre_lite.h>
#include <linux/lprocfs_status.h>
#include "llite_internal.h"
/* /proc/lustre/llite mount point registration */
+struct proc_dir_entry *proc_lustre_fs_root;
#ifndef LPROCFS
int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi){}
#else
-#define LPROC_LLITE_STAT_FCT(fct_name, get_statfs_fct) \
-int fct_name(char *page, char **start, off_t off, \
- int count, int *eof, void *data) \
-{ \
- struct statfs sfs; \
- int rc; \
- LASSERT(data != NULL); \
- rc = get_statfs_fct((struct super_block*)data, &sfs); \
- return (rc==0 \
- ? lprocfs_##fct_name (page, start, off, count, eof, &sfs) \
- : rc); \
+long long mnt_instance;
+
+static int ll_rd_blksize(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct super_block *sb = (struct super_block *)data;
+ struct obd_statfs osfs;
+ int rc;
+
+ LASSERT(sb != NULL);
+ rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+ if (!rc) {
+ *eof = 1;
+ rc = snprintf(page, count, "%u\n", osfs.os_bsize);
+ }
+
+ return rc;
}
-long long mnt_instance;
+static int ll_rd_kbytestotal(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct super_block *sb = (struct super_block *)data;
+ struct obd_statfs osfs;
+ int rc;
+
+ LASSERT(sb != NULL);
+ rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+ if (!rc) {
+ __u32 blk_size = osfs.os_bsize >> 10;
+ __u64 result = osfs.os_blocks;
+
+ while (blk_size >>= 1)
+ result <<= 1;
+
+ *eof = 1;
+ rc = snprintf(page, count, LPU64"\n", result);
+ }
+ return rc;
+
+}
+
+static int ll_rd_kbytesfree(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct super_block *sb = (struct super_block *)data;
+ struct obd_statfs osfs;
+ int rc;
-LPROC_LLITE_STAT_FCT(rd_blksize, vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_kbytestotal, vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_kbytesfree, vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_filestotal, vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_filesfree, vfs_statfs);
-LPROC_LLITE_STAT_FCT(rd_filegroups, vfs_statfs);
+ LASSERT(sb != NULL);
+ rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+ if (!rc) {
+ __u32 blk_size = osfs.os_bsize >> 10;
+ __u64 result = osfs.os_bfree;
+
+ while (blk_size >>= 1)
+ result <<= 1;
+
+ *eof = 1;
+ rc = snprintf(page, count, LPU64"\n", result);
+ }
+ return rc;
+}
+
+static int ll_rd_filestotal(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct super_block *sb = (struct super_block *)data;
+ struct obd_statfs osfs;
+ int rc;
+
+ LASSERT(sb != NULL);
+ rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+ if (!rc) {
+ *eof = 1;
+ rc = snprintf(page, count, LPU64"\n", osfs.os_files);
+ }
+ return rc;
+}
-int rd_path(char *page, char **start, off_t off, int count, int *eof,
- void *data)
+static int ll_rd_filesfree(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct super_block *sb = (struct super_block *)data;
+ struct obd_statfs osfs;
+ int rc;
+
+ LASSERT(sb != NULL);
+ rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+ if (!rc) {
+ *eof = 1;
+ rc = snprintf(page, count, LPU64"\n", osfs.os_ffree);
+ }
+ return rc;
+
+}
+
+#if 0
+static int ll_rd_path(char *page, char **start, off_t off, int count, int *eof,
+ void *data)
{
return 0;
}
+#endif
-int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
- void *data)
+static int ll_rd_fstype(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
struct super_block *sb = (struct super_block*)data;
return snprintf(page, count, "%s\n", sb->s_type->name);
}
-int rd_sb_uuid(char *page, char **start, off_t off, int count, int *eof,
- void *data)
+static int ll_rd_sb_uuid(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
struct super_block *sb = (struct super_block *)data;
return snprintf(page, count, "%s\n", ll_s2sbi(sb)->ll_sb_uuid.uuid);
}
-struct lprocfs_vars lprocfs_obd_vars[] = {
- { "uuid", rd_sb_uuid, 0, 0 },
- { "mntpt_path", rd_path, 0, 0 },
- { "fstype", rd_fstype, 0, 0 },
- { "blocksize", rd_blksize, 0, 0 },
- { "kbytestotal", rd_kbytestotal, 0, 0 },
- { "kbytesfree", rd_kbytesfree, 0, 0 },
- { "filestotal", rd_filestotal, 0, 0 },
- { "filesfree", rd_filesfree, 0, 0 },
- { "filegroups", rd_filegroups, 0, 0 },
- { "dirty_pages", ll_rd_dirty_pages, 0, 0},
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+ { "uuid", ll_rd_sb_uuid, 0, 0 },
+ //{ "mntpt_path", ll_rd_path, 0, 0 },
+ { "fstype", ll_rd_fstype, 0, 0 },
+ { "blocksize", ll_rd_blksize, 0, 0 },
+ { "kbytestotal", ll_rd_kbytestotal, 0, 0 },
+ { "kbytesfree", ll_rd_kbytesfree, 0, 0 },
+ { "filestotal", ll_rd_filestotal, 0, 0 },
+ { "filesfree", ll_rd_filesfree, 0, 0 },
+ //{ "filegroups", lprocfs_rd_filegroups, 0, 0 },
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ { "dirty_pages", ll_rd_dirty_pages, 0, 0},
{ "max_dirty_pages", ll_rd_max_dirty_pages, ll_wr_max_dirty_pages, 0},
+#endif
{ 0 }
};
#include <linux/obd_support.h>
#include <linux/lustre_lite.h>
#include <linux/lustre_dlm.h>
-
-/* from dcache.c */
-extern void ll_set_dd(struct dentry *de);
-
-/* from super.c */
-extern void ll_change_inode(struct inode *inode);
-extern int ll_setattr(struct dentry *de, struct iattr *attr);
-
-/* from dir.c */
-extern int ll_add_link (struct dentry *dentry, struct inode *inode);
-obd_id ll_inode_by_name(struct inode * dir, struct dentry *dentry, int *typ);
-int ext2_make_empty(struct inode *inode, struct inode *parent);
-struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
- struct dentry *dentry, struct page ** res_page);
-int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page );
-int ext2_empty_dir (struct inode * inode);
-struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p);
-void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
- struct page *page, struct inode *inode);
-
-/*
- * Couple of helper functions - make the code slightly cleaner.
- */
-static inline void ext2_inc_count(struct inode *inode)
-{
- inode->i_nlink++;
-}
-
-/* postpone the disk update until the inode really goes away */
-static inline void ext2_dec_count(struct inode *inode)
-{
- inode->i_nlink--;
-}
-static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
-{
- int err;
- err = ll_add_link(dentry, inode);
- if (!err) {
- d_instantiate(dentry, inode);
- return 0;
- }
- ext2_dec_count(inode);
- iput(inode);
- return err;
-}
+#include "llite_internal.h"
/* methods */
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-static int ll_find_inode(struct inode *inode, unsigned long ino, void *opaque)
+static int ll_test_inode(struct inode *inode, unsigned long ino, void *opaque)
#else
static int ll_test_inode(struct inode *inode, void *opaque)
#endif
{
- struct ll_read_inode2_cookie *lic = opaque;
- struct mds_body *body = lic->lic_body;
+ struct lustre_md *md = opaque;
- if (!(lic->lic_body->valid & (OBD_MD_FLGENER | OBD_MD_FLID)))
+ if (!(md->body->valid & (OBD_MD_FLGENER | OBD_MD_FLID)))
CERROR("invalid generation\n");
- CDEBUG(D_VFSTRACE, "comparing inode %p ino %lu/%u to body %lu/%u\n",
- inode, inode->i_ino, inode->i_generation, ino,
- lic->lic_body->generation);
+ CDEBUG(D_VFSTRACE, "comparing inode %p ino %lu/%u to body %u/%u\n",
+ inode, inode->i_ino, inode->i_generation,
+ md->body->ino, md->body->generation);
- if (inode->i_generation != lic->lic_body->generation)
+ if (inode->i_generation != md->body->generation)
return 0;
/* Apply the attributes in 'opaque' to this inode */
- ll_update_inode(inode, body, lic->lic_lsm);
+ ll_update_inode(inode, md->body, md->lsm);
return 1;
}
* Returns inode or NULL
*/
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-extern int ll_read_inode2(struct inode *inode, void *opaque);
+int ll_set_inode(struct inode *inode, void *opaque)
+{
+ ll_read_inode2(inode, opaque);
+ return 0;
+}
struct inode *ll_iget(struct super_block *sb, ino_t hash,
- struct ll_read_inode2_cookie *lic)
+ struct lustre_md *md)
{
struct inode *inode;
LASSERT(hash != 0);
- inode = iget5_locked(sb, hash, ll_test_inode, ll_read_inode2, lic);
- if (inode == NULL)
- return NULL; /* removed ERR_PTR(-ENOMEM) -eeb */
+ inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md);
+
+ if (!inode)
+ return (NULL); /* removed ERR_PTR(-ENOMEM) -eeb */
if (inode->i_state & I_NEW)
unlock_new_inode(inode);
}
#else
struct inode *ll_iget(struct super_block *sb, ino_t hash,
- struct ll_read_inode2_cookie *lic)
+ struct lustre_md *md)
{
struct inode *inode;
LASSERT(hash != 0);
- inode = iget4(sb, hash, ll_find_inode, lic);
+ inode = iget4(sb, hash, ll_test_inode, md);
CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", inode->i_ino,
inode->i_generation, inode);
return inode;
int ll_it_open_error(int phase, struct lookup_intent *it)
{
- if (it->it_disposition & IT_OPEN_OPEN) {
- if (phase == IT_OPEN_OPEN)
+ if (it_disposition(it, DISP_OPEN_OPEN)) {
+ if (phase == DISP_OPEN_OPEN)
return it->it_status;
else
return 0;
}
- if (it->it_disposition & IT_OPEN_CREATE) {
- if (phase == IT_OPEN_CREATE)
+ if (it_disposition(it, DISP_OPEN_CREATE)) {
+ if (phase == DISP_OPEN_CREATE)
return it->it_status;
else
return 0;
}
- if (it->it_disposition & IT_OPEN_LOOKUP) {
- if (phase == IT_OPEN_LOOKUP)
+ if (it_disposition(it, DISP_LOOKUP_EXECD)) {
+ if (phase == DISP_LOOKUP_EXECD)
return it->it_status;
else
return 0;
}
+ CERROR("it disp: %X, status: %d\n", it->it_disposition, it->it_status);
LBUG();
return 0;
}
-int ll_mdc_blocking_ast(struct ldlm_lock *lock,
- struct ldlm_lock_desc *desc,
+int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
void *data, int flag)
{
int rc;
struct lustre_handle lockh;
+ struct inode *inode = lock->l_data;
ENTRY;
switch (flag) {
break;
case LDLM_CB_CANCELING: {
/* Invalidate all dentries associated with this inode */
- struct inode *inode = lock->l_data;
- LASSERT(inode != NULL);
-
+ if (inode == NULL)
+ break;
+ if (lock->l_resource->lr_name.name[0] != inode->i_ino ||
+ lock->l_resource->lr_name.name[1] != inode->i_generation) {
+ LDLM_ERROR(lock, "data mismatch with ino %lu/%u",
+ inode->i_ino, inode->i_generation);
+ }
if (S_ISDIR(inode->i_mode)) {
CDEBUG(D_INODE, "invalidating inode %lu\n",
inode->i_ino);
#warning FIXME: we should probably free this inode if there are no aliases
if (inode->i_sb->s_root &&
inode != inode->i_sb->s_root->d_inode)
- d_unhash_aliases(inode);
+ ll_unhash_aliases(inode);
break;
}
default:
RETURN(0);
}
-void ll_mdc_lock_set_inode(struct lustre_handle *lockh, struct inode *inode)
-{
- struct ldlm_lock *lock = ldlm_handle2lock(lockh);
- ENTRY;
-
- LASSERT(lock != NULL);
- lock->l_data = inode;
- LDLM_LOCK_PUT(lock);
- EXIT;
-}
-
int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
int flags, void *opaque)
{
data->mode = mode;
}
-#define IT_ENQ_COMPLETE (1<<16)
-
+/*
+ *This long block is all about fixing up the local state so that it is
+ *correct as of the moment _before_ the operation was applied; that
+ *way, the VFS will think that everything is normal and call Lustre's
+ *regular VFS methods.
+ *
+ * If we're performing a creation, that means that unless the creation
+ * failed with EEXIST, we should fake up a negative dentry.
+ *
+ * For everything else, we want to lookup to succeed.
+ *
+ * One additional note: if CREATE or OPEN succeeded, we add an extra
+ * reference to the request because we need to keep it around until
+ * ll_create/ll_open gets called.
+ *
+ * The server will return to us, in it_disposition, an indication of
+ * exactly what it_status refers to.
+ *
+ * If DISP_OPEN_OPEN is set, then it_status refers to the open() call,
+ * otherwise if DISP_OPEN_CREATE is set, then it status is the
+ * creation failure mode. In either case, one of DISP_LOOKUP_NEG or
+ * DISP_LOOKUP_POS will be set, indicating whether the child lookup
+ * was successful.
+ *
+ * Else, if DISP_LOOKUP_EXECD then it_status is the rc of the child
+ * lookup.
+ */
int ll_intent_lock(struct inode *parent, struct dentry **de,
- struct lookup_intent *it, intent_finish_cb intent_finish)
+ struct lookup_intent *it, int flags, intent_finish_cb intent_finish)
{
struct dentry *dentry = *de;
struct inode *inode = dentry->d_inode;
struct ll_sb_info *sbi = ll_i2sbi(parent);
struct lustre_handle lockh;
struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
- struct ptlrpc_request *request = NULL;
- int rc = 0, offset, flag = 0;
+ struct ptlrpc_request *request;
+ int rc = 0;
+ struct mds_body *mds_body;
+ int mode;
obd_id ino = 0;
ENTRY;
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
- if (it && it->it_op == 0)
- *it = lookup_it;
+ if (it && it->it_magic != INTENT_MAGIC) {
+ CERROR("WARNING: uninitialized intent\n");
+ LBUG();
+ intent_init(it, IT_LOOKUP, 0);
+ }
+ if (it->it_op == IT_GETATTR ||
+ it->it_op == 0)
+ it->it_op = IT_LOOKUP;
+
#endif
- if (it == NULL)
+ if (!it ||it->it_op == IT_GETXATTR)
it = &lookup_it;
+ it->it_op_release = ll_intent_release;
+
CDEBUG(D_DLMTRACE, "name: %*s, intent: %s\n", dentry->d_name.len,
dentry->d_name.name, ldlm_it2str(it->it_op));
-
+
if (dentry->d_name.len > EXT2_NAME_LEN)
RETURN(-ENAMETOOLONG);
- if (!(it->it_disposition & IT_ENQ_COMPLETE)) {
+ /* This function may be called twice, we only once want to
+ execute the request associated with the intent. If it was
+ done already, we skip past this and use the results. */
+ if (!it_disposition(it, DISP_ENQ_COMPLETE)) {
struct mdc_op_data op_data;
ll_prepare_mdc_op_data(&op_data, parent, dentry->d_inode,
rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, it,
ll_intent_to_lock_mode(it), &op_data,
&lockh, NULL, 0, ldlm_completion_ast,
- ll_mdc_blocking_ast, parent);
+ ll_mdc_blocking_ast, NULL);
if (rc < 0)
RETURN(rc);
memcpy(it->it_lock_handle, &lockh, sizeof(lockh));
}
-
- request = (struct ptlrpc_request *)it->it_data;
+ request = it->it_data;
+ LASSERT(request != NULL);
/* non-zero it_disposition indicates that the server performed the
* intent on our behalf. */
- if (it->it_disposition) {
- struct mds_body *mds_body;
- int mode;
-
- /* This long block is all about fixing up the local
- * state so that it is correct as of the moment
- * _before_ the operation was applied; that way, the
- * VFS will think that everything is normal and call
- * Lustre's regular FS function.
- *
- * If we're performing a creation, that means that unless the
- * creation failed with EEXIST, we should fake up a negative
- * dentry. Likewise for the target of a hard link.
- *
- * For everything else, we want to lookup to succeed. */
-
- /* One additional note: if CREATE/MKDIR/etc succeeded,
- * we add an extra reference to the request because we
- * need to keep it around until ll_create gets called.
- * For anything else which results in
- * LL_LOOKUP_POSITIVE, we can do the iget()
- * immediately with the contents of the reply (in the
- * intent_finish callback). In the create case,
- * however, we need to wait until ll_create_node to do
- * the iget() or the VFS will abort with -EEXISTS.
- */
-
- offset = 1;
- mds_body = lustre_msg_buf(request->rq_repmsg, offset,
- sizeof(*mds_body));
- LASSERT (mds_body != NULL); /* mdc_enqueue checked */
- LASSERT_REPSWABBED (request, offset); /* mdc_enqueue swabbed */
-
- ino = mds_body->fid1.id;
- mode = mds_body->mode;
-
- /*We were called from revalidate2: did we find the same inode?*/
- if (inode && (ino != inode->i_ino ||
- mds_body->fid1.generation != inode->i_generation)) {
- it->it_disposition |= IT_ENQ_COMPLETE;
- RETURN(-ESTALE);
- }
+ LASSERT(it_disposition(it, DISP_IT_EXECD));
+
+
+ mds_body = lustre_msg_buf(request->rq_repmsg, 1, sizeof(*mds_body));
+ LASSERT(mds_body != NULL); /* mdc_enqueue checked */
+ LASSERT_REPSWABBED(request, 1); /* mdc_enqueue swabbed */
+
+ /* XXX everything with fids please, no ino's inode's etc */
+ ino = mds_body->fid1.id;
+ mode = mds_body->mode;
+
+ /*We were called from revalidate2: did we find the same inode?*/
+ if (inode &&
+ (ino != inode->i_ino ||
+ mds_body->fid1.generation != inode->i_generation)) {
+ it_set_disposition(it, DISP_ENQ_COMPLETE);
+ RETURN(-ESTALE);
+ }
- /* If we're doing an IT_OPEN which did not result in an actual
- * successful open, then we need to remove the bit which saves
- * this request for unconditional replay. */
- if (it->it_op & IT_OPEN &&
- (!(it->it_disposition & IT_OPEN_OPEN) ||
- it->it_status != 0)) {
+ /* If we're doing an IT_OPEN which did not result in an actual
+ * successful open, then we need to remove the bit which saves
+ * this request for unconditional replay. */
+ if (it->it_op & IT_OPEN) {
+ if (!it_disposition(it, DISP_OPEN_OPEN) ||
+ it->it_status != 0) {
unsigned long flags;
-
+
spin_lock_irqsave (&request->rq_lock, flags);
request->rq_replay = 0;
spin_unlock_irqrestore (&request->rq_lock, flags);
}
-
- if (it->it_op & IT_CREAT) {
- mdc_store_inode_generation(request, 2, 1);
- /* The server will return to us, in it_disposition, an
- * indication of exactly what it_status refers to.
- *
- * If IT_OPEN_OPEN is set, then it_status refers to the
- * open() call, otherwise if IT_OPEN_CREATE is set, then
- * it status is the creation failure mode. In either
- * case, one of IT_OPEN_NEG or IT_OPEN_POS will be set,
- * indicating whether the child lookup was successful.
- *
- * Else, if IT_OPEN_LOOKUP then it_status is the rc
- * of the child lookup.
- *
- * Finally, if none of the bits are set, then the
- * failure occurred while looking up the parent. */
- rc = ll_it_open_error(IT_OPEN_LOOKUP, it);
- if (rc)
- GOTO(drop_req, rc);
-
- if (it->it_disposition & IT_OPEN_CREATE)
- ptlrpc_request_addref(request);
- if (it->it_disposition & IT_OPEN_OPEN)
- ptlrpc_request_addref(request);
-
- if (it->it_disposition & IT_OPEN_NEG)
- flag = LL_LOOKUP_NEGATIVE;
- else
- flag = LL_LOOKUP_POSITIVE;
- } else if (it->it_op == IT_OPEN) {
- LASSERT(!(it->it_disposition & IT_OPEN_CREATE));
-
- rc = ll_it_open_error(IT_OPEN_LOOKUP, it);
- if (rc)
- GOTO(drop_req, rc);
-
- if (it->it_disposition & IT_OPEN_OPEN)
- ptlrpc_request_addref(request);
-
- if (it->it_disposition & IT_OPEN_NEG)
- flag = LL_LOOKUP_NEGATIVE;
- else
- flag = LL_LOOKUP_POSITIVE;
- } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
- /* For check ops, we want the lookup to succeed */
- it->it_data = NULL;
- if (it->it_status)
- flag = LL_LOOKUP_NEGATIVE;
- else
- flag = LL_LOOKUP_POSITIVE;
- } else
- LBUG();
- } else {
- struct ll_fid fid;
- obd_flag valid;
- int eadatalen;
- int mode;
-
- LBUG(); /* For the moment, no non-intent locks */
-
- /* it_disposition == 0 indicates that it just did a simple lock
- * request, for which we are very thankful. move along with
- * the local lookup then. */
-
- //memcpy(&lli->lli_intent_lock_handle, &lockh, sizeof(lockh));
- offset = 0;
-
- ino = ll_inode_by_name(parent, dentry, &mode);
- if (!ino) {
- CERROR("inode %*s not found by name\n",
- dentry->d_name.len, dentry->d_name.name);
- GOTO(drop_lock, rc = -ENOENT);
- }
-
- valid = OBD_MD_FLNOTOBD;
-
- if (S_ISREG(mode)) {
- eadatalen = obd_size_diskmd(&sbi->ll_osc_conn, NULL),
- valid |= OBD_MD_FLEASIZE;
- } else {
- eadatalen = 0;
- valid |= OBD_MD_FLBLOCKS;
- }
-
- fid.id = ino;
- fid.generation = 0;
- fid.f_type = mode;
- rc = mdc_getattr(&sbi->ll_mdc_conn, &fid, valid,
- eadatalen, &request);
- if (rc) {
- CERROR("failure %d inode "LPX64"\n", rc, ino);
- GOTO(drop_lock, rc = -abs(rc));
- }
}
- LASSERT (request != NULL);
+ rc = ll_it_open_error(DISP_LOOKUP_EXECD, it);
+ if (rc)
+ GOTO(drop_req, rc);
+
+ /* keep requests around for the multiple phases of the call
+ * this shows the DISP_XX must guarantee we make it into the call
+ */
+ if (it_disposition(it, DISP_OPEN_CREATE))
+ ptlrpc_request_addref(request);
+ if (it_disposition(it, DISP_OPEN_OPEN))
+ ptlrpc_request_addref(request);
+
+ if (it->it_op & IT_CREAT) {
+ /* XXX this belongs in ll_create_iit */
+ } else if (it->it_op == IT_OPEN) {
+ LASSERT(!it_disposition(it, DISP_OPEN_CREATE));
+ } else
+ LASSERT(it->it_op & (IT_GETATTR | IT_LOOKUP));
if (intent_finish != NULL) {
struct lustre_handle old_lock;
struct ldlm_lock *lock;
- rc = intent_finish(flag, request, parent, de, it, offset, ino);
+ rc = intent_finish(request, parent, de, it, 1, ino);
dentry = *de; /* intent_finish may change *de */
inode = dentry->d_inode;
if (rc != 0)
}
ptlrpc_req_finished(request);
- /* This places the intent in the dentry so that the vfs_xxx
- * operation can lay its hands on it; but that is not always
- * needed... (we need to save it in the GETATTR case for the
- * benefit of ll_inode_revalidate -phil) */
- /* Ignore trying to save the intent for "special" inodes as
- * they have special semantics that can cause deadlocks on
- * the intent semaphore. -mmex */
- if ((!inode || S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
- S_ISLNK(inode->i_mode)) && (it->it_op & (IT_OPEN | IT_GETATTR)))
- LL_SAVE_INTENT(dentry, it);
- else
- CDEBUG(D_DENTRY,
- "D_IT dentry %p fsdata %p intent: %s status %d\n",
- dentry, ll_d2d(dentry), ldlm_it2str(it->it_op),
- it->it_status);
-
+ CDEBUG(D_DENTRY, "D_IT dentry %p intent: %s status %d disp %x\n",
+ dentry, ldlm_it2str(it->it_op), it->it_status, it->it_disposition);
+
+ /* drop IT_LOOKUP locks */
if (it->it_op == IT_LOOKUP)
- ll_intent_release(dentry, it);
-
+ ll_intent_release(it);
RETURN(rc);
drop_lock:
- ll_intent_release(dentry, it);
+ ll_intent_release(it);
drop_req:
ptlrpc_req_finished(request);
RETURN(rc);
if (!list_empty(&dentry->d_lru))
list_del_init(&dentry->d_lru);
- list_del_init(&dentry->d_hash);
+ hlist_del_init(&dentry->d_hash);
__d_rehash(dentry, 0); /* avoid taking dcache_lock inside */
spin_unlock(&dcache_lock);
atomic_inc(&dentry->d_count);
}
static int
-lookup2_finish(int flag, struct ptlrpc_request *request,
+lookup2_finish(struct ptlrpc_request *request,
struct inode *parent, struct dentry **de,
struct lookup_intent *it, int offset, obd_id ino)
{
struct ll_sb_info *sbi = ll_i2sbi(parent);
struct dentry *dentry = *de, *saved = *de;
struct inode *inode = NULL;
- struct ll_read_inode2_cookie lic = {.lic_body = NULL, .lic_lsm = NULL};
+ int rc;
/* NB 1 request reference will be taken away by ll_intent_lock()
* when I return */
-
- if (!(flag & LL_LOOKUP_NEGATIVE)) {
+ if (!it_disposition(it, DISP_LOOKUP_NEG)) {
+ struct lustre_md md;
ENTRY;
- /* We only get called if the mdc_enqueue() called from
- * ll_intent_lock() was successful. Therefore the mds_body
- * is present and correct, and the eadata is present if
- * body->eadatasize != 0 (but still opaque, so only
- * obd_unpackmd() can check the size) */
- lic.lic_body = lustre_msg_buf(request->rq_repmsg, offset,
- sizeof (*lic.lic_body));
- LASSERT(lic.lic_body != NULL);
- LASSERT_REPSWABBED(request, offset);
-
- if (S_ISREG(lic.lic_body->mode) &&
- (lic.lic_body->valid & OBD_MD_FLEASIZE)) {
- struct lov_mds_md *lmm;
- int lmm_size;
- int rc;
-
- lmm_size = lic.lic_body->eadatasize;
- if (lmm_size == 0) {
- CERROR("OBD_MD_FLEASIZE set but "
- "eadatasize 0\n");
- RETURN(-EPROTO);
- }
- lmm = lustre_msg_buf(request->rq_repmsg, offset + 1,
- lmm_size);
- LASSERT(lmm != NULL);
- LASSERT_REPSWABBED(request, offset + 1);
-
- rc = obd_unpackmd(&sbi->ll_osc_conn,
- &lic.lic_lsm, lmm, lmm_size);
- if (rc < 0) {
- CERROR("Error %d unpacking eadata\n", rc);
- RETURN(rc);
- }
- LASSERT(rc >= sizeof(*lic.lic_lsm));
- }
+ rc =mdc_req2lustre_md(request, offset, &sbi->ll_osc_conn, &md);
+ if (rc)
+ RETURN(rc);
- /* Both ENOMEM and an RPC timeout are possible in ll_iget; which
- * to pick? A more generic EIO? -phik */
- inode = ll_iget(dentry->d_sb, ino, &lic);
+ inode = ll_iget(dentry->d_sb, ino, &md);
if (!inode) {
/* free the lsm if we allocated one above */
- if (lic.lic_lsm != NULL)
- obd_free_memmd(&sbi->ll_osc_conn, &lic.lic_lsm);
+ if (md.lsm != NULL)
+ obd_free_memmd(&sbi->ll_osc_conn, &md.lsm);
RETURN(-ENOMEM);
- } else if (lic.lic_lsm != NULL &&
- ll_i2info(inode)->lli_smd != lic.lic_lsm) {
- obd_free_memmd(&sbi->ll_osc_conn, &lic.lic_lsm);
+ } else if (md.lsm != NULL &&
+ ll_i2info(inode)->lli_smd != md.lsm) {
+ obd_free_memmd(&sbi->ll_osc_conn, &md.lsm);
}
/* If this is a stat, get the authoritative file size */
/* We asked for a lock on the directory, and may have been
* granted a lock on the inode. Just in case, fixup the data
* pointer. */
- ll_mdc_lock_set_inode((struct lustre_handle*)it->it_lock_handle,
- inode);
+ CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
+ inode, inode->i_ino, inode->i_generation);
+ ldlm_lock_set_data((struct lustre_handle*)it->it_lock_handle,
+ inode);
} else {
ENTRY;
}
RETURN(0);
}
-static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry,
- struct lookup_intent *it)
+static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
+ struct lookup_intent *it, int flags)
{
struct dentry *save = dentry, *retval;
int rc;
dentry->d_name.name, parent->i_ino, parent->i_generation,
parent, LL_IT2STR(it));
- rc = ll_intent_lock(parent, &dentry, it, lookup2_finish);
+ if (d_mountpoint(dentry)) {
+ CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
+ }
+
+ rc = ll_intent_lock(parent, &dentry, it, flags, lookup2_finish);
if (rc < 0) {
CDEBUG(D_INFO, "ll_intent_lock: %d\n", rc);
GOTO(out, retval = ERR_PTR(rc));
return retval;
}
-/* We depend on "mode" being set with the proper file type/umask by now */
-static struct inode *ll_create_node(struct inode *dir, const char *name,
- int namelen, const void *data, int datalen,
- int mode, __u64 extra,
- struct lookup_intent *it)
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
+ struct nameidata *nd)
{
- struct inode *inode;
- struct ptlrpc_request *request = NULL;
- struct mds_body *body;
- time_t time = LTIME_S(CURRENT_TIME);
- struct ll_sb_info *sbi = ll_i2sbi(dir);
- struct ll_read_inode2_cookie lic;
+ struct dentry *de;
ENTRY;
- if (it && it->it_disposition) {
- ll_invalidate_inode_pages(dir);
- request = it->it_data;
- body = lustre_msg_buf(request->rq_repmsg, 1, sizeof (*body));
- LASSERT (body != NULL); /* checked already */
- LASSERT_REPSWABBED (request, 1); /* swabbed already */
- } else {
- struct mdc_op_data op_data;
- int gid = current->fsgid;
- int rc;
-
- if (dir->i_mode & S_ISGID) {
- gid = dir->i_gid;
- if (S_ISDIR(mode))
- mode |= S_ISGID;
- }
-
- ll_prepare_mdc_op_data(&op_data, dir, NULL, name, namelen, 0);
- rc = mdc_create(&sbi->ll_mdc_conn, &op_data,
- data, datalen, mode, current->fsuid, gid,
- time, extra, &request);
- if (rc) {
- inode = ERR_PTR(rc);
- GOTO(out, rc);
- }
- body = lustre_swab_repbuf(request, 0, sizeof (*body),
- lustre_swab_mds_body);
- if (body == NULL) {
- CERROR ("Can't unpack mds_body\n");
- GOTO (out, inode = ERR_PTR(-EPROTO));
- }
- }
-
- lic.lic_body = body;
- lic.lic_lsm = NULL;
-
- inode = ll_iget(dir->i_sb, body->ino, &lic);
- if (!inode || is_bad_inode(inode)) {
- /* XXX might need iput() for bad inode */
- int rc = -EIO;
- CERROR("new_inode -fatal: rc %d\n", rc);
- LBUG();
- GOTO(out, rc);
- }
-
- if (!list_empty(&inode->i_dentry)) {
- CERROR("new_inode -fatal: inode %d, ct %d lnk %d\n",
- body->ino, atomic_read(&inode->i_count),
- inode->i_nlink);
- iput(inode);
- LBUG();
- inode = ERR_PTR(-EIO);
- GOTO(out, -EIO);
- }
-
- if (it && it->it_disposition) {
- /* We asked for a lock on the directory, but were
- * granted a lock on the inode. Since we finally have
- * an inode pointer, stuff it in the lock. */
- ll_mdc_lock_set_inode((struct lustre_handle*)it->it_lock_handle,
- inode);
- }
+ if (nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
+ de = ll_lookup_it(parent, dentry, &nd->it, nd->flags);
+ else
+ de = ll_lookup_it(parent, dentry, NULL, 0);
- EXIT;
- out:
- ptlrpc_req_finished(request);
- return inode;
+ RETURN(de);
}
+#endif
static int ll_mdc_unlink(struct inode *dir, struct inode *child, __u32 mode,
const char *name, int len)
{
struct ptlrpc_request *request = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(dir);
struct mds_body *body;
struct lov_mds_md *eadata;
struct lov_stripe_md *lsm = NULL;
- struct lustre_handle lockh;
- struct lookup_intent it = { .it_op = IT_UNLINK };
- struct obdo *oa;
- int err;
+ struct obd_trans_info oti = { 0 };
struct mdc_op_data op_data;
+ struct obdo *oa;
+ int rc;
ENTRY;
ll_prepare_mdc_op_data(&op_data, dir, child, name, len, mode);
-
- err = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_EX,
- &op_data, &lockh, NULL, 0,
- ldlm_completion_ast, ll_mdc_blocking_ast,
- dir);
- request = (struct ptlrpc_request *)it.it_data;
- if (err < 0)
- GOTO(out, err);
- if (it.it_status)
- GOTO(out, err = it.it_status);
- err = 0;
-
- body = lustre_msg_buf (request->rq_repmsg, 1, sizeof (*body));
- LASSERT (body != NULL); /* checked by mdc_enqueue() */
- LASSERT_REPSWABBED (request, 1); /* swabbed by mdc_enqueue() */
+ rc = mdc_unlink(&ll_i2sbi(dir)->ll_mdc_conn, &op_data, &request);
+ if (rc)
+ GOTO(out, rc);
+ /* req is swabbed so this is safe */
+ body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body));
if (!(body->valid & OBD_MD_FLEASIZE))
- GOTO(out, 0);
+ GOTO(out, rc = 0);
if (body->eadatasize == 0) {
- CERROR ("OBD_MD_FLEASIZE set but eadatasize zero\n");
- GOTO (out, err = -EPROTO);
+ CERROR("OBD_MD_FLEASIZE set but eadatasize zero\n");
+ GOTO(out, rc = -EPROTO);
}
/* The MDS sent back the EA because we unlinked the last reference
* to this file. Use this EA to unlink the objects on the OST.
- * Note that mdc_enqueue() has already checked there _is_ some EA
- * data, but this data is opaque to both mdc_enqueue() and the MDS.
- * We have to leave it to obd_unpackmd() to check it is complete
- * and sensible. */
- eadata = lustre_msg_buf (request->rq_repmsg, 2, body->eadatasize);
- LASSERT (eadata != NULL);
- LASSERT_REPSWABBED (request, 2);
-
- err = obd_unpackmd(ll_i2obdconn(dir), &lsm, eadata,
- body->eadatasize);
- if (err < 0) {
- CERROR("obd_unpackmd: %d\n", err);
- GOTO (out_unlock, err);
+ * It's opaque so we don't swab here; we leave it to obd_unpackmd() to
+ * check it is complete and sensible. */
+ eadata = lustre_swab_repbuf(request, 1, body->eadatasize, NULL);
+ LASSERT(eadata != NULL);
+ if (eadata == NULL) {
+ CERROR("Can't unpack MDS EA data\n");
+ GOTO(out, rc = -EPROTO);
}
- LASSERT (err >= sizeof (*lsm));
+
+ rc = obd_unpackmd(ll_i2obdconn(dir), &lsm, eadata, body->eadatasize);
+ if (rc < 0) {
+ CERROR("obd_unpackmd: %d\n", rc);
+ GOTO(out, rc);
+ }
+ LASSERT(rc >= sizeof(*lsm));
oa = obdo_alloc();
if (oa == NULL)
- GOTO(out_free_memmd, err = -ENOMEM);
+ GOTO(out_free_memmd, rc = -ENOMEM);
oa->o_id = lsm->lsm_object_id;
oa->o_mode = body->mode & S_IFMT;
oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
- err = obd_destroy(ll_i2obdconn(dir), oa, lsm, NULL);
+ if (body->valid & OBD_MD_FLCOOKIE) {
+ oa->o_valid |= OBD_MD_FLCOOKIE;
+ oti.oti_logcookies = lustre_msg_buf(request->rq_repmsg, 3,
+ body->eadatasize);
+ }
+
+ rc = obd_destroy(ll_i2obdconn(dir), oa, lsm, &oti);
obdo_free(oa);
- if (err)
+ if (rc)
CERROR("obd destroy objid 0x"LPX64" error %d\n",
- lsm->lsm_object_id, err);
+ lsm->lsm_object_id, rc);
out_free_memmd:
obd_free_memmd(ll_i2obdconn(dir), &lsm);
- out_unlock:
- ldlm_lock_decref_and_cancel(&lockh, LCK_EX);
out:
ptlrpc_req_finished(request);
- return err;
+ return rc;
+}
+
+/* We depend on "mode" being set with the proper file type/umask by now */
+static struct inode *ll_create_node(struct inode *dir, const char *name,
+ int namelen, const void *data, int datalen,
+ int mode, __u64 extra,
+ struct lookup_intent *it)
+{
+ struct inode *inode;
+ struct ptlrpc_request *request = NULL;
+ struct ll_sb_info *sbi = ll_i2sbi(dir);
+ struct lustre_md md;
+ int rc;
+ ENTRY;
+
+ LASSERT(it && it->it_disposition);
+
+ ll_invalidate_inode_pages(dir);
+
+ request = it->it_data;
+ rc = mdc_req2lustre_md(request, 1, &sbi->ll_osc_conn, &md);
+ if (rc) {
+ GOTO(out, inode = ERR_PTR(rc));
+ }
+
+ inode = ll_iget(dir->i_sb, md.body->ino, &md);
+ if (!inode || is_bad_inode(inode)) {
+ /* XXX might need iput() for bad inode */
+ int rc = -EIO;
+ CERROR("new_inode -fatal: rc %d\n", rc);
+ LBUG();
+ GOTO(out, rc);
+ }
+ LASSERT(list_empty(&inode->i_dentry));
+
+ CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
+ inode, inode->i_ino, inode->i_generation);
+ ldlm_lock_set_data((struct lustre_handle*)it->it_lock_handle,
+ inode);
+
+ EXIT;
+ out:
+ ptlrpc_req_finished(request);
+ return inode;
}
/*
* If the create succeeds, we fill in the inode information
* with d_instantiate().
*/
-static int ll_create(struct inode *dir, struct dentry *dentry, int mode)
+static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode, struct lookup_intent *it)
{
- struct lookup_intent *it;
struct inode *inode;
+ struct ptlrpc_request *request = it->it_data;
int rc = 0;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
- LL_IT2STR(dentry->d_it));
-
- it = dentry->d_it;
+ LL_IT2STR(it));
- rc = ll_it_open_error(IT_OPEN_CREATE, it);
+ rc = ll_it_open_error(DISP_OPEN_CREATE, it);
if (rc) {
- LL_GET_INTENT(dentry, it);
- ptlrpc_req_finished(it->it_data);
+ ptlrpc_req_finished(request);
RETURN(rc);
}
+ mdc_store_inode_generation(request, 2, 1);
inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
NULL, 0, mode, 0, it);
-
if (IS_ERR(inode)) {
- LL_GET_INTENT(dentry, it);
RETURN(PTR_ERR(inode));
}
- /* no directory data updates when intents rule */
- if (it && it->it_disposition) {
- d_instantiate(dentry, inode);
- RETURN(0);
- }
-
- rc = ext2_add_nondir(dentry, inode);
- RETURN(rc);
+ d_instantiate(dentry, inode);
+ RETURN(0);
}
-static int ll_mknod(struct inode *dir, struct dentry *dentry, int mode,
- int rdev)
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+static int ll_create_nd(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
{
- LBUG();
- return -ENOSYS;
+ return ll_create_it(dir, dentry, mode, &nd->it);
}
+#endif
-static int ll_mknod2(struct inode *dir, const char *name, int len, int mode,
- int rdev)
+static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev)
{
+ struct inode *dir = nd->dentry->d_inode;
+ const char *name = nd->last.name;
+ int len = nd->last.len;
struct ptlrpc_request *request = NULL;
time_t time = LTIME_S(CURRENT_TIME);
struct ll_sb_info *sbi = ll_i2sbi(dir);
mode &= ~current->fs->umask;
switch (mode & S_IFMT) {
- case 0: case S_IFREG:
+ case 0:
+ case S_IFREG:
mode |= S_IFREG; /* for mode = 0 case, fallthrough */
- case S_IFCHR: case S_IFBLK:
- case S_IFIFO: case S_IFSOCK:
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFIFO:
+ case S_IFSOCK:
ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode,
current->fsuid, current->fsgid, time,
RETURN(err);
}
-static int ll_symlink(struct inode *dir, struct dentry *dentry,
- const char *symname)
-{
- LBUG();
- return -ENOSYS;
-}
-
-static int ll_symlink2(struct inode *dir, const char *name, int len,
- const char *tgt)
+static int ll_symlink_raw(struct nameidata *nd, const char *tgt)
{
+ struct inode *dir = nd->dentry->d_inode;
+ const char *name = nd->last.name;
+ int len = nd->last.len;
struct ptlrpc_request *request = NULL;
time_t time = LTIME_S(CURRENT_TIME);
struct ll_sb_info *sbi = ll_i2sbi(dir);
RETURN(err);
}
-static int ll_link(struct dentry *old_dentry, struct inode * dir,
- struct dentry *dentry)
-{
- LBUG();
- return -ENOSYS;
-}
-
-static int ll_link2(struct inode *src, struct inode *dir,
- const char *name, int len)
+static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
{
+ struct inode *src = srcnd->dentry->d_inode;
+ struct inode *dir = tgtnd->dentry->d_inode;
+ const char *name = tgtnd->last.name;
+ int len = tgtnd->last.len;
struct ptlrpc_request *request = NULL;
struct mdc_op_data op_data;
int err;
RETURN(err);
}
-static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
- LBUG();
- return -ENOSYS;
-}
-static int ll_mkdir2(struct inode *dir, const char *name, int len, int mode)
+static int ll_mkdir_raw(struct nameidata *nd, int mode)
{
+ struct inode *dir = nd->dentry->d_inode;
+ const char *name = nd->last.name;
+ int len = nd->last.len;
struct ptlrpc_request *request = NULL;
time_t time = LTIME_S(CURRENT_TIME);
struct ll_sb_info *sbi = ll_i2sbi(dir);
mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode,
- current->fsuid, current->fsgid,
- time, 0, &request);
+ current->fsuid, current->fsgid, time, 0, &request);
ptlrpc_req_finished(request);
RETURN(err);
}
-static int ll_rmdir2(struct inode *dir, const char *name, int len)
+static int ll_rmdir_raw(struct nameidata *nd)
{
+ struct inode *dir = nd->dentry->d_inode;
+ const char *name = nd->last.name;
+ int len = nd->last.len;
int rc;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
RETURN(rc);
}
-static int ll_unlink2(struct inode *dir, const char *name, int len)
+static int ll_unlink_raw(struct nameidata *nd)
{
+ struct inode *dir = nd->dentry->d_inode;
+ const char *name = nd->last.name;
+ int len = nd->last.len;
int rc;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
RETURN(rc);
}
-static int ll_unlink(struct inode *dir, struct dentry *dentry)
-{
- LBUG();
- return -ENOSYS;
-}
-
-static int ll_rmdir(struct inode *dir, struct dentry *dentry)
-{
- LBUG();
- return -ENOSYS;
-}
-
-static int ll_rename(struct inode * old_dir, struct dentry * old_dentry,
- struct inode * new_dir, struct dentry * new_dentry)
-{
- LBUG();
- return -ENOSYS;
-}
-
-static int ll_rename2(struct inode *src, struct inode *tgt,
- const char *oldname, int oldlen,
- const char *newname, int newlen)
+static int ll_rename_raw(struct nameidata *oldnd, struct nameidata *newnd)
{
+ struct inode *src = oldnd->dentry->d_inode;
+ struct inode *tgt = newnd->dentry->d_inode;
+ const char *oldname = oldnd->last.name;
+ int oldlen = oldnd->last.len;
+ const char *newname = newnd->last.name;
+ int newlen = newnd->last.len;
struct ptlrpc_request *request = NULL;
struct ll_sb_info *sbi = ll_i2sbi(src);
struct mdc_op_data op_data;
RETURN(err);
}
-extern int ll_inode_revalidate(struct dentry *dentry);
struct inode_operations ll_dir_inode_operations = {
- create: ll_create,
- lookup2: ll_lookup2,
- link: ll_link, /* LBUG() */
- link2: ll_link2,
- unlink: ll_unlink, /* LBUG() */
- unlink2: ll_unlink2,
- symlink: ll_symlink, /* LBUG() */
- symlink2: ll_symlink2,
- mkdir: ll_mkdir, /* LBUG() */
- mkdir2: ll_mkdir2,
- rmdir: ll_rmdir, /* LBUG() */
- rmdir2: ll_rmdir2,
- mknod: ll_mknod, /* LBUG() */
- mknod2: ll_mknod2,
- rename: ll_rename, /* LBUG() */
- rename2: ll_rename2,
+ link_raw: ll_link_raw,
+ unlink_raw: ll_unlink_raw,
+ symlink_raw: ll_symlink_raw,
+ mkdir_raw: ll_mkdir_raw,
+ rmdir_raw: ll_rmdir_raw,
+ mknod_raw: ll_mknod_raw,
+ rename_raw: ll_rename_raw,
setattr: ll_setattr,
setattr_raw: ll_setattr_raw,
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- revalidate: ll_inode_revalidate,
+ create_it: ll_create_it,
+ lookup_it: ll_lookup_it,
+ revalidate_it: ll_inode_revalidate_it,
+#else
+ lookup_it: ll_lookup_nd,
+ create_nd: ll_create_nd,
+ getattr_it: ll_getattr,
#endif
};
#include <linux/version.h>
#include <asm/system.h>
#include <asm/uaccess.h>
-#include "llite_internal.h"
#include <linux/fs.h>
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
#include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include <linux/writeback.h>
#else
#include <linux/iobuf.h>
#endif
#include <linux/lustre_mds.h>
#include <linux/lustre_lite.h>
-#include <linux/lustre_lib.h>
+#include "llite_internal.h"
#include <linux/lustre_compat25.h>
/*
}
/* SYNCHRONOUS I/O to object storage for an inode */
-static int ll_brw(int cmd, struct inode *inode, struct page *page, int flags)
+static int ll_brw(int cmd, struct inode *inode, struct obdo *oa,
+ struct page *page, int flags)
{
struct ll_inode_info *lli = ll_i2info(inode);
struct lov_stripe_md *lsm = lli->lli_smd;
else
lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
LPROC_LL_BRW_READ, pg.count);
- rc = obd_brw(cmd, ll_i2obdconn(inode), lsm, 1, &pg, NULL);
- if (rc)
+ rc = obd_brw(cmd, ll_i2obdconn(inode), oa, lsm, 1, &pg, NULL);
+ if (rc != 0 && rc != -EIO)
CERROR("error from obd_brw: rc = %d\n", rc);
RETURN(rc);
struct page *page = first_page;
struct list_head *pos;
struct brw_page *pgs;
+ struct obdo *oa;
unsigned long end_index, extent_end = 0;
struct ptlrpc_request_set *set;
int npgs = 0, rc = 0, max_pages;
} while (page);
- set = ptlrpc_prep_set();
- if (set == NULL) {
+ if ((oa = obdo_alloc()) == NULL) {
+ CERROR("ENOMEM allocing obdo\n");
+ rc = -ENOMEM;
+ } else if ((set = ptlrpc_prep_set()) == NULL) {
CERROR("ENOMEM allocing request set\n");
+ obdo_free(oa);
rc = -ENOMEM;
} else {
- rc = obd_brw_async(OBD_BRW_READ, ll_i2obdconn(inode),
+ struct ll_file_data *fd = file->private_data;
+
+ oa->o_id = lli->lli_smd->lsm_object_id;
+ memcpy(obdo_handle(oa), &fd->fd_ost_och.och_fh,
+ sizeof(fd->fd_ost_och.och_fh));
+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLHANDLE;
+ obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME);
+
+ rc = obd_brw_async(OBD_BRW_READ, ll_i2obdconn(inode), oa,
ll_i2info(inode)->lli_smd, npgs, pgs,
set, NULL);
if (rc == 0)
rc = ptlrpc_set_wait(set);
ptlrpc_set_destroy(set);
+ if (rc == 0)
+ obdo_refresh_inode(inode, oa, oa->o_valid);
if (rc && rc != -EIO)
CERROR("error from obd_brw_async: rc = %d\n", rc);
+ obdo_free(oa);
}
while (npgs-- > 0) {
void ll_truncate(struct inode *inode)
{
struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
- struct obdo oa = {0};
+ struct obdo oa;
int err;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
inode->i_generation, inode);
+ /* object not yet allocated */
if (!lsm) {
- /* object not yet allocated */
- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ CERROR("truncate on inode %lu with no objects\n", inode->i_ino);
EXIT;
return;
}
~0);
oa.o_id = lsm->lsm_object_id;
- oa.o_mode = inode->i_mode;
- oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE;
+ oa.o_valid = OBD_MD_FLID;
+ obdo_from_inode(&oa, inode, OBD_MD_FLTYPE|OBD_MD_FLMODE|OBD_MD_FLATIME|
+ OBD_MD_FLMTIME | OBD_MD_FLCTIME);
CDEBUG(D_INFO, "calling punch for "LPX64" (all bytes after %Lu)\n",
oa.o_id, inode->i_size);
if (err)
CERROR("obd_truncate fails (%d) ino %lu\n", err, inode->i_ino);
else
- obdo_to_inode(inode, &oa, oa.o_valid);
+ obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+ OBD_MD_FLATIME | OBD_MD_FLMTIME |
+ OBD_MD_FLCTIME);
EXIT;
return;
{
struct inode *inode = page->mapping->host;
struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_file_data *fd = file->private_data;
struct lov_stripe_md *lsm = lli->lli_smd;
obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
struct brw_page pg;
+ struct obdo oa;
int rc = 0;
ENTRY;
pg.off = offset;
pg.count = PAGE_SIZE;
pg.flag = 0;
- rc = obd_brw(OBD_BRW_CHECK, ll_i2obdconn(inode), lsm, 1, &pg, NULL);
+ rc = obd_brw(OBD_BRW_CHECK, ll_i2obdconn(inode), NULL, lsm, 1,&pg,NULL);
if (rc)
RETURN(rc);
GOTO(prepare_done, rc = 0);
}
- rc = ll_brw(OBD_BRW_READ, inode, page, 0);
+ oa.o_id = lsm->lsm_object_id;
+ oa.o_mode = inode->i_mode;
+ memcpy(obdo_handle(&oa), &fd->fd_ost_och.och_fh,
+ sizeof(fd->fd_ost_och.och_fh));
+ oa.o_valid = OBD_MD_FLID |OBD_MD_FLMODE |OBD_MD_FLTYPE |OBD_MD_FLHANDLE;
+
+ rc = ll_brw(OBD_BRW_READ, inode, &oa, page, 0);
+ if (rc == 0)
+ obdo_refresh_inode(inode, &oa, oa.o_valid);
EXIT;
prepare_done:
static int ll_writepage(struct page *page)
{
struct inode *inode = page->mapping->host;
+ struct obdo oa;
ENTRY;
CDEBUG(D_CACHE, "page %p [lau %d] inode %p\n", page,
- PageLaunder(page), inode);
+ PageLaunder(page), inode);
LASSERT(PageLocked(page));
- /* XXX should obd_brw errors trickle up? */
- ll_batch_writepage(inode, page);
- RETURN(0);
+ oa.o_id = ll_i2info(inode)->lli_smd->lsm_object_id;
+ oa.o_valid = OBD_MD_FLID;
+ obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+ OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
+ RETURN(ll_batch_writepage(inode, &oa, page));
}
/*
int rc = 0;
ENTRY;
+ SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
LASSERT(inode == file->f_dentry->d_inode);
LASSERT(PageLocked(page));
/* This means that we've hit either the local cache limit or the limit
* of the OST's grant. */
if (rc == -EDQUOT) {
- int rc = ll_batch_writepage(inode, page);
+ struct ll_file_data *fd = file->private_data;
+ struct obdo oa;
+ int rc;
+
+ oa.o_id = ll_i2info(inode)->lli_smd->lsm_object_id;
+ memcpy(obdo_handle(&oa), &fd->fd_ost_och.och_fh,
+ sizeof(fd->fd_ost_och.och_fh));
+ oa.o_valid = OBD_MD_FLID | OBD_MD_FLHANDLE;
+ obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+ OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
+ rc = ll_batch_writepage(inode, &oa, page);
lock_page(page); /* caller expects to unlock */
RETURN(rc);
}
struct lov_stripe_md *lsm = lli->lli_smd;
struct brw_page *pga;
struct ptlrpc_request_set *set;
+ struct obdo oa;
int length, i, flags, rc = 0;
loff_t offset;
ENTRY;
if (!lsm || !lsm->lsm_object_id)
- RETURN(-ENOMEM);
+ RETURN(-EBADF);
if ((iobuf->offset & (blocksize - 1)) ||
(iobuf->length & (blocksize - 1)))
}
}
+ oa.o_id = lsm->lsm_object_id;
+ oa.o_valid = OBD_MD_FLID;
+ obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
+ OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
if (rw == WRITE)
lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
LPROC_LL_DIRECT_WRITE, iobuf->length);
lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
LPROC_LL_DIRECT_READ, iobuf->length);
rc = obd_brw_async(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
- ll_i2obdconn(inode), lsm, iobuf->nr_pages, pga, set,
- NULL);
+ ll_i2obdconn(inode), &oa, lsm, iobuf->nr_pages, pga,
+ set, NULL);
if (rc) {
CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
"error from obd_brw_async: rc = %d\n", rc);
#include "llite_internal.h"
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-kmem_cache_t *ll_file_data_slab;
extern struct address_space_operations ll_aops;
extern struct address_space_operations ll_dir_aops;
-struct super_operations ll_super_operations;
-
-/* /proc/lustre/llite root that tracks llite mount points */
-struct proc_dir_entry *proc_lustre_fs_root = NULL;
-/* lproc_llite.c */
-extern void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi);
-extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
- struct super_block *sb,
- char *osc, char *mdc);
-
-extern int ll_recover(struct recovd_data *, int);
-extern int ll_commitcbd_setup(struct ll_sb_info *);
-extern int ll_commitcbd_cleanup(struct ll_sb_info *);
-
-static char *ll_read_opt(const char *opt, char *data)
-{
- char *value;
- char *retval;
- ENTRY;
-
- CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
- if (strncmp(opt, data, strlen(opt)))
- RETURN(NULL);
- if ((value = strchr(data, '=')) == NULL)
- RETURN(NULL);
-
- value++;
- OBD_ALLOC(retval, strlen(value) + 1);
- if (!retval) {
- CERROR("out of memory!\n");
- RETURN(NULL);
- }
-
- memcpy(retval, value, strlen(value)+1);
- CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
- RETURN(retval);
-}
-
-static int ll_set_opt(const char *opt, char *data, int fl)
-{
- ENTRY;
-
- CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
- if (strncmp(opt, data, strlen(opt)))
- RETURN(0);
- else
- RETURN(fl);
-}
-
-static void ll_options(char *options, char **ost, char **mds, int *flags)
-{
- char *this_char;
- ENTRY;
-
- if (!options) {
- EXIT;
- return;
- }
-
- for (this_char = strtok (options, ",");
- this_char != NULL;
- this_char = strtok (NULL, ",")) {
- CDEBUG(D_SUPER, "this_char %s\n", this_char);
- if ((!*ost && (*ost = ll_read_opt("osc", this_char)))||
- (!*mds && (*mds = ll_read_opt("mdc", this_char)))||
- (!(*flags & LL_SBI_NOLCK) &&
- ((*flags) = (*flags) |
- ll_set_opt("nolock", this_char, LL_SBI_NOLCK))))
- continue;
- }
- EXIT;
-}
-
-#ifndef log2
-#define log2(n) ffz(~(n))
-#endif
static struct super_block *ll_read_super(struct super_block *sb,
void *data, int silent)
{
- struct inode *root = 0;
- struct obd_device *obd;
- struct ll_sb_info *sbi;
- struct obd_export *mdc_export;
- char *osc = NULL;
- char *mdc = NULL;
int err;
- struct ll_fid rootfid;
- struct obd_statfs osfs;
- struct ptlrpc_request *request = NULL;
- struct ptlrpc_connection *mdc_conn;
- struct ll_read_inode2_cookie lic;
- class_uuid_t uuid;
-
ENTRY;
-
- CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
- OBD_ALLOC(sbi, sizeof(*sbi));
- if (!sbi)
+ err = ll_fill_super(sb, data, silent);
+ if (err)
RETURN(NULL);
-
- INIT_LIST_HEAD(&sbi->ll_conn_chain);
- INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
- generate_random_uuid(uuid);
- class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
-
- sb->u.generic_sbp = sbi;
-
- ll_options(data, &osc, &mdc, &sbi->ll_flags);
-
- if (!osc) {
- CERROR("no osc\n");
- GOTO(out_free, sb = NULL);
- }
-
- if (!mdc) {
- CERROR("no mdc\n");
- GOTO(out_free, sb = NULL);
- }
-
- obd = class_name2obd(mdc);
- if (!obd) {
- CERROR("MDC %s: not setup or attached\n", mdc);
- GOTO(out_free, sb = NULL);
- }
-
- err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid);
- if (err) {
- CERROR("cannot connect to %s: rc = %d\n", mdc, err);
- GOTO(out_free, sb = NULL);
- }
-
- mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection;
-
- obd = class_name2obd(osc);
- if (!obd) {
- CERROR("OSC %s: not setup or attached\n", osc);
- GOTO(out_mdc, sb = NULL);
- }
-
- err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid);
- if (err) {
- CERROR("cannot connect to %s: rc = %d\n", osc, err);
- GOTO(out_mdc, sb = NULL);
- }
-
- err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
- if (err) {
- CERROR("cannot mds_connect: rc = %d\n", err);
- GOTO(out_osc, sb = NULL);
- }
- CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
- sbi->ll_rootino = rootfid.id;
-
- memset(&osfs, 0, sizeof(osfs));
- mdc_export = class_conn2export(&sbi->ll_mdc_conn);
- if (mdc_export == NULL) {
- CERROR("null mdc_export\n");
- GOTO(out_osc, sb = NULL);
- }
- err = obd_statfs(mdc_export, &osfs);
- class_export_put(mdc_export);
- sb->s_blocksize = osfs.os_bsize;
- sb->s_blocksize_bits = log2(osfs.os_bsize);
- sb->s_magic = LL_SUPER_MAGIC;
- sb->s_maxbytes = PAGE_CACHE_MAXBYTES;
-
- sb->s_op = &ll_super_operations;
-
- /* make root inode
- * XXX: move this to after cbd setup? */
- err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid,
- OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request);
- if (err) {
- CERROR("mdc_getattr failed for root: rc = %d\n", err);
- GOTO(out_osc, sb = NULL);
- }
-
- /* initialize committed transaction callback daemon */
- spin_lock_init(&sbi->ll_commitcbd_lock);
- init_waitqueue_head(&sbi->ll_commitcbd_waitq);
- init_waitqueue_head(&sbi->ll_commitcbd_ctl_waitq);
- sbi->ll_commitcbd_flags = 0;
- err = ll_commitcbd_setup(sbi);
- if (err) {
- CERROR("failed to start commit callback daemon: rc = %d\n",err);
- ptlrpc_req_finished (request);
- GOTO(out_osc, sb = NULL);
- }
-
- lic.lic_body = lustre_msg_buf(request->rq_repmsg, 0,
- sizeof(*lic.lic_body));
- LASSERT (lic.lic_body != NULL); /* checked by mdc_getattr() */
- LASSERT_REPSWABBED (request, 0); /* swabbed by mdc_getattr() */
-
- lic.lic_lsm = NULL;
-
- LASSERT(sbi->ll_rootino != 0);
- root = iget4(sb, sbi->ll_rootino, NULL, &lic);
-
- ptlrpc_req_finished(request);
-
- if (root == NULL || is_bad_inode(root)) {
- /* XXX might need iput() for bad inode */
- CERROR("lustre_lite: bad iget4 for root\n");
- GOTO(out_cbd, sb = NULL);
- }
-
- sb->s_root = d_alloc_root(root);
-
- if (proc_lustre_fs_root) {
- err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
- osc, mdc);
- if (err < 0)
- CERROR("could not register mount in /proc/lustre");
- }
-
-out_dev:
- if (mdc)
- OBD_FREE(mdc, strlen(mdc) + 1);
- if (osc)
- OBD_FREE(osc, strlen(osc) + 1);
-
RETURN(sb);
-
-out_cbd:
- ll_commitcbd_cleanup(sbi);
-out_osc:
- obd_disconnect(&sbi->ll_osc_conn, 0);
-out_mdc:
- obd_disconnect(&sbi->ll_mdc_conn, 0);
-out_free:
- lprocfs_unregister_mountpoint(sbi);
- OBD_FREE(sbi, sizeof(*sbi));
-
- goto out_dev;
-} /* ll_read_super */
-
-static void ll_put_super(struct super_block *sb)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct list_head *tmp, *next;
- struct ll_fid rootfid;
- struct obd_device *obd = class_conn2obd(&sbi->ll_mdc_conn);
- ENTRY;
-
- CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
- list_del(&sbi->ll_conn_chain);
- ll_commitcbd_cleanup(sbi);
- obd_disconnect(&sbi->ll_osc_conn, 0);
-
- /* NULL request to force sync on the MDS, and get the last_committed
- * value to flush remaining RPCs from the sending queue on client.
- *
- * XXX This should be an mdc_sync() call to sync the whole MDS fs,
- * which we can call for other reasons as well.
- */
- if (!obd->obd_no_recov)
- mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
-
- lprocfs_unregister_mountpoint(sbi);
- if (sbi->ll_proc_root) {
- lprocfs_remove(sbi->ll_proc_root);
- sbi->ll_proc_root = NULL;
- }
-
- obd_disconnect(&sbi->ll_mdc_conn, 0);
-
- spin_lock(&dcache_lock);
- list_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) {
- struct dentry *dentry = list_entry(tmp, struct dentry, d_hash);
- shrink_dcache_parent(dentry);
- }
- spin_unlock(&dcache_lock);
-
- OBD_FREE(sbi, sizeof(*sbi));
-
- EXIT;
-} /* ll_put_super */
-
-static void ll_clear_inode(struct inode *inode)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc;
- ENTRY;
-
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
- inode->i_generation, inode);
- rc = ll_mdc_cancel_unused(&sbi->ll_mdc_conn, inode,
- LDLM_FL_NO_CALLBACK, inode);
- if (rc < 0) {
- CERROR("ll_mdc_cancel_unused: %d\n", rc);
- /* XXX FIXME do something dramatic */
- }
-
- if (atomic_read(&inode->i_count) != 0)
- CERROR("clearing in-use inode %lu: count = %d\n",
- inode->i_ino, atomic_read(&inode->i_count));
-
- if (lli->lli_smd) {
- rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd,
- LDLM_FL_WARN, inode);
- if (rc < 0) {
- CERROR("obd_cancel_unused: %d\n", rc);
- /* XXX FIXME do something dramatic */
- }
- obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd);
- lli->lli_smd = NULL;
- }
-
- if (lli->lli_symlink_name) {
- OBD_FREE(lli->lli_symlink_name,
- strlen(lli->lli_symlink_name) + 1);
- lli->lli_symlink_name = NULL;
- }
-
- EXIT;
-}
-
-#if 0
-static void ll_delete_inode(struct inode *inode)
-{
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
- inode->i_generation, inode);
- if (S_ISREG(inode->i_mode)) {
- int err;
- struct obdo *oa;
- struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-
- /* mcreate with no open */
- if (!lsm)
- GOTO(out, 0);
-
- if (lsm->lsm_object_id == 0) {
- CERROR("This really happens\n");
- /* No obdo was ever created */
- GOTO(out, 0);
- }
-
- oa = obdo_alloc();
- if (oa == NULL)
- GOTO(out, -ENOMEM);
-
- oa->o_id = lsm->lsm_object_id;
- obdo_from_inode(oa, inode, OBD_MD_FLID | OBD_MD_FLTYPE);
-
- err = obd_destroy(ll_i2obdconn(inode), oa, lsm, NULL);
- obdo_free(oa);
- if (err)
- CDEBUG(D_INODE,
- "inode %lu obd_destroy objid "LPX64" error %d\n",
- inode->i_ino, lsm->lsm_object_id, err);
- }
-out:
- clear_inode(inode);
- EXIT;
-}
-#endif
-
-/* like inode_setattr, but doesn't mark the inode dirty */
-static int ll_attr2inode(struct inode *inode, struct iattr *attr, int trunc)
-{
- unsigned int ia_valid = attr->ia_valid;
- int error = 0;
-
- if ((ia_valid & ATTR_SIZE) && trunc) {
- if (attr->ia_size > ll_file_maxbytes(inode)) {
- error = -EFBIG;
- goto out;
- }
- error = vmtruncate(inode, attr->ia_size);
- if (error)
- goto out;
- } else if (ia_valid & ATTR_SIZE)
- inode->i_size = attr->ia_size;
-
- if (ia_valid & ATTR_UID)
- inode->i_uid = attr->ia_uid;
- if (ia_valid & ATTR_GID)
- inode->i_gid = attr->ia_gid;
- if (ia_valid & ATTR_ATIME)
- inode->i_atime = attr->ia_atime;
- if (ia_valid & ATTR_MTIME)
- inode->i_mtime = attr->ia_mtime;
- if (ia_valid & ATTR_CTIME)
- inode->i_ctime = attr->ia_ctime;
- if (ia_valid & ATTR_MODE) {
- inode->i_mode = attr->ia_mode;
- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
- inode->i_mode &= ~S_ISGID;
- }
-out:
- return error;
-}
-
-int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc)
-{
- struct ptlrpc_request *request = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- int err = 0;
- ENTRY;
-
- /* change incore inode */
- err = ll_attr2inode(inode, attr, do_trunc);
- if (err)
- RETURN(err);
-
- /* Don't send size changes to MDS to avoid "fast EA" problems, and
- * also avoid a pointless RPC (we get file size from OST anyways).
- */
- attr->ia_valid &= ~ATTR_SIZE;
- if (attr->ia_valid) {
- struct mdc_op_data op_data;
-
- ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
- err = mdc_setattr(&sbi->ll_mdc_conn, &op_data,
- attr, NULL, 0, &request);
- if (err)
- CERROR("mdc_setattr fails: err = %d\n", err);
-
- ptlrpc_req_finished(request);
- if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
- struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
- struct obdo oa;
- int err2;
-
- CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
- inode->i_ino, attr->ia_mtime);
- oa.o_id = lsm->lsm_object_id;
- oa.o_mode = S_IFREG;
- oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME;
- oa.o_mtime = attr->ia_mtime;
- err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
- if (err2) {
- CERROR("obd_setattr fails: rc=%d\n", err);
- if (!err)
- err = err2;
- }
- }
- }
-
- RETURN(err);
-}
-
-int ll_setattr_raw(struct inode *inode, struct iattr *attr)
-{
- struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *request = NULL;
- struct mdc_op_data op_data;
- int rc = 0, err;
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
- inode->i_generation, inode);
-
- if ((attr->ia_valid & ATTR_SIZE)) {
- struct ldlm_extent extent = {attr->ia_size, OBD_OBJECT_EOF};
- struct lustre_handle lockh = { 0 };
-
- if (attr->ia_size > ll_file_maxbytes(inode))
- RETURN(-EFBIG);
-
- /* writeback uses inode->i_size to determine how far out
- * its cached pages go. ll_truncate gets a PW lock, canceling
- * our lock, _after_ it has updated i_size. this can confuse
- *
- * If this file doesn't have stripes yet, it is already,
- * by definition, truncated. */
- if ((attr->ia_valid & ATTR_FROM_OPEN) && lsm == NULL) {
- LASSERT(attr->ia_size == 0);
- GOTO(skip_extent_lock, rc = 0);
- }
-
- /* we really need to get our PW lock before we change
- * inode->i_size. if we don't we can race with other
- * i_size updaters on our node, like ll_file_read. we
- * can also race with i_size propogation to other
- * nodes through dirtying and writeback of final cached
- * pages. this last one is especially bad for racing
- * o_append users on other nodes. */
- rc = ll_extent_lock_no_validate(NULL, inode, lsm, LCK_PW,
- &extent, &lockh);
- if (rc != ELDLM_OK) {
- if (rc > 0)
- RETURN(-ENOLCK);
- RETURN(rc);
- }
-
- rc = vmtruncate(inode, attr->ia_size);
- if (rc == 0)
- set_bit(LLI_F_HAVE_SIZE_LOCK,
- &ll_i2info(inode)->lli_flags);
-
- /* unlock now as we don't mind others file lockers racing with
- * the mds updates below? */
- err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh);
- if (err)
- CERROR("ll_extent_unlock failed: %d\n", err);
- if (rc)
- RETURN(rc);
- }
-
-skip_extent_lock:
- /* Don't send size changes to MDS to avoid "fast EA" problems, and
- * also avoid a pointless RPC (we get file size from OST anyways).
- */
- attr->ia_valid &= ~ATTR_SIZE;
- if (!attr->ia_valid)
- RETURN(0);
-
- ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
-
- err = mdc_setattr(&sbi->ll_mdc_conn, &op_data,
- attr, NULL, 0, &request);
- if (err)
- CERROR("mdc_setattr fails: err = %d\n", err);
-
- ptlrpc_req_finished(request);
-
- if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_MTIME_SET)) {
- struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
- struct obdo oa;
- int err2;
-
- if (lsm == NULL) {
- CDEBUG(D_INODE, "no lsm: not setting mtime on OSTs\n");
- RETURN(err);
- }
-
- CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
- inode->i_ino, attr->ia_mtime);
- oa.o_id = lsm->lsm_object_id;
- oa.o_mode = S_IFREG;
- oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMTIME;
- oa.o_mtime = attr->ia_mtime;
- err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
- if (err2) {
- CERROR("obd_setattr fails: rc=%d\n", err);
- if (!err)
- err = err2;
- }
- }
- RETURN(err);
-}
-
-int ll_setattr(struct dentry *de, struct iattr *attr)
-{
- int rc = inode_change_ok(de->d_inode, attr);
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s\n", de->d_name.name);
- if (rc)
- return rc;
- lprocfs_counter_incr(ll_i2sbi(de->d_inode)->ll_stats, LPROC_LL_SETATTR);
-
- return ll_inode_setattr(de->d_inode, attr, 1);
-}
-
-static int ll_statfs(struct super_block *sb, struct statfs *sfs)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_export *mdc_exp = class_conn2export(&sbi->ll_mdc_conn);
- struct obd_export *osc_exp;
- struct obd_statfs osfs;
- int rc;
- ENTRY;
-
- if (mdc_exp == NULL)
- RETURN(-EINVAL);
-
- CDEBUG(D_VFSTRACE, "VFS Op:\n");
- lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_STAFS);
- memset(sfs, 0, sizeof(*sfs));
- rc = obd_statfs(mdc_exp, &osfs);
- statfs_unpack(sfs, &osfs);
- if (rc)
- CERROR("mdc_statfs fails: rc = %d\n", rc);
- else
- CDEBUG(D_SUPER, "mdc_statfs shows blocks "LPU64"/"LPU64
- " objects "LPU64"/"LPU64"\n",
- osfs.os_bavail, osfs.os_blocks,
- osfs.os_ffree, osfs.os_files);
-
- /* temporary until mds_statfs returns statfs info for all OSTs */
- if (!rc) {
- osc_exp = class_conn2export(&sbi->ll_osc_conn);
- if (osc_exp == NULL)
- GOTO(out, rc = -EINVAL);
- rc = obd_statfs(osc_exp, &osfs);
- class_export_put(osc_exp);
- if (rc) {
- CERROR("obd_statfs fails: rc = %d\n", rc);
- GOTO(out, rc);
- }
- CDEBUG(D_SUPER, "obd_statfs shows blocks "LPU64"/"LPU64
- " objects "LPU64"/"LPU64"\n",
- osfs.os_bavail, osfs.os_blocks,
- osfs.os_ffree, osfs.os_files);
-
- while (osfs.os_blocks > ~0UL) {
- sfs->f_bsize <<= 1;
-
- osfs.os_blocks >>= 1;
- osfs.os_bfree >>= 1;
- osfs.os_bavail >>= 1;
- }
-
- sfs->f_blocks = osfs.os_blocks;
- sfs->f_bfree = osfs.os_bfree;
- sfs->f_bavail = osfs.os_bavail;
-
- /* If we don't have as many objects free on the OST as inodes
- * on the MDS, we reduce the total number of inodes to
- * compensate, so that the "inodes in use" number is correct.
- */
- if (osfs.os_ffree < (__u64)sfs->f_ffree) {
- sfs->f_files = (sfs->f_files - sfs->f_ffree) +
- osfs.os_ffree;
- sfs->f_ffree = osfs.os_ffree;
- }
- }
-
-out:
- class_export_put(mdc_exp);
- RETURN(rc);
-}
-
-void dump_lsm(int level, struct lov_stripe_md *lsm)
-{
- CDEBUG(level, "objid "LPX64", maxbytes "LPX64", magic %#08x, "
- "stripe_size %#08x, offset %u, stripe_count %u\n",
- lsm->lsm_object_id, lsm->lsm_maxbytes, lsm->lsm_magic,
- lsm->lsm_stripe_size, lsm->lsm_stripe_offset,
- lsm->lsm_stripe_count);
-}
-
-void ll_update_inode(struct inode *inode, struct mds_body *body,
- struct lov_stripe_md *lsm)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
-
- LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
- if (lsm != NULL) {
- if (lli->lli_smd == NULL) {
- lli->lli_maxbytes = lsm->lsm_maxbytes;
- if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
- lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
- lli->lli_smd = lsm;
- } else {
- if (memcmp(lli->lli_smd, lsm, sizeof(*lsm))) {
- CERROR("lsm mismatch for inode %ld\n",
- inode->i_ino);
- CERROR("lli_smd:\n");
- dump_lsm(D_ERROR, lli->lli_smd);
- CERROR("lsm:\n");
- dump_lsm(D_ERROR, lsm);
- LBUG();
- }
- }
- }
-
- if (body->valid & OBD_MD_FLID)
- inode->i_ino = body->ino;
- if (body->valid & OBD_MD_FLATIME)
- LTIME_S(inode->i_atime) = body->atime;
- if (body->valid & OBD_MD_FLMTIME)
- LTIME_S(inode->i_mtime) = body->mtime;
- if (body->valid & OBD_MD_FLCTIME)
- LTIME_S(inode->i_ctime) = body->ctime;
- if (body->valid & OBD_MD_FLMODE)
- inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
- if (body->valid & OBD_MD_FLTYPE)
- inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
- if (body->valid & OBD_MD_FLUID)
- inode->i_uid = body->uid;
- if (body->valid & OBD_MD_FLGID)
- inode->i_gid = body->gid;
- if (body->valid & OBD_MD_FLFLAGS)
- inode->i_flags = body->flags;
- if (body->valid & OBD_MD_FLNLINK)
- inode->i_nlink = body->nlink;
- if (body->valid & OBD_MD_FLGENER)
- inode->i_generation = body->generation;
- if (body->valid & OBD_MD_FLRDEV)
- inode->i_rdev = body->rdev;
- if (body->valid & OBD_MD_FLSIZE)
- inode->i_size = body->size;
- if (body->valid & OBD_MD_FLBLOCKS)
- inode->i_blocks = body->blocks;
-}
-
-static void ll_read_inode2(struct inode *inode, void *opaque)
-{
- struct ll_read_inode2_cookie *lic = opaque;
- struct mds_body *body = lic->lic_body;
- struct ll_inode_info *lli = ll_i2info(inode);
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
- inode->i_generation, inode);
-
- sema_init(&lli->lli_open_sem, 1);
- spin_lock_init(&lli->lli_read_extent_lock);
- INIT_LIST_HEAD(&lli->lli_read_extents);
- lli->lli_flags = 0;
- /* We default to 2T-4k until the LSM is created/read, at which point
- * it'll be updated. */
- lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES;
-
- LASSERT(!lli->lli_smd);
-
- /* core attributes from the MDS first */
- ll_update_inode(inode, body, lic->lic_lsm);
-
- /* OIDEBUG(inode); */
-
- if (S_ISREG(inode->i_mode)) {
- inode->i_op = &ll_file_inode_operations;
- inode->i_fop = &ll_file_operations;
- inode->i_mapping->a_ops = &ll_aops;
- EXIT;
- } else if (S_ISDIR(inode->i_mode)) {
- inode->i_op = &ll_dir_inode_operations;
- inode->i_fop = &ll_dir_operations;
- inode->i_mapping->a_ops = &ll_dir_aops;
- EXIT;
- } else if (S_ISLNK(inode->i_mode)) {
- inode->i_op = &ll_fast_symlink_inode_operations;
- EXIT;
- } else {
- inode->i_op = &ll_special_inode_operations;
- init_special_inode(inode, inode->i_mode, inode->i_rdev);
- EXIT;
- }
-}
-
-void ll_umount_begin(struct super_block *sb)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_device *obd;
- struct obd_ioctl_data ioc_data = { 0 };
-
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
- obd = class_conn2obd(&sbi->ll_mdc_conn);
- obd->obd_no_recov = 1;
- obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_mdc_conn, sizeof ioc_data,
- &ioc_data, NULL);
-
- obd = class_conn2obd(&sbi->ll_osc_conn);
- obd->obd_no_recov = 1;
- obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_osc_conn, sizeof ioc_data,
- &ioc_data, NULL);
-
- /* Really, we'd like to wait until there are no requests outstanding,
- * and then continue. For now, we just invalidate the requests,
- * schedule, and hope.
- */
- schedule();
-
- EXIT;
}
/* exported operations */
#include <linux/lprocfs_status.h>
#include "llite_internal.h"
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-kmem_cache_t *ll_file_data_slab;
-extern struct address_space_operations ll_aops;
-extern struct address_space_operations ll_dir_aops;
-struct super_operations ll_super_operations;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-/* /proc/lustre/llite root that tracks llite mount points */
-struct proc_dir_entry *proc_lustre_fs_root = NULL;
-/* lproc_llite.c */
-extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
- struct super_block *sb,
- char *osc, char *mdc);
-
-extern int ll_init_inodecache(void);
-extern void ll_destroy_inodecache(void);
-extern int ll_recover(struct recovd_data *, int);
-extern int ll_commitcbd_setup(struct ll_sb_info *);
-extern int ll_commitcbd_cleanup(struct ll_sb_info *);
-int ll_read_inode2(struct inode *inode, void *opaque);
-
-extern int ll_proc_namespace(struct super_block* sb, char* osc, char* mdc);
-
-static char *ll_read_opt(const char *opt, char *data)
-{
- char *value;
- char *retval;
- ENTRY;
-
- CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
- if (strncmp(opt, data, strlen(opt)))
- RETURN(NULL);
- if ((value = strchr(data, '=')) == NULL)
- RETURN(NULL);
-
- value++;
- OBD_ALLOC(retval, strlen(value) + 1);
- if (!retval) {
- CERROR("out of memory!\n");
- RETURN(NULL);
- }
-
- memcpy(retval, value, strlen(value)+1);
- CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
- RETURN(retval);
-}
-
-static int ll_set_opt(const char *opt, char *data, int fl)
-{
- ENTRY;
-
- CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
- if (strncmp(opt, data, strlen(opt)))
- RETURN(0);
- else
- RETURN(fl);
-}
-
-static void ll_options(char *options, char **ost, char **mds, int *flags)
-{
- char *opt_ptr = options;
- char *this_char;
- ENTRY;
-
- if (!options) {
- EXIT;
- return;
- }
-
- while ((this_char = strsep (&opt_ptr, ",")) != NULL) {
- CDEBUG(D_SUPER, "this_char %s\n", this_char);
- if ((!*ost && (*ost = ll_read_opt("osc", this_char)))||
- (!*mds && (*mds = ll_read_opt("mdc", this_char)))||
- (!(*flags & LL_SBI_NOLCK) &&
- ((*flags) = (*flags) |
- ll_set_opt("nolock", this_char, LL_SBI_NOLCK))))
- continue;
- }
- EXIT;
-}
-
-#ifndef log2
-#define log2(n) ffz(~(n))
-#endif
-
-
-static int ll_fill_super(struct super_block *sb, void *data, int silent)
-{
- struct inode *root = 0;
- struct obd_device *obd;
- struct ll_sb_info *sbi;
- char *osc = NULL;
- char *mdc = NULL;
- int err;
- struct ll_fid rootfid;
- struct obd_statfs osfs;
- struct ptlrpc_request *request = NULL;
- struct ptlrpc_connection *mdc_conn;
- struct ll_read_inode2_cookie lic;
- class_uuid_t uuid;
-
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
- OBD_ALLOC(sbi, sizeof(*sbi));
- if (!sbi)
- RETURN(-ENOMEM);
-
- INIT_LIST_HEAD(&sbi->ll_conn_chain);
- INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
- generate_random_uuid(uuid);
- class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
-
- sb->s_fs_info = sbi;
-
- ll_options(data, &osc, &mdc, &sbi->ll_flags);
-
- if (!osc) {
- CERROR("no osc\n");
- GOTO(out_free, sb = NULL);
- }
-
- if (!mdc) {
- CERROR("no mdc\n");
- GOTO(out_free, sb = NULL);
- }
-
- obd = class_name2obd(mdc);
- if (!obd) {
- CERROR("MDC %s: not setup or attached\n", mdc);
- GOTO(out_free, sb = NULL);
- }
-
- err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid);
- if (err) {
- CERROR("cannot connect to %s: rc = %d\n", mdc, err);
- GOTO(out_free, sb = NULL);
- }
-
- mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection;
-
- obd = class_name2obd(osc);
- if (!obd) {
- CERROR("OSC %s: not setup or attached\n", osc);
- GOTO(out_mdc, sb = NULL);
- }
-
- err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid);
- if (err) {
- CERROR("cannot connect to %s: rc = %d\n", osc, err);
- GOTO(out_mdc, sb = NULL);
- }
-
- err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
- if (err) {
- CERROR("cannot mds_connect: rc = %d\n", err);
- GOTO(out_osc, sb = NULL);
- }
- CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
- sbi->ll_rootino = rootfid.id;
-
- memset(&osfs, 0, sizeof(osfs));
- err = obd_statfs(&sbi->ll_mdc_conn, &osfs);
- sb->s_blocksize = osfs.os_bsize;
- sb->s_blocksize_bits = log2(osfs.os_bsize);
- sb->s_magic = LL_SUPER_MAGIC;
- sb->s_maxbytes = PAGE_CACHE_MAXBYTES;
-
- sb->s_op = &ll_super_operations;
-
- /* make root inode
- * XXX: move this to after cbd setup? */
- err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid,
- OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request);
- if (err) {
- CERROR("mdc_getattr failed for root: rc = %d\n", err);
- GOTO(out_osc, sb = NULL);
- }
-
- /* initialize committed transaction callback daemon */
- spin_lock_init(&sbi->ll_commitcbd_lock);
- init_waitqueue_head(&sbi->ll_commitcbd_waitq);
- init_waitqueue_head(&sbi->ll_commitcbd_ctl_waitq);
- sbi->ll_commitcbd_flags = 0;
- err = ll_commitcbd_setup(sbi);
- if (err) {
- CERROR("failed to start commit callback daemon: rc = %d\n",err);
- ptlrpc_req_finished (request);
- GOTO(out_osc, sb = NULL);
- }
-
- lic.lic_body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*lic.lic_body));
- LASSERT (lic.lic_body != NULL); /* checked by mdc_getattr() */
- LASSERT_REPSWABBED (request, 0); /* swabbed by mdc_getattr() */
-
- lic.lic_lsm = NULL;
-
- root = iget5_locked(sb, sbi->ll_rootino, NULL,
- ll_read_inode2, &lic);
-
- ptlrpc_req_finished(request);
-
- if (root == NULL || is_bad_inode(root)) {
- /* XXX might need iput() for bad inode */
- CERROR("lustre_lite: bad iget5 for root\n");
- GOTO(out_cbd, sb = NULL);
- }
-
- sb->s_root = d_alloc_root(root);
- root->i_state &= ~(I_LOCK | I_NEW);
- printk("AMRUT 1\n");
- if (proc_lustre_fs_root) {
- err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb,
- osc, mdc);
- if (err < 0)
- CERROR("could not register mount in /proc/lustre");
- }
-
-out_dev:
- if (mdc)
- OBD_FREE(mdc, strlen(mdc) + 1);
- if (osc)
- OBD_FREE(osc, strlen(osc) + 1);
- printk("AMRUT 2\n");
-
- RETURN(0);
-
-out_cbd:
- ll_commitcbd_cleanup(sbi);
-out_osc:
- obd_disconnect(&sbi->ll_osc_conn, 0);
-out_mdc:
- obd_disconnect(&sbi->ll_mdc_conn, 0);
-out_free:
- lprocfs_unregister_mountpoint(sbi);
- OBD_FREE(sbi, sizeof(*sbi));
-
- goto out_dev;
-} /* ll_fill_super */
-
-
-int ll_setattr_raw(struct inode *inode, struct iattr *attr)
-{
- struct ptlrpc_request *request = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct mdc_op_data op_data;
- int err = 0;
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
-
- LPROC_COUNTER_INODE_INCBY1(inode, LPROC_LL_SETATTR);
- if ((attr->ia_valid & ATTR_SIZE)) {
- /* writeback uses inode->i_size to determine how far out
- * its cached pages go. ll_truncate gets a PW lock, canceling
- * our lock, _after_ it has updated i_size. this can confuse
- * us into zero extending the file to the newly truncated
- * size, and this has bad implications for a racing o_append.
- * if we're extending our size we need to flush the pages
- * with the correct i_size before vmtruncate stomps on
- * the new i_size. again, this can only find pages to
- * purge if the PW lock that generated them is still held.
- */
- if ( attr->ia_size > inode->i_size ) {
- filemap_fdatasync(inode->i_mapping);
- filemap_fdatawait(inode->i_mapping);
- }
- err = vmtruncate(inode, attr->ia_size);
- if (err)
- RETURN(err);
- }
-
- /* Don't send size changes to MDS to avoid "fast EA" problems, and
- * also avoid a pointless RPC (we get file size from OST anyways).
- */
- attr->ia_valid &= ~ATTR_SIZE;
- if (!attr->ia_valid)
- RETURN(0);
-
- ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
-
- err = mdc_setattr(&sbi->ll_mdc_conn, &op_data,
- attr, NULL, 0, &request);
- if (err)
- CERROR("mdc_setattr fails: err = %d\n", err);
-
- ptlrpc_req_finished(request);
-
- if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
- struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
- struct obdo oa;
- int err2;
-
- CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
- inode->i_ino, attr->ia_mtime);
- oa.o_id = lsm->lsm_object_id;
- oa.o_mode = S_IFREG;
- oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMTIME;
- oa.o_mtime = LTIME_S(attr->ia_mtime);
- err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
- if (err2) {
- CERROR("obd_setattr fails: rc=%d\n", err);
- if (!err)
- err = err2;
- }
- }
- RETURN(err);
-}
struct super_block * ll_get_sb(struct file_system_type *fs_type,
- int flags, char *devname, void * data)
+ int flags, const char *devname, void * data)
{
+ /* calls back in fill super */
return get_sb_nodev(fs_type, flags, data, ll_fill_super);
}
-static void ll_put_super(struct super_block *sb)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct list_head *tmp, *next;
- struct ll_fid rootfid;
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
- list_del(&sbi->ll_conn_chain);
- ll_commitcbd_cleanup(sbi);
- obd_disconnect(&sbi->ll_osc_conn, 0);
-
- /* NULL request to force sync on the MDS, and get the last_committed
- * value to flush remaining RPCs from the pending queue on client.
- *
- * XXX This should be an mdc_sync() call to sync the whole MDS fs,
- * which we can call for other reasons as well.
- */
- mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
-
- lprocfs_unregister_mountpoint(sbi);
- if (sbi->ll_proc_root) {
- lprocfs_remove(sbi->ll_proc_root);
- sbi->ll_proc_root = NULL;
- }
-
- obd_disconnect(&sbi->ll_mdc_conn, 0);
-
- spin_lock(&dcache_lock);
- list_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list){
- struct dentry *dentry = list_entry(tmp, struct dentry, d_hash);
- shrink_dcache_parent(dentry);
- }
- spin_unlock(&dcache_lock);
-
- OBD_FREE(sbi, sizeof(*sbi));
-
- EXIT;
-} /* ll_put_super */
-
-static void ll_clear_inode(struct inode *inode)
-{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc;
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
-
-#warning "Is there a reason we don't do this in 2.5, but we do in 2.4?"
-#if 0
- rc = ll_mdc_cancel_unused(&sbi->ll_mdc_conn, inode, LDLM_FL_NO_CALLBACK);
- if (rc < 0) {
- CERROR("ll_mdc_cancel_unused: %d\n", rc);
- /* XXX FIXME do something dramatic */
- }
-
- if (lli->lli_smd) {
- rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd, 0);
- if (rc < 0) {
- CERROR("obd_cancel_unused: %d\n", rc);
- /* XXX FIXME do something dramatic */
- }
- }
-#endif
-
- if (atomic_read(&inode->i_count) != 0)
- CERROR("clearing in-use inode %lu: count = %d\n",
- inode->i_ino, atomic_read(&inode->i_count));
-
- if (lli->lli_smd) {
- obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd);
- lli->lli_smd = NULL;
- }
-
- if (lli->lli_symlink_name) {
- OBD_FREE(lli->lli_symlink_name,strlen(lli->lli_symlink_name)+1);
- lli->lli_symlink_name = NULL;
- }
-
- EXIT;
-}
-
-#if 0
-static void ll_delete_inode(struct inode *inode)
-{
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
- if (S_ISREG(inode->i_mode)) {
- int err;
- struct obdo *oa;
- struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-
- /* mcreate with no open */
- if (!lsm)
- GOTO(out, 0);
-
- if (lsm->lsm_object_id == 0) {
- CERROR("This really happens\n");
- /* No obdo was ever created */
- GOTO(out, 0);
- }
-
- oa = obdo_alloc();
- if (oa == NULL)
- GOTO(out, -ENOMEM);
-
- oa->o_id = lsm->lsm_object_id;
- oa->o_mode = inode->i_mode;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
-
- err = obd_destroy(ll_i2obdconn(inode), oa, lsm);
- obdo_free(oa);
- if (err)
- CDEBUG(D_SUPER, "obd destroy objid "LPX64" error %d\n",
- lsm->lsm_object_id, err);
- }
-out:
- clear_inode(inode);
- EXIT;
-}
-#endif
-
-/* like inode_setattr, but doesn't mark the inode dirty */
-static int ll_attr2inode(struct inode * inode, struct iattr * attr, int trunc)
-{
- unsigned int ia_valid = attr->ia_valid;
- int error = 0;
-
- if ((ia_valid & ATTR_SIZE) && trunc) {
- if (attr->ia_size > ll_file_maxbytes(inode)) {
- error = -EFBIG;
- goto out;
- }
- error = vmtruncate(inode, attr->ia_size);
- if (error)
- goto out;
- } else if (ia_valid & ATTR_SIZE)
- inode->i_size = attr->ia_size;
-
- if (ia_valid & ATTR_UID)
- inode->i_uid = attr->ia_uid;
- if (ia_valid & ATTR_GID)
- inode->i_gid = attr->ia_gid;
- if (ia_valid & ATTR_ATIME)
- inode->i_atime = attr->ia_atime;
- if (ia_valid & ATTR_MTIME)
- inode->i_mtime = attr->ia_mtime;
- if (ia_valid & ATTR_CTIME)
- inode->i_ctime = attr->ia_ctime;
- if (ia_valid & ATTR_MODE) {
- inode->i_mode = attr->ia_mode;
- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
- inode->i_mode &= ~S_ISGID;
- }
-out:
- return error;
-}
-
-int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc)
-{
- struct ptlrpc_request *request = NULL;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- int err = 0;
-
- ENTRY;
-
- /* change incore inode */
- err = ll_attr2inode(inode, attr, do_trunc);
- if (err)
- RETURN(err);
-
- /* Don't send size changes to MDS to avoid "fast EA" problems, and
- * also avoid a pointless RPC (we get file size from OST anyways).
- */
- attr->ia_valid &= ~ATTR_SIZE;
- if (attr->ia_valid) {
- struct mdc_op_data op_data;
-
- ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
-
- err = mdc_setattr(&sbi->ll_mdc_conn, &op_data,
- attr, NULL, 0, &request);
- if (err)
- CERROR("mdc_setattr fails: err = %d\n", err);
-
- ptlrpc_req_finished(request);
- if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) {
- struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
- struct obdo oa;
- int err2;
-
- CDEBUG(D_ERROR, "setting mtime on OST\n");
- oa.o_id = lsm->lsm_object_id;
- oa.o_mode = S_IFREG;
- oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME;
- oa.o_mtime = LTIME_S(attr->ia_mtime);
- err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL);
- if (err2) {
- CERROR("obd_setattr fails: rc=%d\n", err);
- if (!err)
- err = err2;
- }
- }
- }
-
- RETURN(err);
-}
-
-int ll_setattr(struct dentry *de, struct iattr *attr)
-{
- int rc = inode_change_ok(de->d_inode, attr);
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s\n", de->d_name.name);
- if (rc)
- return rc;
-
- LPROC_COUNTER_INODE_INCBY1((de->d_inode), LPROC_LL_SETATTR);
- return ll_inode_setattr(de->d_inode, attr, 1);
-}
-
-static int ll_statfs(struct super_block *sb, struct statfs *sfs)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_statfs osfs;
- int rc;
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
- LPROC_COUNTER_SBI_INCBY1(sbi, LPROC_LL_STAFS);
- memset(sfs, 0, sizeof(*sfs));
- rc = obd_statfs(&sbi->ll_mdc_conn, &osfs);
- statfs_unpack(sfs, &osfs);
- if (rc)
- CERROR("mdc_statfs fails: rc = %d\n", rc);
- else
- CDEBUG(D_SUPER, "mdc_statfs shows blocks "LPU64"/"LPU64
- " objects "LPU64"/"LPU64"\n",
- osfs.os_bavail, osfs.os_blocks,
- osfs.os_ffree, osfs.os_files);
-
- /* temporary until mds_statfs returns statfs info for all OSTs */
- if (!rc) {
- rc = obd_statfs(&sbi->ll_osc_conn, &osfs);
- if (rc) {
- CERROR("obd_statfs fails: rc = %d\n", rc);
- GOTO(out, rc);
- }
- CDEBUG(D_SUPER, "obd_statfs shows blocks "LPU64"/"LPU64
- " objects "LPU64"/"LPU64"\n",
- osfs.os_bavail, osfs.os_blocks,
- osfs.os_ffree, osfs.os_files);
-
- while (osfs.os_blocks > ~0UL) {
- sfs->f_bsize <<= 1;
-
- osfs.os_blocks >>= 1;
- osfs.os_bfree >>= 1;
- osfs.os_bavail >>= 1;
- }
- sfs->f_blocks = osfs.os_blocks;
- sfs->f_bfree = osfs.os_bfree;
- sfs->f_bavail = osfs.os_bavail;
- if (osfs.os_ffree < (__u64)sfs->f_ffree) {
- sfs->f_files = (sfs->f_files - sfs->f_ffree) +
- osfs.os_ffree;
- sfs->f_ffree = osfs.os_ffree;
- }
- }
-
-out:
- RETURN(rc);
-}
-
-void ll_update_inode(struct inode *inode, struct mds_body *body,
- struct lov_stripe_md *lsm)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
-
- LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
- if (lsm != NULL) {
- if (lli->lli_smd == NULL) {
- lli->lli_smd = lsm;
- lli->lli_maxbytes = lsm->lsm_maxbytes;
- if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
- lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
- } else {
- LASSERT (!memcmp (lli->lli_smd, lsm, sizeof (*lsm)));
- }
- }
-
- if (body->valid & OBD_MD_FLID)
- inode->i_ino = body->ino;
- if (body->valid & OBD_MD_FLATIME)
- LTIME_S(inode->i_atime) = body->atime;
- if (body->valid & OBD_MD_FLMTIME)
- LTIME_S(inode->i_mtime) = body->mtime;
- if (body->valid & OBD_MD_FLCTIME)
- LTIME_S(inode->i_ctime) = body->ctime;
- if (body->valid & OBD_MD_FLMODE)
- inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
- if (body->valid & OBD_MD_FLTYPE)
- inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
- if (body->valid & OBD_MD_FLUID)
- inode->i_uid = body->uid;
- if (body->valid & OBD_MD_FLGID)
- inode->i_gid = body->gid;
- if (body->valid & OBD_MD_FLFLAGS)
- inode->i_flags = body->flags;
- if (body->valid & OBD_MD_FLNLINK)
- inode->i_nlink = body->nlink;
- if (body->valid & OBD_MD_FLGENER)
- inode->i_generation = body->generation;
- if (body->valid & OBD_MD_FLRDEV)
- inode->i_rdev = to_kdev_t(body->rdev);
- if (body->valid & OBD_MD_FLSIZE)
- inode->i_size = body->size;
- if (body->valid & OBD_MD_FLBLOCKS)
- inode->i_blocks = body->blocks;
-}
-
-int ll_read_inode2(struct inode *inode, void *opaque)
-{
- struct ll_read_inode2_cookie *lic = opaque;
- struct mds_body *body = lic->lic_body;
- struct ll_inode_info *lli = ll_i2info(inode);
- int rc = 0;
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
-
- sema_init(&lli->lli_open_sem, 1);
- /* these are 2.4 only, but putting them here for consistency.. */
- spin_lock_init(&lli->lli_read_extent_lock);
- INIT_LIST_HEAD(&lli->lli_read_extents);
- ll_lldo_init(&lli->lli_dirty);
- lli->lli_flags = 0;
- lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES;
-
- LASSERT(!lli->lli_smd);
-
- /* core attributes first */
- ll_update_inode(inode, body, lic ? lic->lic_lsm : NULL);
-
- /* OIDEBUG(inode); */
-
- if (S_ISREG(inode->i_mode)) {
- inode->i_op = &ll_file_inode_operations;
- inode->i_fop = &ll_file_operations;
- inode->i_mapping->a_ops = &ll_aops;
- EXIT;
- } else if (S_ISDIR(inode->i_mode)) {
- inode->i_op = &ll_dir_inode_operations;
- inode->i_fop = &ll_dir_operations;
- inode->i_mapping->a_ops = &ll_dir_aops;
- EXIT;
- } else if (S_ISLNK(inode->i_mode)) {
- inode->i_op = &ll_fast_symlink_inode_operations;
- EXIT;
- } else {
- inode->i_op = &ll_special_inode_operations;
- init_special_inode(inode, inode->i_mode,
- kdev_t_to_nr(inode->i_rdev));
- EXIT;
- }
-
- return rc;
-}
-
-
-void ll_umount_begin(struct super_block *sb)
-{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_device *obd;
- struct obd_ioctl_data ioc_data = { 0 };
-
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:\n");
-
- obd = class_conn2obd(&sbi->ll_mdc_conn);
- obd->obd_no_recov = 1;
- obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_mdc_conn, sizeof ioc_data,
- &ioc_data, NULL);
-
- obd = class_conn2obd(&sbi->ll_osc_conn);
- obd->obd_no_recov = 1;
- obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_osc_conn, sizeof ioc_data,
- &ioc_data, NULL);
-
- /* Really, we'd like to wait until there are no requests outstanding,
- * and then continue. For now, we just invalidate the requests,
- * schedule, and hope.
- */
- schedule();
-
- EXIT;
-}
-
static kmem_cache_t *ll_inode_cachep;
static struct inode *ll_alloc_inode(struct super_block *sb)
{
struct ll_inode_info *lli;
- LPROC_COUNTER_SBI_INCBY1((ll_s2sbi(sb)), LL_ALLOC_INODE);
+ lprocfs_counter_incr((ll_s2sbi(sb))->ll_stats, LPROC_LL_ALLOC_INODE);
OBD_SLAB_ALLOC(lli, ll_inode_cachep, SLAB_KERNEL, sizeof *lli);
if (lli == NULL)
return NULL;
- memset(lli, 0, (char *)&lli->lli_vfs_inode - (char *)lli);
- sema_init(&lli->lli_open_sem, 1);
- init_MUTEX(&lli->lli_size_valid_sem);
- lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES;
+ inode_init_once(&lli->lli_vfs_inode);
+ ll_lli_init(lli);
return &lli->lli_vfs_inode;
}
static void ll_destroy_inode(struct inode *inode)
{
- OBD_SLAB_FREE(ll_inode_cachep, ll_i2info(inode),
- sizeof(struct ll_inode_info));
+ struct ll_inode_info *ptr = ll_i2info(inode);
+ OBD_SLAB_FREE(ptr, ll_inode_cachep, sizeof(*ptr));
}
static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
CERROR("ll_inode_cache: not all structures were freed\n");
}
-
-
/* exported operations */
struct super_operations ll_super_operations =
{
alloc_inode: ll_alloc_inode,
destroy_inode: ll_destroy_inode,
clear_inode: ll_clear_inode,
-// delete_inode: ll_delete_inode,
put_super: ll_put_super,
statfs: ll_statfs,
umount_begin: ll_umount_begin
#include <linux/stat.h>
#include <linux/smp_lock.h>
#include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
#define DEBUG_SUBSYSTEM S_LLITE
#include <linux/lustre_lite.h>
+#include "llite_internal.h"
static int ll_readlink_internal(struct inode *inode,
struct ptlrpc_request **request, char **symname)
RETURN(rc);
}
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-static int ll_follow_link(struct dentry *dentry, struct nameidata *nd,
- struct lookup_intent *it)
+static int ll_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
struct ll_inode_info *lli = ll_i2info(inode);
+ struct lookup_intent *it = ll_nd2it(nd);
struct ptlrpc_request *request;
- int op = 0, mode = 0, rc;
+ int rc;
char *symname;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op\n");
if (it != NULL) {
- op = it->it_op;
- mode = it->it_mode;
-
- ll_intent_release(dentry, it);
- }
-
- down(&lli->lli_open_sem);
- rc = ll_readlink_internal(inode, &request, &symname);
- up(&lli->lli_open_sem);
- if (rc)
- GOTO(out, rc);
+ int op = it->it_op;
+ int mode = it->it_mode;
- if (it != NULL) {
+ ll_intent_release(it);
it->it_op = op;
it->it_mode = mode;
}
- rc = vfs_follow_link_it(nd, symname, it);
- ptlrpc_req_finished(request);
- out:
- RETURN(rc);
-}
-#else
-static int ll_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- struct inode *inode = dentry->d_inode;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ptlrpc_request *request;
- int op = 0, mode = 0, rc;
- char *symname;
- ENTRY;
-
- op = nd->it.it_op;
- mode = nd->it.it_mode;
-
- ll_intent_release(dentry, &nd->it);
-
+ CDEBUG(D_VFSTRACE, "VFS Op\n");
down(&lli->lli_open_sem);
-
rc = ll_readlink_internal(inode, &request, &symname);
+ up(&lli->lli_open_sem);
if (rc)
GOTO(out, rc);
- nd->it.it_op = op;
- nd->it.it_mode = mode;
-
rc = vfs_follow_link(nd, symname);
ptlrpc_req_finished(request);
out:
- up(&lli->lli_open_sem);
-
RETURN(rc);
}
-#endif
-extern int ll_inode_revalidate(struct dentry *dentry);
-extern int ll_setattr(struct dentry *de, struct iattr *attr);
struct inode_operations ll_fast_symlink_inode_operations = {
readlink: ll_readlink,
setattr: ll_setattr,
setattr_raw: ll_setattr_raw,
- follow_link2: ll_follow_link,
+ follow_link: ll_follow_link,
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- revalidate: ll_inode_revalidate
+ revalidate_it: ll_inode_revalidate_it
+#else
+ getattr_it: ll_getattr
#endif
};
.deps
Makefile
Makefile.in
+.*.cmd
if LIBLUSTRE
lib_LIBRARIES = liblov.a
-liblov_a_SOURCES = lov_obd.c lov_pack.c
+liblov_a_SOURCES = lov_obd.c lov_pack.c lov_internal.h
else
MODULE = lov
modulefs_DATA = lov.o
EXTRA_PROGRAMS = lov
-lov_SOURCES = lov_obd.c lov_pack.c lproc_lov.c
+lov_SOURCES = lov_obd.c lov_pack.c lproc_lov.c lov_internal.h
endif
include $(top_srcdir)/Rules
#include <linux/seq_file.h>
#include <linux/lprocfs_status.h>
+#include "lov_internal.h"
+
+static int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
+ int stripeno, obd_off *obd_off);
+
struct lov_file_handles {
struct portals_handle lfh_handle;
atomic_t lfh_refcount;
struct lov_file_handles *lfh = lfhp;
atomic_inc(&lfh->lfh_refcount);
- CDEBUG(D_INFO, "GETting lfh %p : new refcount %d\n", lfh,
+ CDEBUG(D_MALLOC, "GETting lfh %p : new refcount %d\n", lfh,
atomic_read(&lfh->lfh_refcount));
}
static void lov_lfh_put(struct lov_file_handles *lfh)
{
- CDEBUG(D_INFO, "PUTting lfh %p : new refcount %d\n", lfh,
+ CDEBUG(D_MALLOC, "PUTting lfh %p : new refcount %d\n", lfh,
atomic_read(&lfh->lfh_refcount) - 1);
LASSERT(atomic_read(&lfh->lfh_refcount) > 0 &&
atomic_read(&lfh->lfh_refcount) < 0x5a5a);
struct proc_dir_entry *entry;
int rc;
- lprocfs_init_vars(&lvars);
+ lprocfs_init_vars(lov, &lvars);
rc = lprocfs_obd_attach(dev, lvars.obd_vars);
- if (rc)
+ if (rc)
return rc;
entry = create_proc_entry("target_obd", 0444, dev->obd_proc_entry);
- if (entry == NULL)
+ if (entry == NULL)
RETURN(-ENOMEM);
- entry->proc_fops = &ll_proc_target_fops;
+ entry->proc_fops = &lov_proc_target_fops;
entry->data = dev;
-
+
return rc;
-
}
int lov_detach(struct obd_device *dev)
if (rc)
RETURN(rc);
+ exp = class_conn2export(conn);
+ spin_lock_init(&exp->exp_lov_data.led_lock);
+ INIT_LIST_HEAD(&exp->exp_lov_data.led_open_head);
+
/* We don't want to actually do the underlying connections more than
* once, so keep track. */
lov->refcount++;
- if (lov->refcount > 1)
+ if (lov->refcount > 1) {
+ class_export_put(exp);
RETURN(0);
-
- exp = class_conn2export(conn);
- spin_lock_init(&exp->exp_lov_data.led_lock);
- INIT_LIST_HEAD(&exp->exp_lov_data.led_open_head);
+ }
/* retrieve LOV metadata from MDS */
rc = obd_connect(&mdc_conn, lov->mdcobd, &lov_mds_uuid);
* array fits in LOV_MAX_UUID_BUFFER_SIZE and all uuids are
* terminated), but I still need to verify it makes overall
* sense */
- mdesc = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*mdesc));
- LASSERT (mdesc != NULL);
- LASSERT_REPSWABBED (req, 0);
+ mdesc = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*mdesc));
+ LASSERT(mdesc != NULL);
+ LASSERT_REPSWABBED(req, 0);
*desc = *mdesc;
* demands on memory here. */
lov->bufsize = sizeof(struct lov_tgt_desc) * desc->ld_tgt_count;
OBD_ALLOC(lov->tgts, lov->bufsize);
- if (!lov->tgts) {
+ if (lov->tgts == NULL) {
CERROR("Out of memory\n");
GOTO(out_req, rc = -ENOMEM);
}
uuids = lustre_msg_buf(req->rq_repmsg, 1,
sizeof(*uuids) * desc->ld_tgt_count);
- LASSERT (uuids != NULL);
- LASSERT_REPSWABBED (req, 1);
+ LASSERT(uuids != NULL);
+ LASSERT_REPSWABBED(req, 1);
for (i = 0, tgts = lov->tgts; i < desc->ld_tgt_count; i++, tgts++) {
struct obd_uuid *uuid = &tgts->uuid;
}
mdc->cl_max_mds_easize = obd_size_diskmd(conn, NULL);
- ptlrpc_req_finished (req);
+ mdc->cl_max_mds_cookiesize = desc->ld_tgt_count *
+ sizeof(struct llog_cookie);
+ ptlrpc_req_finished(req);
class_export_put(exp);
RETURN (0);
RETURN (rc);
}
-static int lov_disconnect(struct lustre_handle *conn, int failover)
+static int lov_disconnect(struct lustre_handle *conn, int flags)
{
struct obd_device *obd = class_conn2obd(conn);
struct lov_obd *lov = &obd->u.lov;
class_conn2obd(&lov->tgts[i].conn);
osc_obd->obd_no_recov = 1;
}
- rc = obd_disconnect(&lov->tgts[i].conn, failover);
+ rc = obd_disconnect(&lov->tgts[i].conn, flags);
if (rc) {
if (lov->tgts[i].active) {
CERROR("Target %s disconnect error %d\n",
lov->bufsize = 0;
lov->tgts = NULL;
+ out_local:
exp = class_conn2export(conn);
if (exp == NULL) {
CERROR("export handle "LPU64" invalid! If you can reproduce, "
spin_unlock(&exp->exp_lov_data.led_lock);
class_export_put(exp);
- out_local:
rc = class_disconnect(conn, 0);
RETURN(rc);
}
static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid,
struct lov_stripe_md *lsm, int stripeno, int *set)
{
+ valid &= src->o_valid;
+
if (*set) {
if (valid & OBD_MD_FLSIZE) {
/* this handles sparse files properly */
if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime)
tgt->o_mtime = src->o_mtime;
} else {
- obdo_cpy_md(tgt, src, valid);
+ memcpy(tgt, src, sizeof(*tgt));
+ tgt->o_id = lsm->lsm_object_id;
if (valid & OBD_MD_FLSIZE)
tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno);
*set = 1;
}
}
+#ifndef log2
+#define log2(n) ffz(~(n))
+#endif
+
/* the LOV expects oa->o_id to be set to the LOV object id */
-static int lov_create(struct lustre_handle *conn, struct obdo *oa,
+static int lov_create(struct lustre_handle *conn, struct obdo *src_oa,
struct lov_stripe_md **ea, struct obd_trans_info *oti)
{
struct obd_export *export = class_conn2export(conn);
struct lov_obd *lov;
struct lov_stripe_md *lsm;
- struct lov_oinfo *loi;
- struct obdo *tmp;
+ struct lov_oinfo *loi = NULL;
+ struct obdo *tmp_oa, *ret_oa;
+ struct llog_cookie *cookies = NULL;
unsigned ost_count, ost_idx;
- int set = 0, obj_alloc = 0;
- int rc = 0, i;
+ int set = 0, obj_alloc = 0, cookie_sent = 0, rc = 0, i;
ENTRY;
LASSERT(ea);
if (!export)
- GOTO(out_exp, rc = -EINVAL);
+ RETURN(-EINVAL);
lov = &export->exp_obd->u.lov;
if (!lov->desc.ld_active_tgt_count)
GOTO(out_exp, rc = -EIO);
- tmp = obdo_alloc();
- if (!tmp)
+ ret_oa = obdo_alloc();
+ if (!ret_oa)
GOTO(out_exp, rc = -ENOMEM);
+ tmp_oa = obdo_alloc();
+ if (!tmp_oa)
+ GOTO(out_oa, rc = -ENOMEM);
+
lsm = *ea;
if (!lsm) {
- rc = obd_alloc_memmd(conn, &lsm);
+ int stripes;
+ ost_count = lov_get_stripecnt(lov, 0);
+
+ /* If the MDS file was truncated up to some size, stripe over
+ * enough OSTs to allow the file to be created at that size.
+ */
+ if (src_oa->o_valid & OBD_MD_FLSIZE) {
+ stripes=((src_oa->o_size+LUSTRE_STRIPE_MAXBYTES)>>12)-1;
+ do_div(stripes, (__u32)(LUSTRE_STRIPE_MAXBYTES >> 12));
+
+ if (stripes > lov->desc.ld_active_tgt_count)
+ GOTO(out_exp, rc = -EFBIG);
+ if (stripes < ost_count)
+ stripes = ost_count;
+ } else
+ stripes = ost_count;
+
+ rc = lov_alloc_memmd(&lsm, stripes);
if (rc < 0)
GOTO(out_tmp, rc);
rc = 0;
- lsm->lsm_magic = LOV_MAGIC;
}
ost_count = lov->desc.ld_tgt_count;
- LASSERT(oa->o_valid & OBD_MD_FLID);
- lsm->lsm_object_id = oa->o_id;
+ LASSERT(src_oa->o_valid & OBD_MD_FLID);
+ lsm->lsm_object_id = src_oa->o_id;
if (!lsm->lsm_stripe_size)
lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
if (!*ea || lsm->lsm_stripe_offset >= ost_count) {
get_random_bytes(&ost_idx, 2);
ost_idx %= ost_count;
- } else
+ } else {
ost_idx = lsm->lsm_stripe_offset;
+ }
CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n",
lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx);
+ /* XXX LOV STACKING: need to figure out how many real OSCs */
+ if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) {
+ oti_alloc_cookies(oti, lsm->lsm_stripe_count);
+ if (!oti->oti_logcookies)
+ GOTO(out_cleanup, rc = -ENOMEM);
+ cookies = oti->oti_logcookies;
+ }
+
loi = lsm->lsm_oinfo;
for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
struct lov_stripe_md obj_md;
}
/* create data objects with "parent" OA */
- memcpy(tmp, oa, sizeof(*tmp));
+ memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+
+ /* XXX When we start creating objects on demand, we need to
+ * make sure that we always create the object on the
+ * stripe which holds the existing file size.
+ */
+ if (src_oa->o_valid & OBD_MD_FLSIZE) {
+ if (lov_stripe_offset(lsm, src_oa->o_size, i,
+ &tmp_oa->o_size) < 0 &&
+ tmp_oa->o_size)
+ tmp_oa->o_size--;
+
+ CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
+ i, tmp_oa->o_size, src_oa->o_size);
+ }
+
/* XXX: LOV STACKING: use real "obj_mdp" sub-data */
- err = obd_create(&lov->tgts[ost_idx].conn, tmp, &obj_mdp, oti);
+ err = obd_create(&lov->tgts[ost_idx].conn, tmp_oa,&obj_mdp,oti);
if (err) {
if (lov->tgts[ost_idx].active) {
CERROR("error creating objid "LPX64" sub-object"
- " on OST idx %d/%d: rc = %d\n", oa->o_id,
- ost_idx, lsm->lsm_stripe_count, err);
+ " on OST idx %d/%d: rc = %d\n",
+ src_oa->o_id, ost_idx,
+ lsm->lsm_stripe_count, err);
if (err > 0) {
CERROR("obd_create returned invalid "
"err %d\n", err);
rc = err;
continue;
}
- loi->loi_id = tmp->o_id;
+ loi->loi_id = tmp_oa->o_id;
loi->loi_ost_idx = ost_idx;
CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64" at idx %d\n",
lsm->lsm_object_id, loi->loi_id, ost_idx);
if (set == 0)
lsm->lsm_stripe_offset = ost_idx;
- lov_merge_attrs(oa, tmp, OBD_MD_FLBLKSZ, lsm, obj_alloc, &set);
- ot_init(&loi->loi_dirty_ot_inline);
+ lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm,
+ obj_alloc, &set);
loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
+ ot_init(loi->loi_dirty_ot);
+ if (cookies)
+ ++oti->oti_logcookies;
+ if (tmp_oa->o_valid & OBD_MD_FLCOOKIE)
+ ++cookie_sent;
++obj_alloc;
++loi;
GOTO(out_done, rc = 0);
}
+ /* If we were passed specific striping params, then a failure to
+ * meet those requirements is an error, since we can't reallocate
+ * that memory (it might be part of a larger array or something).
+ *
+ * We can only get here if lsm_stripe_count was originally > 1.
+ */
if (*ea != NULL) {
CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n",
lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count,rc);
} else {
struct lov_stripe_md *lsm_new;
/* XXX LOV STACKING call into osc for sizes */
- unsigned size = lov_stripe_md_size(obj_alloc);
+ unsigned oldsize, newsize;
+
+ if (oti && cookies && cookie_sent) {
+ oldsize = lsm->lsm_stripe_count * sizeof(*cookies);
+ newsize = obj_alloc * sizeof(*cookies);
+
+ oti_alloc_cookies(oti, obj_alloc);
+ if (oti->oti_logcookies) {
+ memcpy(oti->oti_logcookies, cookies, newsize);
+ OBD_FREE(cookies, oldsize);
+ cookies = oti->oti_logcookies;
+ } else {
+ CWARN("'leaking' %d bytes\n", oldsize-newsize);
+ }
+ }
CERROR("reallocating LSM for objid "LPX64": old %u new %u\n",
lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count);
- OBD_ALLOC(lsm_new, size);
- if (!lsm_new)
- GOTO(out_cleanup, rc = -ENOMEM);
- memcpy(lsm_new, lsm, size);
- lsm_new->lsm_stripe_count = obj_alloc;
-
- /* XXX LOV STACKING call into osc for sizes */
- OBD_FREE(lsm, lov_stripe_md_size(lsm->lsm_stripe_count));
- lsm = lsm_new;
-
+ oldsize = lov_stripe_md_size(lsm->lsm_stripe_count);
+ newsize = lov_stripe_md_size(obj_alloc);
+ OBD_ALLOC(lsm_new, newsize);
+ if (lsm_new != NULL) {
+ memcpy(lsm_new, lsm, newsize);
+ lsm_new->lsm_stripe_count = obj_alloc;
+ OBD_FREE(lsm, newsize);
+ lsm = lsm_new;
+ } else {
+ CWARN("'leaking' %d bytes\n", oldsize - newsize);
+ }
rc = 0;
}
out_done:
*ea = lsm;
+ if (src_oa->o_valid & OBD_MD_FLSIZE &&
+ ret_oa->o_size != src_oa->o_size) {
+ CERROR("original size "LPU64" isn't new object size "LPU64"\n",
+ src_oa->o_size, ret_oa->o_size);
+ LBUG();
+ }
+ ret_oa->o_id = src_oa->o_id;
+ memcpy(src_oa, ret_oa, sizeof(*src_oa));
out_tmp:
- obdo_free(tmp);
+ obdo_free(tmp_oa);
+ out_oa:
+ obdo_free(ret_oa);
+ if (oti && cookies) {
+ oti->oti_logcookies = cookies;
+ if (!cookie_sent) {
+ oti_free_cookies(oti);
+ src_oa->o_valid &= ~OBD_MD_FLCOOKIE;
+ } else {
+ src_oa->o_valid |= OBD_MD_FLCOOKIE;
+ }
+ }
out_exp:
class_export_put(export);
return rc;
--loi;
/* destroy already created objects here */
- memcpy(tmp, oa, sizeof(*tmp));
- tmp->o_id = loi->loi_id;
- err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL,
- NULL);
+ memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+ tmp_oa->o_id = loi->loi_id;
+
+ if (oti && cookie_sent) {
+ err = obd_log_cancel(&lov->tgts[loi->loi_ost_idx].conn,
+ NULL, 1, --oti->oti_logcookies,
+ OBD_LLOG_FL_SENDNOW);
+ if (err)
+ CERROR("Failed to cancel objid "LPX64" subobj "
+ LPX64" cookie on OST idx %d: rc = %d\n",
+ src_oa->o_id, loi->loi_id,
+ loi->loi_ost_idx, err);
+ }
+
+ err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa,
+ NULL, oti);
if (err)
- CERROR("Failed to uncreate objid "LPX64" subobj "
- LPX64" on OST idx %d: rc = %d\n",
- oa->o_id, loi->loi_id, loi->loi_ost_idx,
- err);
+ CERROR("Failed to uncreate objid "LPX64" subobj "LPX64
+ " on OST idx %d: rc = %d\n", src_oa->o_id,
+ loi->loi_id, loi->loi_ost_idx, err);
}
if (*ea == NULL)
obd_free_memmd(conn, &lsm);
memcpy(&tmp, oa, sizeof(tmp));
tmp.o_id = loi->loi_id;
if (lfh)
- memcpy(obdo_handle(&tmp), lfh->lfh_och + i,
- FD_OSTDATA_SIZE);
+ memcpy(obdo_handle(&tmp), &lfh->lfh_och[i].och_fh,
+ sizeof(lfh->lfh_och[i].och_fh));
else
tmp.o_valid &= ~OBD_MD_FLHANDLE;
err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, &tmp,
- NULL, NULL);
+ NULL, oti);
if (err && lov->tgts[loi->loi_ost_idx].active) {
CERROR("error: destroying objid "LPX64" subobj "
LPX64" on OST idx %d: rc = %d\n",
memcpy(&tmp, oa, sizeof(tmp));
tmp.o_id = loi->loi_id;
if (lfh)
- memcpy(obdo_handle(&tmp), lfh->lfh_och + i,
- FD_OSTDATA_SIZE);
+ memcpy(obdo_handle(&tmp), &lfh->lfh_och[i].och_fh,
+ sizeof(lfh->lfh_och[i].och_fh));
else
tmp.o_valid &= ~OBD_MD_FLHANDLE;
return rc;
}
-static int lov_getattr_interpret(struct ptlrpc_request_set *rqset,
- struct lov_getattr_async_args *aa, int rc)
+static int lov_getattr_interpret(struct ptlrpc_request_set *rqset, void *data,
+ int rc)
{
+ struct lov_getattr_async_args *aa = data;
struct lov_stripe_md *lsm = aa->aa_lsm;
struct obdo *oa = aa->aa_oa;
- struct obdo *obdos = aa->aa_stripe_oas;
+ struct obdo *obdos = aa->aa_obdos;
struct lov_oinfo *loi;
int i;
int set = 0;
if (rc == 0) {
/* NB all stripe requests succeeded to get here */
- for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
- i++,loi++) {
+ for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
+ i++, loi++) {
if (obdos[i].o_valid == 0) /* inactive stripe */
continue;
memcpy(&obdos[i], oa, sizeof(obdos[i]));
obdos[i].o_id = loi->loi_id;
if (lfh)
- memcpy(obdo_handle(&obdos[i]), lfh->lfh_och + i,
- FD_OSTDATA_SIZE);
+ memcpy(obdo_handle(&obdos[i]), &lfh->lfh_och[i].och_fh,
+ sizeof(lfh->lfh_och[i].och_fh));
else
obdos[i].o_valid &= ~OBD_MD_FLHANDLE;
aa = (struct lov_getattr_async_args *)&rqset->set_args;
aa->aa_lsm = lsm;
aa->aa_oa = oa;
- aa->aa_stripe_oas = obdos;
+ aa->aa_obdos = obdos;
GOTO (out, rc = 0);
out_obdos:
RETURN (rc);
}
-static int lov_setattr(struct lustre_handle *conn, struct obdo *oa,
+static int lov_setattr(struct lustre_handle *conn, struct obdo *src_oa,
struct lov_stripe_md *lsm, struct obd_trans_info *oti)
{
- struct obdo *tmp;
+ struct obdo *tmp_oa, *ret_oa;
struct obd_export *export = class_conn2export(conn);
struct lov_obd *lov;
struct lov_oinfo *loi;
if (!export || !export->exp_obd)
GOTO(out, rc = -ENODEV);
- /* size changes should go through punch and not setattr */
- LASSERT(!(oa->o_valid & OBD_MD_FLSIZE));
-
- /* for now, we only expect mtime updates here */
- LASSERT(!(oa->o_valid & ~(OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME)));
-
- tmp = obdo_alloc();
- if (!tmp)
+ /* for now, we only expect time updates here */
+ LASSERT(!(src_oa->o_valid & ~(OBD_MD_FLID|OBD_MD_FLTYPE|OBD_MD_FLMODE|
+ OBD_MD_FLATIME | OBD_MD_FLMTIME |
+ OBD_MD_FLCTIME)));
+ ret_oa = obdo_alloc();
+ if (!ret_oa)
GOTO(out, rc = -ENOMEM);
- if (oa->o_valid & OBD_MD_FLHANDLE)
- lfh = lov_handle2lfh(obdo_handle(oa));
+ tmp_oa = obdo_alloc();
+ if (!tmp_oa)
+ GOTO(out_oa, rc = -ENOMEM);
lov = &export->exp_obd->u.lov;
for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
continue;
}
- obdo_cpy_md(tmp, oa, oa->o_valid);
+ memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
if (lfh)
- memcpy(obdo_handle(tmp), lfh->lfh_och + i,
- FD_OSTDATA_SIZE);
+ memcpy(obdo_handle(tmp_oa), &lfh->lfh_och[i].och_fh,
+ sizeof(lfh->lfh_och[i].och_fh));
else
- tmp->o_valid &= ~OBD_MD_FLHANDLE;
+ tmp_oa->o_valid &= ~OBD_MD_FLHANDLE;
- tmp->o_id = loi->loi_id;
+ tmp_oa->o_id = loi->loi_id;
- err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp,
+ err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa,
NULL, NULL);
if (err) {
if (lov->tgts[loi->loi_ost_idx].active) {
CERROR("error: setattr objid "LPX64" subobj "
LPX64" on OST idx %d: rc = %d\n",
- oa->o_id, loi->loi_id, loi->loi_ost_idx,
- err);
+ src_oa->o_id, loi->loi_id,
+ loi->loi_ost_idx, err);
if (!rc)
rc = err;
}
- } else
- set = 1;
+ continue;
+ }
+
+ lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, i, &set);
}
- obdo_free(tmp);
if (!set && !rc)
rc = -EIO;
if (lfh != NULL)
lov_lfh_put(lfh);
- GOTO(out, rc);
- out:
+
+ ret_oa->o_id = src_oa->o_id;
+ memcpy(src_oa, ret_oa, sizeof(*src_oa));
+ GOTO(out_tmp, rc);
+out_tmp:
+ obdo_free(tmp_oa);
+out_oa:
+ obdo_free(ret_oa);
+out:
class_export_put(export);
return rc;
}
-static int lov_open(struct lustre_handle *conn, struct obdo *oa,
+static int lov_open(struct lustre_handle *conn, struct obdo *src_oa,
struct lov_stripe_md *lsm, struct obd_trans_info *oti,
struct obd_client_handle *och)
{
- struct obdo *tmp; /* on the heap here, on the stack in lov_close? */
+ struct obdo *tmp_oa, *ret_oa;
struct obd_export *export = class_conn2export(conn);
struct lov_obd *lov;
struct lov_oinfo *loi;
if (!export || !export->exp_obd)
GOTO(out_exp, rc = -ENODEV);
- tmp = obdo_alloc();
- if (!tmp)
+ ret_oa = obdo_alloc();
+ if (!ret_oa)
GOTO(out_exp, rc = -ENOMEM);
+ tmp_oa = obdo_alloc();
+ if (!tmp_oa)
+ GOTO(out_oa, rc = -ENOMEM);
+
lfh = lov_lfh_new();
if (lfh == NULL)
GOTO(out_tmp, rc = -ENOMEM);
- OBD_ALLOC(lfh->lfh_och, lsm->lsm_stripe_count * sizeof *och);
+ OBD_ALLOC(lfh->lfh_och, lsm->lsm_stripe_count * sizeof(*och));
if (!lfh->lfh_och)
GOTO(out_lfh, rc = -ENOMEM);
lov = &export->exp_obd->u.lov;
- oa->o_size = 0;
- oa->o_blocks = 0;
+ src_oa->o_size = 0;
+ src_oa->o_blocks = 0;
for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
if (lov->tgts[loi->loi_ost_idx].active == 0) {
CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
}
/* create data objects with "parent" OA */
- memcpy(tmp, oa, sizeof(*tmp));
- tmp->o_id = loi->loi_id;
+ memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+ tmp_oa->o_id = loi->loi_id;
- rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp,
- NULL, NULL, lfh->lfh_och + i);
+ rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa,
+ NULL, NULL, &lfh->lfh_och[i]);
if (rc) {
if (!lov->tgts[loi->loi_ost_idx].active) {
rc = 0;
}
CERROR("error: open objid "LPX64" subobj "LPX64
" on OST idx %d: rc = %d\n",
- oa->o_id, lsm->lsm_oinfo[i].loi_id,
+ src_oa->o_id, lsm->lsm_oinfo[i].loi_id,
loi->loi_ost_idx, rc);
goto out_handles;
}
- lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &set);
+ lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, i, &set);
}
lfh->lfh_count = lsm->lsm_stripe_count;
och->och_fh.cookie = lfh->lfh_handle.h_cookie;
- obdo_handle(oa)->cookie = lfh->lfh_handle.h_cookie;
- oa->o_valid |= OBD_MD_FLHANDLE;
+ obdo_handle(ret_oa)->cookie = lfh->lfh_handle.h_cookie;
+ ret_oa->o_valid |= OBD_MD_FLHANDLE;
+ ret_oa->o_id = src_oa->o_id;
+ memcpy(src_oa, ret_oa, sizeof(*src_oa));
- /* llfh refcount transfers to list */
+ /* lfh refcount transfers to list */
spin_lock(&export->exp_lov_data.led_lock);
list_add(&lfh->lfh_list, &export->exp_lov_data.led_open_head);
spin_unlock(&export->exp_lov_data.led_lock);
GOTO(out_tmp, rc);
out_tmp:
- obdo_free(tmp);
+ obdo_free(tmp_oa);
+ out_oa:
+ obdo_free(ret_oa);
out_exp:
class_export_put(export);
return rc;
if (lov->tgts[loi->loi_ost_idx].active == 0)
continue;
- memcpy(tmp, oa, sizeof(*tmp));
- tmp->o_id = loi->loi_id;
- memcpy(obdo_handle(tmp), lfh->lfh_och + i, FD_OSTDATA_SIZE);
+ memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+ tmp_oa->o_id = loi->loi_id;
+ memcpy(obdo_handle(tmp_oa), &lfh->lfh_och[i], FD_OSTDATA_SIZE);
- err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp,
+ err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa,
NULL, NULL);
if (err && lov->tgts[loi->loi_ost_idx].active) {
CERROR("error: closing objid "LPX64" subobj "LPX64
" on OST idx %d after open error: rc=%d\n",
- oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
+ src_oa->o_id, loi->loi_id, loi->loi_ost_idx,err);
}
}
if (oa->o_valid & OBD_MD_FLHANDLE)
lfh = lov_handle2lfh(obdo_handle(oa));
+ if (!lfh)
+ LBUG();
lov = &export->exp_obd->u.lov;
for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
memcpy(&tmp, oa, sizeof(tmp));
tmp.o_id = loi->loi_id;
if (lfh)
- memcpy(obdo_handle(&tmp), lfh->lfh_och + i,
+ memcpy(obdo_handle(&tmp), &lfh->lfh_och[i],
FD_OSTDATA_SIZE);
else
tmp.o_valid &= ~OBD_MD_FLHANDLE;
OBD_FREE(lfh->lfh_och, lsm->lsm_stripe_count * FD_OSTDATA_SIZE);
lov_lfh_destroy(lfh);
+ LASSERT(atomic_read(&lfh->lfh_refcount) == 1);
lov_lfh_put(lfh); /* balance handle2lfh above */
- }
+ } else
+ LBUG();
GOTO(out, rc);
out:
class_export_put(export);
return rc;
}
-#ifndef log2
-#define log2(n) ffz(~(n))
-#endif
-
/* we have an offset in file backed by an lov and want to find out where
* that offset lands in our given stripe of the file. for the easy
* case where the offset is within the stripe, we just have to scale the
memcpy(&tmp, oa, sizeof(tmp));
tmp.o_id = loi->loi_id;
if (lfh)
- memcpy(obdo_handle(&tmp), lfh->lfh_och + i,
- FD_OSTDATA_SIZE);
+ memcpy(obdo_handle(&tmp), &lfh->lfh_och[i].och_fh,
+ sizeof(lfh->lfh_och[i].och_fh));
else
tmp.o_valid &= ~OBD_MD_FLHANDLE;
return 0;
}
-static int lov_brw(int cmd, struct lustre_handle *conn,
+static int lov_brw(int cmd, struct lustre_handle *conn, struct obdo *src_oa,
struct lov_stripe_md *lsm, obd_count oa_bufs,
struct brw_page *pga, struct obd_trans_info *oti)
{
int ost_idx;
} *stripeinfo, *si, *si_last;
struct obd_export *export = class_conn2export(conn);
+ struct obdo *ret_oa = NULL, *tmp_oa = NULL;
+ struct lov_file_handles *lfh = NULL;
struct lov_obd *lov;
struct brw_page *ioarr;
struct lov_oinfo *loi;
- int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count;
+ int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count, set = 0;
ENTRY;
if (lsm_bad_magic(lsm))
if (!ioarr)
GOTO(out_where, rc = -ENOMEM);
+ if (src_oa) {
+ ret_oa = obdo_alloc();
+ if (!ret_oa)
+ GOTO(out_ioarr, rc = -ENOMEM);
+
+ tmp_oa = obdo_alloc();
+ if (!tmp_oa)
+ GOTO(out_oa, rc = -ENOMEM);
+
+ if (src_oa->o_valid & OBD_MD_FLHANDLE)
+ lfh = lov_handle2lfh(obdo_handle(src_oa));
+ else
+ src_oa->o_valid &= ~OBD_MD_FLHANDLE;
+ }
+
for (i = 0; i < oa_bufs; i++) {
where[i] = lov_stripe_number(lsm, pga[i].off);
stripeinfo[where[i]].bufct++;
if (lov->tgts[si->ost_idx].active == 0) {
CDEBUG(D_HA, "lov idx %d inactive\n", si->ost_idx);
- GOTO(out_ioarr, rc = -EIO);
+ GOTO(out_oa, rc = -EIO);
}
if (si->bufct) {
LASSERT(shift < oa_bufs);
- rc = obd_brw(cmd, &lov->tgts[si->ost_idx].conn,
+ if (src_oa) {
+ memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
+ if (lfh)
+ memcpy(obdo_handle(tmp_oa),
+ &lfh->lfh_och[i].och_fh,
+ sizeof(lfh->lfh_och[i].och_fh));
+ }
+
+ tmp_oa->o_id = si->lsm.lsm_object_id;
+ rc = obd_brw(cmd, &lov->tgts[si->ost_idx].conn, tmp_oa,
&si->lsm, si->bufct, &ioarr[shift],
oti);
if (rc)
GOTO(out_ioarr, rc);
+
+ lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm,
+ i, &set);
}
}
- GOTO(out_ioarr, rc);
+
+ ret_oa->o_id = src_oa->o_id;
+ memcpy(src_oa, ret_oa, sizeof(*src_oa));
+
+ GOTO(out_oa, rc);
+ out_oa:
+ if (tmp_oa)
+ obdo_free(tmp_oa);
+ if (ret_oa)
+ obdo_free(ret_oa);
out_ioarr:
OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
out_where:
OBD_FREE(where, sizeof(*where) * oa_bufs);
+ if (lfh)
+ lov_lfh_put(lfh);
out_sinfo:
OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
out_exp:
return rc;
}
-static int lov_brw_interpret (struct ptlrpc_request_set *set,
- struct lov_brw_async_args *aa, int rc)
+static int lov_brw_interpret(struct ptlrpc_request_set *rqset,
+ struct lov_brw_async_args *aa, int rc)
{
- obd_count oa_bufs = aa->aa_oa_bufs;
- struct brw_page *ioarr = aa->aa_ioarr;
+ struct lov_stripe_md *lsm = aa->aa_lsm;
+ obd_count oa_bufs = aa->aa_oa_bufs;
+ struct obdo *oa = aa->aa_oa;
+ struct obdo *obdos = aa->aa_obdos;
+ struct brw_page *ioarr = aa->aa_ioarr;
+ struct lov_oinfo *loi;
+ int i, set = 0;
ENTRY;
- OBD_FREE (ioarr, sizeof (*ioarr) * oa_bufs);
- RETURN (rc);
+ if (rc == 0) {
+ /* NB all stripe requests succeeded to get here */
+
+ for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
+ i++, loi++) {
+ if (obdos[i].o_valid == 0) /* inactive stripe */
+ continue;
+
+ lov_merge_attrs(oa, &obdos[i], obdos[i].o_valid, lsm,
+ i, &set);
+ }
+
+ if (!set) {
+ CERROR("No stripes had valid attrs\n");
+ rc = -EIO;
+ }
+ }
+ oa->o_id = lsm->lsm_object_id;
+
+ OBD_FREE(obdos, lsm->lsm_stripe_count * sizeof(*obdos));
+ OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
+ RETURN(rc);
}
-static int lov_brw_async(int cmd, struct lustre_handle *conn,
+static int lov_brw_async(int cmd, struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *lsm, obd_count oa_bufs,
struct brw_page *pga, struct ptlrpc_request_set *set,
struct obd_trans_info *oti)
} *stripeinfo, *si, *si_last;
struct obd_export *export = class_conn2export(conn);
struct lov_obd *lov;
+ struct lov_file_handles *lfh = NULL;
struct brw_page *ioarr;
+ struct obdo *obdos = NULL;
struct lov_oinfo *loi;
struct lov_brw_async_args *aa;
int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count;
if (!where)
GOTO(out_sinfo, rc = -ENOMEM);
+ if (oa) {
+ OBD_ALLOC(obdos, sizeof(*obdos) * stripe_count);
+ if (!obdos)
+ GOTO(out_where, rc = -ENOMEM);
+
+ if (oa->o_valid & OBD_MD_FLHANDLE)
+ lfh = lov_handle2lfh(obdo_handle(oa));
+ else
+ oa->o_valid &= ~OBD_MD_FLHANDLE;
+ }
+
OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs);
if (!ioarr)
- GOTO(out_where, rc = -ENOMEM);
+ GOTO(out_obdos, rc = -ENOMEM);
for (i = 0; i < oa_bufs; i++) {
where[i] = lov_stripe_number(lsm, pga[i].off);
si->index = si_last->index + si_last->bufct;
si->lsm.lsm_object_id = loi->loi_id;
si->ost_idx = loi->loi_ost_idx;
+
+ if (oa) {
+ memcpy(&obdos[i], oa, sizeof(*obdos));
+ obdos[i].o_id = si->lsm.lsm_object_id;
+ if (lfh)
+ memcpy(obdo_handle(&obdos[i]),
+ &lfh->lfh_och[i].och_fh,
+ sizeof(lfh->lfh_och[i].och_fh));
+ }
}
for (i = 0; i < oa_bufs; i++) {
}
LASSERT(shift < oa_bufs);
+
rc = obd_brw_async(cmd, &lov->tgts[si->ost_idx].conn,
- &si->lsm, si->bufct, &ioarr[shift],
- set, oti);
+ &obdos[i], &si->lsm, si->bufct,
+ &ioarr[shift], set, oti);
if (rc)
GOTO(out_ioarr, rc);
}
- LASSERT (rc == 0);
- LASSERT (set->set_interpret == NULL);
- set->set_interpret = lov_brw_interpret;
- LASSERT (sizeof (set->set_args) >= sizeof (struct lov_brw_async_args));
+ LASSERT(rc == 0);
+ LASSERT(set->set_interpret == NULL);
+ set->set_interpret = (set_interpreter_func)lov_brw_interpret;
+ LASSERT(sizeof(set->set_args) >= sizeof(struct lov_brw_async_args));
aa = (struct lov_brw_async_args *)&set->set_args;
- aa->aa_oa_bufs = oa_bufs;
+ aa->aa_lsm = lsm;
+ aa->aa_obdos = obdos;
+ aa->aa_oa = oa;
aa->aa_ioarr = ioarr;
+ aa->aa_oa_bufs = oa_bufs;
+
+ /* Don't free ioarr or obdos - that's done in lov_brw_interpret */
GOTO(out_where, rc);
+
out_ioarr:
OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
+ out_obdos:
+ OBD_FREE(obdos, stripe_count * sizeof(*obdos));
out_where:
OBD_FREE(where, sizeof(*where) * oa_bufs);
+ if (lfh)
+ lov_lfh_put(lfh);
out_sinfo:
OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
out_exp:
(tot) += (add); \
} while(0)
-static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
+static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+ unsigned long max_age)
{
- struct obd_export *tgt_export;
- struct lov_obd *lov;
+ struct lov_obd *lov = &obd->u.lov;
struct obd_statfs lov_sfs;
int set = 0;
int rc = 0;
int i;
ENTRY;
- if (!export || !export->exp_obd)
- RETURN(-ENODEV);
-
- lov = &export->exp_obd->u.lov;
/* We only get block data from the OBD */
for (i = 0; i < lov->desc.ld_tgt_count; i++) {
continue;
}
- tgt_export = class_conn2export(&lov->tgts[i].conn);
- if (!tgt_export) {
- CDEBUG(D_HA, "lov idx %d NULL export\n", i);
- continue;
- }
-
- err = obd_statfs(tgt_export, &lov_sfs);
- class_export_put(tgt_export);
+ err = obd_statfs(class_conn2obd(&lov->tgts[i].conn), &lov_sfs,
+ max_age);
if (err) {
if (lov->tgts[i].active) {
CERROR("error: statfs OSC %s on OST idx %d: "
}
continue;
}
+
if (!set) {
memcpy(osfs, &lov_sfs, sizeof(lov_sfs));
set = 1;
LOV_SUM_MAX(osfs->os_ffree, lov_sfs.os_ffree);
}
}
+
if (set) {
__u32 expected_stripes = lov->desc.ld_default_stripe_count ?
lov->desc.ld_default_stripe_count :
do_div(osfs->os_ffree, expected_stripes);
} else if (!rc)
rc = -EIO;
+
RETURN(rc);
}
RETURN(-EINVAL);
}
-static int lov_mark_page_dirty(struct lustre_handle *conn,
+static int lov_set_info(struct lustre_handle *conn, obd_count keylen,
+ void *key, obd_count vallen, void *val)
+{
+ struct obd_device *obddev = class_conn2obd(conn);
+ struct lov_obd *lov = &obddev->u.lov;
+ int i, rc = 0;
+ ENTRY;
+
+ if (keylen < strlen("mds_conn") ||
+ memcmp(key, "mds_conn", strlen("mds_conn")) != 0)
+ RETURN(-EINVAL);
+
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ int er;
+ er = obd_set_info(&lov->tgts[i].conn, keylen, key, vallen, val);
+ if (!rc)
+ rc = er;
+ }
+ RETURN(rc);
+}
+
+static int lov_mark_page_dirty(struct lustre_handle *conn,
struct lov_stripe_md *lsm, unsigned long offset)
{
struct lov_obd *lov = &class_conn2obd(conn)->u.lov;
RETURN(-ENOMEM);
stripe = lov_stripe_number(lsm, (obd_off)offset << PAGE_CACHE_SHIFT);
- lov_stripe_offset(lsm, (obd_off)offset << PAGE_CACHE_SHIFT, stripe,
+ lov_stripe_offset(lsm, (obd_off)offset << PAGE_CACHE_SHIFT, stripe,
&off);
off >>= PAGE_CACHE_SHIFT;
loi = &lsm->lsm_oinfo[stripe];
- CDEBUG(D_INODE, "off %lu => off %lu on stripe %d\n", offset,
+ CDEBUG(D_INODE, "off %lu => off %lu on stripe %d\n", offset,
(unsigned long)off, stripe);
submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
RETURN(rc);
}
-static int lov_clear_dirty_pages(struct lustre_handle *conn,
+static int lov_clear_dirty_pages(struct lustre_handle *conn,
struct lov_stripe_md *lsm, unsigned long start,
unsigned long end, unsigned long *cleared)
obd_start >>= PAGE_CACHE_SHIFT;
obd_end >>= PAGE_CACHE_SHIFT;
- CDEBUG(D_INODE, "offs [%lu,%lu] => offs [%lu,%lu] stripe %d\n",
- start, end, (unsigned long)obd_start,
+ CDEBUG(D_INODE, "offs [%lu,%lu] => offs [%lu,%lu] stripe %d\n",
+ start, end, (unsigned long)obd_start,
(unsigned long)obd_end, loi->loi_ost_idx);
submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
- rc = obd_clear_dirty_pages(&lov->tgts[loi->loi_ost_idx].conn,
+ rc = obd_clear_dirty_pages(&lov->tgts[loi->loi_ost_idx].conn,
submd, obd_start, obd_end,
&osc_cleared);
if (rc)
*offset = 0;
lov = &export->exp_obd->u.lov;
rc = -ENOENT;
- for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
- i++, loi++) {
+ for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++){
count = lsm->lsm_stripe_size >> PAGE_CACHE_SHIFT;
skip = (lsm->lsm_stripe_count - 1) * count;
submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
- err = obd_last_dirty_offset(&lov->tgts[loi->loi_ost_idx].conn,
+ err = obd_last_dirty_offset(&lov->tgts[loi->loi_ost_idx].conn,
submd, &tmp);
if (err == -ENOENT)
continue;
GOTO(out_exp, rc = err);
rc = 0;
- if (tmp != ~0)
+ if (tmp != ~0)
tmp += (tmp/count * skip) + (i * count);
if (tmp > *offset)
*offset = tmp;
RETURN(rc);
}
+/* For LOV catalogs, we "nest" catalogs from the parent catalog. What this
+ * means is that the parent catalog has a bunch of log cookies that are
+ * pointing at one catalog for each OSC. The OSC catalogs in turn hold
+ * cookies for actual log files. */
+static int lov_get_catalogs(struct lov_obd *lov, struct llog_handle *cathandle)
+{
+ int i, rc;
+
+ ENTRY;
+ for (i = 0; i < lov->desc.ld_tgt_count; i++) {
+ lov->tgts[i].ltd_cathandle = llog_new_log(cathandle,
+ &lov->tgts[i].uuid);
+ if (IS_ERR(lov->tgts[i].ltd_cathandle))
+ continue;
+ rc = llog_init_catalog(cathandle, &lov->tgts[i].uuid);
+ if (rc)
+ GOTO(err_logs, rc);
+ }
+ lov->lo_catalog_loaded = 1;
+ RETURN(0);
+err_logs:
+ while (i-- > 0) {
+ llog_delete_log(cathandle, lov->tgts[i].ltd_cathandle);
+ llog_close_log(cathandle, lov->tgts[i].ltd_cathandle);
+ }
+ return rc;
+}
+
+/* Add log records for each OSC that this object is striped over, and return
+ * cookies for each one. We _would_ have nice abstraction here, except that
+ * we need to keep cookies in stripe order, even if some are NULL, so that
+ * the right cookies are passed back to the right OSTs at the client side.
+ * Unset cookies should be all-zero (which will never occur naturally). */
+static int lov_log_add(struct lustre_handle *conn,
+ struct llog_handle *cathandle,
+ struct llog_trans_hdr *rec, struct lov_stripe_md *lsm,
+ struct llog_cookie *logcookies, int numcookies)
+{
+ struct obd_device *obd = class_conn2obd(conn);
+ struct lov_obd *lov = &obd->u.lov;
+ struct lov_oinfo *loi;
+ int i, rc = 0;
+ ENTRY;
+
+ LASSERT(logcookies && numcookies >= lsm->lsm_stripe_count);
+
+ if (unlikely(!lov->lo_catalog_loaded))
+ lov_get_catalogs(lov, cathandle);
+
+ for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
+ rc += obd_log_add(&lov->tgts[loi->loi_ost_idx].conn,
+ lov->tgts[loi->loi_ost_idx].ltd_cathandle,
+ rec, NULL, logcookies + rc, numcookies - rc);
+ }
+
+ RETURN(rc);
+}
+
+static int lov_log_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm,
+ int count, struct llog_cookie *cookies, int flags)
+{
+ struct obd_export *export = class_conn2export(conn);
+ struct lov_obd *lov;
+ struct lov_oinfo *loi;
+ int rc = 0, i;
+ ENTRY;
+
+ LASSERT(lsm != NULL);
+ if (export == NULL || export->exp_obd == NULL)
+ GOTO(out, rc = -ENODEV);
+
+ LASSERT(count == lsm->lsm_stripe_count);
+
+ loi = lsm->lsm_oinfo;
+ lov = &export->exp_obd->u.lov;
+ for (i = 0; i < count; i++, cookies++, loi++) {
+ int err;
+
+ err = obd_log_cancel(&lov->tgts[loi->loi_ost_idx].conn,
+ NULL, 1, cookies, flags);
+ if (err && lov->tgts[loi->loi_ost_idx].active) {
+ CERROR("error: objid "LPX64" subobj "LPX64
+ " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
+ loi->loi_id, loi->loi_ost_idx, err);
+ if (!rc)
+ rc = err;
+ }
+ }
+ GOTO(out, rc);
+ out:
+ class_export_put(export);
+ return rc;
+}
+
struct obd_ops lov_obd_ops = {
o_owner: THIS_MODULE,
o_attach: lov_attach,
o_cancel_unused: lov_cancel_unused,
o_iocontrol: lov_iocontrol,
o_get_info: lov_get_info,
- .o_mark_page_dirty = lov_mark_page_dirty,
- .o_clear_dirty_pages = lov_clear_dirty_pages,
- .o_last_dirty_offset = lov_last_dirty_offset,
+ o_set_info: lov_set_info,
+ o_log_add: lov_log_add,
+ o_log_cancel: lov_log_cancel,
+ o_mark_page_dirty: lov_mark_page_dirty,
+ o_clear_dirty_pages: lov_clear_dirty_pages,
+ o_last_dirty_offset: lov_last_dirty_offset,
};
int __init lov_init(void)
struct lprocfs_static_vars lvars;
int rc;
- printk(KERN_INFO "Lustre Logical Object Volume driver; "
- "info@clusterfs.com\n");
- lprocfs_init_vars(&lvars);
+ lprocfs_init_vars(lov, &lvars);
rc = class_register_type(&lov_obd_ops, lvars.module_vars,
OBD_LOV_DEVICENAME);
RETURN(rc);
}
-static void __exit lov_exit(void)
+static void /*__exit*/ lov_exit(void)
{
class_unregister_type(OBD_LOV_DEVICENAME);
}
#include <linux/obd_class.h>
#include <linux/obd_support.h>
+#include "lov_internal.h"
+
void lov_dump_lmm(int level, struct lov_mds_md *lmm)
{
struct lov_object_id *loi;
for (i = 0, loi = lsm->lsm_oinfo; i < stripe_count; i++, loi++) {
/* XXX call down to osc_packmd() to do the packing */
LASSERT (loi->loi_id);
- lmm->lmm_objects[loi->loi_ost_idx].l_object_id =
+ lmm->lmm_objects[loi->loi_ost_idx].l_object_id =
cpu_to_le64 (loi->loi_id);
}
RETURN(lmm_size);
}
-static int lov_get_stripecnt(struct lov_obd *lov, int stripe_count)
+int lov_get_stripecnt(struct lov_obd *lov, int stripe_count)
{
if (!stripe_count)
stripe_count = lov->desc.ld_default_stripe_count;
return stripe_count;
}
+static int lov_verify_lmm(struct lov_mds_md *lmm, int lmm_bytes,
+ int *ost_count, int *stripe_count, int *ost_offset)
+{
+ if (lmm_bytes < sizeof(*lmm)) {
+ CERROR("lov_mds_md too small: %d, need at least %d\n",
+ lmm_bytes, (int)sizeof(*lmm));
+ return -EINVAL;
+ }
+
+ if (le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC) {
+ CERROR("bad disk LOV MAGIC: %#08x != %#08x\n",
+ le32_to_cpu(lmm->lmm_magic), LOV_MAGIC);
+ lov_dump_lmm(D_WARNING, lmm);
+ return -EINVAL;
+ }
+
+ *ost_count = le16_to_cpu(lmm->lmm_ost_count);
+ *stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
+ *ost_offset = le32_to_cpu(lmm->lmm_stripe_offset);
+
+ if (*ost_count == 0 || *stripe_count == 0) {
+ CERROR("zero OST count %d or stripe count %d\n",
+ *ost_count, *stripe_count);
+ lov_dump_lmm(D_WARNING, lmm);
+ return -EINVAL;
+ }
+
+ if (lmm_bytes < lov_mds_md_size(*ost_count)) {
+ CERROR("lov_mds_md too small: %d, need %d\n",
+ lmm_bytes, lov_mds_md_size(*ost_count));
+ lov_dump_lmm(D_WARNING, lmm);
+ return -EINVAL;
+ }
+
+ if (*ost_offset > *ost_count) {
+ CERROR("starting OST offset %d > number of OSTs %d\n",
+ *ost_offset, *ost_count);
+ lov_dump_lmm(D_WARNING, lmm);
+ return -EINVAL;
+ }
+
+ if (*stripe_count > *ost_count) {
+ CERROR("stripe count %d > number of OSTs %d\n",
+ *stripe_count, *ost_count);
+ lov_dump_lmm(D_WARNING, lmm);
+ return -EINVAL;
+ }
+
+ if (lmm->lmm_object_id == 0) {
+ CERROR("zero object id\n");
+ lov_dump_lmm(D_WARNING, lmm);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count)
+{
+ int lsm_size = lov_stripe_md_size(stripe_count);
+ struct lov_oinfo *loi;
+ int i;
+
+ OBD_ALLOC(*lsmp, lsm_size);
+ if (!*lsmp)
+ return -ENOMEM;
+
+ (*lsmp)->lsm_magic = LOV_MAGIC;
+ (*lsmp)->lsm_stripe_count = stripe_count;
+ (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
+
+ for (i = 0, loi = (*lsmp)->lsm_oinfo; i < stripe_count; i++, loi++){
+ loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
+ ot_init(loi->loi_dirty_ot);
+ }
+ return lsm_size;
+}
+
+void lov_free_memmd(struct lov_stripe_md **lsmp)
+{
+ OBD_FREE(*lsmp, lov_stripe_md_size((*lsmp)->lsm_stripe_count));
+ *lsmp = NULL;
+}
+
/* Unpack LOV object metadata from disk storage. It is packed in LE byte
* order and is opaque to the networking layer.
*/
struct lov_obd *lov = &obd->u.lov;
struct lov_stripe_md *lsm;
struct lov_oinfo *loi;
- int ost_count = 0;
- int ost_offset = 0;
+ int ost_count;
+ int ost_offset;
int stripe_count;
int lsm_size;
int i;
ENTRY;
+ /* If passed an MDS struct use values from there, otherwise defaults */
if (lmm) {
- if (lmm_bytes < sizeof (*lmm)) {
- CERROR("lov_mds_md too small: %d, need %d\n",
- lmm_bytes, (int)sizeof(*lmm));
- RETURN(-EINVAL);
- }
- if (le32_to_cpu (lmm->lmm_magic) != LOV_MAGIC) {
- CERROR("bad disk LOV MAGIC: %#08x != %#08x\n",
- le32_to_cpu (lmm->lmm_magic), LOV_MAGIC);
- RETURN(-EINVAL);
- }
-
- ost_count = le16_to_cpu (lmm->lmm_ost_count);
- stripe_count = le16_to_cpu (lmm->lmm_stripe_count);
-
- if (ost_count == 0 || stripe_count == 0) {
- CERROR ("zero ost %d or stripe %d count\n",
- ost_count, stripe_count);
- RETURN (-EINVAL);
- }
-
- if (lmm_bytes < lov_mds_md_size (ost_count)) {
- CERROR ("lov_mds_md too small: %d, need %d\n",
- lmm_bytes, lov_mds_md_size (ost_count));
- RETURN (-EINVAL);
- }
- } else
+ i = lov_verify_lmm(lmm, lmm_bytes, &ost_count, &stripe_count,
+ &ost_offset);
+ if (i)
+ RETURN(i);
+ } else {
+ ost_count = 0;
stripe_count = lov_get_stripecnt(lov, 0);
+ ost_offset = 0;
+ }
- /* XXX LOV STACKING call into osc for sizes */
- lsm_size = lov_stripe_md_size(stripe_count);
-
+ /* If we aren't passed an lsmp struct, we just want the size */
if (!lsmp)
- RETURN(lsm_size);
+ /* XXX LOV STACKING call into osc for sizes */
+ RETURN(lov_stripe_md_size(stripe_count));
+ /* If we are passed an allocated struct but nothing to unpack, free */
if (*lsmp && !lmm) {
- stripe_count = (*lsmp)->lsm_stripe_count;
- OBD_FREE(*lsmp, lov_stripe_md_size(stripe_count));
- *lsmp = NULL;
+ lov_free_memmd(lsmp);
RETURN(0);
}
- if (!*lsmp) {
- OBD_ALLOC(*lsmp, lsm_size);
- if (!*lsmp)
- RETURN(-ENOMEM);
- }
-
- lsm = *lsmp;
- lsm->lsm_magic = LOV_MAGIC;
- lsm->lsm_stripe_count = stripe_count;
- lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
+ lsm_size = lov_alloc_memmd(lsmp, stripe_count);
+ if (lsm_size < 0)
+ RETURN(lsm_size);
+ /* If we are passed a pointer but nothing to unpack, we only alloc */
if (!lmm)
RETURN(lsm_size);
- lsm->lsm_object_id = le64_to_cpu (lmm->lmm_object_id);
- lsm->lsm_stripe_size = le32_to_cpu (lmm->lmm_stripe_size);
- ost_offset = lsm->lsm_stripe_offset = le32_to_cpu (lmm->lmm_stripe_offset);
-
- LMM_ASSERT(lsm->lsm_object_id);
- LMM_ASSERT(ost_count);
+ lsm = *lsmp;
+ lsm->lsm_object_id = le64_to_cpu(lmm->lmm_object_id);
+ lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size);
+ lsm->lsm_stripe_offset = ost_offset;
for (i = 0, loi = lsm->lsm_oinfo; i < ost_count; i++, ost_offset++) {
ost_offset %= ost_count;
if (!lmm->lmm_objects[ost_offset].l_object_id)
continue;
- LMM_ASSERT(loi - lsm->lsm_oinfo < stripe_count);
/* XXX LOV STACKING call down to osc_unpackmd() */
loi->loi_id =
- le64_to_cpu (lmm->lmm_objects[ost_offset].l_object_id);
+ le64_to_cpu(lmm->lmm_objects[ost_offset].l_object_id);
loi->loi_ost_idx = ost_offset;
- loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
- ot_init(loi->loi_dirty_ot);
loi++;
}
- LMM_ASSERT(loi - lsm->lsm_oinfo > 0);
- LMM_ASSERT(loi - lsm->lsm_oinfo == stripe_count);
+
+ if (loi - lsm->lsm_oinfo != stripe_count) {
+ CERROR("missing objects in lmm struct\n");
+ lov_dump_lmm(D_WARNING, lmm);
+ lov_free_memmd(lsmp);
+ RETURN(-EINVAL);
+ }
+
RETURN(lsm_size);
}
struct obd_device *obd = class_conn2obd(conn);
struct lov_obd *lov = &obd->u.lov;
struct lov_mds_md lmm;
- struct lov_stripe_md *lsm;
int stripe_count;
int rc;
ENTRY;
/* Bug 1185 FIXME: struct lov_mds_md is little-endian everywhere else */
if (lmm.lmm_magic != LOV_MAGIC) {
- CERROR("bad userland LOV MAGIC: %#08x != %#08x\n",
+ CDEBUG(D_IOCTL, "bad userland LOV MAGIC: %#08x != %#08x\n",
lmm.lmm_magic, LOV_MAGIC);
RETURN(-EINVAL);
}
}
#endif
if (lmm.lmm_stripe_size & (PAGE_SIZE - 1)) {
- CERROR("stripe size %u not multiple of %lu\n",
+ CDEBUG(D_IOCTL, "stripe size %u not multiple of %lu\n",
lmm.lmm_stripe_size, PAGE_SIZE);
RETURN(-EINVAL);
}
stripe_count = lov_get_stripecnt(lov, lmm.lmm_stripe_count);
if ((__u64)lmm.lmm_stripe_size * stripe_count > ~0UL) {
- CERROR("stripe width %ux%u > %lu on 32-bit system\n",
+ CDEBUG(D_IOCTL, "stripe width %ux%u > %lu on 32-bit system\n",
lmm.lmm_stripe_size, (int)lmm.lmm_stripe_count, ~0UL);
RETURN(-EINVAL);
}
- /* XXX LOV STACKING call into osc for sizes */
- OBD_ALLOC(lsm, lov_stripe_md_size(stripe_count));
- if (!lsm)
- RETURN(-ENOMEM);
+ rc = lov_alloc_memmd(lsmp, stripe_count);
- lsm->lsm_magic = LOV_MAGIC;
- lsm->lsm_stripe_count = stripe_count;
- lsm->lsm_stripe_offset = lmm.lmm_stripe_offset;
- lsm->lsm_stripe_size = lmm.lmm_stripe_size;
- lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count;
+ if (rc < 0)
+ RETURN(rc);
- *lsmp = lsm;
+ (*lsmp)->lsm_stripe_offset = lmm.lmm_stripe_offset;
+ (*lsmp)->lsm_stripe_size = lmm.lmm_stripe_size;
- RETURN(rc);
+ RETURN(0);
}
/* Retrieve object striping information.
#include <linux/seq_file.h>
#ifndef LPROCFS
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
-struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
#else
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, obd_self_statfs);
-
-int rd_stripesize(char *page, char **start, off_t off, int count, int *eof,
- void *data)
+static int lov_rd_stripesize(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
struct obd_device *dev = (struct obd_device *)data;
struct lov_desc *desc;
return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_size);
}
-int rd_stripeoffset(char *page, char **start, off_t off, int count, int *eof,
- void *data)
+static int lov_rd_stripeoffset(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
struct obd_device *dev = (struct obd_device *)data;
struct lov_desc *desc;
return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_offset);
}
-int rd_stripetype(char *page, char **start, off_t off, int count, int *eof,
- void *data)
+static int lov_rd_stripetype(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
struct obd_device* dev = (struct obd_device*)data;
struct lov_desc *desc;
return snprintf(page, count, "%u\n", desc->ld_pattern);
}
-int rd_stripecount(char *page, char **start, off_t off, int count, int *eof,
- void *data)
+static int lov_rd_stripecount(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
struct obd_device *dev = (struct obd_device *)data;
struct lov_desc *desc;
return snprintf(page, count, "%u\n", desc->ld_default_stripe_count);
}
-int rd_numobd(char *page, char **start, off_t off, int count, int *eof,
- void *data)
+static int lov_rd_numobd(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
struct obd_device *dev = (struct obd_device*)data;
struct lov_desc *desc;
}
-int rd_activeobd(char *page, char **start, off_t off, int count, int *eof,
- void *data)
+static int lov_rd_activeobd(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
struct obd_device* dev = (struct obd_device*)data;
struct lov_desc *desc;
return snprintf(page, count, "%u\n", desc->ld_active_tgt_count);
}
-int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int lov_rd_mdc(char *page, char **start, off_t off, int count, int *eof,
+ void *data)
{
struct obd_device *dev = (struct obd_device*) data;
struct lov_obd *lov;
return snprintf(page, count, "%s\n", lov->mdcobd->obd_uuid.uuid);
}
-static void *ll_tgt_seq_start(struct seq_file *p, loff_t *pos)
+static void *lov_tgt_seq_start(struct seq_file *p, loff_t *pos)
{
struct obd_device *dev = p->private;
struct lov_obd *lov = &dev->u.lov;
return (*pos >= lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]);
}
-static void ll_tgt_seq_stop(struct seq_file *p, void *v)
-{
+static void lov_tgt_seq_stop(struct seq_file *p, void *v)
+{
}
-static void *ll_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos)
+static void *lov_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos)
{
struct obd_device *dev = p->private;
struct lov_obd *lov = &dev->u.lov;
return (*pos >=lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]);
}
-static int ll_tgt_seq_show(struct seq_file *p, void *v)
+static int lov_tgt_seq_show(struct seq_file *p, void *v)
{
struct lov_tgt_desc *tgt = v;
struct obd_device *dev = p->private;
tgt->active ? "" : "IN");
}
-struct seq_operations ll_tgt_sops = {
- .start = ll_tgt_seq_start,
- .stop = ll_tgt_seq_stop,
- .next = ll_tgt_seq_next,
- .show = ll_tgt_seq_show,
+struct seq_operations lov_tgt_sops = {
+ .start = lov_tgt_seq_start,
+ .stop = lov_tgt_seq_stop,
+ .next = lov_tgt_seq_next,
+ .show = lov_tgt_seq_show,
};
-static int ll_target_seq_open(struct inode *inode, struct file *file)
+static int lov_target_seq_open(struct inode *inode, struct file *file)
{
struct proc_dir_entry *dp = inode->u.generic_ip;
struct seq_file *seq;
- int rc = seq_open(file, &ll_tgt_sops);
+ int rc = seq_open(file, &lov_tgt_sops);
if (rc)
return rc;
return 0;
}
+
struct lprocfs_vars lprocfs_obd_vars[] = {
- { "uuid", lprocfs_rd_uuid, 0, 0 },
- { "stripesize", rd_stripesize, 0, 0 },
- { "stripeoffset", rd_stripeoffset, 0, 0 },
- { "stripecount", rd_stripecount, 0, 0 },
- { "stripetype", rd_stripetype, 0, 0 },
- { "numobd", rd_numobd, 0, 0 },
- { "activeobd", rd_activeobd, 0, 0 },
- { "filestotal", rd_filestotal, 0, 0 },
- { "filesfree", rd_filesfree, 0, 0 },
- { "filegroups", rd_filegroups, 0, 0 },
- { "blocksize", rd_blksize, 0, 0 },
- { "kbytestotal", rd_kbytestotal, 0, 0 },
- { "kbytesfree", rd_kbytesfree, 0, 0 },
- { "target_mdc", rd_mdc, 0, 0 },
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
+ { "stripesize", lov_rd_stripesize, 0, 0 },
+ { "stripeoffset", lov_rd_stripeoffset, 0, 0 },
+ { "stripecount", lov_rd_stripecount, 0, 0 },
+ { "stripetype", lov_rd_stripetype, 0, 0 },
+ { "numobd", lov_rd_numobd, 0, 0 },
+ { "activeobd", lov_rd_activeobd, 0, 0 },
+ { "filestotal", lprocfs_rd_filestotal, 0, 0 },
+ { "filesfree", lprocfs_rd_filesfree, 0, 0 },
+ //{ "filegroups", lprocfs_rd_filegroups, 0, 0 },
+ { "blocksize", lprocfs_rd_blksize, 0, 0 },
+ { "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 },
+ { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 },
+ { "target_mdc", lov_rd_mdc, 0, 0 },
{ 0 }
};
-struct lprocfs_vars lprocfs_module_vars[] = {
- { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
{ 0 }
};
-struct file_operations ll_proc_target_fops = {
- .open = ll_target_seq_open,
+struct file_operations lov_proc_target_fops = {
+ .open = lov_target_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
#endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(lov, lprocfs_module_vars, lprocfs_obd_vars)
Makefile.in
.deps
TAGS
+.*.cmd
#define DEBUG_SUBSYSTEM S_CLASS
#include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
+#include <linux/vfs.h>
#include <linux/obd_class.h>
#include <linux/lprocfs_status.h>
#ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
#else
-
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, obd_self_statfs);
-
-struct lprocfs_vars lprocfs_obd_vars[] = {
+static struct lprocfs_vars lprocfs_obd_vars[] = {
{ "uuid", lprocfs_rd_uuid, 0, 0 },
- { "blocksize", rd_blksize, 0, 0 },
- { "kbytestotal", rd_kbytestotal, 0, 0 },
- { "kbytesfree", rd_kbytesfree, 0, 0 },
- { "filestotal", rd_filestotal, 0, 0 },
- { "filesfree", rd_filesfree, 0, 0 },
- { "filegroups", rd_filegroups, 0, 0 },
+ { "blocksize", lprocfs_rd_blksize, 0, 0 },
+ { "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 },
+ { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 },
+ { "filestotal", lprocfs_rd_filestotal, 0, 0 },
+ { "filesfree", lprocfs_rd_filesfree, 0, 0 },
+ //{ "filegroups", lprocfs_rd_filegroups, 0, 0 },
{ "mds_server_uuid", lprocfs_rd_server_uuid, 0, 0 },
{ "mds_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 },
{ 0 }
};
-struct lprocfs_vars lprocfs_module_vars[] = {
+static struct lprocfs_vars lprocfs_module_vars[] = {
{ "num_refs", lprocfs_rd_numrefs, 0, 0 },
{ 0 }
};
#endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(mdc, lprocfs_module_vars, lprocfs_obd_vars)
-void mds_pack_req_body(struct ptlrpc_request *);
-void mds_pack_rep_body(struct ptlrpc_request *);
-void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
+void mdc_pack_req_body(struct ptlrpc_request *);
+void mdc_pack_rep_body(struct ptlrpc_request *);
+void mdc_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
obd_id ino, int type);
-void mds_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
+void mdc_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
int flags, struct mdc_op_data *data);
-void mds_setattr_pack(struct ptlrpc_request *req,
+void mdc_setattr_pack(struct ptlrpc_request *req,
struct mdc_op_data *data,
- struct iattr *iattr, void *ea, int ealen);
-void mds_create_pack(struct ptlrpc_request *req, int offset,
+ struct iattr *iattr, void *ea, int ealen,
+ void *ea2, int ea2len);
+void mdc_create_pack(struct ptlrpc_request *req, int offset,
struct mdc_op_data *op_data,
__u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
const void *data, int datalen);
-void mds_open_pack(struct ptlrpc_request *req, int offset,
+void mdc_open_pack(struct ptlrpc_request *req, int offset,
struct mdc_op_data *op_data,
__u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
__u32 flags, const void *data, int datalen);
-void mds_unlink_pack(struct ptlrpc_request *req, int offset,
+void mdc_unlink_pack(struct ptlrpc_request *req, int offset,
struct mdc_op_data *data);
-void mds_link_pack(struct ptlrpc_request *req, int offset,
+void mdc_link_pack(struct ptlrpc_request *req, int offset,
struct mdc_op_data *data);
-void mds_rename_pack(struct ptlrpc_request *req, int offset,
+void mdc_rename_pack(struct ptlrpc_request *req, int offset,
struct mdc_op_data *data,
const char *old, int oldlen, const char *new, int newlen);
#include <linux/lustre_mds.h>
#include <linux/lustre_lite.h>
-void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
+void mdc_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size,
obd_id ino, int type, __u64 xid)
{
struct mds_body *b;
b->nlink = size; /* !! */
}
-static void mds_pack_body(struct mds_body *b)
+static void mdc_pack_body(struct mds_body *b)
{
LASSERT (b != NULL);
b->capability = current->cap_effective;
}
-void mds_pack_req_body(struct ptlrpc_request *req)
+void mdc_pack_req_body(struct ptlrpc_request *req)
{
struct mds_body *b = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*b));
- mds_pack_body(b);
+ mdc_pack_body(b);
}
/* packing of MDS records */
-void mds_create_pack(struct ptlrpc_request *req, int offset,
+void mdc_create_pack(struct ptlrpc_request *req, int offset,
struct mdc_op_data *op_data,
__u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
const void *data, int datalen)
memcpy (tmp, data, datalen);
}
}
+
/* packing of MDS records */
-void mds_open_pack(struct ptlrpc_request *req, int offset,
+void mdc_open_pack(struct ptlrpc_request *req, int offset,
struct mdc_op_data *op_data,
__u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
__u32 flags, const void *data, int datalen)
rec->cr_fsuid = current->fsuid;
rec->cr_fsgid = current->fsgid;
rec->cr_cap = current->cap_effective;
- ll_ino2fid(&rec->cr_fid, op_data->ino1,
- op_data->gen1, op_data->typ1);
+ if (op_data != NULL)
+ ll_ino2fid(&rec->cr_fid, op_data->ino1,
+ op_data->gen1, op_data->typ1);
memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
rec->cr_mode = mode;
rec->cr_flags = flags;
else
rec->cr_suppgid = -1;
- tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1);
- LOGL0(op_data->name, op_data->namelen, tmp);
+ if (op_data->name) {
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1,
+ op_data->namelen + 1);
+ LOGL0(op_data->name, op_data->namelen, tmp);
+ }
if (data) {
tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, datalen);
memcpy (tmp, data, datalen);
}
}
-void mds_setattr_pack(struct ptlrpc_request *req,
+
+void mdc_setattr_pack(struct ptlrpc_request *req,
struct mdc_op_data *data,
- struct iattr *iattr, void *ea, int ealen)
+ struct iattr *iattr, void *ea, int ealen,
+ void *ea2, int ea2len)
{
struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, 0,
sizeof (*rec));
rec->sa_suppgid = -1;
}
- if (ealen != 0)
- memcpy(lustre_msg_buf(req->rq_reqmsg, 1, ealen), ea, ealen);
+ if (ealen == 0)
+ return;
+
+ memcpy(lustre_msg_buf(req->rq_reqmsg, 1, ealen), ea, ealen);
+
+ if (ea2len == 0)
+ return;
+
+ memcpy(lustre_msg_buf(req->rq_reqmsg, 2, ea2len), ea2, ea2len);
}
-void mds_unlink_pack(struct ptlrpc_request *req, int offset,
+void mdc_unlink_pack(struct ptlrpc_request *req, int offset,
struct mdc_op_data *data)
{
struct mds_rec_unlink *rec;
LOGL0(data->name, data->namelen, tmp);
}
-void mds_link_pack(struct ptlrpc_request *req, int offset,
+void mdc_link_pack(struct ptlrpc_request *req, int offset,
struct mdc_op_data *data)
{
struct mds_rec_link *rec;
LOGL0(data->name, data->namelen, tmp);
}
-void mds_rename_pack(struct ptlrpc_request *req, int offset,
+void mdc_rename_pack(struct ptlrpc_request *req, int offset,
struct mdc_op_data *data,
const char *old, int oldlen, const char *new, int newlen)
{
}
}
-void mds_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
+void mdc_getattr_pack(struct ptlrpc_request *req, int valid, int offset,
int flags, struct mdc_op_data *data)
{
struct mds_body *b;
#include "mdc_internal.h"
/* mdc_setattr does its own semaphore handling */
-static int mdc_reint(struct ptlrpc_request *request, int level)
+static int mdc_reint(struct ptlrpc_request *request,
+ struct mdc_rpc_lock *rpc_lock, int level)
{
int rc;
- __u32 *opcodeptr;
+
- opcodeptr = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*opcodeptr));
request->rq_level = level;
- if (!(*opcodeptr == REINT_SETATTR))
- mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
+ mdc_get_rpc_lock(rpc_lock, NULL);
rc = ptlrpc_queue_wait(request);
- if (!(*opcodeptr == REINT_SETATTR))
- mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
-
+ mdc_put_rpc_lock(rpc_lock, NULL);
if (rc)
CDEBUG(D_INFO, "error in handling %d\n", rc);
+ else if (!lustre_swab_repbuf(request, 0, sizeof(struct mds_body),
+ lustre_swab_mds_body)) {
+ CERROR ("Can't unpack mds_body\n");
+ rc = -EPROTO;
+ }
return rc;
}
* If it is called with iattr->ia_valid & ATTR_FROM_OPEN, then it is a
* magic open-path setattr that should take the setattr semaphore and
* go to the setattr portal. */
-int mdc_setattr(struct lustre_handle *conn,
- struct mdc_op_data *data,
- struct iattr *iattr, void *ea, int ealen,
+int mdc_setattr(struct lustre_handle *conn, struct mdc_op_data *data,
+ struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len,
struct ptlrpc_request **request)
{
struct ptlrpc_request *req;
struct mds_rec_setattr *rec;
struct mdc_rpc_lock *rpc_lock;
- int rc, bufcount = 1, size[2] = {sizeof(*rec), ealen};
+ int rc, bufcount = 1, size[3] = {sizeof(*rec), ealen, ea2len};
ENTRY;
LASSERT(iattr != NULL);
- if (ealen > 0)
+ if (ealen > 0) {
bufcount = 2;
+ if (ea2len > 0)
+ bufcount = 3;
+ }
req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, bufcount,
size, NULL);
- if (!req)
+ if (req == NULL)
RETURN(-ENOMEM);
if (iattr->ia_valid & ATTR_FROM_OPEN) {
req->rq_request_portal = MDS_SETATTR_PORTAL; //XXX FIXME bug 249
rpc_lock = &mdc_setattr_lock;
- } else
+ } else {
rpc_lock = &mdc_rpc_lock;
+ }
- mds_setattr_pack(req, data, iattr, ea, ealen);
+ if (iattr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
+ CDEBUG(D_INODE, "setting mtime %lu, ctime %lu\n",
+ iattr->ia_mtime, iattr->ia_ctime);
+ mdc_setattr_pack(req, data, iattr, ea, ealen, ea2, ea2len);
size[0] = sizeof(struct mds_body);
req->rq_replen = lustre_msg_size(1, size);
- mdc_get_rpc_lock(rpc_lock, NULL);
- rc = mdc_reint(req, LUSTRE_CONN_FULL);
- mdc_put_rpc_lock(rpc_lock, NULL);
-
+ rc = mdc_reint(req, rpc_lock, LUSTRE_CONN_FULL);
*request = req;
if (rc == -ERESTARTSYS)
rc = 0;
RETURN(rc);
}
-int mdc_create(struct lustre_handle *conn,
- struct mdc_op_data *op_data,
- const void *data, int datalen,
- int mode, __u32 uid, __u32 gid, __u64 time, __u64 rdev,
- struct ptlrpc_request **request)
+int mdc_create(struct lustre_handle *conn, struct mdc_op_data *op_data,
+ const void *data, int datalen, int mode, __u32 uid, __u32 gid,
+ __u64 time, __u64 rdev, struct ptlrpc_request **request)
{
struct ptlrpc_request *req;
- int rc, size[3] = {sizeof(struct mds_rec_create),
- op_data->namelen + 1, 0};
+ int rc, size[3] = {sizeof(struct mds_rec_create), op_data->namelen + 1};
int level, bufcount = 2;
-// ENTRY;
+ ENTRY;
if (data && datalen) {
size[bufcount] = datalen;
req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, bufcount,
size, NULL);
- if (!req)
- return -ENOMEM;
-// RETURN(-ENOMEM);
+ if (req == NULL)
+ RETURN(-ENOMEM);
- /* mds_create_pack fills msg->bufs[1] with name
+ /* mdc_create_pack fills msg->bufs[1] with name
* and msg->bufs[2] with tgt, for symlinks or lov MD data */
- mds_create_pack(req, 0, op_data,
- mode, rdev, uid, gid, time,
+ mdc_create_pack(req, 0, op_data, mode, rdev, uid, gid, time,
data, datalen);
size[0] = sizeof(struct mds_body);
level = LUSTRE_CONN_FULL;
resend:
- rc = mdc_reint(req, level);
+ rc = mdc_reint(req, &mdc_rpc_lock, level);
/* Resend if we were told to. */
if (rc == -ERESTARTSYS) {
level = LUSTRE_CONN_RECOVER;
mdc_store_inode_generation(req, 0, 0);
*request = req;
- return rc;
-// RETURN(rc);
+ RETURN(rc);
}
-int mdc_unlink(struct lustre_handle *conn,
- struct mdc_op_data *data,
+int mdc_unlink(struct lustre_handle *conn, struct mdc_op_data *data,
struct ptlrpc_request **request)
{
struct obd_device *obddev = class_conn2obd(conn);
ENTRY;
LASSERT(req == NULL);
-
req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size,
NULL);
- if (!req)
+ if (req == NULL)
RETURN(-ENOMEM);
*request = req;
size[0] = sizeof(struct mds_body);
size[1] = obddev->u.cli.cl_max_mds_easize;
- req->rq_replen = lustre_msg_size(2, size);
+ size[2] = obddev->u.cli.cl_max_mds_cookiesize;
+ req->rq_replen = lustre_msg_size(3, size);
- mds_unlink_pack(req, 0, data);
+ mdc_unlink_pack(req, 0, data);
- rc = mdc_reint(req, LUSTRE_CONN_FULL);
+ rc = mdc_reint(req, &mdc_rpc_lock, LUSTRE_CONN_FULL);
if (rc == -ERESTARTSYS)
rc = 0;
RETURN(rc);
}
-int mdc_link(struct lustre_handle *conn,
- struct mdc_op_data *data,
+int mdc_link(struct lustre_handle *conn, struct mdc_op_data *data,
struct ptlrpc_request **request)
{
struct ptlrpc_request *req;
req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size,
NULL);
- if (!req)
+ if (req == NULL)
RETURN(-ENOMEM);
- mds_link_pack(req, 0, data);
+ mdc_link_pack(req, 0, data);
size[0] = sizeof(struct mds_body);
req->rq_replen = lustre_msg_size(1, size);
- rc = mdc_reint(req, LUSTRE_CONN_FULL);
+ rc = mdc_reint(req, &mdc_rpc_lock, LUSTRE_CONN_FULL);
*request = req;
if (rc == -ERESTARTSYS)
rc = 0;
RETURN(rc);
}
-int mdc_rename(struct lustre_handle *conn,
- struct mdc_op_data *data,
- const char *old, int oldlen,
- const char *new, int newlen,
+int mdc_rename(struct lustre_handle *conn, struct mdc_op_data *data,
+ const char *old, int oldlen, const char *new, int newlen,
struct ptlrpc_request **request)
{
struct ptlrpc_request *req;
req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 3, size,
NULL);
- if (!req)
+ if (req == NULL)
RETURN(-ENOMEM);
- mds_rename_pack(req, 0, data, old, oldlen, new, newlen);
+ mdc_rename_pack(req, 0, data, old, oldlen, new, newlen);
size[0] = sizeof(struct mds_body);
req->rq_replen = lustre_msg_size(1, size);
- rc = mdc_reint(req, LUSTRE_CONN_FULL);
+ rc = mdc_reint(req, &mdc_rpc_lock, LUSTRE_CONN_FULL);
*request = req;
if (rc == -ERESTARTSYS)
rc = 0;
EXPORT_SYMBOL(mdc_rpc_lock);
/* Helper that implements most of mdc_getstatus and signal_completed_replay. */
+/* XXX this should become mdc_get_info("key"), sending MDS_GET_INFO RPC */
static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
int level, int msg_flags)
{
req->rq_level = level;
req->rq_replen = lustre_msg_size(1, &size);
- mds_pack_req_body(req);
+ mdc_pack_req_body(req);
req->rq_reqmsg->flags |= msg_flags;
rc = ptlrpc_queue_wait(req);
return rc;
}
-/* should become mdc_getinfo() */
+/* This should be mdc_get_info("rootfid") */
int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid)
{
return send_getstatus(class_conn2cliimp(conn), rootfid, LUSTRE_CONN_CON,
0);
}
+/* should call mdc_get_info("lovdesc") and mdc_get_info("lovtgts") */
int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
struct ptlrpc_request **request)
{
memcpy(&body->fid1, fid, sizeof(*fid));
body->valid = valid;
body->eadatasize = ea_size;
- mds_pack_req_body(req);
+ mdc_pack_req_body(req);
rc = mdc_getattr_common (conn, ea_size, req);
if (rc != 0) {
memcpy(&body->fid1, fid, sizeof(*fid));
body->valid = valid;
body->eadatasize = ea_size;
- mds_pack_req_body(req);
+ mdc_pack_req_body(req);
LASSERT (strnlen (filename, namelen) == namelen - 1);
memcpy(lustre_msg_buf(req->rq_reqmsg, 1, namelen), filename, namelen);
int repoff)
{
struct mds_rec_create *rec =
- lustre_msg_buf(req->rq_reqmsg, reqoff, sizeof (*rec));
+ lustre_msg_buf(req->rq_reqmsg, reqoff, sizeof(*rec));
struct mds_body *body =
- lustre_msg_buf(req->rq_repmsg, repoff, sizeof (*body));
+ lustre_msg_buf(req->rq_repmsg, repoff, sizeof(*body));
LASSERT (rec != NULL);
LASSERT (body != NULL);
rec->cr_replayfid.generation, rec->cr_replayfid.id);
}
+int mdc_req2lustre_md(struct ptlrpc_request *req, int offset,
+ struct lustre_handle *obd_import,
+ struct lustre_md *md)
+{
+ int rc;
+ ENTRY;
+
+ LASSERT(md);
+ memset(md, 0, sizeof(*md));
+
+ md->body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*md->body));
+ LASSERT (md->body != NULL);
+ LASSERT_REPSWABBED (req, offset);
+
+ if (md->body->valid & OBD_MD_FLEASIZE) {
+ int lmmsize;
+ struct lov_mds_md *lmm;
+
+ LASSERT(S_ISREG(md->body->mode));
+
+ if (md->body->eadatasize == 0) {
+ CERROR ("OBD_MD_FLEASIZE set, but eadatasize 0\n");
+ RETURN(-EPROTO);
+ }
+ lmmsize = md->body->eadatasize;
+ lmm = lustre_msg_buf(req->rq_repmsg, offset + 1, lmmsize);
+ LASSERT (lmm != NULL);
+ LASSERT_REPSWABBED (req, offset + 1);
+
+ rc = obd_unpackmd(obd_import, &md->lsm, lmm, lmmsize);
+ if (rc < 0) {
+ /* XXX don't know if I should do this... */
+ CERROR ("Error %d unpacking eadata\n", rc);
+ LBUG();
+ }
+ LASSERT (rc >= sizeof (*md->lsm));
+ }
+ RETURN(0);
+}
+
+
/* We always reserve enough space in the reply packet for a stripe MD, because
- * we don't know in advance the file type.
- *
- * XXX we could get that from ext2_dir_entry_2 file_type
- */
+ * we don't know in advance the file type. */
int mdc_enqueue(struct lustre_handle *conn,
int lock_type,
struct lookup_intent *it,
{ .name = {data->ino1, data->gen1} };
int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
int rc, flags = LDLM_FL_HAS_INTENT;
- int repsize[3] = {sizeof(struct ldlm_reply),
+ int repsize[4] = {sizeof(struct ldlm_reply),
sizeof(struct mds_body),
- obddev->u.cli.cl_max_mds_easize};
+ obddev->u.cli.cl_max_mds_easize,
+ obddev->u.cli.cl_max_mds_cookiesize};
struct ldlm_reply *dlm_rep;
struct ldlm_intent *lit;
struct ldlm_request *lockreq;
lit->opc = (__u64)it->it_op;
/* pack the intended request */
- mds_open_pack(req, 2, data, it->it_mode, 0, current->fsuid,
+ mdc_open_pack(req, 2, data, it->it_mode, 0, current->fsuid,
current->fsgid, LTIME_S(CURRENT_TIME),
it->it_flags, tgt, tgtlen);
/* get ready for the reply */
lit->opc = (__u64)it->it_op;
/* pack the intended request */
- mds_unlink_pack(req, 2, data);
+ mdc_unlink_pack(req, 2, data);
/* get ready for the reply */
- reply_buffers = 3;
- req->rq_replen = lustre_msg_size(3, repsize);
+ reply_buffers = 4;
+ req->rq_replen = lustre_msg_size(4, repsize);
} else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
int valid = OBD_MD_FLNOTOBD | OBD_MD_FLEASIZE;
size[2] = sizeof(struct mds_body);
lit->opc = (__u64)it->it_op;
/* pack the intended request */
- mds_getattr_pack(req, valid, 2, it->it_flags, data);
+ mdc_getattr_pack(req, valid, 2, it->it_flags, data);
/* get ready for the reply */
reply_buffers = 3;
req->rq_replen = lustre_msg_size(3, repsize);
}
dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep));
- LASSERT (dlm_rep != NULL); /* checked by ldlm_cli_enqueue() */
- LASSERT_REPSWABBED (req, 0); /* swabbed by ldlm_cli_enqueue() */
+ LASSERT(dlm_rep != NULL); /* checked by ldlm_cli_enqueue() */
+ LASSERT_REPSWABBED(req, 0); /* swabbed by ldlm_cli_enqueue() */
it->it_disposition = (int) dlm_rep->lock_policy_res1;
it->it_status = (int) dlm_rep->lock_policy_res2;
it->it_data = req;
/* We know what to expect, so we do any byte flipping required here */
- LASSERT (reply_buffers == 3 || reply_buffers == 1);
- if (reply_buffers == 3) {
+ LASSERT(reply_buffers == 4 || reply_buffers == 3 || reply_buffers == 1);
+ if (reply_buffers >= 3) {
struct mds_body *body;
body = lustre_swab_repbuf (req, 1, sizeof (*body),
/* The eadata is opaque; just check that it is
* there. Eventually, obd_unpackmd() will check
* the contents */
- eadata = lustre_swab_repbuf (req, 2, body->eadatasize,
- NULL);
+ eadata = lustre_swab_repbuf(req, 2, body->eadatasize,
+ NULL);
if (eadata == NULL) {
CERROR ("Missing/short eadata\n");
RETURN (-EPROTO);
struct list_head *tmp;
struct mds_body *body;
- body = lustre_swab_repbuf (req, 1, sizeof (*body),
- lustre_swab_mds_body);
+ body = lustre_swab_repbuf(req, 1, sizeof(*body), lustre_swab_mds_body);
LASSERT (body != NULL);
memcpy(&old, file_fh, sizeof(old));
{
struct ptlrpc_request *req = och->och_req;
struct mds_rec_create *rec =
- lustre_msg_buf(req->rq_reqmsg, 2, sizeof (*rec));
+ lustre_msg_buf(req->rq_reqmsg, 2, sizeof(*rec));
struct mds_body *body =
- lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body));
+ lustre_msg_buf(req->rq_repmsg, 1, sizeof(*body));
- LASSERT (rec != NULL);
+ LASSERT(rec != NULL);
/* outgoing messages always in my byte order */
- LASSERT (body != NULL);
+ LASSERT(body != NULL);
/* incoming message in my byte order (it's been swabbed) */
- LASSERT_REPSWABBED (req, 1);
+ LASSERT_REPSWABBED(req, 1);
memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
req->rq_replay_cb = mdc_replay_open;
if (rc != 0)
GOTO(out, rc);
- mds_readdir_pack(req, offset, PAGE_CACHE_SIZE, ino, type);
+ mdc_readdir_pack(req, offset, PAGE_CACHE_SIZE, ino, type);
req->rq_replen = lustre_msg_size(1, &size);
rc = ptlrpc_queue_wait(req);
case OBD_IOC_CLIENT_RECOVER:
RETURN(ptlrpc_recover_import(imp, data->ioc_inlbuf1));
case IOC_OSC_SET_ACTIVE:
- if (data->ioc_offset) {
- CERROR("%s: can't reactivate MDC\n",
- obddev->obd_uuid.uuid);
- RETURN(-ENOTTY);
- }
- RETURN(ptlrpc_set_import_active(imp, 0));
+ RETURN(ptlrpc_set_import_active(imp, data->ioc_offset));
default:
CERROR("osc_ioctl(): unrecognised ioctl %#x\n", cmd);
RETURN(-ENOTTY);
}
}
-static int mdc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
+static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+ unsigned long max_age)
{
struct ptlrpc_request *req;
struct obd_statfs *msfs;
int rc, size = sizeof(*msfs);
ENTRY;
- req = ptlrpc_prep_req(exp->exp_obd->u.cli.cl_import, MDS_STATFS, 0,
- NULL, NULL);
+ /* We could possibly pass max_age in the request (as an absolute
+ * timestamp or a "seconds.usec ago") so the target can avoid doing
+ * extra calls into the filesystem if that isn't necessary (e.g.
+ * during mount that would help a bit). Having relative timestamps
+ * is not so great if request processing is slow, while absolute
+ * timestamps are not ideal because they need time synchronization. */
+ req = ptlrpc_prep_req(obd->u.cli.cl_import, MDS_STATFS, 0, NULL, NULL);
if (!req)
RETURN(-ENOMEM);
if (rc)
GOTO(out, rc);
- msfs = lustre_swab_repbuf (req, 0, sizeof (*msfs),
- lustre_swab_obd_statfs);
+ msfs = lustre_swab_repbuf(req, 0, sizeof(*msfs),lustre_swab_obd_statfs);
if (msfs == NULL) {
- CERROR ("Can't unpack obd_statfs\n");
- GOTO (out, rc = -EPROTO);
+ CERROR("Can't unpack obd_statfs\n");
+ GOTO(out, rc = -EPROTO);
}
- memcpy (osfs, msfs, sizeof (*msfs));
+ memcpy(osfs, msfs, sizeof (*msfs));
EXIT;
out:
ptlrpc_req_finished(req);
return rc;
}
+static int mdc_pin(struct lustre_handle *conn, obd_id ino, __u32 gen, int type,
+ struct obd_client_handle *handle, int flag)
+{
+ struct ptlrpc_request *req;
+ struct mds_body *body;
+ int rc, size = sizeof(*body);
+ ENTRY;
+
+ req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_PIN, 1, &size, NULL);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
+ ll_ino2fid(&body->fid1, ino, gen, type);
+ body->flags = flag;
+
+ req->rq_replen = lustre_msg_size(1, &size);
+
+ mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
+ rc = ptlrpc_queue_wait(req);
+ mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
+ if (rc) {
+ CERROR("pin failed: %d\n", rc);
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+ }
+
+ body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_mds_body);
+ if (body == NULL) {
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+ }
+
+ memcpy(&handle->och_fh, &body->handle, sizeof(body->handle));
+ handle->och_req = req; /* will be dropped by unpin */
+ handle->och_magic = OBD_CLIENT_HANDLE_MAGIC;
+ RETURN(rc);
+}
+
+static int mdc_unpin(struct lustre_handle *conn,
+ struct obd_client_handle *handle, int flag)
+{
+ struct ptlrpc_request *req;
+ struct mds_body *body;
+ int rc, size = sizeof(*body);
+ ENTRY;
+
+ if (handle->och_magic != OBD_CLIENT_HANDLE_MAGIC)
+ RETURN(0);
+
+ req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size,
+ NULL);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body));
+ memcpy(&body->handle, &handle->och_fh, sizeof(body->handle));
+ body->flags = flag;
+
+ req->rq_replen = lustre_msg_size(0, NULL);
+ mdc_get_rpc_lock(&mdc_rpc_lock, NULL);
+ rc = ptlrpc_queue_wait(req);
+ mdc_put_rpc_lock(&mdc_rpc_lock, NULL);
+
+ if (rc != 0)
+ CERROR("unpin failed: %d\n", rc);
+
+ ptlrpc_req_finished(req);
+ ptlrpc_req_finished(handle->och_req);
+ RETURN(rc);
+}
+
static int mdc_attach(struct obd_device *dev, obd_count len, void *data)
{
struct lprocfs_static_vars lvars;
- lprocfs_init_vars(&lvars);
+ lprocfs_init_vars(mdc, &lvars);
return lprocfs_obd_attach(dev, lvars.obd_vars);
}
o_connect: client_import_connect,
o_disconnect: client_import_disconnect,
o_iocontrol: mdc_iocontrol,
- o_statfs: mdc_statfs
+ o_statfs: mdc_statfs,
+ o_pin: mdc_pin,
+ o_unpin: mdc_unpin,
};
int __init mdc_init(void)
struct lprocfs_static_vars lvars;
mdc_init_rpc_lock(&mdc_rpc_lock);
mdc_init_rpc_lock(&mdc_setattr_lock);
- lprocfs_init_vars(&lvars);
+ lprocfs_init_vars(mdc, &lvars);
return class_register_type(&mdc_obd_ops, lvars.module_vars,
LUSTRE_MDC_NAME);
}
-static void __exit mdc_exit(void)
+static void /*__exit*/ mdc_exit(void)
{
class_unregister_type(LUSTRE_MDC_NAME);
}
MODULE_DESCRIPTION("Lustre Metadata Client");
MODULE_LICENSE("GPL");
+EXPORT_SYMBOL(mdc_req2lustre_md);
EXPORT_SYMBOL(mdc_getstatus);
EXPORT_SYMBOL(mdc_getlovinfo);
EXPORT_SYMBOL(mdc_enqueue);
Makefile.in
.deps
TAGS
+.*.cmd
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-include fs/lustre/portals/Kernelenv
+include $(src)/../portals/Kernelenv
obj-y += mds.o
-
-mds-objs := mds_lov.o handler.o mds_reint.o mds_fs.o lproc_mds.o mds_internal.h mds_updates.o mds_open.o simple.o target.o
+mds-objs := mds_lov.o handler.o mds_reint.o mds_fs.o lproc_mds.o mds_open.o \
+ mds_lib.o
+
#include <linux/lustre_mds.h>
#include <linux/lustre_fsfilt.h>
#include <linux/lprocfs_status.h>
+#include <linux/lustre_commit_confd.h>
+
#include "mds_internal.h"
-extern int mds_get_lovtgts(struct mds_obd *obd, int tgt_count,
- struct obd_uuid *uuidarray);
-extern int mds_get_lovdesc(struct mds_obd *obd, struct lov_desc *desc);
-int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
- struct ptlrpc_request *req, int rc, int disp);
-static int mds_cleanup(struct obd_device * obddev, int force, int failover);
-
-inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req)
-{
- return &req->rq_export->exp_obd->u.mds;
-}
+static int mds_cleanup(struct obd_device *obd, int flags);
static int mds_bulk_timeout(void *data)
{
snprintf(fid_name, sizeof(fid_name), "0x%lx", ino);
+ CDEBUG(D_DENTRY, "--> mds_fid2dentry: ino %lu, gen %u, sb %p\n",
+ ino, generation, mds->mds_sb);
+
/* under ext3 this is neither supposed to return bad inodes
nor NULL inodes. */
result = ll_lookup_one_len(fid_name, mds->mds_fid_de, strlen(fid_name));
if (!inode)
RETURN(ERR_PTR(-ENOENT));
- CDEBUG(D_DENTRY, "--> mds_fid2dentry: ino %lu, gen %u, sb %p\n",
- inode->i_ino, inode->i_generation, inode->i_sb);
-
if (generation && inode->i_generation != generation) {
/* we didn't find the right inode.. */
CERROR("bad inode %lu, link: %d ct: %d or generation %u/%u\n",
mds_mfd_put(mfd);
}
-/* Call with med->med_open_lock held, please. */
-static int mds_close_mfd(struct mds_file_data *mfd, struct mds_export_data *med)
+/* Close a "file descriptor" and possibly unlink an orphan from the
+ * PENDING directory.
+ *
+ * If we are being called from mds_disconnect() because the client has
+ * disappeared, then req == NULL and we do not update last_rcvd because
+ * there is nothing that could be recovered by the client at this stage
+ * (it will not even _have_ an entry in last_rcvd anymore).
+ */
+static int mds_mfd_close(struct ptlrpc_request *req, struct obd_device *obd,
+ struct mds_file_data *mfd)
{
- struct dentry *de = NULL;
-
-#ifdef CONFIG_SMP
- LASSERT(spin_is_locked(&med->med_open_lock));
-#endif
- list_del(&mfd->mfd_list);
+ struct dentry *dparent = mfd->mfd_dentry->d_parent;
+ struct inode *child_inode = mfd->mfd_dentry->d_inode;
+ char fidname[LL_FID_NAMELEN];
+ int last_orphan, fidlen, rc = 0;
+ ENTRY;
- if (mfd->mfd_dentry->d_parent) {
- LASSERT(atomic_read(&mfd->mfd_dentry->d_parent->d_count));
- de = dget(mfd->mfd_dentry->d_parent);
+ if (dparent) {
+ LASSERT(atomic_read(&dparent->d_count) > 0);
+ dparent = dget(dparent);
}
- /* this is the actual "close" */
- l_dput(mfd->mfd_dentry);
+ fidlen = ll_fid2str(fidname, child_inode->i_ino,
+ child_inode->i_generation);
- if (de)
- l_dput(de);
+ last_orphan = mds_open_orphan_dec_test(child_inode) &&
+ mds_inode_is_orphan(child_inode);
+ /* this is the actual "close" */
+ l_dput(mfd->mfd_dentry);
mds_mfd_destroy(mfd);
- RETURN(0);
-}
-static int mds_disconnect(struct lustre_handle *conn, int failover)
-{
- struct obd_export *export = class_conn2export(conn);
- int rc;
- unsigned long flags;
- ENTRY;
+ if (dparent)
+ l_dput(dparent);
- ldlm_cancel_locks_for_export(export);
+ if (last_orphan) {
+ struct mds_obd *mds = &obd->u.mds;
+ struct inode *pending_dir = mds->mds_pending_dir->d_inode;
+ struct dentry *pending_child = NULL;
+ void *handle;
- spin_lock_irqsave(&export->exp_lock, flags);
- export->exp_failover = failover;
- spin_unlock_irqrestore(&export->exp_lock, flags);
+ CDEBUG(D_ERROR, "destroying orphan object %s\n", fidname);
- rc = class_disconnect(conn, failover);
- class_export_put(export);
+ /* Sadly, there is no easy way to save pending_child from
+ * mds_reint_unlink() into mfd, so we need to re-lookup,
+ * but normally it will still be in the dcache.
+ */
+ down(&pending_dir->i_sem);
+ pending_child = lookup_one_len(fidname, mds->mds_pending_dir,
+ fidlen);
+ if (IS_ERR(pending_child))
+ GOTO(out_lock, rc = PTR_ERR(pending_child));
+ LASSERT(pending_child->d_inode != NULL);
+
+ handle = fsfilt_start(obd, pending_dir, FSFILT_OP_UNLINK, NULL);
+ if (IS_ERR(handle))
+ GOTO(out_dput, rc = PTR_ERR(handle));
+ rc = vfs_unlink(pending_dir, pending_child);
+ if (rc)
+ CERROR("error unlinking orphan %s: rc %d\n",fidname,rc);
+
+ if (req) {
+ rc = mds_finish_transno(mds, pending_dir, handle, req,
+ rc, 0);
+ } else {
+ int err = fsfilt_commit(obd, pending_dir, handle, 0);
+ if (err) {
+ CERROR("error committing orphan unlink: %d\n",
+ err);
+ if (!rc)
+ rc = err;
+ }
+ }
+ out_dput:
+ dput(pending_child);
+ out_lock:
+ up(&pending_dir->i_sem);
+ }
RETURN(rc);
}
-static void mds_destroy_export(struct obd_export *export)
+static int mds_disconnect(struct lustre_handle *conn, int flags)
{
+ struct obd_export *export = class_conn2export(conn);
struct mds_export_data *med = &export->exp_mds_data;
- struct list_head *tmp, *n;
+ struct obd_device *obd = export->exp_obd;
+ struct obd_run_ctxt saved;
int rc;
-
ENTRY;
- LASSERT(!strcmp(export->exp_obd->obd_type->typ_name,
- LUSTRE_MDS_NAME));
- /*
- * Close any open files.
- */
+ push_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+ /* Close any open files (which may also cause orphan unlinking). */
spin_lock(&med->med_open_lock);
- list_for_each_safe(tmp, n, &med->med_open_head) {
+ while (!list_empty(&med->med_open_head)) {
+ struct list_head *tmp = med->med_open_head.next;
struct mds_file_data *mfd =
list_entry(tmp, struct mds_file_data, mfd_list);
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ /* bug 1579: fix force-closing for 2.5 */
struct dentry *dentry = mfd->mfd_dentry;
+
+ list_del(&mfd->mfd_list);
+ spin_unlock(&med->med_open_lock);
+
CERROR("force closing client file handle for %*s (%s:%lu)\n",
dentry->d_name.len, dentry->d_name.name,
kdevname(dentry->d_inode->i_sb->s_dev),
dentry->d_inode->i_ino);
+ rc = mds_mfd_close(NULL, obd, mfd);
#endif
- rc = mds_close_mfd(mfd, med);
if (rc)
CDEBUG(D_INODE, "Error closing file: %d\n", rc);
+ spin_lock(&med->med_open_lock);
}
spin_unlock(&med->med_open_lock);
+ pop_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+ ldlm_cancel_locks_for_export(export);
if (export->exp_outstanding_reply) {
struct ptlrpc_request *req = export->exp_outstanding_reply;
unsigned long flags;
export->exp_outstanding_reply = NULL;
}
- if (!export->exp_failover)
+ if (!(flags & OBD_OPT_FAILOVER))
mds_client_free(export);
- EXIT;
+
+ rc = class_disconnect(conn, flags);
+ class_export_put(export);
+
+ RETURN(rc);
}
/*
{
lock_kernel();
lock_super(sb);
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
if (sb->s_dirt && sb->s_op && sb->s_op->write_super)
sb->s_op->write_super(sb);
+#else
+ if (sb->s_dirt && sb->s_op) {
+ if (sb->s_op->sync_fs)
+ sb->s_op->sync_fs(sb, 1);
+ else if (sb->s_op->write_super)
+ sb->s_op->write_super(sb);
+ }
+#endif
unlock_super(sb);
unlock_kernel();
}
static int mds_getstatus(struct ptlrpc_request *req)
{
+ struct obd_device *obd = req->rq_export->exp_obd;
struct mds_obd *mds = mds_req2mds(req);
struct mds_body *body;
int rc, size = sizeof(*body);
* requests if they have any. This would be fsync_super() if it
* was exported.
*/
- mds_fsync_super(mds->mds_sb);
+ fsfilt_sync(obd, mds->mds_sb);
body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
memcpy(&body->fid1, &mds->mds_rootfid, sizeof(body->fid1));
memcpy(desc, &mds->mds_lov_desc, sizeof (*desc));
tgt_count = mds->mds_lov_desc.ld_tgt_count;
- uuid0 = lustre_msg_buf (req->rq_repmsg, 1,
- tgt_count * sizeof (*uuid0));
+ uuid0 = lustre_msg_buf(req->rq_repmsg, 1, tgt_count * sizeof (*uuid0));
if (uuid0 == NULL) {
CERROR("too many targets, enlarge client buffers\n");
req->rq_status = -ENOSPC;
req->rq_status = rc;
RETURN(0);
}
+ memcpy(&mds->mds_osc_uuid, &mds->mds_lov_desc.ld_uuid,
+ sizeof(mds->mds_osc_uuid));
RETURN(0);
}
rc = fsfilt_get_md(obd, inode, lmm, lmm_size);
if (rc < 0) {
- CERROR ("Error %d reading eadata for ino %lu\n",
- rc, inode->i_ino);
+ CERROR("Error %d reading eadata for ino %lu\n",
+ rc, inode->i_ino);
} else if (rc > 0) {
body->valid |= OBD_MD_FLEASIZE;
body->eadatasize = rc;
if (inode == NULL)
RETURN(-ENOENT);
- body = lustre_msg_buf(req->rq_repmsg, reply_off, sizeof (*body));
- LASSERT (body != NULL); /* caller prepped reply */
+ body = lustre_msg_buf(req->rq_repmsg, reply_off, sizeof(*body));
+ LASSERT(body != NULL); /* caller prepped reply */
mds_pack_inode2fid(&body->fid1, inode);
mds_pack_inode2body(body, inode);
- if (S_ISREG(inode->i_mode) &&
- (reqbody->valid & OBD_MD_FLEASIZE) != 0) {
- rc = mds_pack_md(obd, req->rq_repmsg, reply_off + 1,
- body, inode);
+ if (S_ISREG(inode->i_mode) && (reqbody->valid & OBD_MD_FLEASIZE) != 0) {
+ rc = mds_pack_md(obd, req->rq_repmsg, reply_off+1, body, inode);
+
+ /* If we have LOV EA data, the OST holds size, atime, mtime */
+ if (!(body->valid & OBD_MD_FLEASIZE))
+ body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+ OBD_MD_FLATIME | OBD_MD_FLMTIME);
} else if (S_ISLNK(inode->i_mode) &&
(reqbody->valid & OBD_MD_LINKNAME) != 0) {
- char *symname = lustre_msg_buf(req->rq_repmsg, reply_off + 1, 0);
+ char *symname = lustre_msg_buf(req->rq_repmsg, reply_off + 1,0);
int len;
LASSERT (symname != NULL); /* caller prepped reply */
rc = 0;
}
}
+
RETURN(rc);
}
ENTRY;
body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body));
- LASSERT (body != NULL); /* checked by caller */
- LASSERT_REQSWABBED (req, offset); /* swabbed by caller */
+ LASSERT(body != NULL); /* checked by caller */
+ LASSERT_REQSWABBED(req, offset); /* swabbed by caller */
- if (S_ISREG(inode->i_mode) &&
- (body->valid & OBD_MD_FLEASIZE) != 0) {
+ if (S_ISREG(inode->i_mode) && (body->valid & OBD_MD_FLEASIZE)) {
int rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0);
CDEBUG(D_INODE, "got %d bytes MD data for inode %lu\n",
rc, inode->i_ino);
size[bufcount] = 0;
CERROR("MD size %d larger than maximum possible %u\n",
rc, mds->mds_max_mdsize);
- } else
+ } else {
size[bufcount] = rc;
+ }
bufcount++;
- } else if (S_ISLNK (inode->i_mode) &&
- (body->valid & OBD_MD_LINKNAME) != 0) {
+ } else if (S_ISLNK(inode->i_mode) && (body->valid & OBD_MD_LINKNAME)) {
if (inode->i_size + 1 != body->eadatasize)
- CERROR ("symlink size: %Lu, reply space: %d\n",
- inode->i_size + 1, body->eadatasize);
+ CERROR("symlink size: %Lu, reply space: %d\n",
+ inode->i_size + 1, body->eadatasize);
size[bufcount] = MIN(inode->i_size + 1, body->eadatasize);
bufcount++;
CDEBUG(D_INODE, "symlink size: %Lu, reply space: %d\n",
rc = lustre_pack_msg(bufcount, size, NULL, &req->rq_replen,
&req->rq_repmsg);
if (rc) {
- CERROR("out of memoryK\n");
- req->rq_status = rc;
- GOTO(out, rc);
+ CERROR("out of memory\n");
+ GOTO(out, req->rq_status = rc);
}
EXIT;
static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req,
struct lustre_handle *client_lockh)
{
+ struct mds_export_data *med = &req->rq_export->exp_mds_data;
+ struct mds_client_data *mcd = med->med_mcd;
struct obd_device *obd = req->rq_export->exp_obd;
struct mds_obd *mds = mds_req2mds(req);
struct dentry *parent, *child;
int namelen, rc = 0;
char *name;
- if (req->rq_export->exp_outstanding_reply)
- mds_steal_ack_locks(req->rq_export, req);
+ req->rq_transno = mcd->mcd_last_transno;
+ req->rq_status = mcd->mcd_last_result;
+
+ LASSERT (req->rq_export->exp_outstanding_reply);
+
+ mds_steal_ack_locks(req->rq_export, req);
+
+ if (req->rq_status)
+ return;
body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body));
LASSERT (body != NULL); /* checked by caller */
uc.ouc_cap = body->capability;
uc.ouc_suppgid1 = body->suppgid;
uc.ouc_suppgid2 = -1;
+
push_ctxt(&saved, &mds->mds_ctxt, &uc);
parent = mds_fid2dentry(mds, &body->fid1, NULL);
LASSERT(!IS_ERR(parent));
}
rc = mds_getattr_internal(obd, child, req, body, offset);
- req->rq_status = rc;
+ /* XXX need to handle error here */
+ LASSERT(!rc);
l_dput(child);
l_dput(parent);
}
{
struct mds_obd *mds = mds_req2mds(req);
struct obd_device *obd = req->rq_export->exp_obd;
+ struct ldlm_reply *rep = NULL;
struct obd_run_ctxt saved;
struct mds_body *body;
struct dentry *de = NULL, *dchild = NULL;
struct ldlm_res_id child_res_id = { .name = {0} };
struct lustre_handle parent_lockh;
int namesize;
- int flags = 0, rc = 0, cleanup_phase = 0, req_was_resent;
+ int flags = 0, rc = 0, cleanup_phase = 0;
char *name;
ENTRY;
/* Swab now, before anyone looks inside the request */
- body = lustre_swab_reqbuf (req, offset, sizeof (*body),
- lustre_swab_mds_body);
+ body = lustre_swab_reqbuf(req, offset, sizeof(*body),
+ lustre_swab_mds_body);
if (body == NULL) {
- CERROR ("Can't swab mds_body\n");
- GOTO (cleanup, rc = -EFAULT);
+ CERROR("Can't swab mds_body\n");
+ GOTO(cleanup, rc = -EFAULT);
}
- LASSERT_REQSWAB (req, offset + 1);
- name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0);
+ LASSERT_REQSWAB(req, offset + 1);
+ name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0);
if (name == NULL) {
- CERROR ("Can't unpack name\n");
- GOTO (cleanup, rc = -EFAULT);
+ CERROR("Can't unpack name\n");
+ GOTO(cleanup, rc = -EFAULT);
}
namesize = req->rq_reqmsg->buflens[offset + 1];
- req_was_resent = lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT;
- if (child_lockh->cookie) {
- LASSERT(req_was_resent);
- reconstruct_getattr_name(offset, req, child_lockh);
- RETURN(0);
- } else if (req_was_resent) {
- DEBUG_REQ(D_HA, req, "no reply for RESENT req");
+ if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
+ struct obd_export *exp = req->rq_export;
+ if (exp->exp_outstanding_reply &&
+ exp->exp_outstanding_reply->rq_xid == req->rq_xid) {
+ reconstruct_getattr_name(offset, req, child_lockh);
+ RETURN(0);
+ }
+ DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")",
+ exp->exp_outstanding_reply ?
+ exp->exp_outstanding_reply->rq_xid : (u64)0);
}
LASSERT (offset == 0 || offset == 2);
- /* if requests were at offset 2, replies go back at 1 */
- if (offset)
+ /* if requests were at offset 2, the getattr reply goes back at 1 */
+ if (offset) {
+ rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
offset = 1;
+ }
uc.ouc_fsuid = body->fsuid;
uc.ouc_fsgid = body->fsgid;
uc.ouc_suppgid2 = -1;
push_ctxt(&saved, &mds->mds_ctxt, &uc);
/* Step 1: Lookup/lock parent */
+ intent_set_disposition(rep, DISP_LOOKUP_EXECD);
de = mds_fid2locked_dentry(obd, &body->fid1, NULL, LCK_PR,
&parent_lockh);
if (IS_ERR(de))
cleanup_phase = 2; /* child dentry */
if (dchild->d_inode == NULL) {
+ intent_set_disposition(rep, DISP_LOOKUP_NEG);
GOTO(cleanup, rc = -ENOENT);
+ } else {
+ intent_set_disposition(rep, DISP_LOOKUP_POS);
}
/* Step 3: Lock child */
return rc;
}
+
+static int mds_obd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+ unsigned long max_age)
+{
+ return fsfilt_statfs(obd, obd->u.mds.mds_sb, osfs);
+}
+
static int mds_statfs(struct ptlrpc_request *req)
{
struct obd_device *obd = req->rq_export->exp_obd;
- struct obd_statfs *osfs;
- int rc, size = sizeof(*osfs);
+ int rc, size = sizeof(struct obd_statfs);
ENTRY;
rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
GOTO(out, rc);
}
- osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*osfs));
- rc = fsfilt_statfs(obd, obd->u.mds.mds_sb, osfs);
+ /* We call this so that we can cache a bit - 1 jiffie worth */
+ rc = obd_statfs(obd, lustre_msg_buf(req->rq_repmsg,0,size),jiffies-HZ);
if (rc) {
- CERROR("mds: statfs failed: rc %d\n", rc);
+ CERROR("mds_obd_statfs failed: rc %d\n", rc);
GOTO(out, rc);
}
static int mds_close(struct ptlrpc_request *req)
{
struct mds_export_data *med = &req->rq_export->exp_mds_data;
+ struct obd_device *obd = req->rq_export->exp_obd;
struct mds_body *body;
struct mds_file_data *mfd;
+ struct obd_run_ctxt saved;
int rc;
ENTRY;
RETURN(-ESTALE);
}
+ rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
+ if (rc) {
+ CERROR("lustre_pack_msg: rc = %d\n", rc);
+ req->rq_status = rc;
+ }
+
spin_lock(&med->med_open_lock);
- req->rq_status = mds_close_mfd(mfd, med);
+ list_del(&mfd->mfd_list);
spin_unlock(&med->med_open_lock);
+ push_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+ req->rq_status = mds_mfd_close(rc ? NULL : req, obd, mfd);
+ pop_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK)) {
CERROR("test case OBD_FAIL_MDS_CLOSE_PACK\n");
req->rq_status = -ENOMEM;
RETURN(-ENOMEM);
}
- rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
- if (rc) {
- CERROR("mds: lustre_pack_msg: rc = %d\n", rc);
- req->rq_status = rc;
- }
-
mds_mfd_put(mfd);
RETURN(0);
}
GOTO (out, rc = -EFAULT);
/* body->size is actually the offset -eeb */
- if ((body->size & (PAGE_SIZE - 1)) != 0) {
+ if ((body->size & ~PAGE_MASK) != 0) {
CERROR ("offset "LPU64"not on a page boundary\n", body->size);
GOTO (out, rc = -EFAULT);
}
break;
case MDS_REINT: {
- __u32 *opcp = lustre_msg_buf (req->rq_reqmsg, 0, sizeof (*opcp));
+ __u32 *opcp = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*opcp));
__u32 opc;
- int size[2] = {sizeof(struct mds_body), mds->mds_max_mdsize};
+ int size[3] = {sizeof(struct mds_body), mds->mds_max_mdsize,
+ mds->mds_max_cookiesize};
int bufcount;
/* NB only peek inside req now; mds_reint() will swab it */
}
opc = *opcp;
if (lustre_msg_swabbed (req->rq_reqmsg))
- __swab32s (&opc);
+ __swab32s(&opc);
DEBUG_REQ(D_INODE, req, "reint %d (%s)", opc,
- (opc < sizeof (reint_names) / sizeof (reint_names[0]) ||
- reint_names[opc] == NULL) ? reint_names[opc] : "unknown opcode");
+ (opc < sizeof(reint_names) / sizeof(reint_names[0]) ||
+ reint_names[opc] == NULL) ? reint_names[opc] :
+ "unknown opcode");
OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET, 0);
if (opc == REINT_UNLINK)
+ bufcount = 3;
+ else if (opc == REINT_OPEN)
bufcount = 2;
else
bufcount = 1;
rc = mds_close(req);
break;
+ case MDS_PIN:
+ DEBUG_REQ(D_INODE, req, "pin");
+ OBD_FAIL_RETURN(OBD_FAIL_MDS_PIN_NET, 0);
+ rc = mds_pin(req);
+ break;
+
case OBD_PING:
DEBUG_REQ(D_INODE, req, "ping");
rc = target_handle_ping(req);
break;
+ case OBD_LOG_CANCEL:
+ CDEBUG(D_INODE, "log cancel\n");
+ OBD_FAIL_RETURN(OBD_FAIL_OBD_LOG_CANCEL_NET, 0);
+ rc = -ENOTSUPP; /* la la la */
+ break;
+
case LDLM_ENQUEUE:
DEBUG_REQ(D_INODE, req, "enqueue");
OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
struct obd_device *obd = list_entry(mds, struct obd_device,
u.mds);
req->rq_repmsg->last_xid =
- le64_to_cpu (med->med_mcd->mcd_last_xid);
+ le64_to_cpu(med->med_mcd->mcd_last_xid);
if (!obd->obd_no_transno) {
req->rq_repmsg->last_committed =
*
* Also assumes for mds_last_transno that we are not modifying it (no locking).
*/
-int mds_update_server_data(struct mds_obd *mds)
+int mds_update_server_data(struct obd_device *obd)
{
+ struct mds_obd *mds = &obd->u.mds;
struct mds_server_data *msd = mds->mds_server_data;
struct file *filp = mds->mds_rcvd_filp;
struct obd_run_ctxt saved;
msd->msd_last_transno = cpu_to_le64(mds->mds_last_transno);
msd->msd_mount_count = cpu_to_le64(mds->mds_mount_count);
- CDEBUG(D_SUPER, "MDS mount_count is %Lu, last_transno is %Lu\n",
- (unsigned long long)mds->mds_mount_count,
- (unsigned long long)mds->mds_last_transno);
- rc = lustre_fwrite(filp, (char *)msd, sizeof(*msd), &off);
+ CDEBUG(D_SUPER, "MDS mount_count is "LPU64", last_transno is "LPU64"\n",
+ mds->mds_mount_count, mds->mds_last_transno);
+ rc = fsfilt_write_record(obd, filp, (char *)msd, sizeof(*msd), &off);
if (rc != sizeof(*msd)) {
CERROR("error writing MDS server data: rc = %d\n", rc);
if (rc > 0)
rc = -EIO;
GOTO(out, rc);
}
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- rc = fsync_dev(filp->f_dentry->d_inode->i_rdev);
-#else
rc = file_fsync(filp, filp->f_dentry, 1);
-#endif
if (rc)
CERROR("error flushing MDS server data: rc = %d\n", rc);
}
/* mount the file system (secretly) */
-static int mds_setup(struct obd_device *obddev, obd_count len, void *buf)
+static int mds_setup(struct obd_device *obd, obd_count len, void *buf)
{
struct obd_ioctl_data* data = buf;
- struct mds_obd *mds = &obddev->u.mds;
+ struct mds_obd *mds = &obd->u.mds;
struct vfsmount *mnt;
int rc = 0;
unsigned long page;
if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2)
RETURN(rc = -EINVAL);
- obddev->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2);
- if (IS_ERR(obddev->obd_fsops))
- RETURN(rc = PTR_ERR(obddev->obd_fsops));
+ if (data->ioc_inlbuf4)
+ obd_str2uuid(&mds->mds_osc_uuid, data->ioc_inlbuf4);
+
+ obd->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2);
+ if (IS_ERR(obd->obd_fsops))
+ RETURN(rc = PTR_ERR(obd->obd_fsops));
if (data->ioc_inllen3 > 0 && data->ioc_inlbuf3) {
spin_lock_init(&mds->mds_transno_lock);
mds->mds_max_mdsize = sizeof(struct lov_mds_md);
- rc = mds_fs_setup(obddev, mnt);
+ mds->mds_max_cookiesize = sizeof(struct llog_cookie);
+ rc = mds_fs_setup(obd, mnt);
if (rc) {
CERROR("MDS filesystem method init failed: rc = %d\n", rc);
GOTO(err_put, rc);
}
- obddev->obd_namespace =
- ldlm_namespace_new("mds_server", LDLM_NAMESPACE_SERVER);
- if (obddev->obd_namespace == NULL) {
- mds_cleanup(obddev, 0, 0);
- GOTO(err_fs, rc = -ENOMEM);
+#ifdef ENABLE_ORPHANS
+ rc = llog_start_commit_thread();
+ if (rc < 0)
+ GOTO(err_fs, rc);
+#endif
+
+#ifdef ENABLE_ORPHANS
+ mds->mds_catalog = mds_get_catalog(obd);
+ if (IS_ERR(mds->mds_catalog))
+ GOTO(err_fs, rc = PTR_ERR(mds->mds_catalog));
+#endif
+
+ obd->obd_namespace = ldlm_namespace_new("mds_server",
+ LDLM_NAMESPACE_SERVER);
+ if (obd->obd_namespace == NULL) {
+ mds_cleanup(obd, 0);
+ GOTO(err_log, rc = -ENOMEM);
}
ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
- "mds_ldlm_client", &obddev->obd_ldlm_client);
+ "mds_ldlm_client", &obd->obd_ldlm_client);
mds->mds_has_lov_desc = 0;
+ obd->obd_replayable = 1;
RETURN(0);
+err_log:
+#ifdef ENABLE_ORPHANS
+ mds_put_catalog(mds->mds_catalog);
+ /* No extra cleanup needed for llog_init_commit_thread() */
err_fs:
- mds_fs_cleanup(obddev, 0);
+#endif
+ mds_fs_cleanup(obd, 0);
err_put:
unlock_kernel();
mntput(mds->mds_vfsmnt);
mds->mds_sb = 0;
lock_kernel();
err_ops:
- fsfilt_put_ops(obddev->obd_fsops);
+ fsfilt_put_ops(obd->obd_fsops);
return rc;
}
-static int mds_cleanup(struct obd_device *obddev, int force, int failover)
+static int mds_cleanup(struct obd_device *obd, int flags)
{
- struct super_block *sb;
- struct mds_obd *mds = &obddev->u.mds;
+ struct mds_obd *mds = &obd->u.mds;
ENTRY;
- sb = mds->mds_sb;
- if (!mds->mds_sb)
+ if (mds->mds_sb == NULL)
RETURN(0);
- mds_update_server_data(mds);
- mds_fs_cleanup(obddev, failover);
+#ifdef ENABLE_ORPHANS
+ mds_put_catalog(mds->mds_catalog);
+#endif
+ if (mds->mds_osc_obd)
+ obd_disconnect(&mds->mds_osc_conn, flags);
+ mds_update_server_data(obd);
+ mds_fs_cleanup(obd, flags);
unlock_kernel();
/* 2 seems normal on mds, (may_umount() also expects 2
fwiw), but we only see 1 at this point in obdfilter. */
- if (atomic_read(&obddev->u.mds.mds_vfsmnt->mnt_count) > 2){
- CERROR("%s: mount point busy, mnt_count: %d\n",
- obddev->obd_name,
- atomic_read(&obddev->u.mds.mds_vfsmnt->mnt_count));
- }
+ if (atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count) > 2)
+ CERROR("%s: mount point busy, mnt_count: %d\n", obd->obd_name,
+ atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count));
mntput(mds->mds_vfsmnt);
mds->mds_sb = 0;
- ldlm_namespace_free(obddev->obd_namespace);
+ ldlm_namespace_free(obd->obd_namespace);
- if (obddev->obd_recovering)
- target_cancel_recovery_timer(obddev);
+ if (obd->obd_recovering)
+ target_cancel_recovery_timer(obd);
lock_kernel();
#ifdef CONFIG_DEV_RDONLY
dev_clear_rdonly(2);
#endif
- fsfilt_put_ops(obddev->obd_fsops);
+ fsfilt_put_ops(obd->obd_fsops);
RETURN(0);
}
remote_hdl.cookie);
}
+int intent_disposition(struct ldlm_reply *rep, int flag)
+{
+ if (!rep)
+ return 0;
+ return (rep->lock_policy_res1 & flag);
+}
+
+void intent_set_disposition(struct ldlm_reply *rep, int flag)
+{
+ if (!rep)
+ return;
+ rep->lock_policy_res1 |= flag;
+}
+
static int ldlm_intent_policy(struct ldlm_namespace *ns,
struct ldlm_lock **lockp, void *req_cookie,
ldlm_mode_t mode, int flags, void *data)
{
struct ptlrpc_request *req = req_cookie;
struct ldlm_lock *lock = *lockp;
- int rc = 0;
ENTRY;
if (!req_cookie)
/* an intent needs to be considered */
struct ldlm_intent *it;
struct mds_obd *mds = &req->rq_export->exp_obd->u.mds;
- struct mds_body *mds_body;
struct ldlm_reply *rep;
- struct lustre_handle lockh = { 0 };
+ struct lustre_handle lockh;
struct ldlm_lock *new_lock;
- int rc, offset = 2, repsize[3] = {sizeof(struct ldlm_reply),
- sizeof(struct mds_body),
- mds->mds_max_mdsize};
+ int offset = 2, repsize[4] = {sizeof(struct ldlm_reply),
+ sizeof(struct mds_body),
+ mds->mds_max_mdsize,
+ mds->mds_max_cookiesize};
- it = lustre_swab_reqbuf (req, 1, sizeof (*it),
- lustre_swab_ldlm_intent);
+ it = lustre_swab_reqbuf(req, 1, sizeof (*it),
+ lustre_swab_ldlm_intent);
if (it == NULL) {
CERROR ("Intent missing\n");
- rc = req->rq_status = -EFAULT;
- RETURN (rc);
+ req->rq_status = -EFAULT;
+ RETURN(req->rq_status);
}
LDLM_DEBUG(lock, "intent policy, opc: %s",
ldlm_it2str(it->opc));
- rc = lustre_pack_msg(3, repsize, NULL, &req->rq_replen,
- &req->rq_repmsg);
- if (rc) {
- rc = req->rq_status = -ENOMEM;
- RETURN(rc);
- }
+ req->rq_status = lustre_pack_msg(it->opc == IT_UNLINK ? 4 : 3,
+ repsize, NULL, &req->rq_replen,
+ &req->rq_repmsg);
+ if (req->rq_status)
+ RETURN(req->rq_status);
rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
- rep->lock_policy_res1 = IT_INTENT_EXEC;
+ intent_set_disposition(rep, DISP_IT_EXECD);
fixup_handle_for_resent_req(req, lock, &lockh);
switch ((long)it->opc) {
case IT_OPEN:
case IT_CREAT|IT_OPEN:
- rc = mds_reint(req, offset, &lockh);
- /* We return a dentry to the client if IT_OPEN_POS is
- * set, or if we make it to the OPEN portion of the
- * programme (which implies that we created) */
- if (!(rep->lock_policy_res1 & IT_OPEN_POS ||
- rep->lock_policy_res1 & IT_OPEN_OPEN)) {
- rep->lock_policy_res2 = rc;
+ /* XXX swab here to assert that an mds_open reint
+ * packet is following */
+ rep->lock_policy_res2 = mds_reint(req, offset, &lockh);
+ /* We abort the lock if the lookup was negative and
+ * we did not make it to the OPEN portion */
+ if (intent_disposition(rep, DISP_LOOKUP_NEG) &&
+ !intent_disposition(rep, DISP_OPEN_OPEN))
RETURN(ELDLM_LOCK_ABORTED);
- }
- break;
- case IT_UNLINK:
- rc = mds_reint(req, offset, &lockh);
- /* Don't return a lock if the unlink failed, or if we're
- * not sending back an EA */
- if (rc) {
- rep->lock_policy_res2 = rc;
- RETURN(ELDLM_LOCK_ABORTED);
- }
- if (req->rq_status != 0) {
- rep->lock_policy_res2 = req->rq_status;
- RETURN(ELDLM_LOCK_ABORTED);
- }
- mds_body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*mds_body));
- if (!(mds_body->valid & OBD_MD_FLEASIZE)) {
- rep->lock_policy_res2 = rc;
- RETURN(ELDLM_LOCK_ABORTED);
- }
break;
case IT_GETATTR:
case IT_LOOKUP:
case IT_READDIR:
- rc = mds_getattr_name(offset, req, &lockh);
+ rep->lock_policy_res2 = mds_getattr_name(offset, req,
+ &lockh);
/* FIXME: we need to sit down and decide on who should
* set req->rq_status, who should return negative and
- * positive return values, and what they all mean. */
- if (rc) {
- rep->lock_policy_res2 = rc;
+ * positive return values, and what they all mean.
+ * - replay: returns 0 & req->status is old status
+ * - otherwise: returns req->status */
+ if (!intent_disposition(rep, DISP_LOOKUP_POS) ||
+ rep->lock_policy_res2)
RETURN(ELDLM_LOCK_ABORTED);
- }
if (req->rq_status != 0) {
rep->lock_policy_res2 = req->rq_status;
RETURN(ELDLM_LOCK_ABORTED);
}
/* By this point, whatever function we called above must have
- * filled in 'lockh' or returned an error. We want to give the
- * new lock to the client instead of whatever lock it was about
- * to get. */
+ * either filled in 'lockh', been an intent replay, or returned
+ * an error. We want to allow replayed RPCs to not get a lock,
+ * since we would just drop it below anyways because lock replay
+ * is done separately by the client afterwards. For regular
+ * RPCs we want to give the new lock to the client instead of
+ * whatever lock it was about to get.
+ */
new_lock = ldlm_handle2lock(&lockh);
+ if (flags & LDLM_FL_INTENT_ONLY && !new_lock)
+ RETURN(ELDLM_LOCK_ABORTED);
+
LASSERT(new_lock != NULL);
/* If we've already given this lock to a client once, then we
RETURN(ELDLM_LOCK_REPLACED);
} else {
int size = sizeof(struct ldlm_reply);
- rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen,
- &req->rq_repmsg);
- if (rc) {
+ if (lustre_pack_msg(1, &size, NULL, &req->rq_replen,
+ &req->rq_repmsg)) {
LBUG();
RETURN(-ENOMEM);
}
}
- RETURN(rc);
+ RETURN(0);
}
int mds_attach(struct obd_device *dev, obd_count len, void *data)
}
-static int mdt_cleanup(struct obd_device *obddev, int force, int failover)
+static int mdt_cleanup(struct obd_device *obddev, int flags)
{
struct mds_obd *mds = &obddev->u.mds;
ENTRY;
/* use obd ops to offer management infrastructure */
static struct obd_ops mds_obd_ops = {
- o_owner: THIS_MODULE,
- o_attach: mds_attach,
- o_detach: mds_detach,
- o_connect: mds_connect,
- o_disconnect: mds_disconnect,
- o_setup: mds_setup,
- o_cleanup: mds_cleanup,
- o_iocontrol: mds_iocontrol,
- o_destroy_export: mds_destroy_export
+ o_owner: THIS_MODULE,
+ o_attach: mds_attach,
+ o_detach: mds_detach,
+ o_connect: mds_connect,
+ o_disconnect: mds_disconnect,
+ o_setup: mds_setup,
+ o_cleanup: mds_cleanup,
+ o_statfs: mds_obd_statfs,
+ o_iocontrol: mds_iocontrol
};
static struct obd_ops mdt_obd_ops = {
return 0;
}
-static void __exit mds_exit(void)
+static void /*__exit*/ mds_exit(void)
{
ldlm_unregister_intent();
class_unregister_type(LUSTRE_MDS_NAME);
#else
-static inline int lprocfs_mds_statfs(void *data, struct statfs *sfs)
-{
- struct obd_device* dev = (struct obd_device*) data;
- struct mds_obd *mds;
-
- LASSERT(dev != NULL);
- mds = &dev->u.mds;
- return vfs_statfs(mds->mds_sb, sfs);
-}
-
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize, lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, lprocfs_mds_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, lprocfs_mds_statfs);
-
-int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device *obd = (struct obd_device *)data;
-
- LASSERT(obd != NULL);
- LASSERT(obd->obd_fsops != NULL);
- LASSERT(obd->obd_fsops->fs_type != NULL);
- return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type);
-}
-
-int lprocfs_mds_rd_mntdev(char *page, char **start, off_t off, int count,
- int *eof, void *data)
+static int lprocfs_mds_rd_mntdev(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
struct obd_device* obd = (struct obd_device *)data;
LASSERT(obd != NULL);
LASSERT(obd->u.mds.mds_vfsmnt->mnt_devname);
*eof = 1;
- return snprintf(page, count, "%s\n",
- obd->u.mds.mds_vfsmnt->mnt_devname);
+
+ return snprintf(page, count, "%s\n",obd->u.mds.mds_vfsmnt->mnt_devname);
}
struct lprocfs_vars lprocfs_mds_obd_vars[] = {
- { "uuid", lprocfs_rd_uuid, 0, 0 },
- { "blocksize", rd_blksize, 0, 0 },
- { "kbytestotal",rd_kbytestotal, 0, 0 },
- { "kbytesfree", rd_kbytesfree, 0, 0 },
- { "fstype", rd_fstype, 0, 0 },
- { "filestotal", rd_filestotal, 0, 0 },
- { "filesfree", rd_filesfree, 0, 0 },
- { "filegroups", rd_filegroups, 0, 0 },
- { "mntdev", lprocfs_mds_rd_mntdev, 0, 0 },
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
+ { "blocksize", lprocfs_rd_blksize, 0, 0 },
+ { "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 },
+ { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 },
+ { "fstype", lprocfs_rd_fstype, 0, 0 },
+ { "filestotal", lprocfs_rd_filestotal, 0, 0 },
+ { "filesfree", lprocfs_rd_filesfree, 0, 0 },
+ //{ "filegroups", lprocfs_rd_filegroups, 0, 0 },
+ { "mntdev", lprocfs_mds_rd_mntdev, 0, 0 },
{ 0 }
};
struct lprocfs_vars lprocfs_mds_module_vars[] = {
- { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
{ 0 }
};
struct lprocfs_vars lprocfs_mdt_obd_vars[] = {
- { "uuid", lprocfs_rd_uuid, 0, 0 },
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
{ 0 }
};
struct lprocfs_vars lprocfs_mdt_module_vars[] = {
- { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
{ 0 }
};
#include <linux/obd_support.h>
#include <linux/lustre_lib.h>
#include <linux/lustre_fsfilt.h>
+#include <portals/list.h>
+
+#include "mds_internal.h"
/* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
#define MDS_MAX_CLIENTS (PAGE_SIZE * 8)
* we know its offset.
*/
int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
- struct mds_export_data *med, int cl_off)
+ struct mds_export_data *med, int cl_idx)
{
unsigned long *bitmap = mds->mds_client_bitmap;
- int new_client = (cl_off == -1);
+ int new_client = (cl_idx == -1);
LASSERT(bitmap != NULL);
if (!strcmp(med->med_mcd->mcd_uuid, "OBD_CLASS_UUID"))
RETURN(0);
- /* the bitmap operations can handle cl_off > sizeof(long) * 8, so
+ /* the bitmap operations can handle cl_idx > sizeof(long) * 8, so
* there's no need for extra complication here
*/
if (new_client) {
- cl_off = find_first_zero_bit(bitmap, MDS_MAX_CLIENTS);
+ cl_idx = find_first_zero_bit(bitmap, MDS_MAX_CLIENTS);
repeat:
- if (cl_off >= MDS_MAX_CLIENTS) {
+ if (cl_idx >= MDS_MAX_CLIENTS) {
CERROR("no room for clients - fix MDS_MAX_CLIENTS\n");
return -ENOMEM;
}
- if (test_and_set_bit(cl_off, bitmap)) {
+ if (test_and_set_bit(cl_idx, bitmap)) {
CERROR("MDS client %d: found bit is set in bitmap\n",
- cl_off);
- cl_off = find_next_zero_bit(bitmap, MDS_MAX_CLIENTS,
- cl_off);
+ cl_idx);
+ cl_idx = find_next_zero_bit(bitmap, MDS_MAX_CLIENTS,
+ cl_idx);
goto repeat;
}
} else {
- if (test_and_set_bit(cl_off, bitmap)) {
+ if (test_and_set_bit(cl_idx, bitmap)) {
CERROR("MDS client %d: bit already set in bitmap!!\n",
- cl_off);
+ cl_idx);
LBUG();
}
}
- CDEBUG(D_INFO, "client at offset %d with UUID '%s' added\n",
- cl_off, med->med_mcd->mcd_uuid);
+ CDEBUG(D_INFO, "client at index %d with UUID '%s' added\n",
+ cl_idx, med->med_mcd->mcd_uuid);
- med->med_off = cl_off;
+ med->med_idx = cl_idx;
+ med->med_off = MDS_LR_CLIENT_START + (cl_idx * MDS_LR_CLIENT_SIZE);
if (new_client) {
struct obd_run_ctxt saved;
- loff_t off = MDS_LR_CLIENT + (cl_off * MDS_LR_SIZE);
+ loff_t off = med->med_off;
ssize_t written;
void *handle;
* could use any of them, or maybe an FSFILT_OP_NONE is best?
*/
handle = fsfilt_start(obd,mds->mds_rcvd_filp->f_dentry->d_inode,
- FSFILT_OP_SETATTR);
+ FSFILT_OP_SETATTR, NULL);
if (IS_ERR(handle)) {
written = PTR_ERR(handle);
CERROR("unable to start transaction: rc %d\n",
(int)written);
} else {
- written = lustre_fwrite(mds->mds_rcvd_filp,med->med_mcd,
- sizeof(*med->med_mcd), &off);
+ written = fsfilt_write_record(obd, mds->mds_rcvd_filp,
+ (char *)med->med_mcd,
+ sizeof(*med->med_mcd),
+ &off);
fsfilt_commit(obd,mds->mds_rcvd_filp->f_dentry->d_inode,
handle, 0);
}
RETURN(written);
RETURN(-EIO);
}
- CDEBUG(D_INFO, "wrote client mcd at off %u (len %u)\n",
- MDS_LR_CLIENT + (cl_off * MDS_LR_SIZE),
+ CDEBUG(D_INFO, "wrote client mcd at idx %u off %llu (len %u)\n",
+ med->med_idx, med->med_off,
(unsigned int)sizeof(*med->med_mcd));
}
return 0;
{
struct mds_export_data *med = &exp->exp_mds_data;
struct mds_obd *mds = &exp->exp_obd->u.mds;
+ struct obd_device *obd = exp->exp_obd;
struct mds_client_data zero_mcd;
struct obd_run_ctxt saved;
int written;
unsigned long *bitmap = mds->mds_client_bitmap;
- loff_t off;
LASSERT(bitmap);
if (!med->med_mcd)
if (!strcmp(med->med_mcd->mcd_uuid, "OBD_CLASS_UUID"))
GOTO(free_and_out, 0);
- off = MDS_LR_CLIENT + (med->med_off * MDS_LR_SIZE);
-
- CDEBUG(D_INFO, "freeing client at offset %u (%lld)with UUID '%s'\n",
- med->med_off, off, med->med_mcd->mcd_uuid);
+ CDEBUG(D_INFO, "freeing client at index %u (%lld)with UUID '%s'\n",
+ med->med_idx, med->med_off, med->med_mcd->mcd_uuid);
- if (!test_and_clear_bit(med->med_off, bitmap)) {
+ if (!test_and_clear_bit(med->med_idx, bitmap)) {
CERROR("MDS client %u: bit already clear in bitmap!!\n",
- med->med_off);
+ med->med_idx);
LBUG();
}
memset(&zero_mcd, 0, sizeof zero_mcd);
push_ctxt(&saved, &mds->mds_ctxt, NULL);
- written = lustre_fwrite(mds->mds_rcvd_filp, (const char *)&zero_mcd,
- sizeof(zero_mcd), &off);
+ written = fsfilt_write_record(obd, mds->mds_rcvd_filp,
+ (char *)&zero_mcd, sizeof(zero_mcd),
+ &med->med_off);
pop_ctxt(&saved, &mds->mds_ctxt, NULL);
if (written != sizeof(zero_mcd)) {
- CERROR("error zeroing out client %s off %d in %s: %d\n",
- med->med_mcd->mcd_uuid, med->med_off, LAST_RCVD,
+ CERROR("error zeroing out client %s index %d in %s: %d\n",
+ med->med_mcd->mcd_uuid, med->med_idx, LAST_RCVD,
written);
} else {
CDEBUG(D_INFO, "zeroed out disconnecting client %s at off %d\n",
- med->med_mcd->mcd_uuid, med->med_off);
+ med->med_mcd->mcd_uuid, med->med_idx);
}
free_and_out:
return 0;
}
-static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f)
+static int mds_read_last_rcvd(struct obd_device *obd, struct file *file)
{
- struct mds_obd *mds = &obddev->u.mds;
+ struct mds_obd *mds = &obd->u.mds;
struct mds_server_data *msd;
struct mds_client_data *mcd = NULL;
loff_t off = 0;
- int cl_off;
- unsigned long last_rcvd_size = f->f_dentry->d_inode->i_size;
+ int cl_idx;
+ unsigned long last_rcvd_size = file->f_dentry->d_inode->i_size;
__u64 last_transno = 0;
- __u64 last_mount;
+ __u64 mount_count;
int rc = 0;
- LASSERT(sizeof(struct mds_client_data) == MDS_LR_SIZE);
- LASSERT(sizeof(struct mds_server_data) <= MDS_LR_CLIENT);
+ LASSERT(sizeof(struct mds_client_data) == MDS_LR_CLIENT_SIZE);
+ LASSERT(sizeof(struct mds_server_data) <= MDS_LR_SERVER_SIZE);
OBD_ALLOC(msd, sizeof(*msd));
if (!msd)
RETURN(-ENOMEM);
}
- rc = lustre_fread(f, (char *)msd, sizeof(*msd), &off);
-
mds->mds_server_data = msd;
- if (rc == 0) {
- CERROR("%s: empty MDS %s, new MDS?\n", obddev->obd_name,
- LAST_RCVD);
+
+ if (last_rcvd_size == 0) {
+ CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD);
+ memcpy(msd->msd_uuid, obd->obd_uuid.uuid,sizeof(msd->msd_uuid));
+ msd->msd_server_size = cpu_to_le32(MDS_LR_SERVER_SIZE);
+ msd->msd_client_start = cpu_to_le32(MDS_LR_CLIENT_START);
+ msd->msd_client_size = cpu_to_le16(MDS_LR_CLIENT_SIZE);
+
RETURN(0);
}
+ rc = fsfilt_read_record(obd, file, (char *)msd, sizeof(*msd), &off);
+
if (rc != sizeof(*msd)) {
- CERROR("error reading MDS %s: rc = %d\n", LAST_RCVD, rc);
+ CERROR("error reading MDS %s: rc = %d\n", LAST_RCVD,rc);
if (rc > 0)
rc = -EIO;
GOTO(err_msd, rc);
}
+ if (!msd->msd_server_size)
+ msd->msd_server_size = cpu_to_le32(MDS_LR_SERVER_SIZE);
+ if (!msd->msd_client_start)
+ msd->msd_client_start = cpu_to_le32(MDS_LR_CLIENT_START);
+ if (!msd->msd_client_size)
+ msd->msd_client_size = cpu_to_le16(MDS_LR_CLIENT_SIZE);
+
+ if (msd->msd_feature_incompat) {
+ CERROR("unsupported incompat feature %x\n",
+ le32_to_cpu(msd->msd_feature_incompat));
+ GOTO(err_msd, rc = -EINVAL);
+ }
+ if (msd->msd_feature_rocompat) {
+ CERROR("unsupported read-only feature %x\n",
+ le32_to_cpu(msd->msd_feature_rocompat));
+ /* Do something like remount filesystem read-only */
+ GOTO(err_msd, rc = -EINVAL);
+ }
- CDEBUG(D_INODE, "last_rcvd has size %lu (msd + %lu clients)\n",
- last_rcvd_size, (last_rcvd_size - MDS_LR_CLIENT)/MDS_LR_SIZE);
-
- /*
- * When we do a clean MDS shutdown, we save the last_transno into
- * the header.
- */
last_transno = le64_to_cpu(msd->msd_last_transno);
mds->mds_last_transno = last_transno;
- CDEBUG(D_INODE, "got "LPU64" for server last_rcvd value\n",
- last_transno);
-
- last_mount = le64_to_cpu(msd->msd_mount_count);
- mds->mds_mount_count = last_mount;
- CDEBUG(D_INODE, "got "LPU64" for server last_mount value\n",last_mount);
- /* off is adjusted by lustre_fread, so we don't adjust it in the loop */
- for (off = MDS_LR_CLIENT, cl_off = 0; off < last_rcvd_size; cl_off++) {
+ mount_count = le64_to_cpu(msd->msd_mount_count);
+ mds->mds_mount_count = mount_count;
+
+ CDEBUG(D_INODE, "%s: server last_transno: "LPU64"\n",
+ obd->obd_name, last_transno);
+ CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n",
+ obd->obd_name, mount_count);
+ CDEBUG(D_INODE, "%s: server data size: %u\n",
+ obd->obd_name, le32_to_cpu(msd->msd_server_size));
+ CDEBUG(D_INODE, "%s: per-client data start: %u\n",
+ obd->obd_name, le32_to_cpu(msd->msd_client_start));
+ CDEBUG(D_INODE, "%s: per-client data size: %u\n",
+ obd->obd_name, le32_to_cpu(msd->msd_client_size));
+ CDEBUG(D_INODE, "%s: last_rcvd size: %lu\n",
+ obd->obd_name, last_rcvd_size);
+ CDEBUG(D_INODE, "%s: last_rcvd clients: %lu\n", obd->obd_name,
+ (last_rcvd_size - MDS_LR_CLIENT_START) / MDS_LR_CLIENT_SIZE);
+
+ /* When we do a clean FILTER shutdown, we save the last_transno into
+ * the header. If we find clients with higher last_transno values
+ * then those clients may need recovery done. */
+ for (cl_idx = 0; off < last_rcvd_size; cl_idx++) {
+ __u64 last_transno;
int mount_age;
if (!mcd) {
GOTO(err_msd, rc = -ENOMEM);
}
- rc = lustre_fread(f, (char *)mcd, sizeof(*mcd), &off);
+ /* Don't assume off is incremented properly, in case
+ * sizeof(fsd) isn't the same as fsd->fsd_client_size.
+ */
+ off = le32_to_cpu(msd->msd_client_start) +
+ cl_idx * le16_to_cpu(msd->msd_client_size);
+ rc = fsfilt_read_record(obd, file, (char *)mcd,
+ sizeof(*mcd), &off);
if (rc != sizeof(*mcd)) {
CERROR("error reading MDS %s offset %d: rc = %d\n",
- LAST_RCVD, cl_off, rc);
+ LAST_RCVD, cl_idx, rc);
if (rc > 0) /* XXX fatal error or just abort reading? */
rc = -EIO;
break;
if (mcd->mcd_uuid[0] == '\0') {
CDEBUG(D_INFO, "skipping zeroed client at offset %d\n",
- cl_off);
+ cl_idx);
continue;
}
/* These exports are cleaned up by mds_disconnect(), so they
* need to be set up like real exports as mds_connect() does.
*/
- mount_age = last_mount - le64_to_cpu(mcd->mcd_mount_count);
+ mount_age = mount_count - le64_to_cpu(mcd->mcd_mount_count);
if (mount_age < MDS_MOUNT_RECOV) {
- struct obd_export *exp = class_new_export(obddev);
+ struct obd_export *exp = class_new_export(obd);
struct mds_export_data *med;
+ CERROR("RCVRNG CLIENT uuid: %s off: %d lr: "LPU64
+ "srv lr: "LPU64" mnt: "LPU64" last mount: "LPU64
+ "\n", mcd->mcd_uuid, cl_idx,
+ last_transno, le64_to_cpu(msd->msd_last_transno),
+ le64_to_cpu(mcd->mcd_mount_count), mount_count);
if (!exp) {
rc = -ENOMEM;
sizeof exp->exp_client_uuid.uuid);
med = &exp->exp_mds_data;
med->med_mcd = mcd;
- mds_client_add(obddev, mds, med, cl_off);
+ mds_client_add(obd, mds, med, cl_idx);
/* create helper if export init gets more complex */
INIT_LIST_HEAD(&med->med_open_head);
spin_lock_init(&med->med_open_lock);
mcd = NULL;
- obddev->obd_recoverable_clients++;
+ obd->obd_recoverable_clients++;
class_export_put(exp);
} else {
CDEBUG(D_INFO, "discarded client %d, UUID '%s', count "
- LPU64"\n", cl_off, mcd->mcd_uuid,
+ LPU64"\n", cl_idx, mcd->mcd_uuid,
le64_to_cpu(mcd->mcd_mount_count));
}
- CDEBUG(D_OTHER, "client at offset %d has last_transno = %Lu\n",
- cl_off, (unsigned long long)last_transno);
+ CDEBUG(D_OTHER, "client at offset %d has last_transno = "
+ LPU64"\n", cl_idx, last_transno);
if (last_transno > mds->mds_last_transno)
mds->mds_last_transno = last_transno;
}
- obddev->obd_last_committed = mds->mds_last_transno;
- if (obddev->obd_recoverable_clients) {
+ obd->obd_last_committed = mds->mds_last_transno;
+ if (obd->obd_recoverable_clients) {
CERROR("RECOVERY: %d recoverable clients, last_transno "
LPU64"\n",
- obddev->obd_recoverable_clients, mds->mds_last_transno);
- obddev->obd_next_recovery_transno = obddev->obd_last_committed
+ obd->obd_recoverable_clients, mds->mds_last_transno);
+ obd->obd_next_recovery_transno = obd->obd_last_committed
+ 1;
- obddev->obd_recovering = 1;
+ obd->obd_recovering = 1;
}
if (mcd)
return rc;
}
-static int mds_fs_prep(struct obd_device *obddev)
+static int mds_fs_prep(struct obd_device *obd)
{
- struct mds_obd *mds = &obddev->u.mds;
+ struct mds_obd *mds = &obd->u.mds;
struct obd_run_ctxt saved;
struct dentry *dentry;
- struct file *f;
+ struct file *file;
int rc;
push_ctxt(&saved, &mds->mds_ctxt, NULL);
}
mds->mds_fid_de = dentry;
- f = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0644);
- if (IS_ERR(f)) {
- rc = PTR_ERR(f);
+ dentry = simple_mkdir(current->fs->pwd, "PENDING", 0777);
+ if (IS_ERR(dentry)) {
+ rc = PTR_ERR(dentry);
+ CERROR("cannot create PENDING directory: rc = %d\n", rc);
+ GOTO(err_fid, rc);
+ }
+ mds->mds_pending_dir = dentry;
+
+ dentry = simple_mkdir(current->fs->pwd, "LOGS", 0700);
+ if (IS_ERR(dentry)) {
+ rc = PTR_ERR(dentry);
+ CERROR("cannot create LOGS directory: rc = %d\n", rc);
+ GOTO(err_pending, rc);
+ }
+ mds->mds_logs_dir = dentry;
+
+ file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0644);
+ if (IS_ERR(file)) {
+ rc = PTR_ERR(file);
CERROR("cannot open/create %s file: rc = %d\n", LAST_RCVD, rc);
- GOTO(err_pop, rc = PTR_ERR(f));
+
+ GOTO(err_logs, rc = PTR_ERR(file));
}
- if (!S_ISREG(f->f_dentry->d_inode->i_mode)) {
+ if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
CERROR("%s is not a regular file!: mode = %o\n", LAST_RCVD,
- f->f_dentry->d_inode->i_mode);
+ file->f_dentry->d_inode->i_mode);
GOTO(err_filp, rc = -ENOENT);
}
- rc = fsfilt_journal_data(obddev, f);
+ rc = fsfilt_journal_data(obd, file);
if (rc) {
CERROR("cannot journal data on %s: rc = %d\n", LAST_RCVD, rc);
GOTO(err_filp, rc);
}
- rc = mds_read_last_rcvd(obddev, f);
+ rc = mds_read_last_rcvd(obd, file);
if (rc) {
CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
GOTO(err_client, rc);
}
- mds->mds_rcvd_filp = f;
+ mds->mds_rcvd_filp = file;
+#ifdef I_SKIP_PDFLUSH
+ /*
+ * we need this to protect from deadlock
+ * pdflush vs. lustre_fwrite()
+ */
+ file->f_dentry->d_inode->i_flags |= I_SKIP_PDFLUSH;
+#endif
err_pop:
pop_ctxt(&saved, &mds->mds_ctxt, NULL);
return rc;
err_client:
- class_disconnect_exports(obddev, 0);
+ class_disconnect_exports(obd, 0);
err_filp:
- if (filp_close(f, 0))
+ if (filp_close(file, 0))
CERROR("can't close %s after error\n", LAST_RCVD);
+err_logs:
+ dput(mds->mds_logs_dir);
+err_pending:
+ dput(mds->mds_pending_dir);
+err_fid:
+ dput(mds->mds_fid_de);
goto err_pop;
}
-int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt)
+int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt)
{
- struct mds_obd *mds = &obddev->u.mds;
+ struct mds_obd *mds = &obd->u.mds;
ENTRY;
mds->mds_vfsmnt = mnt;
mds->mds_ctxt.pwdmnt = mnt;
mds->mds_ctxt.pwd = mnt->mnt_root;
mds->mds_ctxt.fs = get_ds();
- RETURN(mds_fs_prep(obddev));
+ RETURN(mds_fs_prep(obd));
}
-int mds_fs_cleanup(struct obd_device *obddev, int failover)
+int mds_fs_cleanup(struct obd_device *obd, int flags)
{
- struct mds_obd *mds = &obddev->u.mds;
+ struct mds_obd *mds = &obd->u.mds;
struct obd_run_ctxt saved;
int rc = 0;
- if (failover)
+ if (flags & OBD_OPT_FAILOVER)
CERROR("%s: shutting down for failover; client state will"
- " be preserved.\n", obddev->obd_name);
+ " be preserved.\n", obd->obd_name);
- class_disconnect_exports(obddev, failover); /* this cleans up client
- info too */
+ class_disconnect_exports(obd, flags); /* cleans up client info too */
mds_server_free_data(mds);
push_ctxt(&saved, &mds->mds_ctxt, NULL);
rc = filp_close(mds->mds_rcvd_filp, 0);
mds->mds_rcvd_filp = NULL;
if (rc)
- CERROR("last_rcvd file won't close, rc=%d\n", rc);
+ CERROR("%s file won't close, rc=%d\n", LAST_RCVD, rc);
+ }
+ if (mds->mds_logs_dir) {
+ l_dput(mds->mds_logs_dir);
+ mds->mds_logs_dir = NULL;
+ }
+ if (mds->mds_pending_dir) {
+ l_dput(mds->mds_pending_dir);
+ mds->mds_pending_dir = NULL;
}
pop_ctxt(&saved, &mds->mds_ctxt, NULL);
shrink_dcache_parent(mds->mds_fid_de);
return rc;
}
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+int mds_log_close(struct llog_handle *cathandle, struct llog_handle *loghandle)
+{
+ struct llog_object_hdr *llh = loghandle->lgh_hdr;
+ struct mds_obd *mds = &cathandle->lgh_obd->u.mds;
+ struct dentry *dchild = NULL;
+ int rc;
+ ENTRY;
+
+ /* If we are going to delete this log, grab a ref before we close
+ * it so we don't have to immediately do another lookup.
+ */
+ if (llh->llh_hdr.lth_type != LLOG_CATALOG_MAGIC && llh->llh_count == 0){
+ CDEBUG(D_INODE, "deleting log file "LPX64":%x\n",
+ loghandle->lgh_cookie.lgc_lgl.lgl_oid,
+ loghandle->lgh_cookie.lgc_lgl.lgl_ogen);
+ down(&mds->mds_logs_dir->d_inode->i_sem);
+ dchild = dget(loghandle->lgh_file->f_dentry);
+ llog_delete_log(cathandle, loghandle);
+ } else {
+ CDEBUG(D_INODE, "closing log file "LPX64":%x\n",
+ loghandle->lgh_cookie.lgc_lgl.lgl_oid,
+ loghandle->lgh_cookie.lgc_lgl.lgl_ogen);
+ }
+
+ rc = filp_close(loghandle->lgh_file, 0);
+
+ llog_free_handle(loghandle); /* also removes loghandle from list */
+
+ if (dchild) {
+ int err = vfs_unlink(mds->mds_logs_dir->d_inode, dchild);
+ if (err) {
+ CERROR("error unlinking empty log %*s: rc %d\n",
+ dchild->d_name.len, dchild->d_name.name, err);
+ if (!rc)
+ rc = err;
+ }
+ l_dput(dchild);
+ up(&mds->mds_logs_dir->d_inode->i_sem);
+ }
+ RETURN(rc);
+}
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+struct llog_handle *mds_log_open(struct obd_device *obd,
+ struct llog_cookie *logcookie)
+{
+ struct ll_fid fid = { .id = logcookie->lgc_lgl.lgl_oid,
+ .generation = logcookie->lgc_lgl.lgl_ogen,
+ .f_type = S_IFREG };
+ struct llog_handle *loghandle;
+ struct dentry *dchild;
+ int rc;
+ ENTRY;
+
+ loghandle = llog_alloc_handle();
+ if (loghandle == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ down(&obd->u.mds.mds_logs_dir->d_inode->i_sem);
+ dchild = mds_fid2dentry(&obd->u.mds, &fid, NULL);
+ up(&obd->u.mds.mds_logs_dir->d_inode->i_sem);
+ if (IS_ERR(dchild)) {
+ rc = PTR_ERR(dchild);
+ CERROR("error looking up log file "LPX64":%x: rc %d\n",
+ fid.id, fid.generation, rc);
+ GOTO(out, rc);
+ }
+
+ if (dchild->d_inode == NULL) {
+ rc = -ENOENT;
+ CERROR("nonexistent log file "LPX64":%x: rc %d\n",
+ fid.id, fid.generation, rc);
+ GOTO(out_put, rc);
+ }
+
+ /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
+ mntget(obd->u.mds.mds_vfsmnt);
+ loghandle->lgh_file = dentry_open(dchild, obd->u.mds.mds_vfsmnt,
+ O_RDWR | O_LARGEFILE);
+ if (IS_ERR(loghandle->lgh_file)) {
+ rc = PTR_ERR(loghandle->lgh_file);
+ CERROR("error opening logfile "LPX64":%x: rc %d\n",
+ fid.id, fid.generation, rc);
+ GOTO(out, rc);
+ }
+ memcpy(&loghandle->lgh_cookie, logcookie, sizeof(*logcookie));
+ loghandle->lgh_log_create = mds_log_create;
+ loghandle->lgh_log_open = mds_log_open;
+ loghandle->lgh_log_close = mds_log_close;
+ loghandle->lgh_obd = obd;
+
+ RETURN(loghandle);
+
+out_put:
+ l_dput(dchild);
+out:
+ llog_free_handle(loghandle);
+ return ERR_PTR(rc);
+}
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+struct llog_handle *mds_log_create(struct obd_device *obd)
+{
+ char logbuf[24], *logname; /* logSSSSSSSSSS.count */
+ struct llog_handle *loghandle;
+ int rc, open_flags = O_RDWR | O_CREAT | O_LARGEFILE;
+ ENTRY;
+
+ loghandle = llog_alloc_handle();
+ if (!loghandle)
+ RETURN(ERR_PTR(-ENOMEM));
+
+retry:
+ if (!obd->u.mds.mds_catalog) {
+ logname = "LOGS/catalog";
+ } else {
+ sprintf(logbuf, "LOGS/log%lu.%u\n",
+ CURRENT_SECONDS, obd->u.mds.mds_catalog->lgh_index++);
+ open_flags |= O_EXCL;
+ logname = logbuf;
+ }
+ loghandle->lgh_file = filp_open(logname, open_flags, 0644);
+ if (IS_ERR(loghandle->lgh_file)) {
+ rc = PTR_ERR(loghandle->lgh_file);
+ if (rc == -EEXIST) {
+ CDEBUG(D_HA, "collision in logfile %s creation\n",
+ logname);
+ obd->u.mds.mds_catalog->lgh_index++;
+ goto retry;
+ }
+ CERROR("error opening/creating %s: rc %d\n", logname, rc);
+ GOTO(out_handle, rc);
+ }
+
+ loghandle->lgh_cookie.lgc_lgl.lgl_oid =
+ loghandle->lgh_file->f_dentry->d_inode->i_ino;
+ loghandle->lgh_cookie.lgc_lgl.lgl_ogen =
+ loghandle->lgh_file->f_dentry->d_inode->i_generation;
+ loghandle->lgh_log_create = mds_log_create;
+ loghandle->lgh_log_open = mds_log_open;
+ loghandle->lgh_log_close = mds_log_close;
+ loghandle->lgh_obd = obd;
+
+ RETURN(loghandle);
+
+out_handle:
+ llog_free_handle(loghandle);
+ return ERR_PTR(rc);
+}
+
+struct llog_handle *mds_get_catalog(struct obd_device *obd)
+{
+ struct mds_server_data *msd = obd->u.mds.mds_server_data;
+ struct obd_run_ctxt saved;
+ struct llog_handle *cathandle = NULL;
+ int rc = 0;
+ ENTRY;
+
+ push_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+
+ if (msd->msd_catalog_oid) {
+ struct llog_cookie catcookie;
+
+ catcookie.lgc_lgl.lgl_oid = le64_to_cpu(msd->msd_catalog_oid);
+ catcookie.lgc_lgl.lgl_ogen = le32_to_cpu(msd->msd_catalog_ogen);
+ cathandle = mds_log_open(obd, &catcookie);
+ if (IS_ERR(cathandle)) {
+ CERROR("error opening catalog "LPX64":%x: rc %d\n",
+ catcookie.lgc_lgl.lgl_oid,
+ catcookie.lgc_lgl.lgl_ogen,
+ (int)PTR_ERR(cathandle));
+ msd->msd_catalog_oid = 0;
+ msd->msd_catalog_ogen = 0;
+ }
+ /* ORPHANS FIXME: compare catalog UUID to msd_peeruuid */
+ }
+
+ if (!msd->msd_catalog_oid) {
+ struct llog_logid *lgl;
+
+ cathandle = mds_log_create(obd);
+ if (IS_ERR(cathandle)) {
+ CERROR("error creating new catalog: rc %d\n",
+ (int)PTR_ERR(cathandle));
+ GOTO(out, cathandle);
+ }
+ lgl = &cathandle->lgh_cookie.lgc_lgl;
+ msd->msd_catalog_oid = cpu_to_le64(lgl->lgl_oid);
+ msd->msd_catalog_ogen = cpu_to_le32(lgl->lgl_ogen);
+ rc = mds_update_server_data(obd);
+ if (rc) {
+ CERROR("error writing new catalog to disk: rc %d\n",rc);
+ GOTO(out_handle, rc);
+ }
+ }
+
+ rc = llog_init_catalog(cathandle, &obd->u.mds.mds_osc_uuid);
+
+out:
+ pop_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL);
+ RETURN(cathandle);
+
+out_handle:
+ mds_log_close(cathandle, cathandle);
+ cathandle = ERR_PTR(rc);
+ goto out;
+
+}
+
+void mds_put_catalog(struct llog_handle *cathandle)
+{
+ struct llog_handle *loghandle, *n;
+ int rc;
+ ENTRY;
+
+ list_for_each_entry_safe(loghandle, n, &cathandle->lgh_list, lgh_list)
+ mds_log_close(cathandle, loghandle);
+
+ rc = filp_close(cathandle->lgh_file, 0);
+ if (rc)
+ CERROR("error closing catalog: rc %d\n", rc);
+
+ llog_free_handle(cathandle);
+ EXIT;
+}
+#ifndef _MDS_INTERNAL_H
+#define _MDS_INTERNAL_H
+static inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req)
+{
+ return &req->rq_export->exp_obd->u.mds;
+}
+
+/* mds/mds_fs.c */
+struct llog_handle *mds_log_create(struct obd_device *obd);
+int mds_log_close(struct llog_handle *cathandle, struct llog_handle *loghandle);
+struct llog_handle *mds_log_open(struct obd_device *obd,
+ struct llog_cookie *logcookie);
+struct llog_handle *mds_get_catalog(struct obd_device *obd);
+void mds_put_catalog(struct llog_handle *cathandle);
+
+/* mds/handler.c */
struct mds_file_data *mds_mfd_new(void);
void mds_mfd_put(struct mds_file_data *mfd);
void mds_mfd_destroy(struct mds_file_data *mfd);
+
+/* mds/mds_reint.c */
+void mds_commit_cb(struct obd_device *, __u64 last_rcvd, void *data, int error);
+int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
+ struct ptlrpc_request *req, int rc, __u32 op_data);
+
+/* mds/mds_lib.c */
int mds_update_unpack(struct ptlrpc_request *, int offset,
struct mds_update_record *);
+/* mds/mds_lov.c */
+int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,
+ struct obd_uuid *uuidarray);
+
+/* mds/mds_open.c */
+int mds_open(struct mds_update_record *rec, int offset,
+ struct ptlrpc_request *req, struct lustre_handle *);
+int mds_pin(struct ptlrpc_request *req);
+
/* mds/mds_fs.c */
int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
struct mds_export_data *med, int cl_off);
void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode);
void mds_pack_inode2body(struct mds_body *body, struct inode *inode);
#endif
+
+#endif /* _MDS_INTERNAL_H */
fid->f_type = (S_IFMT & inode->i_mode);
}
+/* Note that we can copy all of the fields, just some will not be "valid" */
void mds_pack_inode2body(struct mds_body *b, struct inode *inode)
{
- b->valid = OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME |
- OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
- OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
- OBD_MD_FLNLINK | OBD_MD_FLGENER;
+ b->valid = OBD_MD_FLID | OBD_MD_FLCTIME | OBD_MD_FLUID | OBD_MD_FLGID |
+ OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLNLINK | OBD_MD_FLGENER;
- /* The MDS file size isn't authoritative for regular files, so don't
- * even pretend. */
- if (S_ISREG(inode->i_mode))
- b->valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
+ if (!S_ISREG(inode->i_mode))
+ b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLATIME |
+ OBD_MD_FLMTIME;
b->ino = inode->i_ino;
b->atime = LTIME_S(inode->i_atime);
b->gid = inode->i_gid;
b->flags = inode->i_flags;
b->rdev = b->rdev;
- b->nlink = inode->i_nlink;
+ /* Return the correct link count for orphan inodes */
+ b->nlink = mds_inode_is_orphan(inode) ? 0 : inode->i_nlink;
b->generation = inode->i_generation;
b->suppgid = -1;
}
+
/* unpacking */
static int mds_setattr_unpack(struct ptlrpc_request *req, int offset,
struct mds_update_record *r)
struct mds_rec_setattr *rec;
ENTRY;
- rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
- lustre_swab_mds_rec_setattr);
+ rec = lustre_swab_reqbuf(req, offset, sizeof(*rec),
+ lustre_swab_mds_rec_setattr);
if (rec == NULL)
RETURN (-EFAULT);
if (r->ur_eadata == NULL)
RETURN (-EFAULT);
r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 1];
- } else {
- r->ur_eadata = NULL;
- r->ur_eadatalen = 0;
+ }
+
+ if (req->rq_reqmsg->bufcount > offset + 2) {
+ r->ur_logcookies = lustre_msg_buf(req->rq_reqmsg, offset + 2,0);
+ if (r->ur_eadata == NULL)
+ RETURN (-EFAULT);
+
+ r->ur_cookielen = req->rq_reqmsg->buflens[offset + 2];
}
RETURN(0);
if (r->ur_tgt == NULL)
RETURN (-EFAULT);
r->ur_tgtlen = req->rq_reqmsg->buflens[offset + 2];
- } else {
- r->ur_tgt = NULL;
- r->ur_tgtlen = 0;
}
RETURN(0);
}
#include <linux/obd_class.h>
#include <linux/obd_lov.h>
#include <linux/lustre_lib.h>
+#include <linux/lustre_fsfilt.h>
+
+#include "mds_internal.h"
void le_lov_desc_to_cpu (struct lov_desc *ld)
{
mds->mds_has_lov_desc = 1;
/* XXX the MDS should not really know about this */
mds->mds_max_mdsize = lov_mds_md_size(desc->ld_tgt_count);
+ mds->mds_max_cookiesize = desc->ld_tgt_count*sizeof(struct llog_cookie);
out:
pop_ctxt(&saved, &mds->mds_ctxt, NULL);
return rc;
}
-int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,struct obd_uuid *uuidarray)
+int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,
+ struct obd_uuid *uuidarray)
{
struct obd_run_ctxt saved;
struct file *f;
RETURN(rc);
- case OBD_IOC_SET_READONLY:
+ case OBD_IOC_SET_READONLY: {
+ BDEVNAME_DECLARE_STORAGE(tmp);
CERROR("setting device %s read-only\n",
- ll_bdevname(obd->u.mds.mds_sb->s_dev));
-#ifdef CONFIG_DEV_RDONLY
+ ll_bdevname(obd->u.mds.mds_sb->s_dev, tmp));
dev_set_rdonly(obd->u.mds.mds_sb->s_dev, 2);
-#endif
RETURN(0);
+ }
case OBD_IOC_ABORT_RECOVERY:
CERROR("aborting recovery for device %s\n", obd->obd_name);
#include "mds_internal.h"
-extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req);
-int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
- struct ptlrpc_request *req, int rc, __u32 op_data);
-extern int enqueue_ordered_locks(int lock_mode, struct obd_device *obd,
- struct ldlm_res_id *p1_res_id,
- struct ldlm_res_id *p2_res_id,
- struct ldlm_res_id *c1_res_id,
- struct ldlm_res_id *c2_res_id,
- struct lustre_handle *p1_lockh,
- struct lustre_handle *p2_lockh,
- struct lustre_handle *c1_lockh,
- struct lustre_handle *c2_lockh);
-
struct mds_file_data *mds_dentry_open(struct dentry *dentry,
struct vfsmount *mnt,
int flags,
{
struct mds_export_data *med = &req->rq_export->exp_mds_data;
struct inode *inode;
- int mode;
struct mds_file_data *mfd;
- int error;
+ int mode, error;
mfd = mds_mfd_new();
- if (!mfd) {
+ if (mfd == NULL) {
CERROR("mds: out of memory\n");
GOTO(cleanup_dentry, error = -ENOMEM);
}
- mode = (flags+1) & O_ACCMODE;
+ mode = (flags + 1) & O_ACCMODE;
inode = dentry->d_inode;
if (mode & FMODE_WRITE) {
struct ptlrpc_request *req,
struct lustre_handle *child_lockh)
{
+ struct ptlrpc_request *oldreq = req->rq_export->exp_outstanding_reply;
struct mds_export_data *med = &req->rq_export->exp_mds_data;
struct mds_client_data *mcd = med->med_mcd;
struct mds_obd *mds = mds_req2mds(req);
struct dentry *parent, *child;
struct ldlm_reply *rep;
struct mds_body *body;
- int disp, rc;
+ int rc;
struct list_head *t;
int put_child = 1;
ENTRY;
/* copy rc, transno and disp; steal locks */
req->rq_transno = mcd->mcd_last_transno;
req->rq_status = mcd->mcd_last_result;
- disp = rep->lock_policy_res1 = mcd->mcd_last_data;
+ intent_set_disposition(rep, mcd->mcd_last_data);
- if (req->rq_export->exp_outstanding_reply)
+ if (oldreq)
mds_steal_ack_locks(req->rq_export, req);
- /* We never care about these. */
- disp &= ~(IT_OPEN_LOOKUP | IT_OPEN_POS | IT_OPEN_NEG);
- if (!disp) {
+ /* Only replay if create or open actually happened. */
+ if (!intent_disposition(rep, DISP_OPEN_CREATE | DISP_OPEN_OPEN) ) {
EXIT;
return; /* error looking up parent or child */
}
GOTO(out_dput, 0); /* child not present to open */
}
- /* At this point, we know we have a child, which means that we'll send
- * it back _unless_ it was open failed, _and_ we didn't create the file.
- * I love you guys. No, really.
+ /* At this point, we know we have a child. We'll send
+ * it back _unless_ it not created and open failed.
*/
- if (((disp & (IT_OPEN_OPEN | IT_OPEN_CREATE)) == IT_OPEN_OPEN) &&
+ if (intent_disposition(rep, DISP_OPEN_OPEN) &&
+ !intent_disposition(rep, DISP_OPEN_CREATE) &&
req->rq_status) {
GOTO(out_dput, 0);
}
if (S_ISREG(child->d_inode->i_mode)) {
rc = mds_pack_md(obd, req->rq_repmsg, 2, body,
child->d_inode);
+
if (rc)
LASSERT(rc == req->rq_status);
+
+ /* If we have LOV EA data, the OST holds size, mtime */
+ if (!(body->valid & OBD_MD_FLEASIZE))
+ body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+ OBD_MD_FLATIME | OBD_MD_FLMTIME);
} else {
/* XXX need to check this case */
}
/* If we didn't get as far as trying to open, then some locking thing
* probably went wrong, and we'll just bail here.
*/
- if ((disp & IT_OPEN_OPEN) == 0)
+ if (!intent_disposition(rep, DISP_OPEN_OPEN))
GOTO(out_dput, 0);
/* If we failed, then we must have failed opening, so don't look for
mfd = NULL;
list_for_each(t, &med->med_open_head) {
mfd = list_entry(t, struct mds_file_data, mfd_list);
- if (mfd->mfd_xid == req->rq_xid)
+ if (mfd->mfd_xid == req->rq_xid)
break;
mfd = NULL;
}
- if (req->rq_export->exp_outstanding_reply) {
+ if (oldreq) {
/* if we're not recovering, it had better be found */
LASSERT(mfd);
} else if (mfd == NULL) {
EXIT;
}
+int mds_pin(struct ptlrpc_request *req)
+{
+ struct mds_obd *mds = mds_req2mds(req);
+ struct inode *pending_dir = mds->mds_pending_dir->d_inode;
+ struct mds_file_data *mfd = NULL;
+ struct mds_body *body;
+ struct dentry *dchild;
+ struct obd_run_ctxt saved;
+ char fidname[LL_FID_NAMELEN];
+ int fidlen = 0, rc, cleanup_phase = 0, size = sizeof(*body);
+ ENTRY;
+
+ body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
+
+ down(&pending_dir->i_sem);
+ fidlen = ll_fid2str(fidname, body->fid1.id, body->fid1.generation);
+ dchild = lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
+ if (IS_ERR(dchild)) {
+ up(&pending_dir->i_sem);
+ rc = PTR_ERR(dchild);
+ CERROR("error looking up %s in PENDING: rc = %d\n",
+ fidname, rc);
+ RETURN(rc);
+ }
+
+ cleanup_phase = 2;
+
+ if (dchild->d_inode) {
+ up(&pending_dir->i_sem);
+ mds_inode_set_orphan(dchild->d_inode);
+ mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+ mds_pack_inode2body(body, dchild->d_inode);
+ GOTO(openit, rc = 0);
+ }
+ dput(dchild);
+ up(&pending_dir->i_sem);
+
+ /* We didn't find it in PENDING so it isn't an orphan. See
+ * if it's a regular inode. */
+ dchild = mds_fid2dentry(mds, &body->fid1, NULL);
+ if (!IS_ERR(dchild)) {
+ mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+ mds_pack_inode2body(body, dchild->d_inode);
+ GOTO(openit, rc = 0);
+ }
+
+ /* We didn't find this inode on disk, but we're trying to pin it.
+ * This should never happen. */
+ CERROR("ENOENT during mds_pin for fid "LPU64"/%u\n", body->fid1.id,
+ body->fid1.generation);
+ RETURN(-ENOENT);
+
+ openit:
+ /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
+ mfd = mds_dentry_open(dchild, mds->mds_vfsmnt, body->flags, req);
+ if (IS_ERR(mfd)) {
+ dchild = NULL; /* prevent a double dput in cleanup phase 2 */
+ GOTO(cleanup, rc = PTR_ERR(mfd));
+ }
+
+ rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
+ if (rc) {
+ CERROR("out of memoryK\n");
+ GOTO(cleanup, rc);
+ }
+ body = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*body));
+
+ cleanup_phase = 4; /* mfd allocated */
+ body->handle.cookie = mfd->mfd_handle.h_cookie;
+ CDEBUG(D_INODE, "mfd %p, cookie "LPX64"\n", mfd,
+ mfd->mfd_handle.h_cookie);
+ GOTO(cleanup, rc = 0);
+
+ cleanup:
+ push_ctxt(&saved, &mds->mds_ctxt, NULL);
+ rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, NULL,
+ req, rc, 0);
+ pop_ctxt(&saved, &mds->mds_ctxt, NULL);
+ /* XXX what do we do here if mds_finish_transno itself failed? */
+ switch (cleanup_phase) {
+ case 4:
+ if (rc)
+ mds_mfd_destroy(mfd);
+ case 2:
+ if (rc || S_ISLNK(dchild->d_inode->i_mode))
+ l_dput(dchild);
+ }
+ return rc;
+}
+
int mds_open(struct mds_update_record *rec, int offset,
struct ptlrpc_request *req, struct lustre_handle *child_lockh)
{
+ /* XXX ALLOCATE _something_ - 464 bytes on stack here */
static const char acc_table [] = {[O_RDONLY] MAY_READ,
[O_WRONLY] MAY_WRITE,
[O_RDWR] MAY_READ | MAY_WRITE};
struct mds_obd *mds = mds_req2mds(req);
struct obd_device *obd = req->rq_export->exp_obd;
- struct ldlm_reply *rep;
- struct mds_body *body;
- struct dentry *dchild = NULL, *parent;
+ struct ldlm_reply *rep = NULL;
+ struct mds_body *body = NULL;
+ struct dentry *dchild = NULL, *parent = NULL;
struct mds_export_data *med;
struct mds_file_data *mfd = NULL;
struct ldlm_res_id child_res_id = { .name = {0} };
struct lustre_handle parent_lockh;
int rc = 0, parent_mode, child_mode = LCK_PR, lock_flags, created = 0;
- int cleanup_phase = 0;
+ int cleanup_phase = 0, acc_mode;
void *handle = NULL;
- int acc_mode;
ENTRY;
- LASSERT(offset == 2); /* only called via intent */
- rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
- body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body));
+ if (offset == 2) { /* intent */
+ rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
+ body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body));
+ } else if (offset == 0) { /* non-intent reint */
+ body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
+ } else {
+ body = NULL;
+ LBUG();
+ }
MDS_CHECK_RESENT(req, reconstruct_open(rec, offset, req, child_lockh));
+ /* Step 0: If we are passed a fid, then we assume the client already
+ * opened this file and is only replaying the RPC, so we open the
+ * inode by fid (at some large expense in security).
+ */
+ if (rec->ur_fid2->id) {
+ struct inode *pending_dir = mds->mds_pending_dir->d_inode;
+ char fidname[LL_FID_NAMELEN];
+ int fidlen = 0;
+
+ down(&pending_dir->i_sem);
+ fidlen = ll_fid2str(fidname, rec->ur_fid2->id,
+ rec->ur_fid2->generation);
+ dchild = lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
+ if (IS_ERR(dchild)) {
+ up(&pending_dir->i_sem);
+ rc = PTR_ERR(dchild);
+ CERROR("error looking up %s in PENDING: rc = %d\n",
+ fidname, rc);
+ RETURN(rc);
+ }
+
+ if (dchild->d_inode) {
+ up(&pending_dir->i_sem);
+ mds_inode_set_orphan(dchild->d_inode);
+ mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+ mds_pack_inode2body(body, dchild->d_inode);
+ cleanup_phase = 2;
+ GOTO(openit, rc = 0);
+ }
+ dput(dchild);
+ up(&pending_dir->i_sem);
+
+ /* We didn't find it in PENDING so it isn't an orphan. See
+ * if it was a regular inode that was previously created.
+ */
+ dchild = mds_fid2dentry(mds, rec->ur_fid2, NULL);
+ if (!IS_ERR(dchild)) {
+ mds_pack_inode2fid(&body->fid1, dchild->d_inode);
+ mds_pack_inode2body(body, dchild->d_inode);
+ cleanup_phase = 2;
+ GOTO(openit, rc = 0);
+ }
+
+ /* We didn't find the correct inode on disk either, so we
+ * need to re-create it via a regular replay. Do that below.
+ */
+ LASSERT(rec->ur_flags & O_CREAT);
+ }
+ LASSERT(offset == 2); /* If we got here, we must be called via intent */
+
med = &req->rq_export->exp_mds_data;
- rep->lock_policy_res1 |= IT_OPEN_LOOKUP;
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_PACK)) {
CERROR("test case OBD_FAIL_MDS_OPEN_PACK\n");
req->rq_status = -ENOMEM;
if ((rec->ur_flags & O_ACCMODE) >= sizeof (acc_table))
RETURN(-EINVAL);
- acc_mode = acc_table [rec->ur_flags & O_ACCMODE];
+ acc_mode = acc_table[rec->ur_flags & O_ACCMODE];
if ((rec->ur_flags & O_TRUNC) != 0)
acc_mode |= MAY_WRITE;
/* Step 1: Find and lock the parent */
+ intent_set_disposition(rep, DISP_LOOKUP_EXECD);
parent_mode = (rec->ur_flags & O_CREAT) ? LCK_PW : LCK_PR;
parent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, parent_mode,
&parent_lockh);
cleanup_phase = 2; /* child dentry */
if (dchild->d_inode)
- rep->lock_policy_res1 |= IT_OPEN_POS;
+ intent_set_disposition(rep, DISP_LOOKUP_POS);
else
- rep->lock_policy_res1 |= IT_OPEN_NEG;
+ intent_set_disposition(rep, DISP_LOOKUP_NEG);
/* Step 3: If the child was negative, and we're supposed to,
* create it. */
if (!dchild->d_inode) {
+ unsigned long ino = rec->ur_fid2->id;
+
if (!(rec->ur_flags & O_CREAT)) {
/* It's negative and we weren't supposed to create it */
GOTO(cleanup, rc = -ENOENT);
}
- rep->lock_policy_res1 |= IT_OPEN_CREATE;
- handle = fsfilt_start(obd, parent->d_inode, FSFILT_OP_CREATE);
+ intent_set_disposition(rep, DISP_OPEN_CREATE);
+ handle = fsfilt_start(obd, parent->d_inode, FSFILT_OP_CREATE,
+ NULL);
if (IS_ERR(handle)) {
rc = PTR_ERR(handle);
handle = NULL;
GOTO(cleanup, rc);
}
+ if (ino)
+ dchild->d_fsdata = (void *)(unsigned long)ino;
+
rc = vfs_create(parent->d_inode, dchild, rec->ur_mode);
- if (rc)
+ if (dchild->d_fsdata == (void *)(unsigned long)ino)
+ dchild->d_fsdata = NULL;
+
+ if (rc) {
+ CDEBUG(D_INODE, "error during create: %d\n", rc);
GOTO(cleanup, rc);
- created = 1;
+ } else {
+ struct iattr iattr;
+ struct inode *inode = dchild->d_inode;
+
+ if (ino) {
+ LASSERT(ino == inode->i_ino);
+ /* Written as part of setattr */
+ inode->i_generation = rec->ur_fid2->generation;
+ CDEBUG(D_HA, "recreated ino %lu with gen %x\n",
+ inode->i_ino, inode->i_generation);
+ }
+
+ created = 1;
+ LTIME_S(iattr.ia_atime) = rec->ur_time;
+ LTIME_S(iattr.ia_ctime) = rec->ur_time;
+ LTIME_S(iattr.ia_mtime) = rec->ur_time;
+
+ iattr.ia_uid = rec->ur_uid;
+ if (parent->d_inode->i_mode & S_ISGID) {
+ iattr.ia_gid = parent->d_inode->i_gid;
+ } else
+ iattr.ia_gid = rec->ur_gid;
+
+ iattr.ia_valid = ATTR_UID | ATTR_GID | ATTR_ATIME |
+ ATTR_MTIME | ATTR_CTIME;
+
+ rc = fsfilt_setattr(obd, dchild, handle, &iattr, 0);
+ if (rc) {
+ CERROR("error on setattr: rc = %d\n", rc);
+ /* XXX should we abort here in case of error? */
+ }
+ }
+
child_mode = LCK_PW;
acc_mode = 0; /* Don't check for permissions */
}
+ LASSERT(!mds_inode_is_orphan(dchild->d_inode));
+
/* Step 4: It's positive, so lock the child */
child_res_id.name[0] = dchild->d_inode->i_ino;
child_res_id.name[1] = dchild->d_inode->i_generation;
reacquire:
lock_flags = 0;
+ /* For the open(O_CREAT) case, this would technically be a lock
+ * inversion (getting a VFS lock after starting a transaction),
+ * but in that case we cannot possibly block on this lock because
+ * we just created the child and also hold a write lock on the
+ * parent, so nobody could be holding the lock yet.
+ */
rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
child_res_id, LDLM_PLAIN, NULL, 0, child_mode,
&lock_flags, ldlm_completion_ast,
/* An append-only file must be opened in append mode for
* writing */
- if (IS_APPEND(dchild->d_inode) &&
- (acc_mode & MAY_WRITE) != 0 &&
+ if (IS_APPEND(dchild->d_inode) && (acc_mode & MAY_WRITE) != 0 &&
((rec->ur_flags & O_APPEND) == 0 ||
(rec->ur_flags & O_TRUNC) != 0))
- GOTO (cleanup, rc = -EPERM);
+ GOTO(cleanup, rc = -EPERM);
rc = mds_pack_md(obd, req->rq_repmsg, 2, body, dchild->d_inode);
if (rc)
GOTO(cleanup, rc);
+
+ /* If we have LOV EA data, the OST holds size, mtime */
+ if (!(body->valid & OBD_MD_FLEASIZE))
+ body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+ OBD_MD_FLATIME | OBD_MD_FLMTIME);
}
if (!created && (rec->ur_flags & O_CREAT) &&
GOTO(cleanup, rc = -EEXIST); // returns a lock to the client
}
- /* If we're opening a file without an EA, the client needs a write
- * lock. */
- if (S_ISREG(dchild->d_inode->i_mode) &&
+ /* If we're opening a file without an EA for write, the client needs
+ * a write lock. */
+ if (S_ISREG(dchild->d_inode->i_mode) && (rec->ur_flags & O_ACCMODE) &&
child_mode != LCK_PW && !(body->valid & OBD_MD_FLEASIZE)) {
ldlm_lock_decref(child_lockh, child_mode);
child_mode = LCK_PW;
GOTO(cleanup, rc = -ENOTDIR);
/* Step 5: mds_open it */
- rep->lock_policy_res1 |= IT_OPEN_OPEN;
-
+ intent_set_disposition(rep, DISP_OPEN_OPEN);
+ openit:
/* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */
mfd = mds_dentry_open(dchild, mds->mds_vfsmnt,
rec->ur_flags & ~(O_DIRECT | O_TRUNC), req);
- if (!mfd) {
- CERROR("mds: out of memory\n");
- dchild = NULL; /* prevent a double dput in step 2 */
- GOTO(cleanup, rc = -ENOMEM);
+ if (IS_ERR(mfd)) {
+ dchild = NULL; /* prevent a double dput in cleanup phase 2 */
+ GOTO(cleanup, rc = PTR_ERR(mfd));
}
cleanup_phase = 4; /* mfd allocated */
cleanup:
rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle,
req, rc, rep->lock_policy_res1);
+ /* XXX what do we do here if mds_finish_transno itself failed? */
switch (cleanup_phase) {
case 4:
if (rc && !S_ISLNK(dchild->d_inode->i_mode))
* ldlm_intent_policy: if we found the dentry, or we tried to
* open it (meaning that we created, if it wasn't found), then
* we return the lock to the caller and client. */
- if (!(rep->lock_policy_res1 & (IT_OPEN_OPEN | IT_OPEN_POS)))
+ if (intent_disposition(rep, DISP_LOOKUP_NEG) &&
+ !intent_disposition(rep, DISP_OPEN_OPEN))
ldlm_lock_decref(child_lockh, child_mode);
case 2:
if (rc || S_ISLNK(dchild->d_inode->i_mode))
l_dput(dchild);
case 1:
- l_dput(parent);
- if (rc) {
- ldlm_lock_decref(&parent_lockh, parent_mode);
- } else {
- memcpy(&req->rq_ack_locks[0].lock, &parent_lockh,
- sizeof(parent_lockh));
- req->rq_ack_locks[0].mode = parent_mode;
+ if (parent) {
+ l_dput(parent);
+ if (rc) {
+ ldlm_lock_decref(&parent_lockh, parent_mode);
+ } else {
+ memcpy(&req->rq_ack_locks[0].lock,&parent_lockh,
+ sizeof(parent_lockh));
+ req->rq_ack_locks[0].mode = parent_mode;
+ }
}
}
RETURN(rc);
#include <linux/lustre_mds.h>
#include <linux/lustre_dlm.h>
#include <linux/lustre_fsfilt.h>
+
#include "mds_internal.h"
-extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req);
+void mds_commit_cb(struct obd_device *obd, __u64 transno, void *data,
+ int error)
+{
+ obd_transno_commit_cb(obd, transno, error);
+}
+
+struct mds_logcancel_data {
+ struct lov_mds_md *mlcd_lmm;
+ int mlcd_size;
+ int mlcd_cookielen;
+ int mlcd_eadatalen;
+ struct llog_cookie mlcd_cookies[0];
+};
+
+/* Establish a connection to the OSC when we first need it. We don't do
+ * this during MDS setup because that would introduce setup ordering issues. */
+static int mds_osc_connect(struct obd_device *obd, struct mds_obd *mds)
+{
+ int rc;
+ ENTRY;
+
+ if (IS_ERR(mds->mds_osc_obd))
+ RETURN(PTR_ERR(mds->mds_osc_obd));
+
+ if (mds->mds_osc_obd)
+ RETURN(0);
+
+ mds->mds_osc_obd = class_uuid2obd(&mds->mds_osc_uuid);
+ if (!mds->mds_osc_obd) {
+ CERROR("MDS cannot locate OSC/LOV %s - no logging!\n",
+ mds->mds_osc_uuid.uuid);
+ mds->mds_osc_obd = ERR_PTR(-ENOTCONN);
+ RETURN(-ENOTCONN);
+ }
+
+ rc = obd_connect(&mds->mds_osc_conn, mds->mds_osc_obd, &obd->obd_uuid);
+ if (rc) {
+ CERROR("MDS cannot locate OSC/LOV %s - no logging!\n",
+ mds->mds_osc_uuid.uuid);
+ mds->mds_osc_obd = ERR_PTR(rc);
+ RETURN(rc);
+ }
+
+ rc = obd_set_info(&mds->mds_osc_conn, strlen("mds_conn"), "mds_conn",
+ 0, NULL);
+ RETURN(rc);
+}
-static void mds_commit_cb(struct obd_device *obd, __u64 transno, int error)
+static void mds_cancel_cookies_cb(struct obd_device *obd, __u64 transno,
+ void *cb_data, int error)
{
+ struct mds_logcancel_data *mlcd = cb_data;
+ struct lov_stripe_md *lsm = NULL;
+ int rc;
+
obd_transno_commit_cb(obd, transno, error);
+
+ CDEBUG(D_HA, "cancelling %d cookies\n",
+ (int)(mlcd->mlcd_cookielen / sizeof(*mlcd->mlcd_cookies)));
+
+ rc = obd_unpackmd(&obd->u.mds.mds_osc_conn, &lsm, mlcd->mlcd_lmm,
+ mlcd->mlcd_eadatalen);
+ if (rc < 0) {
+ CERROR("bad LSM cancelling %d log cookies: rc %d\n",
+ (int)(mlcd->mlcd_cookielen/sizeof(*mlcd->mlcd_cookies)),
+ rc);
+ } else {
+ rc = obd_log_cancel(&obd->u.mds.mds_osc_conn, lsm,
+ mlcd->mlcd_cookielen /
+ sizeof(*mlcd->mlcd_cookies),
+ mlcd->mlcd_cookies, OBD_LLOG_FL_SENDNOW);
+ ///* XXX 0 normally, SENDNOW for debug */);
+ if (rc)
+ CERROR("error cancelling %d log cookies: rc %d\n",
+ (int)(mlcd->mlcd_cookielen /
+ sizeof(*mlcd->mlcd_cookies)), rc);
+ }
+
+ OBD_FREE(mlcd, mlcd->mlcd_size);
}
/* Assumes caller has already pushed us into the kernel context. */
-int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle,
- struct ptlrpc_request *req, int rc,
- __u32 op_data)
+int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
+ struct ptlrpc_request *req, int rc, __u32 op_data)
{
struct mds_export_data *med = &req->rq_export->exp_mds_data;
struct mds_client_data *mcd = med->med_mcd;
if (!handle) {
/* if we're starting our own xaction, use our own inode */
- i = mds->mds_rcvd_filp->f_dentry->d_inode;
- handle = fsfilt_start(obd, i, FSFILT_OP_SETATTR);
+ inode = mds->mds_rcvd_filp->f_dentry->d_inode;
+ handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL);
if (IS_ERR(handle)) {
CERROR("fsfilt_start: %ld\n", PTR_ERR(handle));
GOTO(out, rc = PTR_ERR(handle));
}
}
- off = MDS_LR_CLIENT + med->med_off * MDS_LR_SIZE;
+ off = med->med_off;
transno = req->rq_reqmsg->transno;
if (transno == 0) {
mcd->mcd_last_data = cpu_to_le32(op_data);
fsfilt_set_last_rcvd(req->rq_export->exp_obd, transno, handle,
- mds_commit_cb);
- written = lustre_fwrite(mds->mds_rcvd_filp, mcd, sizeof(*mcd), &off);
- CDEBUG(D_INODE, "wrote trans "LPU64" client %s at #%u: written = "
- LPSZ"\n", transno, mcd->mcd_uuid, med->med_off, written);
+ mds_commit_cb, NULL);
+ written = fsfilt_write_record(obd, mds->mds_rcvd_filp,
+ (char *)mcd, sizeof(*mcd), &off);
+ CDEBUG(D_INODE, "wrote trans "LPU64" client %s at idx %u: written = "
+ LPSZ"\n", transno, mcd->mcd_uuid, med->med_idx, written);
if (written != sizeof(*mcd)) {
CERROR("error writing to last_rcvd: rc = "LPSZ"\n", written);
}
commit:
- err = fsfilt_commit(obd, i, handle, 0);
+ err = fsfilt_commit(obd, inode, handle, 0);
if (err) {
CERROR("error committing transaction: %d\n", err);
if (!rc)
if (!(ia_valid & ATTR_RAW))
RETURN(0);
- if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- RETURN(-EPERM);
-
- LTIME_S(attr->ia_ctime) = now;
+ if (!(ia_valid & ATTR_CTIME_SET))
+ LTIME_S(attr->ia_ctime) = now;
if (!(ia_valid & ATTR_ATIME_SET))
LTIME_S(attr->ia_atime) = now;
if (!(ia_valid & ATTR_MTIME_SET))
LTIME_S(attr->ia_mtime) = now;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ RETURN(-EPERM);
+
/* times */
- if ((ia_valid & (ATTR_MTIME|ATTR_ATIME))==(ATTR_MTIME|ATTR_ATIME) &&
- !(ia_valid & ATTR_ATIME_SET)) {
+ if ((ia_valid & (ATTR_MTIME|ATTR_ATIME))==(ATTR_MTIME|ATTR_ATIME)) {
if (rec->ur_fsuid != inode->i_uid &&
(error = permission(inode,MAY_WRITE)) != 0)
RETURN(error);
- } else if (ia_valid & ATTR_UID) {
+ }
+
+ if (ia_valid & ATTR_SIZE) {
+ if ((error = permission(inode,MAY_WRITE)) != 0)
+ RETURN(error);
+ }
+
+ if (ia_valid & ATTR_UID) {
/* chown */
error = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
if (attr->ia_gid == (gid_t) -1)
attr->ia_gid = inode->i_gid;
attr->ia_mode = inode->i_mode;
- attr->ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME;
/*
* If the user or group of a non-directory has been
* changed by a non-root user, remove the setuid bit.
mds_pack_inode2fid(&body->fid1, de->d_inode);
mds_pack_inode2body(body, de->d_inode);
+ /* Don't return OST-specific attributes if we didn't just set them */
+ if (rec->ur_iattr.ia_valid & ATTR_SIZE)
+ body->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+ if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_MTIME_SET))
+ body->valid |= OBD_MD_FLMTIME;
+ if (rec->ur_iattr.ia_valid & (ATTR_ATIME | ATTR_ATIME_SET))
+ body->valid |= OBD_MD_FLATIME;
+
l_dput(de);
}
struct inode *inode = NULL;
struct lustre_handle lockh;
void *handle = NULL;
+ struct mds_logcancel_data *mlcd = NULL;
int rc = 0, cleanup_phase = 0, err, locked = 0;
ENTRY;
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE,
to_kdev_t(inode->i_sb->s_dev));
- handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR);
+#ifdef ENABLE_ORPHANS
+ if (unlikely(mds->mds_osc_obd == NULL))
+ mds_osc_connect(obd, mds);
+#endif
+
+ handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL);
if (IS_ERR(handle)) {
rc = PTR_ERR(handle);
handle = NULL;
GOTO(cleanup, rc);
}
+ if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_CTIME))
+ CDEBUG(D_INODE, "setting mtime %lu, ctime %lu\n",
+ LTIME_S(rec->ur_iattr.ia_mtime),
+ LTIME_S(rec->ur_iattr.ia_ctime));
rc = mds_fix_attr(inode, rec);
if (rc)
GOTO(cleanup, rc);
rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr, 0);
- if (rc == 0 &&
- S_ISREG(inode->i_mode) &&
- rec->ur_eadata != NULL) {
+ if (rc == 0 && S_ISREG(inode->i_mode) && rec->ur_eadata != NULL) {
rc = fsfilt_set_md(obd, inode, handle,
rec->ur_eadata, rec->ur_eadatalen);
}
mds_pack_inode2fid(&body->fid1, inode);
mds_pack_inode2body(body, inode);
+ /* Don't return OST-specific attributes if we didn't just set them */
+ if (rec->ur_iattr.ia_valid & ATTR_SIZE)
+ body->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+ if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_MTIME_SET))
+ body->valid |= OBD_MD_FLMTIME;
+ if (rec->ur_iattr.ia_valid & (ATTR_ATIME | ATTR_ATIME_SET))
+ body->valid |= OBD_MD_FLATIME;
+
+ if (rc == 0 && rec->ur_cookielen && !IS_ERR(mds->mds_osc_obd)) {
+ OBD_ALLOC(mlcd, sizeof(*mlcd) + rec->ur_cookielen +
+ rec->ur_eadatalen);
+ if (mlcd) {
+ mlcd->mlcd_size = sizeof(*mlcd) + rec->ur_cookielen +
+ rec->ur_eadatalen;
+ mlcd->mlcd_eadatalen = rec->ur_eadatalen;
+ mlcd->mlcd_cookielen = rec->ur_cookielen;
+ mlcd->mlcd_lmm = (void *)&mlcd->mlcd_cookies +
+ mlcd->mlcd_cookielen;
+ memcpy(&mlcd->mlcd_cookies, rec->ur_logcookies,
+ mlcd->mlcd_cookielen);
+ memcpy(mlcd->mlcd_lmm, rec->ur_eadata,
+ mlcd->mlcd_eadatalen);
+ } else {
+ CERROR("unable to allocate log cancel data\n");
+ }
+ }
EXIT;
cleanup:
+ if (mlcd != NULL)
+ fsfilt_set_last_rcvd(req->rq_export->exp_obd, 0, handle,
+ mds_cancel_cookies_cb, mlcd);
err = mds_finish_transno(mds, inode, handle, req, rc, 0);
- switch(cleanup_phase) {
+ switch (cleanup_phase) {
case 1:
l_dput(de);
if (locked) {
switch (type) {
case S_IFREG:{
- handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE);
+ handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE, NULL);
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
rc = vfs_create(dir, dchild, rec->ur_mode);
break;
}
case S_IFDIR:{
- handle = fsfilt_start(obd, dir, FSFILT_OP_MKDIR);
+ handle = fsfilt_start(obd, dir, FSFILT_OP_MKDIR, NULL);
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
rc = vfs_mkdir(dir, dchild, rec->ur_mode);
break;
}
case S_IFLNK:{
- handle = fsfilt_start(obd, dir, FSFILT_OP_SYMLINK);
+ handle = fsfilt_start(obd, dir, FSFILT_OP_SYMLINK, NULL);
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
if (rec->ur_tgt == NULL) /* no target supplied */
case S_IFIFO:
case S_IFSOCK:{
int rdev = rec->ur_rdev;
- handle = fsfilt_start(obd, dir, FSFILT_OP_MKNOD);
+ handle = fsfilt_start(obd, dir, FSFILT_OP_MKNOD, NULL);
if (IS_ERR(handle))
GOTO(cleanup, (handle = NULL, rc = PTR_ERR(handle)));
rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev);
}
default:
CERROR("bad file type %o creating %s\n", type, rec->ur_name);
+ dchild->d_fsdata = NULL;
GOTO(cleanup, rc = -EINVAL);
}
- /* In case we stored the desired inum in here, we want to clean up.
- * We also do this in the cleanup block, for the error cases.
- */
- dchild->d_fsdata = NULL;
+ /* In case we stored the desired inum in here, we want to clean up. */
+ if (dchild->d_fsdata == (void *)(unsigned long)rec->ur_fid2->id)
+ dchild->d_fsdata = NULL;
if (rc) {
CDEBUG(D_INODE, "error during create: %d\n", rc);
}
switch (cleanup_phase) {
case 2: /* child dentry */
- dchild->d_fsdata = NULL;
l_dput(dchild);
case 1: /* locked parent dentry */
if (rc) {
"can't get EA for reconstructed unlink, leaking OST inodes");
}
+/* If we are unlinking an open file/dir (i.e. creating an orphan) then
+ * we instead link the inode into the PENDING directory until it is
+ * finally released. We can't simply call mds_reint_rename() or some
+ * part thereof, because we don't have the inode to check for link
+ * count/open status until after it is locked.
+ *
+ * For lock ordering, we always get the PENDING, then pending_child lock
+ * last to avoid deadlocks.
+ */
+static int mds_unlink_orphan(struct mds_update_record *rec,
+ struct obd_device *obd, struct dentry *dparent,
+ struct dentry *dchild, void **handle)
+{
+ struct mds_obd *mds = &obd->u.mds;
+ struct inode *pending_dir = mds->mds_pending_dir->d_inode;
+ struct dentry *pending_child;
+ char fidname[LL_FID_NAMELEN];
+ int fidlen = 0, rc;
+ ENTRY;
+
+ LASSERT(!mds_inode_is_orphan(dchild->d_inode));
+
+ down(&pending_dir->i_sem);
+ fidlen = ll_fid2str(fidname, dchild->d_inode->i_ino,
+ dchild->d_inode->i_generation);
+
+ CDEBUG(D_ERROR, "pending destroy of %dx open file %s = %s\n",
+ mds_open_orphan_count(dchild->d_inode),
+ rec->ur_name, fidname);
+
+ pending_child = lookup_one_len(fidname, mds->mds_pending_dir, fidlen);
+ if (IS_ERR(pending_child))
+ GOTO(out_lock, rc = PTR_ERR(pending_child));
+
+ if (pending_child->d_inode != NULL) {
+ CERROR("re-destroying orphan file %s?\n", rec->ur_name);
+ LASSERT(pending_child->d_inode == dchild->d_inode);
+ GOTO(out_dput, rc = 0);
+ }
+
+ *handle = fsfilt_start(obd, pending_dir, FSFILT_OP_RENAME, NULL);
+ if (IS_ERR(*handle))
+ GOTO(out_dput, rc = PTR_ERR(*handle));
+
+ rc = vfs_rename(dparent->d_inode, dchild, pending_dir, pending_child);
+ if (rc)
+ CERROR("error renaming orphan %lu/%s to PENDING: rc = %d\n",
+ dparent->d_inode->i_ino, rec->ur_name, rc);
+ else
+ mds_inode_set_orphan(dchild->d_inode);
+out_dput:
+ dput(pending_child);
+out_lock:
+ up(&pending_dir->i_sem);
+ RETURN(rc);
+}
+
+static int mds_log_op_unlink(struct obd_device *obd, struct mds_obd *mds,
+ struct inode *inode, struct lustre_msg *repmsg,
+ int offset)
+{
+ struct lov_stripe_md *lsm = NULL;
+ struct llog_unlink_rec *lur;
+ int rc;
+ ENTRY;
+
+ if (IS_ERR(mds->mds_osc_obd))
+ RETURN(PTR_ERR(mds->mds_osc_obd));
+
+ rc = obd_unpackmd(&mds->mds_osc_conn, &lsm,
+ lustre_msg_buf(repmsg, offset, 0),
+ repmsg->buflens[offset]);
+ if (rc < 0)
+ RETURN(rc);
+
+ OBD_ALLOC(lur, sizeof(*lur));
+ if (!lur)
+ RETURN(-ENOMEM);
+ lur->lur_hdr.lth_len = lur->lur_end_len = sizeof(*lur);
+ lur->lur_hdr.lth_type = MDS_UNLINK_REC;
+ lur->lur_oid = inode->i_ino;
+ lur->lur_ogen = inode->i_generation;
+
+ rc = obd_log_add(&mds->mds_osc_conn, mds->mds_catalog, &lur->lur_hdr,
+ lsm, lustre_msg_buf(repmsg, offset + 1, 0),
+ repmsg->buflens[offset+1]/sizeof(struct llog_cookie));
+
+ obd_free_memmd(&mds->mds_osc_conn, &lsm);
+ OBD_FREE(lur, sizeof(*lur));
+
+ RETURN(rc);
+}
+
static int mds_reint_unlink(struct mds_update_record *rec, int offset,
struct ptlrpc_request *req,
- struct lustre_handle *child_lockh)
+ struct lustre_handle *lh)
{
- struct dentry *dir_de = NULL;
+ struct dentry *dparent = NULL;
struct dentry *dchild = NULL;
struct mds_obd *mds = mds_req2mds(req);
struct obd_device *obd = req->rq_export->exp_obd;
struct mds_body *body = NULL;
- struct inode *dir_inode = NULL, *child_inode;
- struct lustre_handle parent_lockh;
+ struct inode *child_inode;
+ struct lustre_handle parent_lockh, child_lockh;
void *handle = NULL;
struct ldlm_res_id child_res_id = { .name = {0} };
- int rc = 0, flags = 0, return_lock = 0;
- int cleanup_phase = 0;
+ int rc = 0, flags = 0, log_unlink = 0, cleanup_phase = 0;
ENTRY;
LASSERT(offset == 0 || offset == 2);
MDS_CHECK_RESENT(req, reconstruct_reint_unlink(rec, offset, req,
- child_lockh));
+ &child_lockh));
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK))
GOTO(cleanup, rc = -ENOENT);
/* Step 1: Lookup the parent by FID */
- dir_de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW,
- &parent_lockh);
- if (IS_ERR(dir_de))
- GOTO(cleanup, rc = PTR_ERR(dir_de));
- dir_inode = dir_de->d_inode;
- LASSERT(dir_inode);
+ dparent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW,
+ &parent_lockh);
+ if (IS_ERR(dparent))
+ GOTO(cleanup, rc = PTR_ERR(dparent));
+ LASSERT(dparent->d_inode);
cleanup_phase = 1; /* Have parent dentry lock */
/* Step 2: Lookup the child */
- dchild = ll_lookup_one_len(rec->ur_name, dir_de, rec->ur_namelen - 1);
+ dchild = ll_lookup_one_len(rec->ur_name, dparent, rec->ur_namelen - 1);
if (IS_ERR(dchild))
GOTO(cleanup, rc = PTR_ERR(dchild));
child_inode = dchild->d_inode;
if (child_inode == NULL) {
- CDEBUG(D_INODE,
- "child doesn't exist (dir %lu, name %s)\n",
- dir_inode->i_ino, rec->ur_name);
- rc = -ENOENT;
- GOTO(cleanup, rc);
+ CDEBUG(D_INODE, "child doesn't exist (dir %lu, name %s)\n",
+ dparent->d_inode->i_ino, rec->ur_name);
+ GOTO(cleanup, rc = -ENOENT);
}
DEBUG_REQ(D_INODE, req, "parent ino %lu, child ino %lu",
- dir_inode->i_ino, child_inode->i_ino);
+ dparent->d_inode->i_ino, child_inode->i_ino);
/* Step 3: Get a lock on the child */
child_res_id.name[0] = child_inode->i_ino;
rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
child_res_id, LDLM_PLAIN, NULL, 0, LCK_EX,
&flags, ldlm_completion_ast, mds_blocking_ast,
- NULL, child_lockh);
+ NULL, &child_lockh);
if (rc != ELDLM_OK)
GOTO(cleanup, rc);
cleanup_phase = 3; /* child lock */
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE,
- to_kdev_t(dir_inode->i_sb->s_dev));
+ to_kdev_t(dparent->d_inode->i_sb->s_dev));
/* ldlm_reply in buf[0] if called via intent */
if (offset)
body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*body));
LASSERT(body != NULL);
- /* Step 4: Do the unlink: client decides between rmdir/unlink!
- * (bug 72) */
+#ifdef ENABLE_ORPHANS
+ if (unlikely(mds->mds_osc_obd == NULL))
+ mds_osc_connect(obd, mds);
+#endif
+
+ /* If this is the last reference to this inode, get the OBD EA
+ * data first so the client can destroy OST objects */
+ if (S_ISREG(child_inode->i_mode) && child_inode->i_nlink == 1) {
+ mds_pack_inode2fid(&body->fid1, child_inode);
+ mds_pack_inode2body(body, child_inode);
+ mds_pack_md(obd, req->rq_repmsg, offset + 1, body, child_inode);
+ if (!(body->valid & OBD_MD_FLEASIZE)) {
+ body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+ OBD_MD_FLATIME | OBD_MD_FLMTIME);
+ log_unlink = 1;
+ }
+ }
+
+ /* We have to do these checks ourselves, in case we are making an
+ * orphan. The client tells us whether rmdir() or unlink() was called,
+ * so we need to return appropriate errors (bug 72).
+ *
+ * We don't have to check permissions, because vfs_rename (called from
+ * mds_unlink_orphan) also calls may_delete. */
+ if ((rec->ur_mode & S_IFMT) == S_IFDIR) {
+ if (!S_ISDIR(child_inode->i_mode))
+ GOTO(cleanup, rc = -ENOTDIR);
+ } else {
+ if (S_ISDIR(child_inode->i_mode))
+ GOTO(cleanup, rc = -EISDIR);
+ }
+
+ if (mds_open_orphan_count(child_inode) > 0) {
+ rc = mds_unlink_orphan(rec, obd, dparent, dchild, &handle);
+#ifdef ENABLE_ORPHANS
+ if (!rc && mds_log_op_unlink(obd, mds, child_inode,
+ req->rq_repmsg, offset + 1) > 0)
+ body->valid |= OBD_MD_FLCOOKIE;
+#endif
+ GOTO(cleanup, rc);
+ }
+
+ // Step 4: Do the unlink: client decides between rmdir/unlink! (bug 72)
switch (rec->ur_mode & S_IFMT) {
case S_IFDIR:
/* Drop any lingering child directories before we start our
* transaction, to avoid doing multiple inode dirty/delete
- * in our compound transaction (bug 1321).
- */
+ * in our compound transaction (bug 1321). */
shrink_dcache_parent(dchild);
- handle = fsfilt_start(obd, dir_inode, FSFILT_OP_RMDIR);
+ handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_RMDIR,
+ NULL);
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
cleanup_phase = 4;
- rc = vfs_rmdir(dir_inode, dchild);
+ rc = vfs_rmdir(dparent->d_inode, dchild);
break;
- case S_IFREG:
- /* If this is the last reference to this inode, get the OBD EA
- * data first so the client can destroy OST objects */
- if (S_ISREG(child_inode->i_mode) && child_inode->i_nlink == 1) {
- mds_pack_inode2fid(&body->fid1, child_inode);
- mds_pack_inode2body(body, child_inode);
- mds_pack_md(obd, req->rq_repmsg, offset + 1,
- body, child_inode);
- if (body->valid & OBD_MD_FLEASIZE)
- return_lock = 1;
- }
- /* no break */
+ case S_IFREG: {
+ handle = fsfilt_start(obd, dparent->d_inode,
+ FSFILT_OP_UNLINK_LOG, NULL);
+ if (IS_ERR(handle))
+ GOTO(cleanup, rc = PTR_ERR(handle));
+
+ cleanup_phase = 4;
+ rc = vfs_unlink(dparent->d_inode, dchild);
+#ifdef ENABLE_ORPHANS
+ if (!rc && log_unlink)
+ if (mds_log_op_unlink(obd, mds, child_inode,
+ req->rq_repmsg, offset + 1) > 0)
+ body->valid |= OBD_MD_FLCOOKIE;
+#endif
+ break;
+ }
case S_IFLNK:
case S_IFCHR:
case S_IFBLK:
case S_IFIFO:
case S_IFSOCK:
- handle = fsfilt_start(obd, dir_inode, FSFILT_OP_UNLINK);
+ handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK,
+ NULL);
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
cleanup_phase = 4;
- rc = vfs_unlink(dir_inode, dchild);
+ rc = vfs_unlink(dparent->d_inode, dchild);
break;
default:
CERROR("bad file type %o unlinking %s\n", rec->ur_mode,
cleanup:
switch(cleanup_phase) {
- case 4:
- rc = mds_finish_transno(mds, dir_inode, handle, req, rc, 0);
- if (rc && body) {
- /* Don't unlink the OST objects if the MDS unlink failed */
+ case 4:
+ rc = mds_finish_transno(mds, dparent->d_inode, handle, req,
+ rc, 0);
+ if (rc && body != NULL) {
+ // Don't unlink the OST objects if the MDS unlink failed
body->valid = 0;
}
- case 3: /* child lock */
- if (rc != 0 || return_lock == 0)
- ldlm_lock_decref(child_lockh, LCK_EX);
- case 2: /* child dentry */
+ case 3: /* child lock */
+ ldlm_lock_decref(&child_lockh, LCK_EX);
+ case 2: /* child dentry */
l_dput(dchild);
- case 1: /* parent dentry and lock */
+ case 1: /* parent dentry and lock */
if (rc) {
- ldlm_lock_decref(&parent_lockh, LCK_EX);
+ ldlm_lock_decref(&parent_lockh, LCK_PW);
} else {
memcpy(&req->rq_ack_locks[0].lock, &parent_lockh,
sizeof(parent_lockh));
- req->rq_ack_locks[0].mode = LCK_EX;
+ req->rq_ack_locks[0].mode = LCK_PW;
}
- l_dput(dir_de);
- case 0:
+ l_dput(dparent);
+ case 0:
break;
- default:
+ default:
CERROR("invalid cleanup_phase %d\n", cleanup_phase);
LBUG();
}
/* Step 3: Lookup the child */
dchild = ll_lookup_one_len(rec->ur_name, de_tgt_dir, rec->ur_namelen-1);
if (IS_ERR(dchild)) {
- CERROR("child lookup error %ld\n", PTR_ERR(dchild));
- GOTO(cleanup, rc = PTR_ERR(dchild));
+ rc = PTR_ERR(dchild);
+ if (rc != -EPERM && rc != -EACCES)
+ CERROR("child lookup error %d\n", rc);
+ GOTO(cleanup, rc);
}
cleanup_phase = 4; /* child dentry */
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE,
to_kdev_t(de_src->d_inode->i_sb->s_dev));
- handle = fsfilt_start(obd, de_tgt_dir->d_inode, FSFILT_OP_LINK);
+ handle = fsfilt_start(obd, de_tgt_dir->d_inode, FSFILT_OP_LINK, NULL);
if (IS_ERR(handle)) {
rc = PTR_ERR(handle);
GOTO(cleanup, rc);
}
rc = vfs_link(de_src, de_tgt_dir->d_inode, dchild);
- if (rc)
- CERROR("link error %d\n", rc);
+ if (rc && rc != -EPERM && rc != -EACCES)
+ CERROR("vfs_link error %d\n", rc);
cleanup:
rc = mds_finish_transno(mds, de_tgt_dir ? de_tgt_dir->d_inode : NULL,
handle, req, rc, 0);
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE,
to_kdev_t(de_srcdir->d_inode->i_sb->s_dev));
- handle = fsfilt_start(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME);
+ handle = fsfilt_start(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME, NULL);
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
lock_kernel();
- rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new,
- NULL);
+ rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new);
unlock_kernel();
EXIT;
Makefile.in
.deps
TAGS
+.*.cmd
-# FIXME: we need to make it clear that obdclass.o depends on
-# lustre_build_version, or 'make -j2' breaks!
DEFS=
MODULE = obdclass
FSMOD = fsfilt_ext3
endif
+class_obd.o: lustre_build_version
+
if LIBLUSTRE
lib_LIBRARIES = liblustreclass.a
-liblustreclass_a_SOURCES = uuid.c statfs_pack.c genops.c debug.c class_obd.c lustre_handles.c lustre_peer.c lprocfs_status.c simple.c
-
-class_obd.o: lustre_version
-
-lustre_version:
- echo '#define LUSTRE_VERSION 12' > $(top_builddir)/include/linux/lustre_build_version.h
- echo '#define BUILD_VERSION "1"' >> $(top_builddir)/include/linux/lustre_build_version.h
+liblustreclass_a_SOURCES = uuid.c statfs_pack.c genops.c debug.c class_obd.c
+liblustreclass_a_SOURCES += lustre_handles.c lustre_peer.c lprocfs_status.c
+liblustreclass_a_SOURCES += simple.c recov_log.c obdo.c
else
modulefs_DATA = lustre_build_version obdclass.o $(FSMOD).o fsfilt_reiserfs.o
obdclass_SOURCES = class_obd.c debug.c genops.c sysctl.c uuid.c simple.c
obdclass_SOURCES += lprocfs_status.c lustre_handles.c lustre_peer.c
-obdclass_SOURCES += fsfilt.c statfs_pack.c otree.c
+obdclass_SOURCES += fsfilt.c statfs_pack.c otree.c recov_log.c obdo.c
endif
include $(top_srcdir)/Rules
-# XXX I'm sure there's some automake mv-if-different helper for this.
lustre_build_version:
perl $(top_srcdir)/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver
+ echo #define LUSTRE_RELEASE @RELEASE@ >> tmpver
cmp -s $(top_builddir)/include/linux/lustre_build_version.h tmpver \
- 2> /dev/null && \
- $(RM) tmpver || \
- mv tmpver $(top_builddir)/include/linux/lustre_build_version.h
+ 2> /dev/null && \
+ $(RM) tmpver || \
+ mv tmpver $(top_builddir)/include/linux/lustre_build_version.h
+
#include <linux/miscdevice.h>
#include <linux/smp_lock.h>
#else
-
# include <liblustre.h>
-
#endif
#include <linux/obd_support.h>
#include <linux/lprocfs_status.h>
#include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
#include <linux/lustre_build_version.h>
+#include <portals/list.h>
struct semaphore obd_conf_sem; /* serialize configuration commands */
struct obd_device obd_dev[MAX_OBD_DEVICES];
static void dump_exports(struct obd_device *obd)
{
- struct list_head *tmp, *n;
+ struct obd_export *exp, *n;
- list_for_each_safe(tmp, n, &obd->obd_exports) {
- struct obd_export *exp = list_entry(tmp, struct obd_export,
- exp_obd_chain);
- CDEBUG(D_ERROR, "%s: %p %s %d %d %p\n",
+ list_for_each_entry_safe(exp, n, &obd->obd_exports, exp_obd_chain) {
+ CERROR("%s: %p %s %d %d %p\n",
obd->obd_name, exp, exp->exp_client_uuid.uuid,
atomic_read(&exp->exp_refcount),
exp->exp_failed, exp->exp_outstanding_reply );
obd->obd_type->typ_refcnt--;
class_put_type(obd->obd_type);
obd->obd_type = NULL;
+ memset(obd, 0, sizeof(*obd));
GOTO(out, err = 0);
}
atomic_set(&obd->obd_refcount, 0);
- if ( OBT(obd) && OBP(obd, setup) )
+ if (OBT(obd) && OBP(obd, setup))
err = obd_setup(obd, sizeof(*data), data);
if (!err) {
GOTO(out, err);
}
case OBD_IOC_CLEANUP: {
- int force = 0, failover = 0;
- char * flag;
+ int flags = 0;
+ char *flag;
if (!obd->obd_set_up) {
CERROR("Device %d not setup\n", obd->obd_minor);
for (flag = data->ioc_inlbuf1; *flag != 0; flag++)
switch (*flag) {
case 'F':
- force = 1;
+ flags |= OBD_OPT_FORCE;
break;
case 'A':
- failover = 1;
+ flags |= OBD_OPT_FAILOVER;
break;
default:
- CERROR("unrecognised flag '%c'\n",
+ CERROR("unrecognised flag '%c'\n",
*flag);
}
}
-
- if (atomic_read(&obd->obd_refcount) == 1 || force) {
+
+ if (atomic_read(&obd->obd_refcount) == 1 ||
+ flags & OBD_OPT_FORCE) {
/* this will stop new connections, and need to
do it before class_disconnect_exports() */
obd->obd_stopping = 1;
struct l_wait_info lwi = LWI_TIMEOUT_INTR(60 * HZ, NULL,
NULL, NULL);
int rc;
-
- if (!force) {
+
+ if (!(flags & OBD_OPT_FORCE)) {
CERROR("OBD device %d (%p) has refcount %d\n",
- obd->obd_minor, obd,
+ obd->obd_minor, obd,
atomic_read(&obd->obd_refcount));
dump_exports(obd);
GOTO(out, err = -EBUSY);
}
- class_disconnect_exports(obd, failover);
- CDEBUG(D_IOCTL,
- "%s: waiting for obd refs to go away: %d\n",
+ class_disconnect_exports(obd, flags);
+ CDEBUG(D_IOCTL,
+ "%s: waiting for obd refs to go away: %d\n",
obd->obd_name, atomic_read(&obd->obd_refcount));
-
+
rc = l_wait_event(obd->obd_refcount_waitq,
atomic_read(&obd->obd_refcount) < 2, &lwi);
if (rc == 0) {
atomic_read(&obd->obd_refcount));
dump_exports(obd);
}
- CDEBUG(D_IOCTL, "%s: awake, now finishing cleanup\n",
+ CDEBUG(D_IOCTL, "%s: awake, now finishing cleanup\n",
obd->obd_name);
}
if (OBT(obd) && OBP(obd, cleanup))
- err = obd_cleanup(obd, force, failover);
+ err = obd_cleanup(obd, flags);
if (!err) {
obd->obd_set_up = obd->obd_stopping = 0;
EXPORT_SYMBOL(class_conn2ldlmimp);
EXPORT_SYMBOL(class_disconnect);
EXPORT_SYMBOL(class_disconnect_exports);
-EXPORT_SYMBOL(lustre_uuid_to_peer);
/* uuid.c */
EXPORT_SYMBOL(class_uuid_unparse);
+EXPORT_SYMBOL(lustre_uuid_to_peer);
EXPORT_SYMBOL(client_tgtuuid2obd);
EXPORT_SYMBOL(class_handle_hash);
", info@clusterfs.com\n");
class_init_uuidlist();
- class_handle_init();
+ err = class_handle_init();
+ if (err)
+ return err;
sema_init(&obd_conf_sem, 1);
INIT_LIST_HEAD(&obd_types);
- if ((err = misc_register(&obd_psdev))) {
+ err = misc_register(&obd_psdev);
+ if (err) {
CERROR("cannot register %d err %d\n", OBD_MINOR, err);
return err;
}
#endif
#ifdef __KERNEL__
-static void __exit cleanup_obdclass(void)
+static void /*__exit*/ cleanup_obdclass(void)
#else
static void cleanup_obdclass(void)
#endif
* kernel patch */
#ifdef __KERNEL__
#include <linux/lustre_version.h>
-#define LUSTRE_MIN_VERSION 18
-#define LUSTRE_MAX_VERSION 19
+#define LUSTRE_MIN_VERSION 21
+#define LUSTRE_MAX_VERSION 21
#if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION)
# error Cannot continue: Your Lustre kernel patch is older than the sources
#elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION)
/* unlock fsfilt_types list */
}
-struct fsfilt_operations *fsfilt_get_ops(char *type)
+struct fsfilt_operations *fsfilt_get_ops(const char *type)
{
struct fsfilt_operations *fs_ops;
/* unlock fsfilt_types list */
}
}
- __MOD_INC_USE_COUNT(fs_ops->fs_owner);
+ try_module_get(fs_ops->fs_owner);
/* unlock fsfilt_types list */
return fs_ops;
void fsfilt_put_ops(struct fsfilt_operations *fs_ops)
{
- __MOD_DEC_USE_COUNT(fs_ops->fs_owner);
+ module_put(fs_ops->fs_owner);
}
#include <linux/quotaops.h>
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
-#include <linux/ext3_xattr.h>
+#include <linux/version.h>
+/* XXX ugh */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ #include <linux/ext3_xattr.h>
+#else
+ #include <linux/../../fs/ext3/xattr.h>
+#endif
#include <linux/kp30.h>
#include <linux/lustre_fsfilt.h>
#include <linux/obd.h>
static atomic_t fcb_cache_count = ATOMIC_INIT(0);
struct fsfilt_cb_data {
- struct journal_callback cb_jcb; /* data private to jbd */
+ struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
fsfilt_cb_t cb_func; /* MDS/OBD completion function */
struct obd_device *cb_obd; /* MDS/OBD completion device */
__u64 cb_last_rcvd; /* MDS/OST last committed operation */
+ void *cb_data; /* MDS/OST completion function data */
};
#define EXT3_XATTR_INDEX_LUSTRE 5
* the inode (which we will be changing anyways as part of this
* transaction).
*/
-static void *fsfilt_ext3_start(struct inode *inode, int op)
+static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private)
{
/* For updates to the last recieved file */
int nblocks = EXT3_DATA_TRANS_BLOCKS;
void *handle;
switch(op) {
+ case FSFILT_OP_CREATE_LOG:
+ nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
+ op = FSFILT_OP_CREATE;
+ break;
+ case FSFILT_OP_UNLINK_LOG:
+ nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS;
+ op = FSFILT_OP_UNLINK;
+ break;
+ }
+
+ switch(op) {
case FSFILT_OP_RMDIR:
case FSFILT_OP_UNLINK:
nblocks += EXT3_DELETE_TRANS_BLOCKS;
LBUG();
}
- LASSERT(!current->journal_info);
+ LASSERT(current->journal_info == desc_private);
lock_kernel();
handle = journal_start(EXT3_JOURNAL(inode), nblocks);
unlock_kernel();
* the pages have been written.
*/
static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso,
- int niocount, struct niobuf_remote *nb)
+ int niocount, void *desc_private)
{
journal_t *journal;
handle_t *handle;
int needed;
ENTRY;
- LASSERT(!current->journal_info);
+ LASSERT(current->journal_info == desc_private);
journal = EXT3_SB(fso->fso_dentry->d_inode->i_sb)->s_journal;
needed = fsfilt_ext3_credits_needed(objcount, fso);
if (IS_ERR(handle))
CERROR("can't get handle for %d credits: rc = %ld\n", needed,
PTR_ERR(handle));
+ else
+ LASSERT(handle->h_buffer_credits >= needed);
RETURN(handle);
}
* in the block pointers; this is really the "small" stripe MD data.
* We can avoid further hackery by virtue of the MDS file size being
* zero all the time (which doesn't invoke block truncate at unlink
- * time), so we assert we never change the MDS file size from zero.
- */
+ * time), so we assert we never change the MDS file size from zero. */
if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
/* ATTR_SIZE would invoke truncate: clear it */
iattr->ia_valid &= ~ATTR_SIZE;
- inode->i_size = iattr->ia_size;
+ EXT3_I(inode)->i_disksize = inode->i_size = iattr->ia_size;
/* make sure _something_ gets set - so new inode
- * goes to disk (probably won't work over XFS
- */
- if (!iattr->ia_valid & ATTR_MODE) {
+ * goes to disk (probably won't work over XFS */
+ if (!(iattr->ia_valid & (ATTR_MODE | ATTR_MTIME | ATTR_CTIME))){
iattr->ia_valid |= ATTR_MODE;
iattr->ia_mode = inode->i_mode;
}
}
- if (inode->i_op->setattr)
+
+ /* Don't allow setattr to change file type */
+ iattr->ia_mode = (inode->i_mode & S_IFMT)|(iattr->ia_mode & ~S_IFMT);
+
+ if (inode->i_op->setattr) {
rc = inode->i_op->setattr(dentry, iattr);
- else{
+ } else {
rc = inode_change_ok(inode, iattr);
if (!rc)
rc = inode_setattr(inode, iattr);
* it will fit, because putting it in an EA currently kills the MDS
* performance. We'll fix this with "fast EAs" in the future.
*/
- if (lmm_size <= sizeof(EXT3_I(inode)->i_data) -
- sizeof(EXT3_I(inode)->i_data[0])) {
+ if (inode->i_blocks == 0 && lmm_size <= sizeof(EXT3_I(inode)->i_data) -
+ sizeof(EXT3_I(inode)->i_data[0])) {
/* XXX old_size is debugging only */
int old_size = EXT3_I(inode)->i_data[0];
if (old_size != 0) {
} else {
down(&inode->i_sem);
lock_kernel();
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
rc = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_LUSTRE,
XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0);
+#else
+ rc = ext3_xattr_set_handle(handle, inode,
+ EXT3_XATTR_INDEX_LUSTRE,
+ XATTR_LUSTRE_MDS_OBJID, lmm,
+ lmm_size, 0);
+#endif
unlock_kernel();
up(&inode->i_sem);
}
{
int rc;
- if (EXT3_I(inode)->i_data[0]) {
+ if (inode->i_blocks == 0 && EXT3_I(inode)->i_data[0]) {
int size = le32_to_cpu(EXT3_I(inode)->i_data[0]);
LASSERT(size < sizeof(EXT3_I(inode)->i_data));
if (lmm) {
{
struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
- fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error);
+ fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error);
OBD_SLAB_FREE(fcb, fcb_cache, sizeof *fcb);
atomic_dec(&fcb_cache_count);
}
static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
- void *handle, fsfilt_cb_t cb_func)
+ void *handle, fsfilt_cb_t cb_func,
+ void *cb_data)
{
struct fsfilt_cb_data *fcb;
fcb->cb_func = cb_func;
fcb->cb_obd = obd;
fcb->cb_last_rcvd = last_rcvd;
+ fcb->cb_data = cb_data;
CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
lock_kernel();
- /* Note that an "incompatible pointer" warning here is OK for now */
journal_callback_set(handle, fsfilt_ext3_cb_func,
(struct journal_callback *)fcb);
unlock_kernel();
static int fsfilt_ext3_journal_data(struct file *filp)
{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ /* bug 1576: enable data journaling on 2.5 when appropriate */
struct inode *inode = filp->f_dentry->d_inode;
-
EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
-
+#endif
return 0;
}
*/
static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs)
{
- struct statfs sfs;
+ struct kstatfs sfs;
int rc = vfs_statfs(sb, &sfs);
if (!rc && sfs.f_bfree < sfs.f_ffree) {
return ext3_prep_san_write(inode, blocks, nblocks, newsize);
}
+static int fsfilt_ext3_read_record(struct file * file, char *buf,
+ int size, loff_t *offs)
+{
+ struct buffer_head *bh;
+ unsigned long block, boffs;
+ struct inode *inode = file->f_dentry->d_inode;
+ int err;
+
+ if (inode->i_size < *offs + size) {
+ CERROR("file size %llu is too short for read %u@%llu\n",
+ inode->i_size, size, *offs);
+ return -EIO;
+ }
+
+ block = *offs >> inode->i_blkbits;
+ bh = ext3_bread(NULL, inode, block, 0, &err);
+ if (!bh) {
+ CERROR("can't read block: %d\n", err);
+ return err;
+ }
+
+ boffs = (unsigned)*offs % bh->b_size;
+ if (boffs + size > bh->b_size) {
+ CERROR("request crosses block's border. offset %llu, size %u\n",
+ *offs, size);
+ brelse(bh);
+ return -EIO;
+ }
+
+ memcpy(buf, bh->b_data + boffs, size);
+ brelse(bh);
+ *offs += size;
+ return size;
+}
+
+static int fsfilt_ext3_write_record(struct file * file, char *buf,
+ int size, loff_t *offs)
+{
+ struct buffer_head *bh;
+ unsigned long block, boffs;
+ struct inode *inode = file->f_dentry->d_inode;
+ loff_t old_size = inode->i_size;
+ journal_t *journal;
+ handle_t *handle;
+ int err;
+
+ journal = EXT3_SB(inode->i_sb)->s_journal;
+ handle = journal_start(journal, EXT3_DATA_TRANS_BLOCKS + 2);
+ if (handle == NULL) {
+ CERROR("can't start transaction\n");
+ return -EIO;
+ }
+
+ block = *offs >> inode->i_blkbits;
+ if (*offs + size > inode->i_size) {
+ down(&inode->i_sem);
+ if (*offs + size > inode->i_size)
+ inode->i_size = ((loff_t)block + 1) << inode->i_blkbits;
+ up(&inode->i_sem);
+ }
+
+ bh = ext3_bread(handle, inode, block, 1, &err);
+ if (!bh) {
+ CERROR("can't read/create block: %d\n", err);
+ goto out;
+ }
+
+ /* This is a hack only needed because ext3_get_block_handle() updates
+ * i_disksize after marking the inode dirty in ext3_splice_branch().
+ * We will fix that when we get a chance, as ext3_mark_inode_dirty()
+ * is not without cost, nor is it even exported.
+ */
+ if (inode->i_size > old_size)
+ mark_inode_dirty(inode);
+
+ boffs = (unsigned)*offs % bh->b_size;
+ if (boffs + size > bh->b_size) {
+ CERROR("request crosses block's border. offset %llu, size %u\n",
+ *offs, size);
+ err = -EIO;
+ goto out;
+ }
+
+ err = ext3_journal_get_write_access(handle, bh);
+ if (err) {
+ CERROR("journal_get_write_access() returned error %d\n", err);
+ goto out;
+ }
+ memcpy(bh->b_data + boffs, buf, size);
+ err = ext3_journal_dirty_metadata(handle, bh);
+ if (err) {
+ CERROR("journal_dirty_metadata() returned error %d\n", err);
+ goto out;
+ }
+ err = size;
+out:
+ if (bh)
+ brelse(bh);
+ journal_stop(handle);
+ if (err > 0)
+ *offs += size;
+ return err;
+}
+
static struct fsfilt_operations fsfilt_ext3_ops = {
fs_type: "ext3",
fs_owner: THIS_MODULE,
fs_statfs: fsfilt_ext3_statfs,
fs_sync: fsfilt_ext3_sync,
fs_prep_san_write: fsfilt_ext3_prep_san_write,
+ fs_write_record: fsfilt_ext3_write_record,
+ fs_read_record: fsfilt_ext3_read_record,
};
static int __init fsfilt_ext3_init(void)
static atomic_t fcb_cache_count = ATOMIC_INIT(0);
struct fsfilt_cb_data {
- struct journal_callback cb_jcb; /* data private to jbd */
+ struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */
fsfilt_cb_t cb_func; /* MDS/OBD completion function */
struct obd_device *cb_obd; /* MDS/OBD completion device */
__u64 cb_last_rcvd; /* MDS/OST last committed operation */
+ void *cb_data; /* MDS/OST completion function data */
};
#define EXTN_XATTR_INDEX_LUSTRE 5
* the inode (which we will be changing anyways as part of this
* transaction).
*/
-static void *fsfilt_extN_start(struct inode *inode, int op)
+static void *fsfilt_extN_start(struct inode *inode, int op, void *desc_private)
{
/* For updates to the last recieved file */
int nblocks = EXTN_DATA_TRANS_BLOCKS;
void *handle;
switch(op) {
+ case FSFILT_OP_CREATE_LOG:
+ nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS;
+ op = FSFILT_OP_CREATE;
+ break;
+ case FSFILT_OP_UNLINK_LOG:
+ nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS;
+ op = FSFILT_OP_UNLINK;
+ break;
+ }
+
+ switch(op) {
case FSFILT_OP_RMDIR:
case FSFILT_OP_UNLINK:
nblocks += EXTN_DELETE_TRANS_BLOCKS;
LBUG();
}
- LASSERT(!current->journal_info);
+ LASSERT(current->journal_info == desc_private);
lock_kernel();
handle = journal_start(EXTN_JOURNAL(inode), nblocks);
unlock_kernel();
* objcount inode blocks
* 1 superblock
* 2 * EXTN_SINGLEDATA_TRANS_BLOCKS for the quota files
- *
+ *
* 1 EXTN_DATA_TRANS_BLOCKS for the last_rcvd update.
*/
static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso)
ngdblocks = EXTN_SB(sb)->s_gdb_count;
needed += nbitmaps + ngdblocks;
-
+
/* last_rcvd update */
needed += EXTN_DATA_TRANS_BLOCKS;
* the pages have been written.
*/
static void *fsfilt_extN_brw_start(int objcount, struct fsfilt_objinfo *fso,
- int niocount, struct niobuf_remote *nb)
+ int niocount, void *desc_private)
{
journal_t *journal;
handle_t *handle;
int needed;
ENTRY;
- LASSERT(!current->journal_info);
+ LASSERT(current->journal_info == desc_private);
journal = EXTN_SB(fso->fso_dentry->d_inode->i_sb)->s_journal;
needed = fsfilt_extN_credits_needed(objcount, fso);
if (IS_ERR(handle))
CERROR("can't get handle for %d credits: rc = %ld\n", needed,
PTR_ERR(handle));
+ else
+ LASSERT(handle->h_buffer_credits >= needed);
RETURN(handle);
}
* in the block pointers; this is really the "small" stripe MD data.
* We can avoid further hackery by virtue of the MDS file size being
* zero all the time (which doesn't invoke block truncate at unlink
- * time), so we assert we never change the MDS file size from zero.
- */
+ * time), so we assert we never change the MDS file size from zero. */
if (iattr->ia_valid & ATTR_SIZE && !do_trunc) {
/* ATTR_SIZE would invoke truncate: clear it */
iattr->ia_valid &= ~ATTR_SIZE;
- inode->i_size = iattr->ia_size;
+ EXTN_I(inode)->i_disksize = inode->i_size = iattr->ia_size;
/* make sure _something_ gets set - so new inode
- * goes to disk (probably won't work over XFS
- */
- if (!iattr->ia_valid & ATTR_MODE) {
+ * goes to disk (probably won't work over XFS */
+ if (!(iattr->ia_valid & (ATTR_MODE | ATTR_MTIME | ATTR_CTIME))){
iattr->ia_valid |= ATTR_MODE;
iattr->ia_mode = inode->i_mode;
}
}
- if (inode->i_op->setattr)
+
+ /* Don't allow setattr to change file type */
+ iattr->ia_mode = (inode->i_mode & S_IFMT)|(iattr->ia_mode & ~S_IFMT);
+
+ if (inode->i_op->setattr) {
rc = inode->i_op->setattr(dentry, iattr);
- else{
+ } else {
rc = inode_change_ok(inode, iattr);
if (!rc)
rc = inode_setattr(inode, iattr);
* it will fit, because putting it in an EA currently kills the MDS
* performance. We'll fix this with "fast EAs" in the future.
*/
- if (lmm_size <= sizeof(EXTN_I(inode)->i_data) -
- sizeof(EXTN_I(inode)->i_data[0])) {
+ if (inode->i_blocks == 0 && lmm_size <= sizeof(EXTN_I(inode)->i_data) -
+ sizeof(EXTN_I(inode)->i_data[0])) {
/* XXX old_size is debugging only */
int old_size = EXTN_I(inode)->i_data[0];
if (old_size != 0) {
{
int rc;
- if (EXTN_I(inode)->i_data[0]) {
+ if (inode->i_blocks == 0 && EXTN_I(inode)->i_data[0]) {
int size = le32_to_cpu(EXTN_I(inode)->i_data[0]);
LASSERT(size < sizeof(EXTN_I(inode)->i_data));
if (lmm) {
{
struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb;
- fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error);
+ fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error);
OBD_SLAB_FREE(fcb, fcb_cache, sizeof *fcb);
atomic_dec(&fcb_cache_count);
}
static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
- void *handle, fsfilt_cb_t cb_func)
+ void *handle, fsfilt_cb_t cb_func,
+ void *cb_data)
{
struct fsfilt_cb_data *fcb;
fcb->cb_func = cb_func;
fcb->cb_obd = obd;
fcb->cb_last_rcvd = last_rcvd;
+ fcb->cb_data = cb_data;
CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd);
lock_kernel();
- /* Note that an "incompatible pointer" warning here is OK for now */
journal_callback_set(handle, fsfilt_extN_cb_func,
(struct journal_callback *)fcb);
unlock_kernel();
*/
static int fsfilt_extN_statfs(struct super_block *sb, struct obd_statfs *osfs)
{
- struct statfs sfs;
+ struct kstatfs sfs;
int rc = vfs_statfs(sb, &sfs);
if (!rc && sfs.f_bfree < sfs.f_ffree) {
return extN_prep_san_write(inode, blocks, nblocks, newsize);
}
+static int fsfilt_extN_read_record(struct file * file, char *buf,
+ int size, loff_t *offs)
+{
+ struct buffer_head *bh;
+ unsigned long block, boffs;
+ struct inode *inode = file->f_dentry->d_inode;
+ int err;
+
+ if (inode->i_size < *offs + size) {
+ CERROR("file size %llu is too short for read %u@%llu\n",
+ inode->i_size, size, *offs);
+ return -EIO;
+ }
+
+ block = *offs >> inode->i_blkbits;
+ bh = extN_bread(NULL, inode, block, 0, &err);
+ if (!bh) {
+ CERROR("can't read block: %d\n", err);
+ return err;
+ }
+
+ boffs = (unsigned)*offs % bh->b_size;
+ if (boffs + size > bh->b_size) {
+ CERROR("request crosses block's border. offset %llu, size %u\n",
+ *offs, size);
+ brelse(bh);
+ return -EIO;
+ }
+
+ memcpy(buf, bh->b_data + boffs, size);
+ brelse(bh);
+ *offs += size;
+ return size;
+}
+
+static int fsfilt_extN_write_record(struct file * file, char *buf,
+ int size, loff_t *offs)
+{
+ struct buffer_head *bh;
+ unsigned long block, boffs;
+ struct inode *inode = file->f_dentry->d_inode;
+ loff_t old_size = inode->i_size;
+ journal_t *journal;
+ handle_t *handle;
+ int err;
+
+ journal = EXTN_SB(inode->i_sb)->s_journal;
+ handle = journal_start(journal, EXTN_DATA_TRANS_BLOCKS + 2);
+ if (handle == NULL) {
+ CERROR("can't start transaction\n");
+ return -EIO;
+ }
+
+ block = *offs >> inode->i_blkbits;
+ if (*offs + size > inode->i_size) {
+ down(&inode->i_sem);
+ if (*offs + size > inode->i_size)
+ inode->i_size = ((loff_t)block + 1) << inode->i_blkbits;
+ up(&inode->i_sem);
+ }
+
+ bh = extN_bread(handle, inode, block, 1, &err);
+ if (!bh) {
+ CERROR("can't read/create block: %d\n", err);
+ goto out;
+ }
+
+ /* This is a hack only needed because extN_get_block_handle() updates
+ * i_disksize after marking the inode dirty in extN_splice_branch().
+ * We will fix that when we get a chance, as extN_mark_inode_dirty()
+ * is not without cost, nor is it even exported.
+ */
+ if (inode->i_size > old_size)
+ mark_inode_dirty(inode);
+
+ boffs = (unsigned)*offs % bh->b_size;
+ if (boffs + size > bh->b_size) {
+ CERROR("request crosses block's border. offset %llu, size %u\n",
+ *offs, size);
+ err = -EIO;
+ goto out;
+ }
+
+ err = extN_journal_get_write_access(handle, bh);
+ if (err) {
+ CERROR("journal_get_write_access() returned error %d\n", err);
+ goto out;
+ }
+ memcpy(bh->b_data + boffs, buf, size);
+ err = extN_journal_dirty_metadata(handle, bh);
+ if (err) {
+ CERROR("journal_dirty_metadata() returned error %d\n", err);
+ goto out;
+ }
+ err = size;
+out:
+ if (bh)
+ brelse(bh);
+ journal_stop(handle);
+ if (err > 0)
+ *offs += size;
+ return err;
+}
+
static struct fsfilt_operations fsfilt_extN_ops = {
fs_type: "extN",
fs_owner: THIS_MODULE,
fs_statfs: fsfilt_extN_statfs,
fs_sync: fsfilt_extN_sync,
fs_prep_san_write: fsfilt_extN_prep_san_write,
+ fs_write_record: fsfilt_extN_write_record,
+ fs_read_record: fsfilt_extN_read_record,
};
static int __init fsfilt_extN_init(void)
#include <linux/obd_class.h>
#include <linux/module.h>
-static void *fsfilt_reiserfs_start(struct inode *inode, int op)
+static void *fsfilt_reiserfs_start(struct inode *inode, int op,
+ void *desc_private)
{
return (void *)0xf00f00be;
}
static void *fsfilt_reiserfs_brw_start(int objcount, struct fsfilt_objinfo *fso,
- int niocount, struct niobuf_remote *nb)
+ int niocount, void *desc_private)
{
return (void *)0xf00f00be;
}
-static int fsfilt_reiserfs_commit(struct inode *inode, void *handle,
+static int fsfilt_reiserfs_commit(struct inode *inode, void *handle,
int force_sync)
{
if (handle != (void *)0xf00f00be) {
return file->f_op->read(file, buf, count, offset);
}
-static int fsfilt_reiserfs_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
- void *handle, fsfilt_cb_t cb_func)
+static int fsfilt_reiserfs_set_last_rcvd(struct obd_device *obd,
+ __u64 last_rcvd, void *handle,
+ fsfilt_cb_t cb_func, void *cb_data)
{
static long next = 0;
next = jiffies + 300 * HZ;
}
- cb_func(obd, last_rcvd, 0);
+ cb_func(obd, last_rcvd, cb_data, 0);
return 0;
}
#define EXPORT_SYMTAB
#define DEBUG_SUBSYSTEM S_CLASS
-#ifdef __KERNEL__
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/version.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
-#include <linux/seq_file.h>
-#else
-#include <liblustre.h>
+#ifdef __KERNEL__
+# include <linux/config.h>
+# include <linux/module.h>
+# include <linux/version.h>
+# include <linux/slab.h>
+# include <linux/types.h>
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+# include <asm/statfs.h>
+# endif
+# include <linux/seq_file.h>
+#else /* __KERNEL__ */
+# include <liblustre.h>
#endif
#include <linux/obd_class.h>
#include <linux/lprocfs_status.h>
+#include <linux/lustre_fsfilt.h>
#ifdef LPROCFS
struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
const char *name)
{
- struct proc_dir_entry* temp;
+ struct proc_dir_entry *temp;
- if (!head)
+ if (head == NULL)
return NULL;
temp = head->subdir;
while (temp != NULL) {
- if (!strcmp(temp->name, name))
+ if (strcmp(temp->name, name) == 0)
return temp;
temp = temp->next;
int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
void *data)
{
- if ((root == NULL) || (list == NULL))
+ if (root == NULL || list == NULL)
return -EINVAL;
- while (list->name) {
+ while (list->name != NULL) {
struct proc_dir_entry *cur_root, *proc;
- char *pathcopy, *cur, *next;
- int pathsize = strlen(list->name)+1;
+ char *pathcopy, *cur, *next, pathbuf[64];
+ int pathsize = strlen(list->name) + 1;
proc = NULL;
cur_root = root;
/* need copy of path for strsep */
- OBD_ALLOC(pathcopy, pathsize);
- if (!pathcopy)
- return -ENOMEM;
+ if (strlen(list->name) > sizeof(pathbuf) - 1) {
+ OBD_ALLOC(pathcopy, pathsize);
+ if (pathcopy == NULL)
+ return -ENOMEM;
+ } else {
+ pathcopy = pathbuf;
+ }
next = pathcopy;
strcpy(pathcopy, list->name);
- while (cur_root && (cur = strsep(&next, "/"))) {
+ while (cur_root != NULL && (cur = strsep(&next, "/"))) {
if (*cur =='\0') /* skip double/trailing "/" */
continue;
CDEBUG(D_OTHER, "cur_root=%s, cur=%s, next=%s, (%s)\n",
cur_root->name, cur, next,
(proc ? "exists" : "new"));
- if (next)
+ if (next != NULL) {
cur_root = (proc ? proc :
- proc_mkdir(cur, cur_root));
- else if (!proc) {
+ proc_mkdir(cur, cur_root));
+ } else if (proc == NULL) {
mode_t mode = 0444;
if (list->write_fptr)
mode = 0644;
}
}
+ if (pathcopy != pathbuf)
OBD_FREE(pathcopy, pathsize);
- if ((cur_root == NULL) || (proc == NULL)) {
+ if (cur_root == NULL || proc == NULL) {
CERROR("LprocFS: No memory to create /proc entry %s",
list->name);
return -ENOMEM;
return 0;
}
-void lprocfs_remove(struct proc_dir_entry* root)
+void lprocfs_remove(struct proc_dir_entry *root)
{
struct proc_dir_entry *temp = root;
struct proc_dir_entry *rm_entry;
LASSERT(parent != NULL);
while (1) {
- while (temp->subdir)
+ while (temp->subdir != NULL)
temp = temp->subdir;
rm_entry = temp;
struct proc_dir_entry *newchild;
newchild = lprocfs_srch(parent, name);
- if (newchild) {
+ if (newchild != NULL) {
CERROR(" Lproc: Attempting to register %s more than once \n",
name);
return ERR_PTR(-EALREADY);
}
newchild = proc_mkdir(name, parent);
- if (newchild && list) {
+ if (newchild != NULL && list != NULL) {
int rc = lprocfs_add_vars(newchild, list, data);
if (rc) {
lprocfs_remove(newchild);
return snprintf(page, count, LPU64"\n", *(__u64 *)data);
}
-int lprocfs_rd_uuid(char* page, char **start, off_t off, int count,
+int lprocfs_rd_uuid(char *page, char **start, off_t off, int count,
int *eof, void *data)
{
- struct obd_device* dev = (struct obd_device*)data;
+ struct obd_device *dev = (struct obd_device*)data;
LASSERT(dev != NULL);
*eof = 1;
}
int lprocfs_rd_name(char *page, char **start, off_t off, int count,
- int *eof, void *data)
+ int *eof, void* data)
{
- struct obd_device* dev = (struct obd_device *)data;
+ struct obd_device *dev = (struct obd_device *)data;
LASSERT(dev != NULL);
LASSERT(dev->obd_name != NULL);
return snprintf(page, count, "%s\n", dev->obd_name);
}
-int lprocfs_rd_blksize(char* page, char **start, off_t off, int count,
- int *eof, struct statfs *sfs)
+int lprocfs_rd_fstype(char *page, char **start, off_t off, int count, int *eof,
+ void *data)
{
- LASSERT(sfs != NULL);
- *eof = 1;
- return snprintf(page, count, "%lu\n", sfs->f_bsize);
+ struct obd_device *obd = (struct obd_device *)data;
+
+ LASSERT(obd != NULL);
+ LASSERT(obd->obd_fsops != NULL);
+ LASSERT(obd->obd_fsops->fs_type != NULL);
+ return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type);
}
-int lprocfs_rd_kbytestotal(char* page, char **start, off_t off, int count,
- int *eof, struct statfs *sfs)
+int lprocfs_rd_blksize(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
- __u32 blk_size;
- __u64 result;
+ struct obd_statfs osfs;
+ int rc = obd_statfs(data, &osfs, jiffies - HZ);
+ if (!rc) {
+ *eof = 1;
+ rc = snprintf(page, count, "%u\n", osfs.os_bsize);
+ }
+ return rc;
+}
- LASSERT(sfs != NULL);
- blk_size = sfs->f_bsize >> 10;
- result = sfs->f_blocks;
+int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct obd_statfs osfs;
+ int rc = obd_statfs(data, &osfs, jiffies - HZ);
+ if (!rc) {
+ __u32 blk_size = osfs.os_bsize >> 10;
+ __u64 result = osfs.os_blocks;
- while (blk_size >>= 1)
- result <<= 1;
+ while (blk_size >>= 1)
+ result <<= 1;
- *eof = 1;
- return snprintf(page, count, LPU64"\n", result);
+ *eof = 1;
+ rc = snprintf(page, count, LPU64"\n", result);
+ }
+ return rc;
}
-int lprocfs_rd_kbytesfree(char* page, char **start, off_t off, int count,
- int *eof, struct statfs *sfs)
+int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
- __u32 blk_size;
- __u64 result;
+ struct obd_statfs osfs;
+ int rc = obd_statfs(data, &osfs, jiffies - HZ);
+ if (!rc) {
+ __u32 blk_size = osfs.os_bsize >> 10;
+ __u64 result = osfs.os_bfree;
- LASSERT(sfs != NULL);
- blk_size = sfs->f_bsize >> 10;
- result = sfs->f_bfree;
+ while (blk_size >>= 1)
+ result <<= 1;
- while (blk_size >>= 1)
- result <<= 1;
-
- *eof = 1;
- return snprintf(page, count, LPU64"\n", result);
+ *eof = 1;
+ rc = snprintf(page, count, LPU64"\n", result);
+ }
+ return rc;
}
-int lprocfs_rd_filestotal(char* page, char **start, off_t off, int count,
- int *eof, struct statfs *sfs)
+int lprocfs_rd_filestotal(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
- LASSERT(sfs != NULL);
- *eof = 1;
- return snprintf(page, count, "%ld\n", sfs->f_files);
+ struct obd_statfs osfs;
+ int rc = obd_statfs(data, &osfs, jiffies - HZ);
+ if (!rc) {
+ *eof = 1;
+ rc = snprintf(page, count, LPU64"\n", osfs.os_files);
+ }
+
+ return rc;
}
-int lprocfs_rd_filesfree(char* page, char **start, off_t off, int count,
- int *eof, struct statfs *sfs)
+int lprocfs_rd_filesfree(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
- LASSERT(sfs != NULL);
- *eof = 1;
- return snprintf(page, count, "%ld\n", sfs->f_ffree);
+ struct obd_statfs osfs;
+ int rc = obd_statfs(data, &osfs, jiffies - HZ);
+ if (!rc) {
+ *eof = 1;
+ rc = snprintf(page, count, LPU64"\n", osfs.os_ffree);
+ }
+ return rc;
}
-int lprocfs_rd_filegroups(char* page, char **start, off_t off, int count,
- int *eof, struct statfs *sfs)
+int lprocfs_rd_filegroups(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
*eof = 1;
return snprintf(page, count, "unimplemented\n");
}
-int lprocfs_rd_server_uuid(char* page, char **start, off_t off, int count,
+int lprocfs_rd_server_uuid(char *page, char **start, off_t off, int count,
int *eof, void *data)
{
struct obd_device *obd = (struct obd_device *)data;
int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count,
int *eof, void *data)
{
- struct obd_type* class = (struct obd_type*) data;
+ struct obd_type *class = (struct obd_type*) data;
LASSERT(class != NULL);
*eof = 1;
if (num == 0)
return NULL;
- OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[smp_num_cpus]));
+ OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[num_online_cpus()]));
if (stats == NULL)
return NULL;
percpusize = L1_CACHE_ALIGN(offsetof(typeof(*percpu), lp_cntr[num]));
- stats->ls_percpu_size = smp_num_cpus * percpusize;
+ stats->ls_percpu_size = num_online_cpus() * percpusize;
OBD_ALLOC(stats->ls_percpu[0], stats->ls_percpu_size);
if (stats->ls_percpu[0] == NULL) {
OBD_FREE(stats, offsetof(typeof(*stats),
- ls_percpu[smp_num_cpus]));
+ ls_percpu[num_online_cpus()]));
return NULL;
}
stats->ls_num = num;
- for (i = 1; i < smp_num_cpus; i++)
+ for (i = 1; i < num_online_cpus(); i++)
stats->ls_percpu[i] = (void *)(stats->ls_percpu[i - 1]) +
percpusize;
return;
OBD_FREE(stats->ls_percpu[0], stats->ls_percpu_size);
- OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[smp_num_cpus]));
+ OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_online_cpus()]));
}
/* Reset counter under lock */
}
idx = cntr - &(stats->ls_percpu[0])->lp_cntr[0];
- for (i = 0; i < smp_num_cpus; i++) {
+ for (i = 0; i < num_online_cpus(); i++) {
struct lprocfs_counter *percpu_cntr =
&(stats->ls_percpu[i])->lp_cntr[idx];
int centry;
+
do {
- centry = atomic_read(&percpu_cntr->lc_cntl.la_entry);
- t.lc_count = percpu_cntr->lc_count;
- t.lc_sum = percpu_cntr->lc_sum;
- t.lc_min = percpu_cntr->lc_min;
- t.lc_max = percpu_cntr->lc_max;
- t.lc_sumsquare = percpu_cntr->lc_sumsquare;
+ centry = atomic_read(&percpu_cntr->lc_cntl.la_entry);
+ t.lc_count = percpu_cntr->lc_count;
+ t.lc_sum = percpu_cntr->lc_sum;
+ t.lc_min = percpu_cntr->lc_min;
+ t.lc_max = percpu_cntr->lc_max;
+ t.lc_sumsquare = percpu_cntr->lc_sumsquare;
} while (centry != atomic_read(&percpu_cntr->lc_cntl.la_entry) &&
centry != atomic_read(&percpu_cntr->lc_cntl.la_exit));
ret.lc_count += t.lc_count;
}
struct seq_operations lprocfs_stats_seq_sops = {
- .start = lprocfs_stats_seq_start,
- .stop = lprocfs_stats_seq_stop,
- .next = lprocfs_stats_seq_next,
- .show = lprocfs_stats_seq_show,
+ start: lprocfs_stats_seq_start,
+ stop: lprocfs_stats_seq_stop,
+ next: lprocfs_stats_seq_next,
+ show: lprocfs_stats_seq_show,
};
static int lprocfs_stats_seq_open(struct inode *inode, struct file *file)
}
struct file_operations lprocfs_stats_seq_fops = {
- .open = lprocfs_stats_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
+ open: lprocfs_stats_seq_open,
+ read: seq_read,
+ llseek: seq_lseek,
+ release: seq_release,
};
-int lprocfs_register_stats(struct proc_dir_entry *root, const char* name,
+int lprocfs_register_stats(struct proc_dir_entry *root, const char *name,
struct lprocfs_stats *stats)
{
struct proc_dir_entry *entry;
int i;
LASSERT(stats != NULL);
- for (i = 0; i < smp_num_cpus; i++) {
+ for (i = 0; i < num_online_cpus(); i++) {
c = &(stats->ls_percpu[i]->lp_cntr[index]);
c->lc_config = conf;
c->lc_min = ~(__u64)0;
#define LPROCFS_OBD_OP_INIT(base, stats, op) \
do { \
unsigned int coffset = base + OBD_COUNTER_OFFSET(op); \
- LASSERT(coffset < stats->ls_num); \
+ LASSERT(coffset < stats->ls_num); \
lprocfs_counter_init(stats, coffset, 0, #op, "reqs"); \
} while (0)
LASSERT(obd->obd_proc_entry != NULL);
LASSERT(obd->obd_cntr_base == 0);
- num_stats = 1 + OBD_COUNTER_OFFSET(destroy_export) +
+ num_stats = 1 + OBD_COUNTER_OFFSET(unpin) +
num_private_stats;
stats = lprocfs_alloc_stats(num_stats);
- if (!stats)
+ if (stats == NULL)
return -ENOMEM;
LPROCFS_OBD_OP_INIT(num_private_stats, stats, iocontrol);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, match);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, log_add);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, log_cancel);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, san_preprw);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, mark_page_dirty);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, clear_dirty_pages);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, last_dirty_offset);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, pin);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpin);
for (i = num_private_stats; i < num_stats; i++) {
- /* If this assertion failed, it is likely that an obd
+ /* If this LBUGs, it is likely that an obd
* operation was added to struct obd_ops in
* <linux/obd.h>, and that the corresponding line item
* LPROCFS_OBD_OP_INIT(.., .., opname)
* is missing from the list above. */
- LASSERT(&(stats->ls_percpu[0])->lp_cntr[i].lc_name != NULL);
+ if (stats->ls_percpu[0]->lp_cntr[i].lc_name == NULL) {
+ CERROR("Missing obd_stat initializer obd_op "
+ "operation at offset %d. Aborting.\n",
+ i - num_private_stats);
+ LBUG();
+ }
}
rc = lprocfs_register_stats(obd->obd_proc_entry, "stats", stats);
if (rc < 0) {
EXPORT_SYMBOL(lprocfs_rd_u64);
EXPORT_SYMBOL(lprocfs_rd_uuid);
EXPORT_SYMBOL(lprocfs_rd_name);
+EXPORT_SYMBOL(lprocfs_rd_fstype);
EXPORT_SYMBOL(lprocfs_rd_server_uuid);
EXPORT_SYMBOL(lprocfs_rd_conn_uuid);
EXPORT_SYMBOL(lprocfs_rd_numrefs);
* Copyright (C) 2002 Cluster File Systems, Inc.
* Author: Phil Schwan <phil@clusterfs.com>
*
- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ * This file is part of Lustre, http://www.lustre.org/
*
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2.1 of the GNU Lesser General
- * Public License as published by the Free Software Foundation.
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
*
- * Portals is distributed in the hope that it will be useful,
+ * Lustre is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
+ * GNU General Public License for more details.
*
- * You should have received a copy of the GNU Lesser General Public
- * License along with Portals; if not, write to the Free Software
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#define DEBUG_SUBSYSTEM S_CLASS
#ifdef __KERNEL__
-#include <linux/types.h>
-#include <linux/random.h>
+# include <linux/types.h>
+# include <linux/random.h>
#else
-#include <liblustre.h>
+# include <liblustre.h>
#endif
-
-#include <linux/kp30.h>
+#include <linux/obd_support.h>
#include <linux/lustre_handles.h>
static spinlock_t handle_lock = SPIN_LOCK_UNLOCKED;
LASSERT(handle_hash == NULL);
- PORTAL_ALLOC(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
+ OBD_VMALLOC(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
if (handle_hash == NULL)
return -ENOMEM;
cleanup_all_handles();
}
- PORTAL_FREE(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
+ OBD_VFREE(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE);
handle_hash = NULL;
if (handle_count)
struct uuid_nid_data *data =
list_entry(tmp, struct uuid_nid_data, head);
- PORTAL_FREE(data->uuid, strlen(data->uuid) + 1);
- PORTAL_FREE(data, sizeof(*data));
+ OBD_FREE(data->uuid, strlen(data->uuid) + 1);
+ OBD_FREE(data, sizeof(*data));
}
}
}
rc = -ENOMEM;
- PORTAL_ALLOC(data, sizeof(*data));
+ OBD_ALLOC(data, sizeof(*data));
if (data == NULL)
goto fail_0;
- PORTAL_ALLOC(data->uuid, nob);
+ OBD_ALLOC(data->uuid, nob);
if (data == NULL)
goto fail_1;
return 0;
fail_1:
- PORTAL_FREE (data, sizeof (*data));
+ OBD_FREE (data, sizeof (*data));
fail_0:
kportal_put_ni (nal);
return (rc);
list_del (&data->head);
kportal_put_ni (data->nal);
- PORTAL_FREE(data->uuid, strlen(data->uuid) + 1);
- PORTAL_FREE(data, sizeof(*data));
+ OBD_FREE(data->uuid, strlen(data->uuid) + 1);
+ OBD_FREE(data, sizeof(*data));
} while (!list_empty (&deathrow));
return 0;
current->fsgid = saved->ouc.ouc_fsgid;
current->cap_effective = saved->ouc.ouc_cap;
current->ngroups = saved->ngroups;
-
current->groups[0] = saved->ouc.ouc_suppgid1;
current->groups[1] = saved->ouc.ouc_suppgid2;
}
ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name);
- dchild = lookup_one_len(name, dir, strlen(name));
+ dchild = ll_lookup_one_len(name, dir, strlen(name));
if (IS_ERR(dchild))
GOTO(out_up, dchild);
ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name);
- dchild = lookup_one_len(name, dir, strlen(name));
+ dchild = ll_lookup_one_len(name, dir, strlen(name));
if (IS_ERR(dchild))
GOTO(out_up, dchild);
#else
#include <linux/version.h>
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
+#include <linux/statfs.h>
#endif
#endif
#include <linux/obd_support.h>
#include <linux/obd_class.h>
-void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs)
+void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs)
{
+ memset(osfs, 0, sizeof(*osfs));
osfs->os_type = sfs->f_type;
osfs->os_blocks = sfs->f_blocks;
osfs->os_bfree = sfs->f_bfree;
osfs->os_namelen = sfs->f_namelen;
}
-void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs)
+void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs)
{
+ memset(sfs, 0, sizeof(*sfs));
sfs->f_type = osfs->os_type;
sfs->f_blocks = osfs->os_blocks;
sfs->f_bfree = osfs->os_bfree;
sfs->f_namelen = osfs->os_namelen;
}
-int obd_self_statfs(struct obd_device *obd, struct statfs *sfs)
-{
- struct obd_export *export, *my_export = NULL;
- struct obd_statfs osfs = { 0 };
- int rc;
- ENTRY;
-
- LASSERT( obd != NULL );
-
- spin_lock(&obd->obd_dev_lock);
- if (list_empty(&obd->obd_exports)) {
- spin_unlock(&obd->obd_dev_lock);
- export = my_export = class_new_export(obd);
- if (export == NULL)
- RETURN(-ENOMEM);
- } else {
- export = list_entry(obd->obd_exports.next, typeof(*export),
- exp_obd_chain);
- export = class_export_get(export);
- spin_unlock(&obd->obd_dev_lock);
- }
-
- rc = obd_statfs(export, &osfs);
- if (!rc)
- statfs_unpack(sfs, &osfs);
-
- if (my_export)
- class_unlink_export(my_export);
-
- class_export_put(export);
- RETURN(rc);
-}
-
EXPORT_SYMBOL(statfs_pack);
EXPORT_SYMBOL(statfs_unpack);
-EXPORT_SYMBOL(obd_self_statfs);
Makefile.in
.deps
TAGS
+.*.cmd
return class_connect(conn, obd, cluuid);
}
-static int echo_disconnect(struct lustre_handle *conn, int failover)
+static int echo_disconnect(struct lustre_handle *conn, int flags)
{
struct obd_export *exp = class_conn2export(conn);
ldlm_cancel_locks_for_export(exp);
class_export_put(exp);
- return (class_disconnect(conn, failover));
+ return class_disconnect(conn, flags);
}
static __u64 echo_next_id(struct obd_device *obddev)
int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
int objcount, struct obd_ioobj *obj, int niocount,
struct niobuf_remote *nb, struct niobuf_local *res,
- void **desc_private, struct obd_trans_info *oti)
+ struct obd_trans_info *oti)
{
struct obd_device *obd;
struct niobuf_local *r = res;
CDEBUG(D_PAGE, "%s %d obdos with %d IOs\n",
cmd == OBD_BRW_READ ? "reading" : "writing", objcount, niocount);
- *desc_private = (void *)DESC_PRIV;
+ if (oti)
+ oti->oti_handle = (void *)DESC_PRIV;
for (i = 0; i < objcount; i++, obj++) {
int gfp_mask = (obj->ioo_id & 1) ? GFP_HIGHUSER : GFP_KERNEL;
r->offset = nb->offset;
r->len = nb->len;
- LASSERT ((r->offset & (PAGE_SIZE - 1)) + r->len <= PAGE_SIZE);
+ LASSERT((r->offset & ~PAGE_MASK) + r->len <= PAGE_SIZE);
CDEBUG(D_PAGE, "$$$$ get page %p @ "LPU64" for %d\n",
r->page, r->offset, r->len);
return rc;
}
-int echo_commitrw(int cmd, struct obd_export *export, int objcount,
- struct obd_ioobj *obj, int niocount, struct niobuf_local *res,
- void *desc_private, struct obd_trans_info *oti)
+int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa,
+ int objcount, struct obd_ioobj *obj, int niocount,
+ struct niobuf_local *res, struct obd_trans_info *oti)
{
struct obd_device *obd;
struct niobuf_local *r = res;
RETURN(-EINVAL);
}
- LASSERT(desc_private == (void *)DESC_PRIV);
+ LASSERT(oti == NULL || oti->oti_handle == (void *)DESC_PRIV);
for (i = 0; i < objcount; i++, obj++) {
int verify = obj->ioo_id != 0;
RETURN(0);
}
-static int echo_cleanup(struct obd_device *obddev, int force, int failover)
+static int echo_cleanup(struct obd_device *obddev, int flags)
{
ENTRY;
struct lprocfs_static_vars lvars;
int rc;
- lprocfs_init_vars(&lvars);
+ lprocfs_init_vars(echo, &lvars);
rc = lprocfs_obd_attach(obd, lvars.obd_vars);
if (rc != 0)
return rc;
printk(KERN_INFO "Lustre Echo OBD driver; info@clusterfs.com\n");
- lprocfs_init_vars(&lvars);
+ lprocfs_init_vars(echo, &lvars);
rc = echo_object0_pages_init ();
if (rc != 0)
RETURN(rc);
}
-static void __exit obdecho_exit(void)
+static void /*__exit*/ obdecho_exit(void)
{
echo_client_cleanup();
class_unregister_type(OBD_ECHO_DEVICENAME);
}
}
- rc = obd_brw(rw, &ec->ec_conn, lsm, npages, pga, NULL);
+ rc = obd_brw(rw, &ec->ec_conn, oa, lsm, npages, pga, NULL);
out:
if (rc != 0)
pgp->flag = 0;
}
- rc = obd_brw(rw, &ec->ec_conn, lsm, npages, pga, NULL);
+ rc = obd_brw(rw, &ec->ec_conn, oa, lsm, npages, pga, NULL);
// if (rw == OBD_BRW_READ)
// mark_dirty_kiobuf (kiobuf, count);
RETURN(rc);
}
-static int echo_cleanup(struct obd_device * obddev, int force, int failover)
+static int echo_cleanup(struct obd_device *obddev, int flags)
{
struct list_head *el;
struct ec_object *eco;
}
/* XXX assuming sole access */
- while (!list_empty (&ec->ec_objects)) {
+ while (!list_empty(&ec->ec_objects)) {
el = ec->ec_objects.next;
- eco = list_entry (el, struct ec_object, eco_obj_chain);
+ eco = list_entry(el, struct ec_object, eco_obj_chain);
- LASSERT (eco->eco_refcount == 0);
+ LASSERT(eco->eco_refcount == 0);
eco->eco_refcount = 1;
eco->eco_deleted = 1;
- echo_put_object (eco);
+ echo_put_object(eco);
}
- rc = obd_disconnect (&ec->ec_conn, 0);
+ rc = obd_disconnect(&ec->ec_conn, 0);
if (rc != 0)
CERROR("fail to disconnect device: %d\n", rc);
- RETURN (rc);
+ RETURN(rc);
}
static int echo_connect(struct lustre_handle *conn, struct obd_device *src,
RETURN (rc);
}
-static int echo_disconnect(struct lustre_handle *conn, int failover)
+static int echo_disconnect(struct lustre_handle *conn, int flags)
{
struct obd_export *exp = class_conn2export (conn);
struct obd_device *obd;
{
struct lprocfs_static_vars lvars;
- lprocfs_init_vars(&lvars);
+ lprocfs_init_vars(echo, &lvars);
return class_register_type(&echo_obd_ops, lvars.module_vars,
OBD_ECHO_CLIENT_DEVICENAME);
}
#include <linux/obd_class.h>
#ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
#else
-int rd_fstype(char* page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device* dev = (struct obd_device*)data;
-
- LASSERT(dev != NULL);
- *eof = 1;
- return snprintf(page, count, "%s\n", dev->u.echo.eo_fstype);
-}
-
-struct lprocfs_vars lprocfs_obd_vars[] = {
- { "uuid", lprocfs_rd_uuid, 0, 0 },
- { "fstype", rd_fstype, 0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
{ 0 }
};
-struct lprocfs_vars lprocfs_module_vars[] = {
- { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
{ 0 }
};
#endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(echo, lprocfs_module_vars, lprocfs_obd_vars)
Makefile.in
.deps
TAGS
+.*.cmd
MODULE = obdfilter
modulefs_DATA = obdfilter.o
EXTRA_PROGRAMS = obdfilter
-obdfilter_SOURCES = filter.c lproc_obdfilter.c
+obdfilter_SOURCES = filter.c filter_io.c filter_log.c filter_san.c \
+lproc_obdfilter.c filter_internal.h
include $(top_srcdir)/Rules
* threaded operation on the OST.
*/
-#define EXPORT_SYMTAB
#define DEBUG_SUBSYSTEM S_FILTER
#include <linux/config.h>
#include <linux/module.h>
-#include <linux/pagemap.h> // XXX kill me soon
#include <linux/fs.h>
#include <linux/dcache.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_dlm.h>
-#include <linux/obd_filter.h>
#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lprocfs_status.h>
#include <linux/version.h>
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-#include <linux/mount.h>
+# include <linux/mount.h>
+# include <linux/buffer_head.h>
#endif
-enum {
- LPROC_FILTER_READ_BYTES = 0,
- LPROC_FILTER_WRITE_BYTES = 1,
- LPROC_FILTER_LAST,
-};
+#include <linux/obd_class.h>
+#include <linux/lustre_dlm.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/lprocfs_status.h>
+#include <linux/lustre_log.h>
+#include <linux/lustre_commit_confd.h>
+
+#include "filter_internal.h"
#define S_SHIFT 12
static char *obd_type_by_mode[S_IFMT >> S_SHIFT] = {
filter_ffd_put(ffd);
}
-static void filter_commit_cb(struct obd_device *obd, __u64 transno, int error)
+static void filter_commit_cb(struct obd_device *obd, __u64 transno,
+ void *cb_data, int error)
{
obd_transno_commit_cb(obd, transno, error);
}
-/* Assumes caller has already pushed us into the kernel context. */
-int filter_finish_transno(struct obd_export *export, void *handle,
- struct obd_trans_info *oti, int rc)
+
+static int filter_client_log_cancel(struct lustre_handle *conn,
+ struct lov_stripe_md *lsm, int count,
+ struct llog_cookie *cookies, int flags)
{
- __u64 last_rcvd;
- struct obd_device *obd = export->exp_obd;
+ struct obd_device *obd = class_conn2obd(conn);
+ struct llog_commit_data *llcd;
struct filter_obd *filter = &obd->u.filter;
- struct filter_export_data *fed = &export->exp_filter_data;
+ int rc = 0;
+ ENTRY;
+
+ if (count == 0 || cookies == NULL) {
+ down(&filter->fo_sem);
+ if (filter->fo_llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW))
+ GOTO(out, rc);
+
+ llcd = filter->fo_llcd;
+ GOTO(send_now, rc);
+ }
+
+ down(&filter->fo_sem);
+ llcd = filter->fo_llcd;
+ if (llcd == NULL) {
+ llcd = llcd_grab();
+ if (llcd == NULL) {
+ CERROR("couldn't get an llcd - dropped "LPX64":%x+%u\n",
+ cookies->lgc_lgl.lgl_oid,
+ cookies->lgc_lgl.lgl_ogen, cookies->lgc_index);
+ GOTO(out, rc = -ENOMEM);
+ }
+ llcd->llcd_import = filter->fo_mdc_imp;
+ filter->fo_llcd = llcd;
+ }
+
+ memcpy(llcd->llcd_cookies + llcd->llcd_cookiebytes, cookies,
+ sizeof(*cookies));
+ llcd->llcd_cookiebytes += sizeof(*cookies);
+
+ GOTO(send_now, rc);
+send_now:
+ if ((PAGE_SIZE - llcd->llcd_cookiebytes < sizeof(*cookies) ||
+ flags & OBD_LLOG_FL_SENDNOW)) {
+ filter->fo_llcd = NULL;
+ llcd_send(llcd);
+ }
+out:
+ up(&filter->fo_sem);
+
+ return rc;
+}
+
+/* When this (destroy) operation is committed, return the cancel cookie */
+static void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno,
+ void *cb_data, int error)
+{
+ filter_client_log_cancel(&obd->u.filter.fo_mdc_conn, NULL, 1,
+ cb_data, OBD_LLOG_FL_SENDNOW);
+ OBD_FREE(cb_data, sizeof(struct llog_cookie));
+}
+
+/* Assumes caller has already pushed us into the kernel context. */
+int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti,
+ int rc)
+{
+ struct filter_obd *filter = &exp->exp_obd->u.filter;
+ struct filter_export_data *fed = &exp->exp_filter_data;
struct filter_client_data *fcd = fed->fed_fcd;
+ __u64 last_rcvd;
loff_t off;
ssize_t written;
if (rc)
RETURN(rc);
- if (!obd->obd_replayable)
+ if (!exp->exp_obd->obd_replayable)
RETURN(rc);
/* we don't allocate new transnos for replayed requests */
- if (oti && oti->oti_transno == 0) {
+ if (oti != NULL && oti->oti_transno == 0) {
spin_lock(&filter->fo_translock);
- last_rcvd = le64_to_cpu(filter->fo_fsd->fsd_last_rcvd) + 1;
- filter->fo_fsd->fsd_last_rcvd = cpu_to_le64(last_rcvd);
+ last_rcvd = le64_to_cpu(filter->fo_fsd->fsd_last_transno) + 1;
+ filter->fo_fsd->fsd_last_transno = cpu_to_le64(last_rcvd);
spin_unlock(&filter->fo_translock);
oti->oti_transno = last_rcvd;
fcd->fcd_last_rcvd = cpu_to_le64(last_rcvd);
fcd->fcd_last_xid = 0;
off = fed->fed_lr_off;
- fsfilt_set_last_rcvd(obd, last_rcvd, handle, filter_commit_cb);
- written = lustre_fwrite(filter->fo_rcvd_filp, (char *)fcd,
- sizeof(*fcd), &off);
+ fsfilt_set_last_rcvd(exp->exp_obd, last_rcvd, oti->oti_handle,
+ filter_commit_cb, NULL);
+ written = fsfilt_write_record(exp->exp_obd,
+ filter->fo_rcvd_filp, (char *)fcd,
+ sizeof(*fcd), &off);
CDEBUG(D_HA, "wrote trans #"LPD64" for client %s at #%d: "
- "written = "LPSZ"\n", last_rcvd, fcd->fcd_uuid,
+ "written = "LPSZ"\n", last_rcvd, fcd->fcd_uuid,
fed->fed_lr_idx, written);
if (written == sizeof(*fcd))
RETURN(0);
- CERROR("error writing to last_rcvd file: rc = %d\n",
+ CERROR("error writing to %s: rc = %d\n", LAST_RCVD,
(int)written);
if (written >= 0)
- RETURN(-EIO);
-
+ RETURN(-ENOSPC);
RETURN(written);
- }
+ }
RETURN(0);
}
-static inline void f_dput(struct dentry *dentry)
+void f_dput(struct dentry *dentry)
{
/* Can't go inside filter_ddelete because it can block */
CDEBUG(D_INODE, "putting %s: %p, count = %d\n",
}
struct dentry_operations filter_dops = {
- .d_release = filter_drelease,
+ d_release: filter_drelease,
};
-#define LAST_RCVD "last_rcvd"
-#define INIT_OBJID 2
-
-/* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
-#define FILTER_LR_MAX_CLIENTS (PAGE_SIZE * 8)
-#define FILTER_LR_MAX_CLIENT_WORDS (FILTER_LR_MAX_CLIENTS/sizeof(unsigned long))
-
/* Add client data to the FILTER. We use a bitmap to locate a free space
* in the last_rcvd file if cl_idx is -1 (i.e. a new client).
* Otherwise, we have just read the data from the last_rcvd file and
- * we know its offset.
- */
-int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
- struct filter_export_data *fed, int cl_idx)
+ * we know its offset. */
+static int filter_client_add(struct obd_device *obd, struct filter_obd *filter,
+ struct filter_export_data *fed, int cl_idx)
{
unsigned long *bitmap = filter->fo_last_rcvd_slots;
int new_client = (cl_idx == -1);
+ ENTRY;
LASSERT(bitmap != NULL);
repeat:
if (cl_idx >= FILTER_LR_MAX_CLIENTS) {
CERROR("no client slots - fix FILTER_LR_MAX_CLIENTS\n");
- return -ENOMEM;
+ RETURN(-ENOMEM);
}
if (test_and_set_bit(cl_idx, bitmap)) {
CERROR("FILTER client %d: found bit is set in bitmap\n",
if (new_client) {
struct obd_run_ctxt saved;
loff_t off = fed->fed_lr_off;
- ssize_t written;
+ int written;
void *handle;
CDEBUG(D_INFO, "writing client fcd at idx %u (%llu) (len %u)\n",
fed->fed_lr_idx,off,(unsigned int)sizeof(*fed->fed_fcd));
push_ctxt(&saved, &filter->fo_ctxt, NULL);
- /* Transaction eeded to fix for bug 1403 */
+ /* Transaction needed to fix bug 1403 */
handle = fsfilt_start(obd,
filter->fo_rcvd_filp->f_dentry->d_inode,
- FSFILT_OP_SETATTR);
+ FSFILT_OP_SETATTR, NULL);
if (IS_ERR(handle)) {
written = PTR_ERR(handle);
CERROR("unable to start transaction: rc %d\n",
(int)written);
} else {
- written = lustre_fwrite(filter->fo_rcvd_filp,
+ written = fsfilt_write_record(obd, filter->fo_rcvd_filp,
(char *)fed->fed_fcd,
sizeof(*fed->fed_fcd), &off);
fsfilt_commit(obd,
pop_ctxt(&saved, &filter->fo_ctxt, NULL);
if (written != sizeof(*fed->fed_fcd)) {
+ CERROR("error writing %s client idx %u: rc %d\n",
+ LAST_RCVD, fed->fed_lr_idx, written);
if (written < 0)
RETURN(written);
- RETURN(-EIO);
+ RETURN(-ENOSPC);
}
}
- return 0;
+ RETURN(0);
}
-int filter_client_free(struct obd_export *exp, int failover)
+static int filter_client_free(struct obd_export *exp, int flags)
{
struct filter_export_data *fed = &exp->exp_filter_data;
struct filter_obd *filter = &exp->exp_obd->u.filter;
+ struct obd_device *obd = exp->exp_obd;
struct filter_client_data zero_fcd;
struct obd_run_ctxt saved;
int written;
loff_t off;
ENTRY;
- if (!fed->fed_fcd)
+ if (fed->fed_fcd == NULL)
RETURN(0);
- if (failover != 0)
+ if (flags & OBD_OPT_FAILOVER)
GOTO(free, 0);
/* XXX if fcd_uuid were a real obd_uuid, I could use obd_uuid_equals */
- if (!strcmp(fed->fed_fcd->fcd_uuid, "OBD_CLASS_UUID"))
+ if (strcmp(fed->fed_fcd->fcd_uuid, "OBD_CLASS_UUID") == 0)
GOTO(free, 0);
LASSERT(filter->fo_last_rcvd_slots != NULL);
memset(&zero_fcd, 0, sizeof zero_fcd);
push_ctxt(&saved, &filter->fo_ctxt, NULL);
- written = lustre_fwrite(filter->fo_rcvd_filp, (const char *)&zero_fcd,
- sizeof(zero_fcd), &off);
+ written = fsfilt_write_record(obd, filter->fo_rcvd_filp,
+ (char *)&zero_fcd, sizeof(zero_fcd),
+ &off);
/* XXX: this write gets lost sometimes, unless this sync is here. */
if (written > 0)
return 0;
}
-
/* assumes caller is already in kernel ctxt */
-static int filter_update_server_data(struct file *filp,
- struct filter_server_data *fsd)
+int filter_update_server_data(struct obd_device *obd,
+ struct file *filp, struct filter_server_data *fsd)
{
loff_t off = 0;
int rc;
+ ENTRY;
CDEBUG(D_INODE, "server uuid : %s\n", fsd->fsd_uuid);
CDEBUG(D_INODE, "server last_objid: "LPU64"\n",
le64_to_cpu(fsd->fsd_last_objid));
CDEBUG(D_INODE, "server last_rcvd : "LPU64"\n",
- le64_to_cpu(fsd->fsd_last_rcvd));
+ le64_to_cpu(fsd->fsd_last_transno));
CDEBUG(D_INODE, "server last_mount: "LPU64"\n",
le64_to_cpu(fsd->fsd_mount_count));
- rc = lustre_fwrite(filp, (char *)fsd, sizeof(*fsd), &off);
- if (rc != sizeof(*fsd)) {
- CDEBUG(D_INODE, "error writing filter_server_data: rc = %d\n",
- rc);
- RETURN(-EIO);
- }
- RETURN(0);
+ rc = fsfilt_write_record(obd, filp, (char *)fsd, sizeof(*fsd), &off);
+ if (rc == sizeof(*fsd))
+ RETURN(0);
+
+ CDEBUG(D_INODE, "error writing filter_server_data: rc = %d\n", rc);
+ if (rc >= 0)
+ RETURN(-ENOSPC);
+ RETURN(rc);
}
/* assumes caller has already in kernel ctxt */
}
if (last_rcvd_size == 0) {
- CERROR("%s: initializing new last_rcvd\n", obd->obd_name);
+ CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD);
memcpy(fsd->fsd_uuid, obd->obd_uuid.uuid,sizeof(fsd->fsd_uuid));
fsd->fsd_last_objid = cpu_to_le64(init_lastobjid);
- fsd->fsd_last_rcvd = 0;
+ fsd->fsd_last_transno = 0;
mount_count = fsd->fsd_mount_count = 0;
fsd->fsd_server_size = cpu_to_le32(FILTER_LR_SERVER_SIZE);
fsd->fsd_client_start = cpu_to_le32(FILTER_LR_CLIENT_START);
fsd->fsd_subdir_count = cpu_to_le16(FILTER_SUBDIR_COUNT);
filter->fo_subdir_count = FILTER_SUBDIR_COUNT;
} else {
- ssize_t retval = lustre_fread(filp, (char *)fsd, sizeof(*fsd),
- &off);
+ int retval = fsfilt_read_record(obd, filp, (char *)fsd,
+ sizeof(*fsd), &off);
if (retval != sizeof(*fsd)) {
- CDEBUG(D_INODE,"OBD filter: error reading %s\n",
- LAST_RCVD);
+ CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n",
+ LAST_RCVD, retval);
GOTO(err_fsd, rc = -EIO);
}
mount_count = le64_to_cpu(fsd->fsd_mount_count);
filter->fo_subdir_count = le16_to_cpu(fsd->fsd_subdir_count);
+ fsd->fsd_last_objid =
+ cpu_to_le64(le64_to_cpu(fsd->fsd_last_objid) +
+ FILTER_SKIP_OBJID);
}
if (fsd->fsd_feature_incompat) {
CDEBUG(D_INODE, "%s: server last_objid: "LPU64"\n",
obd->obd_name, le64_to_cpu(fsd->fsd_last_objid));
CDEBUG(D_INODE, "%s: server last_rcvd : "LPU64"\n",
- obd->obd_name, le64_to_cpu(fsd->fsd_last_rcvd));
+ obd->obd_name, le64_to_cpu(fsd->fsd_last_transno));
CDEBUG(D_INODE, "%s: server last_mount: "LPU64"\n",
obd->obd_name, mount_count);
CDEBUG(D_INODE, "%s: server data size: %u\n",
CDEBUG(D_INODE, "%s: server subdir_count: %u\n",
obd->obd_name, le16_to_cpu(fsd->fsd_subdir_count));
- /*
- * When we do a clean FILTER shutdown, we save the last_rcvd into
- * the header. If we find clients with higher last_rcvd values
- * then those clients may need recovery done.
- */
if (!obd->obd_replayable) {
- CERROR("%s: recovery support OFF\n", obd->obd_name);
+ CWARN("%s: recovery support OFF\n", obd->obd_name);
GOTO(out, rc = 0);
}
*/
off = le32_to_cpu(fsd->fsd_client_start) +
cl_idx * le16_to_cpu(fsd->fsd_client_size);
- rc = lustre_fread(filp, (char *)fcd, sizeof(*fcd), &off);
+ rc = fsfilt_read_record(obd, filp, (char *)fcd, sizeof(*fcd),
+ &off);
if (rc != sizeof(*fcd)) {
CERROR("error reading FILTER %s offset %d: rc = %d\n",
LAST_RCVD, cl_idx, rc);
CERROR("RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64
" srv lr: "LPU64" mnt: "LPU64" last mount: "
LPU64"\n", fcd->fcd_uuid, cl_idx,
- last_rcvd, le64_to_cpu(fsd->fsd_last_rcvd),
+ last_rcvd, le64_to_cpu(fsd->fsd_last_transno),
le64_to_cpu(fcd->fcd_mount_count), mount_count);
if (exp == NULL) {
/* XXX this rc is ignored */
CDEBUG(D_OTHER, "client at idx %d has last_rcvd = "LPU64"\n",
cl_idx, last_rcvd);
- if (last_rcvd > le64_to_cpu(filter->fo_fsd->fsd_last_rcvd))
- filter->fo_fsd->fsd_last_rcvd = cpu_to_le64(last_rcvd);
+ if (last_rcvd > le64_to_cpu(filter->fo_fsd->fsd_last_transno))
+ filter->fo_fsd->fsd_last_transno=cpu_to_le64(last_rcvd);
obd->obd_last_committed =
- le64_to_cpu(filter->fo_fsd->fsd_last_rcvd);
+ le64_to_cpu(filter->fo_fsd->fsd_last_transno);
+
if (obd->obd_recoverable_clients) {
CERROR("RECOVERY: %d recoverable clients, last_rcvd "
LPU64"\n", obd->obd_recoverable_clients,
- le64_to_cpu(filter->fo_fsd->fsd_last_rcvd));
+ le64_to_cpu(filter->fo_fsd->fsd_last_transno));
obd->obd_next_recovery_transno =
obd->obd_last_committed + 1;
obd->obd_recovering = 1;
out:
fsd->fsd_mount_count = cpu_to_le64(mount_count + 1);
- /* save it,so mount count and last_recvd is current */
- rc = filter_update_server_data(filp, filter->fo_fsd);
+ /* save it, so mount count and last_transno is current */
+ rc = filter_update_server_data(obd, filp, filter->fo_fsd);
RETURN(rc);
filter->fo_dentry_O_mode[mode] = dentry;
}
- file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0700);
+ file = filp_open(LAST_RCVD, O_RDWR | O_CREAT | O_LARGEFILE, 0700);
if (!file || IS_ERR(file)) {
rc = PTR_ERR(file);
CERROR("OBD filter: cannot open/create %s: rc = %d\n",
filter->fo_fop = file->f_op;
filter->fo_iop = inode->i_op;
filter->fo_aops = inode->i_mapping->a_ops;
+#ifdef I_SKIP_PDFLUSH
+ /*
+ * we need this to protect from deadlock
+ * pdflush vs. lustre_fwrite()
+ */
+ inode->i_flags |= I_SKIP_PDFLUSH;
+#endif
- rc = filter_init_server_data(obd, file, INIT_OBJID);
+ rc = filter_init_server_data(obd, file, FILTER_INIT_OBJID);
if (rc) {
CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
GOTO(err_client, rc);
* from lastobjid */
push_ctxt(&saved, &filter->fo_ctxt, NULL);
- rc = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd);
+ rc = filter_update_server_data(obd, filter->fo_rcvd_filp,
+ filter->fo_fsd);
if (rc)
- CERROR("OBD filter: error writing lastobjid: rc = %ld\n", rc);
+ CERROR("error writing lastobjid: rc = %ld\n", rc);
if (filter->fo_rcvd_filp) {
filp_close(filter->fo_rcvd_filp, 0);
filter->fo_rcvd_filp = NULL;
if (rc)
- CERROR("last_rcvd file won't closed rc = %ld\n", rc);
+ CERROR("error closing %s: rc = %ld\n", LAST_RCVD, rc);
}
if (filter->fo_subdir_count) {
pop_ctxt(&saved, &filter->fo_ctxt, NULL);
}
-
-static __u64 filter_next_id(struct filter_obd *filter)
+__u64 filter_next_id(struct filter_obd *filter)
{
obd_id id;
LASSERT(filter->fo_fsd != NULL);
}
/* direct cut-n-paste of mds_blocking_ast() */
-int filter_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
- void *data, int flag)
+static int filter_blocking_ast(struct ldlm_lock *lock,
+ struct ldlm_lock_desc *desc,
+ void *data, int flag)
{
int do_ast;
ENTRY;
RETURN(rc == ELDLM_OK ? 0 : -ENOLCK); /* XXX translate ldlm code */
}
+/* We never dget the object parent, so DON'T dput it either */
static void filter_parent_unlock(struct dentry *dparent,
struct lustre_handle *lockh,
ldlm_mode_t lock_mode)
}
/* We never dget the object parent, so DON'T dput it either */
-static inline struct dentry *filter_parent(struct obd_device *obd,
- obd_mode mode, obd_id objid)
+struct dentry *filter_parent(struct obd_device *obd, obd_mode mode,
+ obd_id objid)
{
struct filter_obd *filter = &obd->u.filter;
}
/* We never dget the object parent, so DON'T dput it either */
-static inline struct dentry *filter_parent_lock(struct obd_device *obd,
- obd_mode mode, obd_id objid,
- ldlm_mode_t lock_mode,
- struct lustre_handle *lockh)
+struct dentry *filter_parent_lock(struct obd_device *obd, obd_mode mode,
+ obd_id objid, ldlm_mode_t lock_mode,
+ struct lustre_handle *lockh)
{
unsigned long now = jiffies;
struct dentry *de = filter_parent(obd, mode, objid);
return de;
rc = filter_lock_dentry(obd, de, lock_mode, lockh);
- if (time_after(jiffies, now + 15*HZ))
+ if (time_after(jiffies, now + 15 * HZ))
CERROR("slow parent lock %lus\n", (jiffies - now) / HZ);
return rc ? ERR_PTR(rc) : de;
}
* appropriately for this operation (normally a write lock). If
* dir_dentry is NULL, we do a read lock while we do the lookup to
* avoid races with create/destroy and such changing the directory
- * internal to the filesystem code.
- */
-static struct dentry *filter_fid2dentry(struct obd_device *obd,
- struct dentry *dir_dentry,
- obd_mode mode, obd_id id)
+ * internal to the filesystem code. */
+struct dentry *filter_fid2dentry(struct obd_device *obd,
+ struct dentry *dir_dentry,
+ obd_mode mode, obd_id id)
{
- struct super_block *sb = obd->u.filter.fo_sb;
struct lustre_handle lockh;
struct dentry *dparent = dir_dentry;
struct dentry *dchild;
int len;
ENTRY;
- if (!sb || !sb->s_dev) {
- CERROR("device not initialized.\n");
- RETURN(ERR_PTR(-ENXIO));
- }
-
if (id == 0) {
CERROR("fatal: invalid object id 0\n");
LBUG();
}
len = sprintf(name, LPU64, id);
- if (!dir_dentry) {
+ if (dir_dentry == NULL) {
dparent = filter_parent_lock(obd, mode, id, LCK_PR, &lockh);
if (IS_ERR(dparent))
RETURN(dparent);
CDEBUG(D_INODE, "looking up object O/%*s/%s\n",
dparent->d_name.len, dparent->d_name.name, name);
dchild = ll_lookup_one_len(name, dparent, len);
- if (!dir_dentry)
+ if (dir_dentry == NULL)
filter_parent_unlock(dparent, &lockh, LCK_PR);
if (IS_ERR(dchild)) {
CERROR("child lookup error %ld\n", PTR_ERR(dchild));
}
static struct file *filter_obj_open(struct obd_export *export,
- __u64 id, __u32 type,
- ldlm_mode_t parent_mode,
+ struct obd_trans_info *oti,
+ __u64 id, __u32 type, int parent_mode,
struct lustre_handle *parent_lockh)
{
struct obd_device *obd = export->exp_obd;
struct filter_obd *filter = &obd->u.filter;
- struct super_block *sb = filter->fo_sb;
struct dentry *dchild = NULL, *dparent = NULL;
struct filter_export_data *fed = &export->exp_filter_data;
struct filter_dentry_data *fdd = NULL;
push_ctxt(&saved, &filter->fo_ctxt, NULL);
- if (!sb || !sb->s_dev) {
- CERROR("fatal: device not initialized.\n");
- GOTO(cleanup, file = ERR_PTR(-ENXIO));
- }
-
if (!id) {
CERROR("fatal: invalid obdo "LPU64"\n", id);
GOTO(cleanup, file = ERR_PTR(-ESTALE));
if (dchild->d_inode == NULL) {
CERROR("opening non-existent object %s - O_CREAT?\n", name);
+ /* dput(dchild); call filter_create_internal here */
file = ERR_PTR(-ENOENT);
GOTO(cleanup, file);
}
}
/* Caller must hold LCK_PW on parent and push us into kernel context.
- * Caller is also required to ensure that dchild->d_inode exists.
- */
-static int filter_destroy_internal(struct obd_device *obd,
+ * Caller is also required to ensure that dchild->d_inode exists. */
+static int filter_destroy_internal(struct obd_device *obd, obd_id objid,
struct dentry *dparent,
struct dentry *dchild)
{
inode->i_nlink, atomic_read(&inode->i_count));
}
+
+#if 0
+ /* Tell the clients that the object is gone now and that they should
+ * throw away any cached pages. We don't need to wait until they're
+ * done, so just decref the lock right away and let ldlm_completion_ast
+ * clean up when it's all over. */
+ ldlm_cli_enqueue(..., LCK_PW, AST_INTENT_DESTROY, &lockh);
+ ldlm_lock_decref(&lockh, LCK_PW);
+#endif
+
+ if (0) {
+ struct lustre_handle lockh;
+ int flags = 0, rc;
+ struct ldlm_res_id res_id = { .name = { objid } };
+
+ /* This part is a wee bit iffy: we really only want to bust the
+ * locks on our stripe, so that we don't end up bouncing
+ * [0->EOF] locks around on each of the OSTs as the rest of the
+ * destroys get processed. Because we're only talking to
+ * the local LDLM, though, we should only end up locking the
+ * whole of our stripe. When bug 1425 (take all locks on OST
+ * for stripe 0) is fixed, this code should be revisited. */
+ struct ldlm_extent extent = { 0, OBD_OBJECT_EOF };
+
+ rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL,
+ res_id, LDLM_EXTENT, &extent,
+ sizeof(extent), LCK_PW, &flags,
+ ldlm_completion_ast, filter_blocking_ast,
+ NULL, &lockh);
+ /* We only care about the side-effects, just drop the lock. */
+ ldlm_lock_decref(&lockh, LCK_PW);
+ }
+
rc = vfs_unlink(dparent->d_inode, dchild);
if (rc)
*/
static int filter_close_internal(struct obd_export *exp,
struct filter_file_data *ffd,
- struct obd_trans_info *oti,
- int failover)
+ struct obd_trans_info *oti, int flags)
{
struct obd_device *obd = exp->exp_obd;
struct filter_obd *filter = &obd->u.filter;
ENTRY;
LASSERT(filp->private_data == ffd);
- LASSERT(fdd);
+ LASSERT(fdd != NULL);
LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
rc = filp_close(filp, 0);
if (atomic_dec_and_test(&fdd->fdd_open_count) &&
- fdd->fdd_flags & FILTER_FLAG_DESTROY && !failover) {
+ (fdd->fdd_flags & FILTER_FLAG_DESTROY) &&
+ !(flags & OBD_OPT_FAILOVER)) {
void *handle;
push_ctxt(&saved, &filter->fo_ctxt, NULL);
cleanup_phase = 2;
handle = fsfilt_start(obd, dparent->d_inode,
- FSFILT_OP_UNLINK);
+ FSFILT_OP_UNLINK_LOG, oti);
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
+ if (oti != NULL) {
+ if (oti->oti_handle == NULL)
+ oti->oti_handle = handle;
+ else
+ LASSERT(oti->oti_handle == handle);
+ }
+
+#ifdef ENABLE_ORPHANS
+ /* Remove orphan unlink record from log */
+ llog_cancel_records(filter->fo_catalog, 1, &fdd->fdd_cookie);
+#endif
/* XXX unlink from PENDING directory now too */
- rc2 = filter_destroy_internal(obd, dparent, dchild);
+ rc2 = filter_destroy_internal(obd, fdd->fdd_objid, dparent,
+ dchild);
if (rc2 && !rc)
rc = rc2;
- rc = filter_finish_transno(exp, handle, oti, rc);
+ rc = filter_finish_transno(exp, oti, rc);
rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0);
if (rc2) {
CERROR("error on commit, err = %d\n", rc2);
RETURN(rc);
}
-/* obd methods */
/* mount the file system (secretly) */
-static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
- char *option)
+int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
+ char *option)
{
struct obd_ioctl_data* data = buf;
struct filter_obd *filter = &obd->u.filter;
-
struct vfsmount *mnt;
int rc = 0;
ENTRY;
if (IS_ERR(obd->obd_fsops))
RETURN(PTR_ERR(obd->obd_fsops));
- mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, option);
+ mnt = do_kern_mount(data->ioc_inlbuf2, MS_NOATIME | MS_NODIRATIME,
+ data->ioc_inlbuf1, option);
rc = PTR_ERR(mnt);
if (IS_ERR(mnt))
GOTO(err_ops, rc);
spin_lock_init(&filter->fo_objidlock);
INIT_LIST_HEAD(&filter->fo_export_list);
+ ptlrpc_init_client(MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL,
+ "filter_mdc", &filter->fo_mdc_client);
+ sema_init(&filter->fo_sem, 1);
+
obd->obd_namespace = ldlm_namespace_new("filter-tgt",
LDLM_NAMESPACE_SERVER);
- if (!obd->obd_namespace)
+ if (obd->obd_namespace == NULL)
GOTO(err_post, rc = -ENOMEM);
ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
"filter_ldlm_cb_client", &obd->obd_ldlm_client);
+ /* Create a non-replaying connection for recovery logging, so that
+ * we don't create a client entry for this local connection, and do
+ * not log or assign transaction numbers for logging operations. */
+#ifdef ENABLE_ORPHANS
+ filter->fo_catalog = filter_get_catalog(obd);
+ if (IS_ERR(filter->fo_catalog))
+ GOTO(err_post, rc = PTR_ERR(filter->fo_catalog));
+#endif
+
RETURN(0);
err_post:
struct obd_ioctl_data* data = buf;
char *option = NULL;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ /* bug 1577: implement async-delete for 2.5 */
if (!strcmp(data->ioc_inlbuf2, "ext3"))
option = "asyncdel";
+#endif
return filter_common_setup(obd, len, buf, option);
}
-/* sanobd setup methods - use a specific mount option */
-static int filter_san_setup(struct obd_device *obd, obd_count len, void *buf)
-{
- struct obd_ioctl_data* data = buf;
- char *option = NULL;
-
- if (!data->ioc_inlbuf2)
- RETURN(-EINVAL);
-
- /* for extN/ext3 filesystem, we must mount it with 'writeback' mode */
- if (!strcmp(data->ioc_inlbuf2, "extN"))
- option = "data=writeback";
- else if (!strcmp(data->ioc_inlbuf2, "ext3"))
- option = "data=writeback,asyncdel";
- else
- LBUG(); /* just a reminder */
-
- return filter_common_setup(obd, len, buf, option);
-}
-
-static int filter_cleanup(struct obd_device *obd, int force, int failover)
+static int filter_cleanup(struct obd_device *obd, int flags)
{
- struct super_block *sb;
+ struct filter_obd *filter = &obd->u.filter;
ENTRY;
- if (failover)
+ if (flags & OBD_OPT_FAILOVER)
CERROR("%s: shutting down for failover; client state will"
" be preserved.\n", obd->obd_name);
if (!list_empty(&obd->obd_exports)) {
CERROR("%s: still has clients!\n", obd->obd_name);
- class_disconnect_exports(obd, failover);
+ class_disconnect_exports(obd, flags);
if (!list_empty(&obd->obd_exports)) {
CERROR("still has exports after forced cleanup?\n");
RETURN(-EBUSY);
}
}
+#ifdef ENABLE_ORPHANS
+ filter_put_catalog(filter->fo_catalog);
+#endif
+
ldlm_namespace_free(obd->obd_namespace);
- sb = obd->u.filter.fo_sb;
- if (!sb)
+ if (filter->fo_sb == NULL)
RETURN(0);
filter_post(obd);
- shrink_dcache_parent(sb->s_root);
- unlock_kernel();
+ shrink_dcache_parent(filter->fo_sb->s_root);
+ filter->fo_sb = 0;
- if (atomic_read(&obd->u.filter.fo_vfsmnt->mnt_count) > 1){
+ if (atomic_read(&filter->fo_vfsmnt->mnt_count) > 1)
CERROR("%s: mount point busy, mnt_count: %d\n", obd->obd_name,
- atomic_read(&obd->u.filter.fo_vfsmnt->mnt_count));
- }
-
- mntput(obd->u.filter.fo_vfsmnt);
- obd->u.filter.fo_sb = 0;
-/* destroy_buffers(obd->u.filter.fo_sb->s_dev);*/
+ atomic_read(&filter->fo_vfsmnt->mnt_count));
+ unlock_kernel();
+ mntput(filter->fo_vfsmnt);
+ //destroy_buffers(filter->fo_sb->s_dev);
+ filter->fo_sb = NULL;
fsfilt_put_ops(obd->obd_fsops);
lock_kernel();
RETURN(0);
}
-int filter_attach(struct obd_device *obd, obd_count len, void *data)
+static int filter_attach(struct obd_device *obd, obd_count len, void *data)
{
struct lprocfs_static_vars lvars;
int rc;
- lprocfs_init_vars(&lvars);
+ lprocfs_init_vars(filter, &lvars);
rc = lprocfs_obd_attach(obd, lvars.obd_vars);
if (rc != 0)
return rc;
return rc;
}
-int filter_detach(struct obd_device *dev)
+static int filter_detach(struct obd_device *dev)
{
lprocfs_free_obd_stats(dev);
return lprocfs_obd_detach(dev);
struct filter_client_data *fcd;
struct filter_obd *filter = &obd->u.filter;
int rc;
-
ENTRY;
- if (!conn || !obd || !cluuid)
+ if (conn == NULL || obd == NULL || cluuid == NULL)
RETURN(-EINVAL);
rc = class_connect(conn, obd, cluuid);
if (rc)
RETURN(rc);
exp = class_conn2export(conn);
- LASSERT(exp);
+ LASSERT(exp != NULL);
fed = &exp->exp_filter_data;
class_export_put(exp);
list_del(&ffd->ffd_export_list);
spin_unlock(&fed->fed_lock);
- CERROR("force close file %*s (hdl %p:"LPX64") on disconnect\n",
- ffd->ffd_file->f_dentry->d_name.len,
+ CDEBUG(D_INFO, "force close file %*s (hdl %p:"LPX64") on "
+ "disconnect\n", ffd->ffd_file->f_dentry->d_name.len,
ffd->ffd_file->f_dentry->d_name.name,
ffd, ffd->ffd_handle.h_cookie);
- filter_close_internal(exp, ffd, NULL, exp->exp_failover);
+ filter_close_internal(exp, ffd, NULL, exp->exp_flags);
spin_lock(&fed->fed_lock);
}
spin_unlock(&fed->fed_lock);
if (exp->exp_obd->obd_replayable)
- filter_client_free(exp, exp->exp_failover);
+ filter_client_free(exp, exp->exp_flags);
EXIT;
}
/* also incredibly similar to mds_disconnect */
-static int filter_disconnect(struct lustre_handle *conn, int failover)
+static int filter_disconnect(struct lustre_handle *conn, int flags)
{
struct obd_export *exp = class_conn2export(conn);
+ unsigned long irqflags;
int rc;
- unsigned long flags;
ENTRY;
LASSERT(exp);
ldlm_cancel_locks_for_export(exp);
- spin_lock_irqsave(&exp->exp_lock, flags);
- exp->exp_failover = failover;
- spin_unlock_irqrestore(&exp->exp_lock, flags);
+ spin_lock_irqsave(&exp->exp_lock, irqflags);
+ exp->exp_flags = flags;
+ spin_unlock_irqrestore(&exp->exp_lock, irqflags);
- rc = class_disconnect(conn, failover);
+ rc = class_disconnect(conn, flags);
fsfilt_sync(exp->exp_obd, exp->exp_obd->u.filter.fo_sb);
class_export_put(exp);
RETURN(rc);
}
-static void filter_from_inode(struct obdo *oa, struct inode *inode, int valid)
-{
- int type = oa->o_mode & S_IFMT;
- ENTRY;
-
- CDEBUG(D_INFO, "src inode %lu (%p), dst obdo "LPU64" valid 0x%08x\n",
- inode->i_ino, inode, oa->o_id, valid);
- /* Don't copy the inode number in place of the object ID */
- obdo_from_inode(oa, inode, valid);
- oa->o_mode &= ~S_IFMT;
- oa->o_mode |= type;
-
- if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
- obd_rdev rdev = kdev_t_to_nr(inode->i_rdev);
- oa->o_rdev = rdev;
- oa->o_valid |= OBD_MD_FLRDEV;
- }
-
- EXIT;
-}
-
-static struct dentry *__filter_oa2dentry(struct lustre_handle *conn,
- struct obdo *oa, char *what)
+struct dentry *__filter_oa2dentry(struct obd_device *obd,
+ struct obdo *oa, const char *what)
{
struct dentry *dchild = NULL;
LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
filter_ffd_put(ffd);
- CDEBUG(D_INODE,
- "got child objid %*s: %p, count = %d\n",
- dchild->d_name.len, dchild->d_name.name,
+ CDEBUG(D_INODE,"%s got child objid %*s: %p, count %d\n",
+ what, dchild->d_name.len, dchild->d_name.name,
dchild, atomic_read(&dchild->d_count));
}
}
- if (!dchild) {
- struct obd_device *obd = class_conn2obd(conn);
-
- if (!obd) {
- CERROR("invalid client cookie "LPX64"\n", conn->cookie);
- RETURN(ERR_PTR(-EINVAL));
- }
+ if (!dchild)
dchild = filter_fid2dentry(obd, NULL, oa->o_mode, oa->o_id);
- }
if (IS_ERR(dchild)) {
CERROR("%s error looking up object: "LPU64"\n", what, oa->o_id);
return dchild;
}
-#define filter_oa2dentry(conn, oa) __filter_oa2dentry(conn, oa, __FUNCTION__)
-
static int filter_getattr(struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *md)
{
struct dentry *dentry = NULL;
+ struct obd_device *obd;
int rc = 0;
ENTRY;
- dentry = filter_oa2dentry(conn, oa);
+ obd = class_conn2obd(conn);
+ if (obd == NULL) {
+ CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie);
+ RETURN(-EINVAL);
+ }
+
+ dentry = filter_oa2dentry(obd, oa);
if (IS_ERR(dentry))
RETURN(PTR_ERR(dentry));
- filter_from_inode(oa, dentry->d_inode, oa->o_valid);
+ /* Limit the valid bits in the return data to what we actually use */
+ oa->o_valid = OBD_MD_FLID;
+ obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS);
f_dput(dentry);
RETURN(rc);
struct lov_stripe_md *md, struct obd_trans_info *oti)
{
struct obd_run_ctxt saved;
- struct obd_export *export = class_conn2export(conn);
- struct obd_device *obd = class_conn2obd(conn);
- struct filter_obd *filter = &obd->u.filter;
+ struct obd_export *exp;
+ struct filter_obd *filter;
struct dentry *dentry;
struct iattr iattr;
- struct inode *inode;
- void * handle;
+ void *handle;
int rc, rc2;
ENTRY;
- dentry = filter_oa2dentry(conn, oa);
+ LASSERT(oti != NULL);
+ exp = class_conn2export(conn);
+ if (!exp) {
+ CERROR("invalid client cookie "LPX64"\n", conn->cookie);
+ RETURN(-EINVAL);
+ }
+ dentry = filter_oa2dentry(exp->exp_obd, oa);
if (IS_ERR(dentry))
GOTO(out_exp, rc = PTR_ERR(dentry));
+ filter = &exp->exp_obd->u.filter;
+
iattr_from_obdo(&iattr, oa, oa->o_valid);
- iattr.ia_mode = (iattr.ia_mode & ~S_IFMT) | S_IFREG;
- inode = dentry->d_inode;
push_ctxt(&saved, &filter->fo_ctxt, NULL);
lock_kernel();
+
+ /* XXX this could be a rwsem instead, if filter_preprw played along */
if (iattr.ia_valid & ATTR_SIZE)
- down(&inode->i_sem);
+ down(&dentry->d_inode->i_sem);
- handle = fsfilt_start(obd, dentry->d_inode, FSFILT_OP_SETATTR);
+ handle = fsfilt_start(exp->exp_obd, dentry->d_inode, FSFILT_OP_SETATTR,
+ oti);
if (IS_ERR(handle))
GOTO(out_unlock, rc = PTR_ERR(handle));
- rc = fsfilt_setattr(obd, dentry, handle, &iattr, 1);
- rc = filter_finish_transno(export, handle, oti, rc);
- rc2 = fsfilt_commit(obd, dentry->d_inode, handle, 0);
+ rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1);
+ rc = filter_finish_transno(exp, oti, rc);
+ rc2 = fsfilt_commit(exp->exp_obd, dentry->d_inode, handle, 0);
if (rc2) {
CERROR("error on commit, err = %d\n", rc2);
if (!rc)
rc = rc2;
}
- if (iattr.ia_valid & ATTR_SIZE) {
- up(&inode->i_sem);
- oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLCTIME | OBD_MD_FLMTIME;
- obdo_from_inode(oa, inode, oa->o_valid);
- }
+ if (iattr.ia_valid & ATTR_SIZE)
+ up(&dentry->d_inode->i_sem);
+
+ oa->o_valid = OBD_MD_FLID;
+ obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS);
out_unlock:
unlock_kernel();
f_dput(dentry);
out_exp:
- class_export_put(export);
+ class_export_put(exp);
RETURN(rc);
}
struct lov_stripe_md *ea, struct obd_trans_info *oti,
struct obd_client_handle *och)
{
- struct obd_export *export = NULL;
+ struct obd_export *exp;
struct lustre_handle *handle;
struct filter_file_data *ffd;
struct file *filp;
int rc = 0;
ENTRY;
- export = class_conn2export(conn);
- if (!export) {
- CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
- conn->cookie);
- GOTO(out, rc = -EINVAL);
+ exp = class_conn2export(conn);
+ if (exp == NULL) {
+ CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie);
+ RETURN(-EINVAL);
}
- filp = filter_obj_open(export, oa->o_id, oa->o_mode,
+ filp = filter_obj_open(exp, oti, oa->o_id, oa->o_mode,
LCK_PR, &parent_lockh);
if (IS_ERR(filp))
GOTO(out, rc = PTR_ERR(filp));
- filter_from_inode(oa, filp->f_dentry->d_inode, oa->o_valid);
+ oa->o_valid = OBD_MD_FLID;
+ obdo_from_inode(oa, filp->f_dentry->d_inode, FILTER_VALID_FLAGS);
ffd = filp->private_data;
handle = obdo_handle(oa);
oa->o_valid |= OBD_MD_FLHANDLE;
out:
- class_export_put(export);
+ class_export_put(exp);
if (!rc) {
memcpy(&oti->oti_ack_locks[0].lock, &parent_lockh,
sizeof(parent_lockh));
static int filter_close(struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *ea, struct obd_trans_info *oti)
{
- struct obd_export *exp = class_conn2export(conn);
+ struct obd_export *exp;
struct filter_file_data *ffd;
struct filter_export_data *fed;
int rc;
ENTRY;
- if (!exp) {
- CDEBUG(D_IOCTL, "invalid client cookie"LPX64"\n", conn->cookie);
- GOTO(out, rc = -EINVAL);
+ exp = class_conn2export(conn);
+ if (exp == NULL) {
+ CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie);
+ RETURN(-EINVAL);
}
if (!(oa->o_valid & OBD_MD_FLHANDLE)) {
list_del(&ffd->ffd_export_list);
spin_unlock(&fed->fed_lock);
+ oa->o_valid = OBD_MD_FLID;
+ obdo_from_inode(oa,ffd->ffd_file->f_dentry->d_inode,FILTER_VALID_FLAGS);
+
rc = filter_close_internal(exp, ffd, oti, 0);
filter_ffd_put(ffd);
GOTO(out, rc);
struct lov_stripe_md **ea, struct obd_trans_info *oti)
{
struct obd_export *exp;
- struct obd_device *obd = class_conn2obd(conn);
- struct filter_obd *filter = &obd->u.filter;
+ struct obd_device *obd;
+ struct filter_obd *filter;
struct obd_run_ctxt saved;
struct lustre_handle parent_lockh;
struct dentry *dparent;
+ struct ll_fid mds_fid = { .id = 0 };
struct dentry *dchild = NULL;
- struct iattr;
void *handle;
int err, rc, cleanup_phase;
ENTRY;
- if (!obd) {
- CERROR("invalid client cookie "LPX64"\n", conn->cookie);
+ exp = class_conn2export(conn);
+ if (exp == NULL) {
+ CDEBUG(D_IOCTL,"invalid client cookie "LPX64"\n", conn->cookie);
RETURN(-EINVAL);
}
- exp = class_conn2export(conn);
-
+ obd = exp->exp_obd;
+ filter = &obd->u.filter;
push_ctxt(&saved, &filter->fo_ctxt, NULL);
retry:
oa->o_id = filter_next_id(filter);
}
cleanup_phase = 2;
- handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_CREATE);
+ handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_CREATE_LOG, oti);
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
rc = vfs_create(dparent->d_inode, dchild, oa->o_mode);
- if (rc)
+ if (rc) {
CERROR("create failed rc = %d\n", rc);
+ } else if (oa->o_valid & (OBD_MD_FLCTIME|OBD_MD_FLMTIME|OBD_MD_FLSIZE)){
+ struct iattr attr;
- rc = filter_finish_transno(exp, handle, oti, rc);
- err = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd);
- if (err) {
- CERROR("unable to write lastobjid but file created\n");
- if (!rc)
- rc = err;
+ iattr_from_obdo(&attr, oa, oa->o_valid);
+ rc = fsfilt_setattr(obd, dchild, handle, &attr, 1);
+ if (rc)
+ CERROR("create setattr failed rc = %d\n", rc);
}
+ rc = filter_finish_transno(exp, oti, rc);
+ err = filter_update_server_data(obd, filter->fo_rcvd_filp,
+ filter->fo_fsd);
+ if (err)
+ CERROR("unable to write lastobjid but file created\n");
+
+ /* Set flags for fields we have set in the inode struct */
+ if (!rc && mds_fid.id && (oa->o_valid & OBD_MD_FLCOOKIE)) {
+ err = filter_log_op_create(obd->u.filter.fo_catalog, &mds_fid,
+ dchild->d_inode->i_ino,
+ dchild->d_inode->i_generation,
+ oti->oti_logcookies);
+ if (err) {
+ CERROR("error logging create record: rc %d\n", err);
+ oa->o_valid = OBD_MD_FLID;
+ } else {
+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLCOOKIE;
+ }
+ } else
+ oa->o_valid = OBD_MD_FLID;
+
err = fsfilt_commit(obd, dparent->d_inode, handle, 0);
if (err) {
CERROR("error on commit, err = %d\n", err);
GOTO(cleanup, rc);
/* Set flags for fields we have set in the inode struct */
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLBLKSZ | OBD_MD_FLBLOCKS |
- OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME;
- filter_from_inode(oa, dchild->d_inode, oa->o_valid);
+ obdo_from_inode(oa, dchild->d_inode, FILTER_VALID_FLAGS);
EXIT;
cleanup:
struct lov_stripe_md *ea, struct obd_trans_info *oti)
{
struct obd_export *exp;
- struct obd_device *obd = class_conn2obd(conn);
- struct filter_obd *filter = &obd->u.filter;
- struct dentry *dparent, *dchild = NULL;
+ struct obd_device *obd;
+ struct filter_obd *filter;
+ struct dentry *dchild = NULL, *dparent = NULL;
struct filter_dentry_data *fdd;
struct obd_run_ctxt saved;
void *handle = NULL;
struct lustre_handle parent_lockh;
+ struct llog_cookie *fcc = NULL;
int rc, rc2, cleanup_phase = 0;
ENTRY;
- if (!obd) {
- CERROR("invalid client cookie "LPX64"\n", conn->cookie);
+ exp = class_conn2export(conn);
+ if (exp == NULL) {
+ CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie);
RETURN(-EINVAL);
}
- exp = class_conn2export(conn);
-
- CDEBUG(D_INODE, "destroying objid "LPU64"\n", oa->o_id);
+ obd = exp->exp_obd;
+ filter = &obd->u.filter;
push_ctxt(&saved, &filter->fo_ctxt, NULL);
dparent = filter_parent_lock(obd, oa->o_mode, oa->o_id,
GOTO(cleanup, rc = -ENOENT);
cleanup_phase = 2;
- if (!dchild->d_inode) {
+ if (dchild->d_inode == NULL) {
CERROR("destroying non-existent object "LPU64"\n", oa->o_id);
GOTO(cleanup, rc = -ENOENT);
}
-
- handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK);
+ handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK_LOG, oti);
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
cleanup_phase = 3;
fdd = dchild->d_fsdata;
- if (fdd && atomic_read(&fdd->fdd_open_count)) {
- LASSERT(fdd->fdd_magic = FILTER_DENTRY_MAGIC);
+
+ /* Our MDC connection is established by the MDS to us */
+ if ((oa->o_valid & OBD_MD_FLCOOKIE) && filter->fo_mdc_imp != NULL) {
+ OBD_ALLOC(fcc, sizeof(*fcc));
+ if (fcc != NULL)
+ memcpy(fcc, obdo_logcookie(oa), sizeof(*fcc));
+ }
+
+ if (fdd != NULL && atomic_read(&fdd->fdd_open_count)) {
+ LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC);
if (!(fdd->fdd_flags & FILTER_FLAG_DESTROY)) {
fdd->fdd_flags |= FILTER_FLAG_DESTROY;
- /* XXX put into PENDING directory in case of crash */
+
+#ifdef ENABLE_ORPHANS
+ filter_log_op_orphan(filter->fo_catalog, oa->o_id,
+ oa->o_generation,&fdd->fdd_cookie);
+#endif
CDEBUG(D_INODE,
"defer destroy of %dx open objid "LPU64"\n",
atomic_read(&fdd->fdd_open_count), oa->o_id);
- } else
+ } else {
CDEBUG(D_INODE,
"repeat destroy of %dx open objid "LPU64"\n",
atomic_read(&fdd->fdd_open_count), oa->o_id);
+ }
GOTO(cleanup, rc = 0);
}
- rc = filter_destroy_internal(obd, dparent, dchild);
+ rc = filter_destroy_internal(obd, oa->o_id, dparent, dchild);
cleanup:
switch(cleanup_phase) {
case 3:
- rc = filter_finish_transno(exp, handle, oti, rc);
+ if (fcc != NULL)
+ fsfilt_set_last_rcvd(obd, 0, oti->oti_handle,
+ filter_cancel_cookies_cb, fcc);
+ rc = filter_finish_transno(exp, oti, rc);
rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0);
if (rc2) {
CERROR("error on commit, err = %d\n", rc2);
RETURN(error);
}
-static inline void lustre_put_page(struct page *page)
-{
- page_cache_release(page);
-}
-
-static int filter_start_page_read(struct inode *inode, struct niobuf_local *lnb)
-{
- struct address_space *mapping = inode->i_mapping;
- struct page *page;
- unsigned long index = lnb->offset >> PAGE_SHIFT;
- int rc;
-
- page = grab_cache_page(mapping, index); /* locked page */
- if (IS_ERR(page))
- return lnb->rc = PTR_ERR(page);
-
- lnb->page = page;
-
- if (inode->i_size < lnb->offset + lnb->len - 1)
- lnb->rc = inode->i_size - lnb->offset;
- else
- lnb->rc = lnb->len;
-
- if (PageUptodate(page)) {
- unlock_page(page);
- return 0;
- }
-
- rc = mapping->a_ops->readpage(NULL, page);
- if (rc < 0) {
- CERROR("page index %lu, rc = %d\n", index, rc);
- lnb->page = NULL;
- lustre_put_page(page);
- return lnb->rc = rc;
- }
-
- return 0;
-}
-
-static int filter_finish_page_read(struct niobuf_local *lnb)
-{
- if (lnb->page == NULL)
- return 0;
-
- if (PageUptodate(lnb->page))
- return 0;
-
- wait_on_page(lnb->page);
- if (!PageUptodate(lnb->page)) {
- CERROR("page index %lu/offset "LPX64" not uptodate\n",
- lnb->page->index, lnb->offset);
- GOTO(err_page, lnb->rc = -EIO);
- }
- if (PageError(lnb->page)) {
- CERROR("page index %lu/offset "LPX64" has error\n",
- lnb->page->index, lnb->offset);
- GOTO(err_page, lnb->rc = -EIO);
- }
-
- return 0;
-
-err_page:
- lustre_put_page(lnb->page);
- lnb->page = NULL;
- return lnb->rc;
-}
-
-static struct page *lustre_get_page_write(struct inode *inode,
- unsigned long index)
-{
- struct address_space *mapping = inode->i_mapping;
- struct page *page;
- int rc;
-
- page = grab_cache_page(mapping, index); /* locked page */
-
- if (!IS_ERR(page)) {
- /* Note: Called with "O" and "PAGE_SIZE" this is essentially
- * a no-op for most filesystems, because we write the whole
- * page. For partial-page I/O this will read in the page.
- */
- rc = mapping->a_ops->prepare_write(NULL, page, 0, PAGE_SIZE);
- if (rc) {
- CERROR("page index %lu, rc = %d\n", index, rc);
- if (rc != -ENOSPC)
- LBUG();
- GOTO(err_unlock, rc);
- }
- /* XXX not sure if we need this if we are overwriting page */
- if (PageError(page)) {
- CERROR("error on page index %lu, rc = %d\n", index, rc);
- LBUG();
- GOTO(err_unlock, rc = -EIO);
- }
- }
- return page;
-
-err_unlock:
- unlock_page(page);
- lustre_put_page(page);
- return ERR_PTR(rc);
-}
-
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-int waitfor_one_page(struct page *page)
-{
- wait_on_page_locked(page);
- return 0;
-}
-#endif
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-/* We should only change the file mtime (and not the ctime, like
- * update_inode_times() in generic_file_write()) when we only change data.
- */
-static inline void inode_update_time(struct inode *inode, int ctime_too)
-{
- time_t now = CURRENT_TIME;
- if (inode->i_mtime == now && (!ctime_too || inode->i_ctime == now))
- return;
- inode->i_mtime = now;
- if (ctime_too)
- inode->i_ctime = now;
- mark_inode_dirty_sync(inode);
-}
-#endif
-
-static int lustre_commit_write(struct niobuf_local *lnb)
-{
- struct page *page = lnb->page;
- unsigned from = lnb->offset & ~PAGE_MASK;
- unsigned to = from + lnb->len;
- struct inode *inode = page->mapping->host;
- int err;
-
- LASSERT(to <= PAGE_SIZE);
- err = page->mapping->a_ops->commit_write(NULL, page, from, to);
- if (!err && IS_SYNC(inode))
- err = waitfor_one_page(page);
- //SetPageUptodate(page); // the client commit_write will do this
-
- SetPageReferenced(page);
- unlock_page(page);
- lustre_put_page(page);
- return err;
-}
-
-int filter_get_page_write(struct inode *inode, struct niobuf_local *lnb,
- int *pglocked)
-{
- unsigned long index = lnb->offset >> PAGE_SHIFT;
- struct address_space *mapping = inode->i_mapping;
- struct page *page;
- int rc;
-
- //ASSERT_PAGE_INDEX(index, GOTO(err, rc = -EINVAL));
- if (*pglocked)
- page = grab_cache_page_nowait(mapping, index); /* locked page */
- else
- page = grab_cache_page(mapping, index); /* locked page */
-
-
- /* This page is currently locked, so get a temporary page instead. */
- if (!page) {
- CDEBUG(D_ERROR,"ino %lu page %ld locked\n", inode->i_ino,index);
- page = alloc_pages(GFP_KERNEL, 0); /* locked page */
- if (!page) {
- CERROR("no memory for a temp page\n");
- GOTO(err, rc = -ENOMEM);
- }
- page->index = index;
- lnb->page = page;
- lnb->flags |= N_LOCAL_TEMP_PAGE;
- } else if (!IS_ERR(page)) {
- (*pglocked)++;
-
- rc = mapping->a_ops->prepare_write(NULL, page,
- lnb->offset & ~PAGE_MASK,
- lnb->len);
- if (rc) {
- if (rc != -ENOSPC)
- CERROR("page index %lu, rc = %d\n", index, rc);
- GOTO(err_unlock, rc);
- }
- /* XXX not sure if we need this if we are overwriting page */
- if (PageError(page)) {
- CERROR("error on page index %lu, rc = %d\n", index, rc);
- LBUG();
- GOTO(err_unlock, rc = -EIO);
- }
- lnb->page = page;
- }
-
- return 0;
-
-err_unlock:
- unlock_page(page);
- lustre_put_page(page);
-err:
- return lnb->rc = rc;
-}
-
-/*
- * We need to balance prepare_write() calls with commit_write() calls.
- * If the page has been prepared, but we have no data for it, we don't
- * want to overwrite valid data on disk, but we still need to zero out
- * data for space which was newly allocated. Like part of what happens
- * in __block_prepare_write() for newly allocated blocks.
- *
- * XXX currently __block_prepare_write() creates buffers for all the
- * pages, and the filesystems mark these buffers as BH_New if they
- * were newly allocated from disk. We use the BH_New flag similarly.
- */
-static int filter_commit_write(struct niobuf_local *lnb, int err)
-{
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- if (err) {
- unsigned block_start, block_end;
- struct buffer_head *bh, *head = lnb->page->buffers;
- unsigned blocksize = head->b_size;
-
- /* debugging: just seeing if this ever happens */
- CDEBUG(err == -ENOSPC ? D_INODE : D_ERROR,
- "called for ino %lu:%lu on err %d\n",
- lnb->page->mapping->host->i_ino, lnb->page->index, err);
-
- /* Currently one buffer per page, but in the future... */
- for (bh = head, block_start = 0; bh != head || !block_start;
- block_start = block_end, bh = bh->b_this_page) {
- block_end = block_start + blocksize;
- if (buffer_new(bh)) {
- memset(kmap(lnb->page) + block_start, 0,
- blocksize);
- kunmap(lnb->page);
- }
- }
- }
-#endif
- return lustre_commit_write(lnb);
-}
-
-static int filter_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
- int objcount, struct obd_ioobj *obj,
- int niocount, struct niobuf_remote *nb,
- struct niobuf_local *res, void **desc_private,
- struct obd_trans_info *oti)
-{
- struct obd_run_ctxt saved;
- struct obd_device *obd;
- struct obd_ioobj *o;
- struct niobuf_remote *rnb;
- struct niobuf_local *lnb;
- struct fsfilt_objinfo *fso;
- struct dentry *dentry;
- struct inode *inode;
- int pglocked = 0, rc = 0, i, j, tot_bytes = 0;
- unsigned long now = jiffies;
- ENTRY;
-
- memset(res, 0, niocount * sizeof(*res));
-
- obd = exp->exp_obd;
- if (obd == NULL)
- RETURN(-EINVAL);
-
- // theoretically we support multi-obj BRW RPCs, but until then...
- LASSERT(objcount == 1);
-
- OBD_ALLOC(fso, objcount * sizeof(*fso));
- if (!fso)
- RETURN(-ENOMEM);
-
- push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-
- for (i = 0, o = obj; i < objcount; i++, o++) {
- struct filter_dentry_data *fdd;
-
- LASSERT(o->ioo_bufcnt);
-
- dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id);
-
- if (IS_ERR(dentry))
- GOTO(out_objinfo, rc = PTR_ERR(dentry));
-
- fso[i].fso_dentry = dentry;
- fso[i].fso_bufcnt = o->ioo_bufcnt;
-
- if (!dentry->d_inode) {
- CERROR("trying to BRW to non-existent file "LPU64"\n",
- o->ioo_id);
- f_dput(dentry);
- GOTO(out_objinfo, rc = -ENOENT);
- }
-
- /* If we ever start to support mutli-object BRW RPCs, we will
- * need to get locks on mulitple inodes (in order) or use the
- * DLM to do the locking for us (and use the same locking in
- * filter_setattr() for truncate). That isn't all, because
- * there still exists the possibility of a truncate starting
- * a new transaction while holding the ext3 rwsem = write
- * while some writes (which have started their transactions
- * here) blocking on the ext3 rwsem = read => lock inversion.
- *
- * The handling gets very ugly when dealing with locked pages.
- * It may be easier to just get rid of the locked page code
- * (which has problems of its own) and either discover we do
- * not need it anymore (i.e. it was a symptom of another bug)
- * or ensure we get the page locks in an appropriate order.
- */
- if (cmd & OBD_BRW_WRITE)
- down(&dentry->d_inode->i_sem);
- fdd = dentry->d_fsdata;
- if (!fdd || !atomic_read(&fdd->fdd_open_count))
- CDEBUG(D_PAGE, "I/O to unopened object "LPU64"\n",
- o->ioo_id);
- }
-
- if (time_after(jiffies, now + 15*HZ))
- CERROR("slow prep setup %lus\n", (jiffies - now) / HZ);
-
- if (cmd & OBD_BRW_WRITE) {
- *desc_private = fsfilt_brw_start(obd, objcount, fso,
- niocount, nb);
- if (IS_ERR(*desc_private)) {
- rc = PTR_ERR(*desc_private);
- CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
- "error starting transaction: rc = %d\n", rc);
- *desc_private = NULL;
- GOTO(out_objinfo, rc);
- }
- }
-
- for (i = 0, o = obj, rnb = nb, lnb = res; i < objcount; i++, o++) {
- dentry = fso[i].fso_dentry;
- inode = dentry->d_inode;
-
- for (j = 0; j < o->ioo_bufcnt; j++, rnb++, lnb++) {
- if (j == 0)
- lnb->dentry = dentry;
- else
- lnb->dentry = dget(dentry);
-
- lnb->offset = rnb->offset;
- lnb->len = rnb->len;
- lnb->flags = rnb->flags;
- lnb->start = jiffies;
-
- if (cmd & OBD_BRW_WRITE) {
- rc = filter_get_page_write(inode,lnb,&pglocked);
- if (rc)
- up(&dentry->d_inode->i_sem);
- } else if (inode->i_size <= rnb->offset) {
- /* If there's no more data, abort early.
- * lnb->page == NULL and lnb->rc == 0, so it's
- * easy to detect later. */
- f_dput(dentry);
- lnb->dentry = NULL;
- break;
- } else {
- rc = filter_start_page_read(inode, lnb);
- }
-
- if (rc) {
- CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
- "page err %u@"LPU64" %u/%u %p: rc %d\n",
- lnb->len, lnb->offset, j, o->ioo_bufcnt,
- dentry, rc);
- f_dput(dentry);
- GOTO(out_pages, rc);
- }
-
- tot_bytes += lnb->len;
-
- if ((cmd & OBD_BRW_READ) && lnb->rc < lnb->len) {
- /* Likewise with a partial read */
- break;
- }
- }
- }
-
- if (time_after(jiffies, now + 15*HZ))
- CERROR("slow prep get page %lus\n", (jiffies - now) / HZ);
-
- if (cmd & OBD_BRW_READ) {
- lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES,
- tot_bytes);
- while (lnb-- > res) {
- rc = filter_finish_page_read(lnb);
- if (rc) {
- CERROR("error page %u@"LPU64" %u %p: rc %d\n",
- lnb->len, lnb->offset, lnb - res,
- lnb->dentry, rc);
- f_dput(lnb->dentry);
- GOTO(out_pages, rc);
- }
- }
- } else
- lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_WRITE_BYTES,
- tot_bytes);
-
- if (time_after(jiffies, now + 15*HZ))
- CERROR("slow prep finish page %lus\n", (jiffies - now) / HZ);
-
- EXIT;
-out:
- OBD_FREE(fso, objcount * sizeof(*fso));
- current->journal_info = NULL;
- pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
- return rc;
-
-out_pages:
- while (lnb-- > res) {
- if (cmd & OBD_BRW_WRITE) {
- filter_commit_write(lnb, rc);
- up(&lnb->dentry->d_inode->i_sem);
- } else {
- lustre_put_page(lnb->page);
- }
- f_dput(lnb->dentry);
- }
- if (cmd & OBD_BRW_WRITE) {
- filter_finish_transno(exp, *desc_private, oti, rc);
- fsfilt_commit(obd,
- filter_parent(obd,S_IFREG,obj->ioo_id)->d_inode,
- *desc_private, 0);
- }
- goto out; /* dropped the dentry refs already (one per page) */
-
-out_objinfo:
- for (i = 0; i < objcount && fso[i].fso_dentry; i++) {
- if (cmd & OBD_BRW_WRITE)
- up(&fso[i].fso_dentry->d_inode->i_sem);
- f_dput(fso[i].fso_dentry);
- }
- goto out;
-}
-
-static int filter_write_locked_page(struct niobuf_local *lnb)
-{
- struct page *lpage;
- void *lpage_addr;
- void *lnb_addr;
- int rc;
- ENTRY;
-
- lpage = lustre_get_page_write(lnb->dentry->d_inode, lnb->page->index);
- if (IS_ERR(lpage)) {
- /* It is highly unlikely that we would ever get an error here.
- * The page we want to get was previously locked, so it had to
- * have already allocated the space, and we were just writing
- * over the same data, so there would be no hole in the file.
- *
- * XXX: possibility of a race with truncate could exist, need
- * to check that. There are no guarantees w.r.t.
- * write order even on a local filesystem, although the
- * normal response would be to return the number of bytes
- * successfully written and leave the rest to the app.
- */
- rc = PTR_ERR(lpage);
- CERROR("error getting locked page index %ld: rc = %d\n",
- lnb->page->index, rc);
- LBUG();
- lustre_commit_write(lnb);
- RETURN(rc);
- }
-
- /* 2 kmaps == vanishingly small deadlock opportunity */
- lpage_addr = kmap(lpage);
- lnb_addr = kmap(lnb->page);
-
- memcpy(lpage_addr, lnb_addr, PAGE_SIZE);
-
- kunmap(lnb->page);
- kunmap(lpage);
-
- lustre_put_page(lnb->page);
-
- lnb->page = lpage;
- rc = lustre_commit_write(lnb);
- if (rc)
- CERROR("error committing locked page %ld: rc = %d\n",
- lnb->page->index, rc);
-
- RETURN(rc);
-}
-
static int filter_syncfs(struct obd_export *exp)
{
- struct obd_device *obd = exp->exp_obd;
ENTRY;
- RETURN(fsfilt_sync(obd, obd->u.filter.fo_sb));
-}
-
-static int filter_commitrw(int cmd, struct obd_export *exp,
- int objcount, struct obd_ioobj *obj,
- int niocount, struct niobuf_local *res,
- void *desc_private, struct obd_trans_info *oti)
-{
- struct obd_run_ctxt saved;
- struct obd_ioobj *o;
- struct niobuf_local *lnb;
- struct obd_device *obd = exp->exp_obd;
- int found_locked = 0, rc = 0, i;
- unsigned long now = jiffies; /* DEBUGGING OST TIMEOUTS */
- ENTRY;
-
- push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
-
- LASSERT(!current->journal_info);
- current->journal_info = desc_private;
-
- for (i = 0, o = obj, lnb = res; i < objcount; i++, o++) {
- int j;
-
- if (cmd & OBD_BRW_WRITE) {
- inode_update_time(lnb->dentry->d_inode, 1);
- up(&lnb->dentry->d_inode->i_sem);
- }
- for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) {
- if (lnb->page == NULL) {
- continue;
- }
-
- if (lnb->flags & N_LOCAL_TEMP_PAGE) {
- found_locked++;
- continue;
- }
-
- if (time_after(jiffies, lnb->start + 15*HZ))
- CERROR("slow commitrw %lus\n",
- (jiffies - lnb->start) / HZ);
-
- if (cmd & OBD_BRW_WRITE) {
- int err = filter_commit_write(lnb, 0);
-
- if (!rc)
- rc = err;
- } else {
- lustre_put_page(lnb->page);
- }
-
- f_dput(lnb->dentry);
- if (time_after(jiffies, lnb->start + 15*HZ))
- CERROR("slow commit_write %lus\n",
- (jiffies - lnb->start) / HZ);
- }
- }
-
- for (i = 0, o = obj, lnb = res; found_locked > 0 && i < objcount;
- i++, o++) {
- int j;
- for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) {
- int err;
- if (!(lnb->flags & N_LOCAL_TEMP_PAGE))
- continue;
-
- if (time_after(jiffies, lnb->start + 15*HZ))
- CERROR("slow commitrw locked %lus\n",
- (jiffies - lnb->start) / HZ);
-
- err = filter_write_locked_page(lnb);
- if (!rc)
- rc = err;
- f_dput(lnb->dentry);
- found_locked--;
-
- if (time_after(jiffies, lnb->start + 15*HZ))
- CERROR("slow commit_write locked %lus\n",
- (jiffies - lnb->start) / HZ);
- }
- }
-
- if (cmd & OBD_BRW_WRITE) {
- /* We just want any dentry for the commit, for now */
- struct dentry *dparent = filter_parent(obd, S_IFREG, 0);
- int err;
-
- rc = filter_finish_transno(exp, desc_private, oti, rc);
- err = fsfilt_commit(obd, dparent->d_inode, desc_private,
- obd_sync_filter);
- if (err)
- rc = err;
- if (obd_sync_filter)
- LASSERT(oti->oti_transno <= obd->obd_last_committed);
-
- if (time_after(jiffies, now + 15*HZ))
- CERROR("slow commitrw commit %lus\n", (jiffies-now)/HZ);
- }
-
- LASSERT(!current->journal_info);
-
- pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
- RETURN(rc);
+ RETURN(fsfilt_sync(exp->exp_obd, exp->exp_obd->u.filter.fo_sb));
}
-static int filter_brw(int cmd, struct lustre_handle *conn,
- struct lov_stripe_md *lsm, obd_count oa_bufs,
- struct brw_page *pga, struct obd_trans_info *oti)
+static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+ unsigned long max_age)
{
- struct obd_export *export = class_conn2export(conn);
- struct obd_ioobj ioo;
- struct niobuf_local *lnb;
- struct niobuf_remote *rnb;
- obd_count i;
- void *desc_private;
- int ret = 0;
ENTRY;
-
- if (export == NULL)
- RETURN(-EINVAL);
-
- OBD_ALLOC(lnb, oa_bufs * sizeof(struct niobuf_local));
- OBD_ALLOC(rnb, oa_bufs * sizeof(struct niobuf_remote));
-
- if (lnb == NULL || rnb == NULL)
- GOTO(out, ret = -ENOMEM);
-
- for (i = 0; i < oa_bufs; i++) {
- rnb[i].offset = pga[i].off;
- rnb[i].len = pga[i].count;
- }
-
- ioo.ioo_id = lsm->lsm_object_id;
- ioo.ioo_gr = 0;
- ioo.ioo_type = S_IFREG;
- ioo.ioo_bufcnt = oa_bufs;
-
- ret = filter_preprw(cmd, export, NULL, 1, &ioo, oa_bufs, rnb, lnb,
- &desc_private, oti);
- if (ret != 0)
- GOTO(out, ret);
-
- for (i = 0; i < oa_bufs; i++) {
- void *virt = kmap(pga[i].pg);
- obd_off off = pga[i].off & ~PAGE_MASK;
- void *addr = kmap(lnb[i].page);
-
- /* 2 kmaps == vanishingly small deadlock opportunity */
-
- if (cmd & OBD_BRW_WRITE)
- memcpy(addr + off, virt + off, pga[i].count);
- else
- memcpy(virt + off, addr + off, pga[i].count);
-
- kunmap(addr);
- kunmap(virt);
- }
-
- ret = filter_commitrw(cmd, export, 1, &ioo, oa_bufs, lnb, desc_private,
- oti);
-
-out:
- if (lnb)
- OBD_FREE(lnb, oa_bufs * sizeof(struct niobuf_local));
- if (rnb)
- OBD_FREE(rnb, oa_bufs * sizeof(struct niobuf_remote));
- class_export_put(export);
- RETURN(ret);
-}
-
-static int filter_san_preprw(int cmd, struct lustre_handle *conn,
- int objcount, struct obd_ioobj *obj,
- int niocount, struct niobuf_remote *nb)
-{
- struct obd_device *obd;
- struct obd_ioobj *o = obj;
- struct niobuf_remote *rnb = nb;
- int rc = 0;
- int i;
- ENTRY;
-
- obd = class_conn2obd(conn);
- if (!obd) {
- CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
- conn->cookie);
- RETURN(-EINVAL);
- }
-
- for (i = 0; i < objcount; i++, o++) {
- struct dentry *dentry;
- struct inode *inode;
- int (*fs_bmap)(struct address_space *, long);
- int j;
-
- dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id);
- if (IS_ERR(dentry))
- GOTO(out, rc = PTR_ERR(dentry));
- inode = dentry->d_inode;
- if (!inode) {
- CERROR("trying to BRW to non-existent file "LPU64"\n",
- o->ioo_id);
- f_dput(dentry);
- GOTO(out, rc = -ENOENT);
- }
- fs_bmap = inode->i_mapping->a_ops->bmap;
-
- for (j = 0; j < o->ioo_bufcnt; j++, rnb++) {
- long block;
-
- block = rnb->offset >> inode->i_blkbits;
-
- if (cmd == OBD_BRW_READ) {
- block = fs_bmap(inode->i_mapping, block);
- } else {
- loff_t newsize = rnb->offset + rnb->len;
- /* fs_prep_san_write will also update inode
- * size for us:
- * (1) new alloced block
- * (2) existed block but size extented
- */
- /* FIXME We could call fs_prep_san_write()
- * only once for all the blocks allocation.
- * Now call it once for each block, for
- * simplicity. And if error happens, we
- * probably need to release previous alloced
- * block */
- rc = fs_prep_san_write(obd, inode, &block,
- 1, newsize);
- if (rc)
- break;
- }
-
- rnb->offset = block;
- }
- f_dput(dentry);
- }
-out:
- RETURN(rc);
-}
-
-static int filter_statfs(struct obd_export *exp, struct obd_statfs *osfs)
-{
- struct obd_device *obd = exp->exp_obd;
- ENTRY;
-
RETURN(fsfilt_statfs(obd, obd->u.filter.fo_sb, osfs));
}
ENTRY;
obd = class_conn2obd(conn);
- if (!obd) {
+ if (obd == NULL) {
CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
conn->cookie);
RETURN(-EINVAL);
RETURN(-EINVAL);
}
-int filter_copy_data(struct lustre_handle *dst_conn, struct obdo *dst,
- struct lustre_handle *src_conn, struct obdo *src,
- obd_size count, obd_off offset, struct obd_trans_info *oti)
+static int filter_set_info(struct lustre_handle *conn, __u32 keylen,
+ void *key, __u32 vallen, void *val)
{
- struct page *page;
- struct lov_stripe_md srcmd, dstmd;
- unsigned long index = 0;
- int err = 0;
-
- LBUG(); /* THIS CODE IS NOT CORRECT -phil */
-
- memset(&srcmd, 0, sizeof(srcmd));
- memset(&dstmd, 0, sizeof(dstmd));
- srcmd.lsm_object_id = src->o_id;
- dstmd.lsm_object_id = dst->o_id;
-
+ struct obd_device *obd;
+ struct obd_export *exp;
+ struct obd_import *imp;
ENTRY;
- CDEBUG(D_INFO, "src: ino "LPU64" blocks "LPU64", size "LPU64
- ", dst: ino "LPU64"\n",
- src->o_id, src->o_blocks, src->o_size, dst->o_id);
- page = alloc_page(GFP_USER);
- if (page == NULL)
- RETURN(-ENOMEM);
-
- wait_on_page(page);
- /* XXX with brw vector I/O, we could batch up reads and writes here,
- * all we need to do is allocate multiple pages to handle the I/Os
- * and arrays to handle the request parameters.
- */
- while (index < ((src->o_size + PAGE_SIZE - 1) >> PAGE_SHIFT)) {
- struct brw_page pg;
-
- pg.pg = page;
- pg.count = PAGE_SIZE;
- pg.off = (page->index) << PAGE_SHIFT;
- pg.flag = 0;
-
- page->index = index;
- err = obd_brw(OBD_BRW_READ, src_conn, &srcmd, 1, &pg, NULL);
- if (err) {
- EXIT;
- break;
- }
+ obd = class_conn2obd(conn);
+ if (obd == NULL) {
+ CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
+ conn->cookie);
+ RETURN(-EINVAL);
+ }
- pg.flag = OBD_BRW_CREATE;
- CDEBUG(D_INFO, "Read page %ld ...\n", page->index);
+ if (keylen < strlen("mds_conn") ||
+ memcmp(key, "mds_conn", keylen) != 0)
+ RETURN(-EINVAL);
- err = obd_brw(OBD_BRW_WRITE, dst_conn, &dstmd, 1, &pg, oti);
+ CERROR("Received MDS connection ("LPX64")\n", conn->cookie);
+ memcpy(&obd->u.filter.fo_mdc_conn, conn, sizeof(*conn));
- /* XXX should handle dst->o_size, dst->o_blocks here */
- if (err) {
- EXIT;
- break;
- }
+ imp = obd->u.filter.fo_mdc_imp = class_new_import();
- CDEBUG(D_INFO, "Wrote page %ld ...\n", page->index);
+ exp = class_conn2export(conn);
+ imp->imp_connection = ptlrpc_connection_addref(exp->exp_connection);
+ class_export_put(exp);
- index++;
- }
- dst->o_size = src->o_size;
- dst->o_blocks = src->o_blocks;
- dst->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
- unlock_page(page);
- __free_page(page);
+ imp->imp_client = &obd->u.filter.fo_mdc_client;
+ imp->imp_remote_handle = *conn;
+ imp->imp_obd = obd;
+ imp->imp_dlm_fake = 1; /* XXX rename imp_dlm_fake to something else */
+ imp->imp_level = LUSTRE_CONN_FULL;
+ class_import_put(imp);
- RETURN(err);
+ RETURN(0);
}
int filter_iocontrol(unsigned int cmd, struct lustre_handle *conn,
- int len, void *karg, void *uarg)
+ int len, void *karg, void *uarg)
{
struct obd_device *obd = class_conn2obd(conn);
RETURN(0);
}
-
static struct obd_ops filter_obd_ops = {
o_owner: THIS_MODULE,
o_attach: filter_attach,
o_detach: filter_detach,
o_get_info: filter_get_info,
+ o_set_info: filter_set_info,
o_setup: filter_setup,
o_cleanup: filter_cleanup,
o_connect: filter_connect,
o_punch: filter_truncate,
o_preprw: filter_preprw,
o_commitrw: filter_commitrw,
+ o_log_cancel: filter_log_cancel,
o_destroy_export: filter_destroy_export,
o_iocontrol: filter_iocontrol,
-#if 0
- o_san_preprw: filter_san_preprw,
- o_preallocate: filter_preallocate_inodes,
- o_migrate: filter_migrate,
- o_copy: filter_copy_data,
- o_iterate: filter_iterate
-#endif
};
static struct obd_ops filter_sanobd_ops = {
o_attach: filter_attach,
o_detach: filter_detach,
o_get_info: filter_get_info,
+ o_set_info: filter_set_info,
o_setup: filter_san_setup,
o_cleanup: filter_cleanup,
o_connect: filter_connect,
o_punch: filter_truncate,
o_preprw: filter_preprw,
o_commitrw: filter_commitrw,
+ o_log_cancel: filter_log_cancel,
o_san_preprw: filter_san_preprw,
o_destroy_export: filter_destroy_export,
o_iocontrol: filter_iocontrol,
-#if 0
- o_preallocate: filter_preallocate_inodes,
- o_migrate: filter_migrate,
- o_copy: filter_copy_data,
- o_iterate: filter_iterate
-#endif
};
-
static int __init obdfilter_init(void)
{
struct lprocfs_static_vars lvars;
printk(KERN_INFO "Lustre Filtering OBD driver; info@clusterfs.com\n");
- lprocfs_init_vars(&lvars);
+ lprocfs_init_vars(filter, &lvars);
rc = class_register_type(&filter_obd_ops, lvars.module_vars,
OBD_FILTER_DEVICENAME);
#define DEBUG_SUBSYSTEM S_CLASS
#include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <asm/statfs.h>
-#endif
#include <linux/lprocfs_status.h>
#include <linux/obd.h>
#ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
#else
-static inline int lprocfs_filter_statfs(void *data, struct statfs *sfs)
-{
- struct obd_device *dev = (struct obd_device *) data;
- LASSERT(dev != NULL);
- return vfs_statfs(dev->u.filter.fo_sb, sfs);
-}
-
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize, lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, lprocfs_filter_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, lprocfs_filter_statfs);
-
-int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
- void *data)
-{
- struct obd_device *dev = (struct obd_device *)data;
- LASSERT(dev != NULL);
- return snprintf(page, count, "%s\n", dev->u.filter.fo_fstype);
-}
-
-int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off, int count,
- int *eof, void *data)
+static int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
{
struct obd_device* obd = (struct obd_device *)data;
obd->u.filter.fo_vfsmnt->mnt_devname);
}
-struct lprocfs_vars lprocfs_obd_vars[] = {
- { "uuid", lprocfs_rd_uuid, 0, 0 },
- { "blocksize", rd_blksize, 0, 0 },
- { "kbytestotal", rd_kbytestotal, 0, 0 },
- { "kbytesfree", rd_kbytesfree, 0, 0 },
- { "filestotal", rd_filestotal, 0, 0 },
- { "filesfree", rd_filesfree, 0, 0 },
- { "filegroups", rd_filegroups, 0, 0 },
- { "fstype", rd_fstype, 0, 0 },
- { "mntdev", lprocfs_filter_rd_mntdev, 0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
+ { "blocksize", lprocfs_rd_blksize, 0, 0 },
+ { "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 },
+ { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 },
+ { "filestotal", lprocfs_rd_filestotal, 0, 0 },
+ { "filesfree", lprocfs_rd_filesfree, 0, 0 },
+ //{ "filegroups", lprocfs_rd_filegroups, 0, 0 },
+ { "fstype", lprocfs_rd_fstype, 0, 0 },
+ { "mntdev", lprocfs_filter_rd_mntdev, 0, 0 },
{ 0 }
};
-struct lprocfs_vars lprocfs_module_vars[] = {
- { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
{ 0 }
};
#endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(filter,lprocfs_module_vars, lprocfs_obd_vars)
Makefile.in
.deps
TAGS
+.*.cmd
#include <linux/lprocfs_status.h>
#ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
#else
-
-DEFINE_LPROCFS_STATFS_FCT(rd_blksize, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, obd_self_statfs);
-DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, obd_self_statfs);
-
-struct lprocfs_vars lprocfs_obd_vars[] = {
- { "uuid", lprocfs_rd_uuid, 0, 0 },
- { "blocksize", rd_blksize, 0, 0 },
- { "kbytestotal", rd_kbytestotal, 0, 0 },
- { "kbytesfree", rd_kbytesfree, 0, 0 },
- { "filestotal", rd_filestotal, 0, 0 },
- { "filesfree", rd_filesfree, 0, 0 },
- { "filegroups", rd_filegroups, 0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
+ { "blocksize", lprocfs_rd_blksize, 0, 0 },
+ { "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 },
+ { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 },
+ { "filestotal", lprocfs_rd_filestotal, 0, 0 },
+ { "filesfree", lprocfs_rd_filesfree, 0, 0 },
+ //{ "filegroups", lprocfs_rd_filegroups, 0, 0 },
{ "ost_server_uuid", lprocfs_rd_server_uuid, 0, 0 },
- { "ost_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 },
+ { "ost_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 },
{ 0 }
};
-struct lprocfs_vars lprocfs_module_vars[] = {
- { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
{ 0 }
};
#endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(osc,lprocfs_module_vars, lprocfs_obd_vars)
{
struct dentry *dentry;
struct nameidata nd;
- kdev_t dev;
- KDEVT_VAL(dev, 0);
+ kdev_t dev = KDEVT_INIT(0);
if (!path_init(path, LOOKUP_FOLLOW, &nd))
return 0;
#define DEBUG_SUBSYSTEM S_OSC
#ifdef __KERNEL__
-#include <linux/version.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/lustre_dlm.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <linux/workqueue.h>
-#include <linux/smp_lock.h>
-#else
-#include <linux/locks.h>
-#endif
-#else
-#include <liblustre.h>
+# include <linux/version.h>
+# include <linux/module.h>
+# include <linux/mm.h>
+# include <linux/highmem.h>
+# include <linux/lustre_dlm.h>
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+# include <linux/workqueue.h>
+# include <linux/smp_lock.h>
+# else
+# include <linux/locks.h>
+# endif
+#else /* __KERNEL__ */
+# include <liblustre.h>
#endif
#include <linux/kp30.h>
#include <linux/lustre_mds.h> /* for mds_objid */
#include <linux/lustre_otree.h>
#include <linux/obd_ost.h>
+#include <linux/lustre_commit_confd.h>
#include <linux/obd_lov.h>
#ifndef __CYGWIN__
-#include <linux/ctype.h>
-#include <linux/init.h>
+# include <linux/ctype.h>
+# include <linux/init.h>
#else
-#include <ctype.h>
+# include <ctype.h>
#endif
#include <linux/lustre_ha.h>
#include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
#include <linux/lprocfs_status.h>
+static struct llog_cookie zero_cookie = { { 0 } };
+
static int osc_attach(struct obd_device *dev, obd_count len, void *data)
{
struct lprocfs_static_vars lvars;
- lprocfs_init_vars(&lvars);
+ lprocfs_init_vars(osc,&lvars);
return lprocfs_obd_attach(dev, lvars.obd_vars);
}
if (lmm_bytes < sizeof (*lmm)) {
CERROR("lov_mds_md too small: %d, need %d\n",
lmm_bytes, (int)sizeof(*lmm));
- RETURN (-EINVAL);
+ RETURN(-EINVAL);
}
/* XXX LOV_MAGIC etc check? */
- if (lmm->lmm_object_id == cpu_to_le64 (0)) {
- CERROR ("lov_mds_md: zero lmm_object_id\n");
- RETURN (-EINVAL);
+ if (lmm->lmm_object_id == cpu_to_le64(0)) {
+ CERROR("lov_mds_md: zero lmm_object_id\n");
+ RETURN(-EINVAL);
}
}
lsm_size = lov_stripe_md_size(1);
- if (!lsmp)
+ if (lsmp == NULL)
RETURN(lsm_size);
- if (*lsmp && !lmm) {
+ if (*lsmp != NULL && lmm == NULL) {
OBD_FREE(*lsmp, lsm_size);
*lsmp = NULL;
RETURN(0);
}
- if (!*lsmp) {
+ if (*lsmp == NULL) {
OBD_ALLOC(*lsmp, lsm_size);
- if (!*lsmp)
+ if (*lsmp == NULL)
RETURN(-ENOMEM);
(*lsmp)->lsm_oinfo[0].loi_dirty_ot =
ot_init((*lsmp)->lsm_oinfo[0].loi_dirty_ot);
}
- if (lmm) {
+ if (lmm != NULL) {
/* XXX zero *lsmp? */
(*lsmp)->lsm_object_id = le64_to_cpu (lmm->lmm_object_id);
LASSERT((*lsmp)->lsm_object_id);
static int osc_getattr_interpret(struct ptlrpc_request *req,
struct osc_getattr_async_args *aa, int rc)
{
- struct obdo *oa = aa->aa_oa;
struct ost_body *body;
ENTRY;
- if (rc != 0) {
- CERROR("failed: rc = %d\n", rc);
- RETURN (rc);
- }
-
- body = lustre_swab_repbuf(req, 0, sizeof (*body), lustre_swab_ost_body);
- if (body == NULL) {
- CERROR ("can't unpack ost_body\n");
- RETURN (-EPROTO);
- }
+ if (rc != 0)
+ RETURN(rc);
- CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
- memcpy(oa, &body->oa, sizeof(*oa));
+ body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
+ if (body) {
+ CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode);
+ memcpy(aa->aa_oa, &body->oa, sizeof(*aa->aa_oa));
- /* This should really be sent by the OST */
- oa->o_blksize = OSC_BRW_MAX_SIZE;
- oa->o_valid |= OBD_MD_FLBLKSZ;
+ /* This should really be sent by the OST */
+ aa->aa_oa->o_blksize = OSC_BRW_MAX_SIZE;
+ aa->aa_oa->o_valid |= OBD_MD_FLBLKSZ;
+ } else {
+ CERROR("can't unpack ost_body\n");
+ rc = -EPROTO;
+ aa->aa_oa->o_valid = 0;
+ }
- RETURN (0);
+ RETURN(rc);
}
static int osc_getattr_async(struct lustre_handle *conn, struct obdo *oa,
GOTO(out, rc = -ENOMEM);
body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body));
- memcpy(&body->oa, oa, sizeof(*oa));
+ memcpy(&body->oa, oa, sizeof(body->oa));
request->rq_replen = lustre_msg_size(1, &size);
if (rc)
GOTO(out_req, rc);
- body = lustre_swab_repbuf (request, 0, sizeof (*body),
- lustre_swab_ost_body);
+ body = lustre_swab_repbuf(request, 0, sizeof(*body),
+ lustre_swab_ost_body);
if (body == NULL) {
CERROR ("can't unpack ost_body\n");
GOTO (out_req, rc = -EPROTO);
* This needs to be fixed in a big way.
*/
lsm->lsm_object_id = oa->o_id;
- lsm->lsm_stripe_count = 0;
- lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
*ea = lsm;
- if (oti != NULL)
+ if (oti != NULL) {
oti->oti_transno = request->rq_repmsg->transno;
+ if (oa->o_valid & OBD_MD_FLCOOKIE) {
+ if (!oti->oti_logcookies)
+ oti_alloc_cookies(oti, 1);
+ memcpy(oti->oti_logcookies, obdo_logcookie(oa),
+ sizeof(oti->oti_onecookie));
+ }
+ }
+
CDEBUG(D_HA, "transno: "LPD64"\n", request->rq_repmsg->transno);
EXIT;
out_req:
body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body));
memcpy(&body->oa, oa, sizeof(*oa));
+ if (oti && oa->o_valid & OBD_MD_FLCOOKIE) {
+ memcpy(obdo_logcookie(oa), oti->oti_logcookies,
+ sizeof(*oti->oti_logcookies));
+ oti->oti_logcookies++;
+ }
+
request->rq_replen = lustre_msg_size(1, &size);
rc = ptlrpc_queue_wait(request);
if (rc)
GOTO(out, rc);
- body = lustre_swab_repbuf (request, 0, sizeof (*body),
- lustre_swab_ost_body);
+ body = lustre_swab_repbuf(request, 0, sizeof(*body),
+ lustre_swab_ost_body);
if (body == NULL) {
CERROR ("Can't unpack body\n");
GOTO (out, rc = -EPROTO);
return;
}
- CDEBUG(D_INODE, "got "LPU64" grant\n", body->oa.o_rdev);
+ CDEBUG(D_ERROR, "got "LPU64" grant\n", body->oa.o_rdev);
down(&cli->cl_dirty_sem);
cli->cl_dirty_granted = body->oa.o_rdev;
/* XXX check for over-run and wake up the io thread that
}
}
-static int check_write_rcs (struct ptlrpc_request *request,
- int niocount, obd_count page_count,
- struct brw_page *pga)
+static int check_write_rcs(struct ptlrpc_request *request, int niocount,
+ obd_count page_count, struct brw_page *pga)
{
int i;
__u32 *remote_rcs;
}
#endif
-static int osc_brw_prep_request(struct obd_import *imp,
+static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
struct lov_stripe_md *lsm, obd_count page_count,
- struct brw_page *pga, int cmd,
- int *requested_nobp, int *niocountp,
- struct ptlrpc_request **reqp)
+ struct brw_page *pga, int *requested_nobp,
+ int *niocountp, struct ptlrpc_request **reqp)
{
struct ptlrpc_request *req;
struct ptlrpc_bulk_desc *desc;
if (!can_merge_pages (&pga[i - 1], &pga[i]))
niocount++;
- size[0] = sizeof (*body);
- size[1] = sizeof (*ioobj);
- size[2] = niocount * sizeof (*niobuf);
+ size[0] = sizeof(*body);
+ size[1] = sizeof(*ioobj);
+ size[2] = niocount * sizeof(*niobuf);
- req = ptlrpc_prep_req (imp, opc, 3, size, NULL);
+ req = ptlrpc_prep_req(imp, opc, 3, size, NULL);
if (req == NULL)
return (-ENOMEM);
desc = ptlrpc_prep_bulk_imp(req, BULK_PUT_SINK,
OST_BULK_PORTAL);
if (desc == NULL)
- GOTO (out, rc = -ENOMEM);
+ GOTO(out, rc = -ENOMEM);
/* NB request now owns desc and will free it when it gets freed */
body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body));
ioobj = lustre_msg_buf(req->rq_reqmsg, 1, sizeof(*ioobj));
niobuf = lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf));
- ioobj->ioo_id = lsm->lsm_object_id;
- ioobj->ioo_gr = 0;
- ioobj->ioo_type = S_IFREG;
+ memcpy(&body->oa, oa, sizeof(*oa));
+
+ ioobj->ioo_id = oa->o_id;
+ ioobj->ioo_gr = oa->o_valid & 0 ? oa->o_gr : 0;
+ ioobj->ioo_type = oa->o_mode;
ioobj->ioo_bufcnt = niocount;
LASSERT (page_count > 0);
struct brw_page *pg = &pga[i];
struct brw_page *pg_prev = pg - 1;
- LASSERT (pg->count > 0);
- LASSERT ((pg->off & (PAGE_SIZE - 1)) + pg->count <= PAGE_SIZE);
- LASSERT (i == 0 || pg->off > pg_prev->off);
+ LASSERT(pg->count > 0);
+ LASSERT((pg->off & ~PAGE_MASK) + pg->count <= PAGE_SIZE);
+ LASSERT(i == 0 || pg->off > pg_prev->off);
- rc = ptlrpc_prep_bulk_page (desc, pg->pg,
- pg->off & (PAGE_SIZE - 1),
- pg->count);
+ rc = ptlrpc_prep_bulk_page(desc, pg->pg, pg->off & ~PAGE_MASK,
+ pg->count);
if (rc != 0)
- GOTO (out, rc);
+ GOTO(out, rc);
requested_nob += pg->count;
- if (i > 0 && can_merge_pages (pg_prev, pg)) {
+ if (i > 0 && can_merge_pages(pg_prev, pg)) {
niobuf--;
niobuf->len += pg->count;
} else {
}
}
- LASSERT ((void *)(niobuf - niocount) ==
- lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf)));
+ LASSERT((void *)(niobuf - niocount) ==
+ lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf)));
#if CHECKSUM_BULK
body->oa.o_valid |= OBD_MD_FLCKSUM;
if (opc == OST_BRW_WRITE)
- body->oa.o_nlink = cksum_pages (requested_nob, page_count, pga);
+ body->oa.o_nlink = cksum_pages(requested_nob, page_count, pga);
#endif
osc_announce_cached(cli, body);
- spin_lock_irqsave (&req->rq_lock, flags);
+ spin_lock_irqsave(&req->rq_lock, flags);
req->rq_no_resend = 1;
- spin_unlock_irqrestore (&req->rq_lock, flags);
+ spin_unlock_irqrestore(&req->rq_lock, flags);
/* size[0] still sizeof (*body) */
if (opc == OST_WRITE) {
return (rc);
}
-static int osc_brw_fini_request (struct ptlrpc_request *req,
- int requested_nob, int niocount,
- obd_count page_count, struct brw_page *pga,
- int rc)
+static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa,
+ int requested_nob, int niocount,
+ obd_count page_count, struct brw_page *pga,
+ int rc)
{
struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
struct ost_body *body;
+
if (rc < 0)
return (rc);
- body = lustre_swab_repbuf(req, 0, sizeof (*body), lustre_swab_ost_body);
+ body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
if (body == NULL) {
CERROR ("Can't unpack body\n");
- RETURN(-EPROTO);
+ return (-EPROTO);
}
+
osc_update_grant(cli, body);
if (req->rq_reqmsg->opc == OST_WRITE) {
return (-EPROTO);
}
- return (check_write_rcs(req, niocount, page_count, pga));
+ return(check_write_rcs(req, niocount, page_count, pga));
}
if (rc > requested_nob) {
- CERROR ("Unexpected rc %d (%d requested)\n",
- rc, requested_nob);
+ CERROR("Unexpected rc %d (%d requested)\n", rc, requested_nob);
return (-EPROTO);
}
if (rc < requested_nob)
handle_short_read(rc, page_count, pga);
+ memcpy(oa, &body->oa, sizeof(*oa));
+
#if CHECKSUM_BULK
- if (body->oa.o_valid & OBD_MD_FLCKSUM) {
+ if (oa->o_valid & OBD_MD_FLCKSUM) {
static int cksum_counter;
- obd_count server_cksum = body->oa.o_nlink;
+ obd_count server_cksum = oa->o_nlink;
obd_count cksum = cksum_pages(rc, page_count, pga);
cksum_counter++;
", server NID "LPX64"\n", server_cksum, cksum,
imp->imp_connection->c_peer.peer_nid);
cksum_counter = 0;
+ oa->o_rdev = cksum;
} else if ((cksum_counter & (-cksum_counter)) == cksum_counter)
CERROR("Checksum %u from "LPX64" OK: %x\n",
cksum_counter,
return (0);
}
-static int osc_brw_internal(struct lustre_handle *conn,
+static int osc_brw_internal(int cmd, struct lustre_handle *conn,struct obdo *oa,
struct lov_stripe_md *lsm,
- obd_count page_count, struct brw_page *pga, int cmd)
+ obd_count page_count, struct brw_page *pga)
{
int requested_nob;
int niocount;
ENTRY;
restart_bulk:
- rc = osc_brw_prep_request(class_conn2cliimp(conn), lsm, page_count, pga,
- cmd, &requested_nob, &niocount, &request);
+ rc = osc_brw_prep_request(cmd, class_conn2cliimp(conn), oa, lsm,
+ page_count, pga, &requested_nob, &niocount,
+ &request);
/* NB ^ sets rq_no_resend */
if (rc != 0)
goto restart_bulk;
}
- rc = osc_brw_fini_request (request, requested_nob, niocount,
- page_count, pga, rc);
+ rc = osc_brw_fini_request(request, oa, requested_nob, niocount,
+ page_count, pga, rc);
ptlrpc_req_finished(request);
RETURN (rc);
static int brw_interpret(struct ptlrpc_request *request,
struct osc_brw_async_args *aa, int rc)
{
+ struct obdo *oa = aa->aa_oa;
int requested_nob = aa->aa_requested_nob;
int niocount = aa->aa_nio_count;
obd_count page_count = aa->aa_page_count;
//goto restart_bulk;
}
- rc = osc_brw_fini_request (request, requested_nob, niocount,
- page_count, pga, rc);
+ rc = osc_brw_fini_request(request, oa, requested_nob, niocount,
+ page_count, pga, rc);
RETURN (rc);
}
-static int async_internal(struct lustre_handle *conn, struct lov_stripe_md *lsm,
- obd_count page_count, struct brw_page *pga,
- struct ptlrpc_request_set *set, int cmd)
+static int async_internal(int cmd, struct lustre_handle *conn, struct obdo *oa,
+ struct lov_stripe_md *lsm, obd_count page_count,
+ struct brw_page *pga, struct ptlrpc_request_set *set)
{
struct ptlrpc_request *request;
int requested_nob;
int rc;
ENTRY;
- rc = osc_brw_prep_request (class_conn2cliimp(conn),
- lsm, page_count, pga, cmd,
- &requested_nob, &nio_count, &request);
+ rc = osc_brw_prep_request(cmd, class_conn2cliimp(conn), oa, lsm,
+ page_count, pga, &requested_nob, &nio_count,
+ &request);
/* NB ^ sets rq_no_resend */
if (rc == 0) {
- LASSERT (sizeof (*aa) <= sizeof (request->rq_async_args));
+ LASSERT(sizeof(*aa) <= sizeof(request->rq_async_args));
aa = (struct osc_brw_async_args *)&request->rq_async_args;
+ aa->aa_oa = oa;
aa->aa_requested_nob = requested_nob;
aa->aa_nio_count = nio_count;
aa->aa_page_count = page_count;
return i;
}
-static int osc_brw(int cmd, struct lustre_handle *conn,
+static int osc_brw(int cmd, struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *md, obd_count page_count,
struct brw_page *pga, struct obd_trans_info *oti)
{
sort_brw_pages(pga, pages_per_brw);
pages_per_brw = check_elan_limit(pga, pages_per_brw);
- rc = osc_brw_internal(conn, md, pages_per_brw, pga, cmd);
+ rc = osc_brw_internal(cmd, conn, oa, md, pages_per_brw, pga);
if (rc != 0)
RETURN(rc);
RETURN(0);
}
-static int osc_brw_async(int cmd, struct lustre_handle *conn,
+static int osc_brw_async(int cmd, struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *md, obd_count page_count,
struct brw_page *pga, struct ptlrpc_request_set *set,
struct obd_trans_info *oti)
sort_brw_pages(pga, pages_per_brw);
pages_per_brw = check_elan_limit(pga, pages_per_brw);
- rc = async_internal(conn, md, pages_per_brw, pga, set, cmd);
+ rc = async_internal(cmd, conn, oa, md, pages_per_brw, pga, set);
if (rc != 0)
RETURN(rc);
#ifdef __KERNEL__
/* Note: caller will lock/unlock, and set uptodate on the pages */
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-static int sanosc_brw_read(struct lustre_handle *conn,
- struct lov_stripe_md *lsm,
- obd_count page_count,
+static int sanosc_brw_read(struct lustre_handle *conn, struct obdo *oa,
+ struct lov_stripe_md *lsm, obd_count page_count,
struct brw_page *pga)
{
struct ptlrpc_request *request = NULL;
if (!request)
RETURN(-ENOMEM);
- body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body));
- iooptr = lustre_msg_buf(request->rq_reqmsg, 1, sizeof (*iooptr));
+ body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof(*body));
+ iooptr = lustre_msg_buf(request->rq_reqmsg, 1, sizeof(*iooptr));
nioptr = lustre_msg_buf(request->rq_reqmsg, 2,
- sizeof (*nioptr) * page_count);
+ sizeof(*nioptr) * page_count);
+
+ memcpy(&body->oa, oa, sizeof(body->oa));
- iooptr->ioo_id = lsm->lsm_object_id;
- iooptr->ioo_gr = 0;
- iooptr->ioo_type = S_IFREG;
+ iooptr->ioo_id = oa->o_id;
+ iooptr->ioo_gr = oa->o_valid & 0 ? oa->o_gr : 0;
+ iooptr->ioo_type = oa->o_mode;
iooptr->ioo_bufcnt = page_count;
for (mapped = 0; mapped < page_count; mapped++, nioptr++) {
if (rc)
GOTO(out_req, rc);
- swab = lustre_msg_swabbed (request->rq_repmsg);
- LASSERT_REPSWAB (request, 1);
+ body = lustre_swab_repbuf(request, 0, sizeof(*body),
+ lustre_swab_ost_body);
+ if (body == NULL) {
+ CERROR("Can't unpack body\n");
+ GOTO(out_req, rc = -EPROTO);
+ }
+
+ memcpy(oa, &body->oa, sizeof(*oa));
+
+ swab = lustre_msg_swabbed(request->rq_repmsg);
+ LASSERT_REPSWAB(request, 1);
nioptr = lustre_msg_buf(request->rq_repmsg, 1, size[1]);
if (!nioptr) {
/* nioptr missing or short */
RETURN(rc);
}
-static int sanosc_brw_write(struct lustre_handle *conn,
- struct lov_stripe_md *lsm,
- obd_count page_count,
+static int sanosc_brw_write(struct lustre_handle *conn, struct obdo *oa,
+ struct lov_stripe_md *lsm, obd_count page_count,
struct brw_page *pga)
{
struct ptlrpc_request *request = NULL;
nioptr = lustre_msg_buf(request->rq_reqmsg, 2,
sizeof (*nioptr) * page_count);
- iooptr->ioo_id = lsm->lsm_object_id;
- iooptr->ioo_gr = 0;
- iooptr->ioo_type = S_IFREG;
+ memcpy(&body->oa, oa, sizeof(body->oa));
+
+ iooptr->ioo_id = oa->o_id;
+ iooptr->ioo_gr = oa->o_valid & 0 ? oa->o_gr : 0;
+ iooptr->ioo_type = oa->o_mode;
iooptr->ioo_bufcnt = page_count;
/* pack request */
RETURN(rc);
}
-static int sanosc_brw(int cmd, struct lustre_handle *conn,
+static int sanosc_brw(int cmd, struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *lsm, obd_count page_count,
struct brw_page *pga, struct obd_trans_info *oti)
{
pages_per_brw = page_count;
if (cmd & OBD_BRW_WRITE)
- rc = sanosc_brw_write(conn, lsm, pages_per_brw, pga);
+ rc = sanosc_brw_write(conn, oa, lsm, pages_per_brw,pga);
else
- rc = sanosc_brw_read(conn, lsm, pages_per_brw, pga);
+ rc = sanosc_brw_read(conn, oa, lsm, pages_per_brw, pga);
if (rc != 0)
RETURN(rc);
#endif
#endif
-static int osc_mark_page_dirty(struct lustre_handle *conn,
+static int osc_mark_page_dirty(struct lustre_handle *conn,
struct lov_stripe_md *lsm, unsigned long offset)
{
struct client_obd *cli = &class_conn2obd(conn)->u.cli;
down(&cli->cl_dirty_sem);
- if (cli->cl_ost_can_grant &&
+#if 0
+ if (cli->cl_ost_can_grant &&
(cli->cl_dirty + PAGE_CACHE_SIZE >= cli->cl_dirty_granted)) {
CDEBUG(D_INODE, "granted "LPU64" < "LPU64"\n",
cli->cl_dirty_granted, cli->cl_dirty + PAGE_CACHE_SIZE);
GOTO(out, rc = -EDQUOT);
}
+#endif
rc = ot_mark_offset(dirty_ot, offset);
if (rc)
RETURN(rc);
}
-static int osc_clear_dirty_pages(struct lustre_handle *conn,
+static int osc_clear_dirty_pages(struct lustre_handle *conn,
struct lov_stripe_md *lsm,
unsigned long start, unsigned long end,
unsigned long *cleared)
struct lustre_handle *lockh)
{
struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
- struct obd_device *obddev = class_conn2obd(connh);
+ struct obd_device *obd = class_conn2obd(connh);
struct ldlm_extent *extent = extentp;
int rc;
ENTRY;
extent->end |= ~PAGE_MASK;
/* Next, search for already existing extent locks that will cover us */
- rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_MATCH_DATA, &res_id,
+ rc = ldlm_lock_match(obd->obd_namespace, LDLM_FL_MATCH_DATA, &res_id,
type, extent, sizeof(extent), mode, data, lockh);
if (rc == 1)
/* We already have a lock, and it's referenced */
* locks out from other users right now, too. */
if (mode == LCK_PR) {
- rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_MATCH_DATA,
+ rc = ldlm_lock_match(obd->obd_namespace, LDLM_FL_MATCH_DATA,
&res_id, type, extent, sizeof(extent),
LCK_PW, data, lockh);
if (rc == 1) {
}
}
- rc = ldlm_cli_enqueue(connh, NULL, obddev->obd_namespace, parent_lock,
+ rc = ldlm_cli_enqueue(connh, NULL, obd->obd_namespace, parent_lock,
res_id, type, extent, sizeof(extent), mode, flags,
ldlm_completion_ast, callback, data, lockh);
RETURN(rc);
int *flags, void *data, struct lustre_handle *lockh)
{
struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
- struct obd_device *obddev = class_conn2obd(connh);
+ struct obd_device *obd = class_conn2obd(connh);
struct ldlm_extent *extent = extentp;
int rc;
ENTRY;
extent->end |= ~PAGE_MASK;
/* Next, search for already existing extent locks that will cover us */
- rc = ldlm_lock_match(obddev->obd_namespace, *flags, &res_id, type,
+ rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id, type,
extent, sizeof(extent), mode, data, lockh);
if (rc)
RETURN(rc);
* VFS and page cache already protect us locally, so lots of readers/
* writers can share a single PW lock. */
if (mode == LCK_PR) {
- rc = ldlm_lock_match(obddev->obd_namespace, *flags, &res_id,
+ rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id,
type, extent, sizeof(extent), LCK_PW,
data, lockh);
if (rc == 1) {
static int osc_cancel_unused(struct lustre_handle *connh,
struct lov_stripe_md *lsm, int flags, void *opaque)
{
- struct obd_device *obddev = class_conn2obd(connh);
+ struct obd_device *obd = class_conn2obd(connh);
struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} };
- return ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags,
+ return ldlm_cli_cancel_unused(obd->obd_namespace, &res_id, flags,
opaque);
}
-static int osc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
+static int osc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+ unsigned long max_age)
{
struct obd_statfs *msfs;
struct ptlrpc_request *request;
int rc, size = sizeof(*osfs);
ENTRY;
- request = ptlrpc_prep_req(exp->exp_obd->u.cli.cl_import, OST_STATFS, 0,
- NULL, NULL);
+ /* We could possibly pass max_age in the request (as an absolute
+ * timestamp or a "seconds.usec ago") so the target can avoid doing
+ * extra calls into the filesystem if that isn't necessary (e.g.
+ * during mount that would help a bit). Having relative timestamps
+ * is not so great if request processing is slow, while absolute
+ * timestamps are not ideal because they need time synchronization. */
+ request = ptlrpc_prep_req(obd->u.cli.cl_import, OST_STATFS,0,NULL,NULL);
if (!request)
RETURN(-ENOMEM);
GOTO(out, rc);
}
- msfs = lustre_swab_repbuf (request, 0, sizeof (*msfs),
- lustre_swab_obd_statfs);
+ msfs = lustre_swab_repbuf(request, 0, sizeof(*msfs),
+ lustre_swab_obd_statfs);
if (msfs == NULL) {
- CERROR ("Can't unpack obd_statfs\n");
- GOTO (out, rc = -EPROTO);
+ CERROR("Can't unpack obd_statfs\n");
+ GOTO(out, rc = -EPROTO);
}
- memcpy (osfs, msfs, sizeof (*msfs));
+ memcpy(osfs, msfs, sizeof(*osfs));
EXIT;
out:
static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
void *karg, void *uarg)
{
- struct obd_device *obddev = class_conn2obd(conn);
+ struct obd_device *obd = class_conn2obd(conn);
struct obd_ioctl_data *data = karg;
int err = 0;
ENTRY;
switch (cmd) {
case IOC_OSC_REGISTER_LOV: {
- if (obddev->u.cli.cl_containing_lov)
+ if (obd->u.cli.cl_containing_lov)
GOTO(out, err = -EALREADY);
- obddev->u.cli.cl_containing_lov = (struct obd_device *)karg;
+ obd->u.cli.cl_containing_lov = (struct obd_device *)karg;
GOTO(out, err);
}
case OBD_IOC_LOV_GET_CONFIG: {
desc->ld_default_stripe_size = 0;
desc->ld_default_stripe_offset = 0;
desc->ld_pattern = 0;
- memcpy(&desc->ld_uuid, &obddev->obd_uuid, sizeof(uuid));
+ memcpy(&desc->ld_uuid, &obd->obd_uuid, sizeof(uuid));
- memcpy(data->ioc_inlbuf2, &obddev->obd_uuid, sizeof(uuid));
+ memcpy(data->ioc_inlbuf2, &obd->obd_uuid, sizeof(uuid));
err = copy_to_user((void *)uarg, buf, len);
if (err)
err = osc_getstripe(conn, karg, uarg);
GOTO(out, err);
case OBD_IOC_CLIENT_RECOVER:
- err = ptlrpc_recover_import(obddev->u.cli.cl_import,
+ err = ptlrpc_recover_import(obd->u.cli.cl_import,
data->ioc_inlbuf1);
GOTO(out, err);
case IOC_OSC_SET_ACTIVE:
- err = ptlrpc_set_import_active(obddev->u.cli.cl_import,
+ err = ptlrpc_set_import_active(obd->u.cli.cl_import,
data->ioc_offset);
GOTO(out, err);
default:
- CERROR ("osc_ioctl(): unrecognised ioctl %#x\n", cmd);
+ CERROR("unrecognised ioctl %#x by %s\n", cmd, current->comm);
GOTO(out, err = -ENOTTY);
}
out:
RETURN(-EINVAL);
}
+static int osc_set_info(struct lustre_handle *conn, obd_count keylen,
+ void *key, obd_count vallen, void *val)
+{
+ struct ptlrpc_request *req;
+ int rc, size = keylen;
+ char *bufs[1] = {key};
+ ENTRY;
+
+ if (keylen < strlen("mds_conn") ||
+ memcmp(key, "mds_conn", strlen("mds_conn")) != 0)
+ RETURN(-EINVAL);
+
+ req = ptlrpc_prep_req(class_conn2cliimp(conn), OST_SET_INFO, 1,
+ &size, bufs);
+ if (req == NULL)
+ RETURN(-ENOMEM);
+
+ req->rq_replen = lustre_msg_size(0, NULL);
+ rc = ptlrpc_queue_wait(req);
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+}
+
+static int osc_log_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm,
+ int count, struct llog_cookie *cookies, int flags)
+{
+ struct obd_device *obd = class_conn2obd(conn);
+ struct llog_commit_data *llcd;
+ struct client_obd *cli;
+ int rc = 0;
+ ENTRY;
+
+ cli = &obd->u.cli;
+ if ((count == 0 || cookies == NULL ||
+ memcmp(cookies, &zero_cookie, sizeof(*cookies)) == 0)) {
+ down(&cli->cl_sem);
+ if (cli->cl_llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW))
+ GOTO(out, rc);
+
+ llcd = cli->cl_llcd;
+ GOTO(send_now, rc);
+ }
+
+ down(&cli->cl_sem);
+ llcd = cli->cl_llcd;
+ if (llcd == NULL) {
+ llcd = llcd_grab();
+ if (llcd == NULL) {
+ CERROR("couldn't get an llcd - dropped "LPX64":%x+%u\n",
+ cookies->lgc_lgl.lgl_oid,
+ cookies->lgc_lgl.lgl_ogen, cookies->lgc_index);
+ GOTO(out, rc = -ENOMEM);
+ }
+ llcd->llcd_import = cli->cl_import;
+ cli->cl_llcd = llcd;
+ }
+
+ memcpy(llcd->llcd_cookies + llcd->llcd_cookiebytes, cookies,
+ sizeof(*cookies));
+ llcd->llcd_cookiebytes += sizeof(*cookies);
+
+ /* If we can't fit any more cookies into the page, we need to send it */
+send_now:
+ if ((PAGE_SIZE - llcd->llcd_cookiebytes < sizeof(*cookies) ||
+ flags & OBD_LLOG_FL_SENDNOW)) {
+ cli->cl_llcd = NULL;
+ llcd_send(llcd);
+ }
+out:
+ up(&cli->cl_sem);
+
+ return rc;
+}
+
+static int osc_disconnect(struct lustre_handle *conn, int flags)
+{
+ struct obd_device *obd = class_conn2obd(conn);
+
+ /* flush any remaining cancel messages out to the target */
+ if (obd->u.cli.cl_llcd)
+ osc_log_cancel(conn, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW);
+
+ return client_import_disconnect(conn, flags);
+}
+
+static int osc_log_add(struct lustre_handle *conn,
+ struct llog_handle *cathandle,
+ struct llog_trans_hdr *rec, struct lov_stripe_md *lsm,
+ struct llog_cookie *logcookies, int numcookies)
+{
+ ENTRY;
+ LASSERT(logcookies && numcookies > 0);
+
+ llog_add_record(cathandle, rec, logcookies);
+
+ RETURN(1);
+}
+
struct obd_ops osc_obd_ops = {
o_owner: THIS_MODULE,
o_attach: osc_attach,
o_setup: client_obd_setup,
o_cleanup: client_obd_cleanup,
o_connect: client_import_connect,
- o_disconnect: client_import_disconnect,
+ o_disconnect: osc_disconnect,
o_statfs: osc_statfs,
o_packmd: osc_packmd,
o_unpackmd: osc_unpackmd,
o_create: osc_create,
o_destroy: osc_destroy,
o_getattr: osc_getattr,
- o_getattr_async: osc_getattr_async,
+ o_getattr_async:osc_getattr_async,
o_setattr: osc_setattr,
o_open: osc_open,
o_close: osc_close,
o_enqueue: osc_enqueue,
o_match: osc_match,
o_cancel: osc_cancel,
- o_cancel_unused: osc_cancel_unused,
+ o_cancel_unused:osc_cancel_unused,
o_iocontrol: osc_iocontrol,
o_get_info: osc_get_info,
- .o_mark_page_dirty = osc_mark_page_dirty,
- .o_clear_dirty_pages = osc_clear_dirty_pages,
- .o_last_dirty_offset = osc_last_dirty_offset,
+ o_set_info: osc_set_info,
+ o_log_cancel: osc_log_cancel,
+ o_log_add: osc_log_add,
+ o_mark_page_dirty: osc_mark_page_dirty,
+ o_clear_dirty_pages: osc_clear_dirty_pages,
+ o_last_dirty_offset: osc_last_dirty_offset,
};
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
struct obd_ops sanosc_obd_ops = {
o_owner: THIS_MODULE,
o_attach: osc_attach,
o_setattr: osc_setattr,
o_open: osc_open,
o_close: osc_close,
-#ifdef __KERNEL__
o_setup: client_sanobd_setup,
o_brw: sanosc_brw,
-#endif
o_punch: osc_punch,
o_enqueue: osc_enqueue,
o_match: osc_match,
o_cancel: osc_cancel,
o_cancel_unused: osc_cancel_unused,
o_iocontrol: osc_iocontrol,
- .o_mark_page_dirty = osc_mark_page_dirty,
- .o_clear_dirty_pages = osc_clear_dirty_pages,
- .o_last_dirty_offset = osc_last_dirty_offset,
+ o_log_cancel: osc_log_cancel,
+ o_log_add: osc_log_add,
+ o_mark_page_dirty: osc_mark_page_dirty,
+ o_clear_dirty_pages: osc_clear_dirty_pages,
+ o_last_dirty_offset: osc_last_dirty_offset,
};
+#endif
int __init osc_init(void)
{
- struct lprocfs_static_vars lvars;
+ struct lprocfs_static_vars lvars, sanlvars;
int rc;
ENTRY;
LASSERT(sizeof(struct obd_client_handle) <= FD_OSTDATA_SIZE);
LASSERT(sizeof(struct obd_client_handle) <= OBD_INLINESZ);
- lprocfs_init_vars(&lvars);
+ lprocfs_init_vars(osc,&lvars);
+ lprocfs_init_vars(osc,&sanlvars);
rc = class_register_type(&osc_obd_ops, lvars.module_vars,
LUSTRE_OSC_NAME);
if (rc)
RETURN(rc);
- rc = class_register_type(&sanosc_obd_ops, lvars.module_vars,
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ rc = class_register_type(&sanosc_obd_ops, sanlvars.module_vars,
LUSTRE_SANOSC_NAME);
if (rc)
class_unregister_type(LUSTRE_OSC_NAME);
+#endif
RETURN(rc);
}
-static void __exit osc_exit(void)
+static void /*__exit*/ osc_exit(void)
{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
class_unregister_type(LUSTRE_SANOSC_NAME);
+#endif
class_unregister_type(LUSTRE_OSC_NAME);
}
Makefile.in
.deps
TAGS
+.*.cmd
#include <linux/lprocfs_status.h>
#ifndef LPROCFS
-struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
-struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
#else
-struct lprocfs_vars lprocfs_obd_vars[] = {
- { "uuid", lprocfs_rd_uuid, 0, 0 },
+static struct lprocfs_vars lprocfs_obd_vars[] = {
+ { "uuid", lprocfs_rd_uuid, 0, 0 },
{ 0 }
};
-struct lprocfs_vars lprocfs_module_vars[] = {
- { "num_refs", lprocfs_rd_numrefs, 0, 0 },
+static struct lprocfs_vars lprocfs_module_vars[] = {
+ { "num_refs", lprocfs_rd_numrefs, 0, 0 },
{ 0 }
};
#endif /* LPROCFS */
-LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars)
+LPROCFS_INIT_VARS(ost, lprocfs_module_vars, lprocfs_obd_vars)
#include <linux/lustre_export.h>
#include <linux/init.h>
#include <linux/lprocfs_status.h>
+#include <linux/lustre_commit_confd.h>
+#include <portals/list.h>
-inline void oti_init(struct obd_trans_info *oti,
- struct ptlrpc_request *req)
+void oti_init(struct obd_trans_info *oti, struct ptlrpc_request *req)
{
- if(oti == NULL)
+ if (oti == NULL)
return;
memset(oti, 0, sizeof *oti);
-
if (req->rq_repmsg && req->rq_reqmsg != 0)
oti->oti_transno = req->rq_repmsg->transno;
-
- EXIT;
}
-inline void oti_to_request(struct obd_trans_info *oti,
- struct ptlrpc_request *req)
+void oti_to_request(struct obd_trans_info *oti, struct ptlrpc_request *req)
{
- int i;
struct oti_req_ack_lock *ack_lock;
+ int i;
- if(oti == NULL)
+ if (oti == NULL)
return;
if (req->rq_repmsg)
sizeof(req->rq_ack_locks[i].lock));
req->rq_ack_locks[i].mode = ack_lock->mode;
}
- EXIT;
}
static int ost_destroy(struct ptlrpc_request *req, struct obd_trans_info *oti)
int rc, size = sizeof(*body);
ENTRY;
- body = lustre_swab_reqbuf (req, 0, sizeof (*body),
- lustre_swab_ost_body);
+ body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
if (body == NULL)
- RETURN (-EFAULT);
+ RETURN(-EFAULT);
rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
if (rc)
RETURN(rc);
+ if (body->oa.o_valid & OBD_MD_FLCOOKIE)
+ oti->oti_logcookies = obdo_logcookie(&body->oa);
req->rq_status = obd_destroy(conn, &body->oa, NULL, oti);
RETURN(0);
}
int rc, size = sizeof(*body);
ENTRY;
- body = lustre_swab_reqbuf (req, 0, sizeof (*body),
- lustre_swab_ost_body);
+ body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
if (body == NULL)
- RETURN (-EFAULT);
+ RETURN(-EFAULT);
rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
if (rc)
RETURN(rc);
- repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody));
+ repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof(*repbody));
memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
req->rq_status = obd_getattr(conn, &repbody->oa, NULL);
RETURN(0);
if (rc)
RETURN(rc);
- osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*osfs));
- memset(osfs, 0, size);
+ osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*osfs));
- req->rq_status = obd_statfs(req->rq_export, osfs);
+ req->rq_status = obd_statfs(req->rq_export->exp_obd, osfs, jiffies-HZ);
if (req->rq_status != 0)
CERROR("ost: statfs failed: rc %d\n", req->rq_status);
int rc, size = sizeof(*repbody);
ENTRY;
- body = lustre_swab_reqbuf (req, 0, sizeof (*body),
- lustre_swab_ost_body);
+ body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
if (body == NULL)
- return (-EFAULT);
+ RETURN(-EFAULT);
rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
if (rc)
RETURN(rc);
- repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody));
+ repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof(*repbody));
memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
req->rq_status = obd_open(conn, &repbody->oa, NULL, oti, NULL);
RETURN(0);
int rc, size = sizeof(*repbody);
ENTRY;
- body = lustre_swab_reqbuf (req, 0, sizeof (*body),
- lustre_swab_ost_body);
+ body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
if (body == NULL)
- RETURN (-EFAULT);
+ RETURN(-EFAULT);
rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
if (rc)
RETURN(rc);
- repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody));
+ repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
req->rq_status = obd_close(conn, &repbody->oa, NULL, oti);
RETURN(0);
int rc, size = sizeof(*repbody);
ENTRY;
- body = lustre_swab_reqbuf (req, 0, sizeof (*body),
- lustre_swab_ost_body);
+ body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
if (body == NULL)
- RETURN (-EFAULT);
+ RETURN(-EFAULT);
rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
if (rc)
RETURN(rc);
- repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody));
+ repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof(*repbody));
memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
+ oti->oti_logcookies = obdo_logcookie(&repbody->oa);
req->rq_status = obd_create(conn, &repbody->oa, NULL, oti);
+ //obd_log_cancel(conn, NULL, 1, oti->oti_logcookies, 0);
RETURN(0);
}
int rc, size = sizeof(*repbody);
ENTRY;
- body = lustre_swab_reqbuf (req, 0, sizeof (*body),
- lustre_swab_ost_body);
+ body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
if (body == NULL)
- RETURN (-EFAULT);
+ RETURN(-EFAULT);
if ((body->oa.o_valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) !=
(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS))
if (rc)
RETURN(rc);
- repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody));
+ repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
req->rq_status = obd_punch(conn, &repbody->oa, NULL, repbody->oa.o_size,
repbody->oa.o_blocks, oti);
int rc, size = sizeof(*repbody);
ENTRY;
- body = lustre_swab_reqbuf (req, 0, sizeof (*body),
- lustre_swab_ost_body);
+ body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
if (body == NULL)
- RETURN (-EFAULT);
+ RETURN(-EFAULT);
rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg);
if (rc)
RETURN(rc);
- repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody));
+ repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
req->rq_status = obd_setattr(conn, &repbody->oa, NULL, oti);
RETURN(1);
}
-static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo,
- struct niobuf_remote *rnb, int nrnb,
- struct niobuf_remote **pp_rnbp)
+static int get_per_page_niobufs(struct obd_ioobj *ioo, int nioo,
+ struct niobuf_remote *rnb, int nrnb,
+ struct niobuf_remote **pp_rnbp)
{
/* Copy a remote niobuf, splitting it into page-sized chunks
* and setting ioo[i].ioo_bufcnt accordingly */
obd_off p0 = offset >> PAGE_SHIFT;
obd_off pn = (offset + rnb[rnbidx].len - 1)>>PAGE_SHIFT;
- LASSERT (rnbidx < nrnb);
+ LASSERT(rnbidx < nrnb);
npages += (pn + 1 - p0);
if (rnb[rnbidx].len == 0) {
CERROR("zero len BRW: obj %d objid "LPX64
" buf %u\n", i, ioo[i].ioo_id, j);
- return (-EINVAL);
+ return -EINVAL;
}
if (j > 0 &&
rnb[rnbidx].offset <= rnb[rnbidx-1].offset) {
" buf %u offset "LPX64" <= "LPX64"\n",
i, ioo[i].ioo_id, j, rnb[rnbidx].offset,
rnb[rnbidx].offset);
- return (-EINVAL);
+ return -EINVAL;
}
}
- LASSERT (rnbidx == nrnb);
+ LASSERT(rnbidx == nrnb);
if (npages == nrnb) { /* all niobufs are for single pages */
*pp_rnbp = rnb;
- return (npages);
+ return npages;
}
- OBD_ALLOC (pp_rnb, sizeof (*pp_rnb) * npages);
+ OBD_ALLOC(pp_rnb, sizeof(*pp_rnb) * npages);
if (pp_rnb == NULL)
- return (-ENOMEM);
+ return -ENOMEM;
/* now do the actual split */
page = rnbidx = 0;
obd_off off = rnb[rnbidx].offset;
int nob = rnb[rnbidx].len;
- LASSERT (rnbidx < nrnb);
+ LASSERT(rnbidx < nrnb);
do {
obd_off poff = off & (PAGE_SIZE - 1);
int pnob = (poff + nob > PAGE_SIZE) ?
PAGE_SIZE - poff : nob;
- LASSERT (page < npages);
+ LASSERT(page < npages);
pp_rnb[page].len = pnob;
pp_rnb[page].offset = off;
pp_rnb[page].flags = rnb->flags;
- CDEBUG (D_PAGE, " obj %d id "LPX64
- "page %d(%d) "LPX64" for %d\n",
- i, ioo[i].ioo_id, obj_pages, page,
- pp_rnb[page].offset, pp_rnb[page].len);
+ CDEBUG(D_PAGE, " obj %d id "LPX64
+ "page %d(%d) "LPX64" for %d\n",
+ i, ioo[i].ioo_id, obj_pages, page,
+ pp_rnb[page].offset, pp_rnb[page].len);
page++;
obj_pages++;
off += pnob;
nob -= pnob;
} while (nob > 0);
- LASSERT (nob == 0);
+ LASSERT(nob == 0);
}
ioo[i].ioo_bufcnt = obj_pages;
}
- LASSERT (page == npages);
+ LASSERT(page == npages);
*pp_rnbp = pp_rnb;
- return (npages);
+ return npages;
}
static void free_per_page_niobufs (int npages, struct niobuf_remote *pp_rnb,
if (pp_rnb == rnb) /* didn't allocate above */
return;
- OBD_FREE (pp_rnb, sizeof (*pp_rnb) * npages);
+ OBD_FREE(pp_rnb, sizeof(*pp_rnb) * npages);
}
#if CHECKSUM_BULK
__u64 ost_checksum_bulk (struct ptlrpc_bulk_desc *desc)
{
__u64 cksum = 0;
- struct list_head *tmp;
- char *ptr;
+ struct ptlrpc_bulk_page *bp;
- list_for_each (tmp, &desc->bd_page_list) {
- struct ptlrpc_bulk_page *bp;
-
- bp = list_entry (tmp, struct ptlrpc_bulk_page, bp_link);
- ptr = kmap (bp->bp_page);
- ost_checksum (&cksum, ptr + bp->bp_pageoffset, bp->bp_buflen);
- kunmap (bp->bp_page);
+ list_for_each_entry(bp, &desc->bd_page_list, bp_link) {
+ ost_checksum(&cksum, kmap(bp->bp_page) + bp->bp_pageoffset,
+ bp->bp_buflen);
+ kunmap(bp->bp_page);
}
}
#endif
struct niobuf_remote *pp_rnb;
struct niobuf_local *local_nb;
struct obd_ioobj *ioo;
- struct ost_body *body;
+ struct ost_body *body, *repbody;
struct l_wait_info lwi;
- void *desc_priv = NULL;
+ struct obd_trans_info oti = { 0 };
int size[1] = { sizeof(*body) };
int comms_error = 0;
int niocount;
body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
if (body == NULL) {
- CERROR ("Missing/short ost_body\n");
- GOTO (out, rc = -EFAULT);
+ CERROR("Missing/short ost_body\n");
+ GOTO(out, rc = -EFAULT);
}
- ioo = lustre_swab_reqbuf (req, 1, sizeof (*ioo),
- lustre_swab_obd_ioobj);
+ ioo = lustre_swab_reqbuf(req, 1, sizeof(*ioo), lustre_swab_obd_ioobj);
if (ioo == NULL) {
- CERROR ("Missing/short ioobj\n");
- GOTO (out, rc = -EFAULT);
+ CERROR("Missing/short ioobj\n");
+ GOTO(out, rc = -EFAULT);
}
niocount = ioo->ioo_bufcnt;
- remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb),
+ remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof(*remote_nb),
lustre_swab_niobuf_remote);
if (remote_nb == NULL) {
- CERROR ("Missing/short niobuf\n");
- GOTO (out, rc = -EFAULT);
+ CERROR("Missing/short niobuf\n");
+ GOTO(out, rc = -EFAULT);
}
- if (lustre_msg_swabbed (req->rq_reqmsg)) { /* swab remaining niobufs */
+ if (lustre_msg_swabbed(req->rq_reqmsg)) { /* swab remaining niobufs */
for (i = 1; i < niocount; i++)
lustre_swab_niobuf_remote (&remote_nb[i]);
}
+ size[0] = sizeof(*body);
rc = lustre_pack_msg(1, size, NULL, &req->rq_replen, &req->rq_repmsg);
if (rc)
GOTO(out, rc);
+ /* FIXME all niobuf splitting should be done in obdfilter if needed */
/* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */
- npages = get_per_page_niobufs (ioo, 1, remote_nb, niocount, &pp_rnb);
+ npages = get_per_page_niobufs(ioo, 1, remote_nb, niocount, &pp_rnb);
if (npages < 0)
GOTO(out, rc = npages);
if (local_nb == NULL)
GOTO(out_pp_rnb, rc = -ENOMEM);
- desc = ptlrpc_prep_bulk_exp (req, BULK_PUT_SOURCE, OST_BULK_PORTAL);
+ desc = ptlrpc_prep_bulk_exp(req, BULK_PUT_SOURCE, OST_BULK_PORTAL);
if (desc == NULL)
GOTO(out_local, rc = -ENOMEM);
- rc = obd_preprw(OBD_BRW_READ, req->rq_export, NULL, 1, ioo, npages,
- pp_rnb, local_nb, &desc_priv, NULL);
+ rc = obd_preprw(OBD_BRW_READ, req->rq_export, &body->oa, 1,
+ ioo, npages, pp_rnb, local_nb, &oti);
if (rc != 0)
GOTO(out_bulk, rc);
break;
}
- LASSERT (page_rc <= pp_rnb[i].len);
+ LASSERT(page_rc <= pp_rnb[i].len);
nob += page_rc;
if (page_rc != 0) { /* some data! */
LASSERT (local_nb[i].page != NULL);
if (page_rc != pp_rnb[i].len) { /* short read */
/* All subsequent pages should be 0 */
- while (++i < npages)
- LASSERT (local_nb[i].rc == 0);
+ while(++i < npages)
+ LASSERT(local_nb[i].rc == 0);
break;
}
}
if (rc) {
LASSERT(rc == -ETIMEDOUT);
CERROR ("timeout waiting for bulk PUT\n");
- ptlrpc_abort_bulk (desc);
+ ptlrpc_abort_bulk(desc);
}
} else {
CERROR("ptlrpc_bulk_put failed RC: %d\n", rc);
}
/* Must commit after prep above in all cases */
- rc = obd_commitrw(OBD_BRW_READ, req->rq_export, 1, ioo, npages,
- local_nb, desc_priv, NULL);
+ rc = obd_commitrw(OBD_BRW_READ, req->rq_export, &body->oa, 1,
+ ioo, npages, local_nb, &oti);
+
+ repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
+ memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa));
#if CHECKSUM_BULK
if (rc == 0) {
- body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body));
- body->oa.o_rdev = ost_checksum_bulk (desc);
- body->oa.o_valid |= OBD_MD_FLCKSUM;
+ repbody->oa.o_rdev = ost_checksum_bulk(desc);
+ repbody->oa.o_valid |= OBD_MD_FLCKSUM;
}
#endif
out_bulk:
- ptlrpc_free_bulk (desc);
+ ptlrpc_free_bulk(desc);
out_local:
OBD_FREE(local_nb, sizeof(*local_nb) * npages);
out_pp_rnb:
- free_per_page_niobufs (npages, pp_rnb, remote_nb);
+ free_per_page_niobufs(npages, pp_rnb, remote_nb);
out:
- LASSERT (rc <= 0);
+ LASSERT(rc <= 0);
if (rc == 0) {
req->rq_status = nob;
ptlrpc_reply(req);
} else {
if (req->rq_repmsg != NULL) {
/* reply out callback would free */
- OBD_FREE (req->rq_repmsg, req->rq_replen);
+ OBD_FREE(req->rq_repmsg, req->rq_replen);
}
CERROR("bulk IO comms error: evicting %s@%s nid "LPU64"\n",
req->rq_export->exp_client_uuid.uuid,
struct niobuf_remote *pp_rnb;
struct niobuf_local *local_nb;
struct obd_ioobj *ioo;
- struct ost_body *body;
+ struct ost_body *body, *repbody;
struct l_wait_info lwi;
- void *desc_priv = NULL;
__u32 *rcs;
- int size[2] = { sizeof (*body) };
+ int size[2] = { sizeof(*body) };
int objcount, niocount, npages;
int comms_error = 0;
int rc, rc2, swab, i, j;
GOTO(out, rc = -EIO);
/* pause before transaction has been started */
- OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK | OBD_FAIL_ONCE,
+ OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK | OBD_FAIL_ONCE,
obd_timeout +1);
- swab = lustre_msg_swabbed (req->rq_reqmsg);
- body = lustre_swab_reqbuf (req, 0, sizeof (*body),
- lustre_swab_ost_body);
+ swab = lustre_msg_swabbed(req->rq_reqmsg);
+ body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
if (body == NULL) {
- CERROR ("Missing/short ost_body\n");
+ CERROR("Missing/short ost_body\n");
GOTO(out, rc = -EFAULT);
}
- LASSERT_REQSWAB (req, 1);
+ LASSERT_REQSWAB(req, 1);
objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
if (objcount == 0) {
- CERROR ("Missing/short ioobj\n");
- GOTO (out, rc = -EFAULT);
+ CERROR("Missing/short ioobj\n");
+ GOTO(out, rc = -EFAULT);
}
- ioo = lustre_msg_buf (req->rq_reqmsg, 1, objcount * sizeof (*ioo));
+ ioo = lustre_msg_buf (req->rq_reqmsg, 1, objcount * sizeof(*ioo));
LASSERT (ioo != NULL);
for (niocount = i = 0; i < objcount; i++) {
if (swab)
lustre_swab_obd_ioobj (&ioo[i]);
if (ioo[i].ioo_bufcnt == 0) {
- CERROR ("ioo[%d] has zero bufcnt\n", i);
- GOTO (out, rc = -EFAULT);
+ CERROR("ioo[%d] has zero bufcnt\n", i);
+ GOTO(out, rc = -EFAULT);
}
niocount += ioo[i].ioo_bufcnt;
}
- remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb),
+ remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof(*remote_nb),
lustre_swab_niobuf_remote);
if (remote_nb == NULL) {
- CERROR ("Missing/short niobuf\n");
+ CERROR("Missing/short niobuf\n");
GOTO(out, rc = -EFAULT);
}
if (swab) { /* swab the remaining niobufs */
lustre_swab_niobuf_remote (&remote_nb[i]);
}
- size[1] = niocount * sizeof (*rcs);
+ size[1] = niocount * sizeof(*rcs);
rc = lustre_pack_msg(2, size, NULL, &req->rq_replen,
&req->rq_repmsg);
if (rc != 0)
- GOTO (out, rc);
- rcs = lustre_msg_buf (req->rq_repmsg, 1, niocount * sizeof (*rcs));
+ GOTO(out, rc);
+ rcs = lustre_msg_buf(req->rq_repmsg, 1, niocount * sizeof(*rcs));
+ /* FIXME all niobuf splitting should be done in obdfilter if needed */
/* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */
npages = get_per_page_niobufs(ioo, objcount,remote_nb,niocount,&pp_rnb);
if (npages < 0)
- GOTO (out, rc = npages);
+ GOTO(out, rc = npages);
OBD_ALLOC(local_nb, sizeof(*local_nb) * npages);
if (local_nb == NULL)
GOTO(out_pp_rnb, rc = -ENOMEM);
- desc = ptlrpc_prep_bulk_exp (req, BULK_GET_SINK, OST_BULK_PORTAL);
+ desc = ptlrpc_prep_bulk_exp(req, BULK_GET_SINK, OST_BULK_PORTAL);
if (desc == NULL)
GOTO(out_local, rc = -ENOMEM);
- rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, NULL, objcount, ioo,
- npages, pp_rnb, local_nb, &desc_priv, oti);
+ rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, &body->oa, objcount,
+ ioo, npages, pp_rnb, local_nb, oti);
if (rc != 0)
- GOTO (out_bulk, rc);
+ GOTO(out_bulk, rc);
/* NB Having prepped, we must commit... */
ptlrpc_bulk_complete(desc), &lwi);
if (rc) {
LASSERT(rc == -ETIMEDOUT);
- CERROR ("timeout waiting for bulk GET\n");
- ptlrpc_abort_bulk (desc);
+ CERROR("timeout waiting for bulk GET\n");
+ ptlrpc_abort_bulk(desc);
}
} else {
CERROR("ptlrpc_bulk_get failed RC: %d\n", rc);
comms_error = rc != 0;
}
+ repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
+ memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa));
+
#if CHECKSUM_BULK
if (rc == 0 && (body->oa.o_valid & OBD_MD_FLCKSUM) != 0) {
static int cksum_counter;
__u64 client_cksum = body->oa.o_rdev;
- __u64 cksum = ost_checksum_bulk (desc);
+ __u64 cksum = ost_checksum_bulk(desc);
if (client_cksum != cksum) {
CERROR("Bad checksum: client "LPX64", server "LPX64
", client NID "LPX64"\n", client_cksum, cksum,
req->rq_connection->c_peer.peer_nid);
cksum_counter = 1;
+ repbody->oa.o_rdev = cksum;
} else {
cksum_counter++;
if ((cksum_counter & (-cksum_counter)) == cksum_counter)
}
#endif
/* Must commit after prep above in all cases */
- rc2 = obd_commitrw(OBD_BRW_WRITE, req->rq_export, objcount, ioo,
- npages, local_nb, desc_priv, oti);
+ rc2 = obd_commitrw(OBD_BRW_WRITE, req->rq_export, &repbody->oa,
+ objcount, ioo, npages, local_nb, oti);
if (rc == 0) {
/* set per-requested niobuf return codes */
rcs[i] = 0;
do {
- LASSERT (j < npages);
+ LASSERT(j < npages);
if (local_nb[j].rc < 0)
rcs[i] = local_nb[j].rc;
nob -= pp_rnb[j].len;
j++;
} while (nob > 0);
- LASSERT (nob == 0);
+ LASSERT(nob == 0);
}
- LASSERT (j == npages);
+ LASSERT(j == npages);
}
if (rc == 0)
rc = rc2;
out_bulk:
- ptlrpc_free_bulk (desc);
+ ptlrpc_free_bulk(desc);
out_local:
OBD_FREE(local_nb, sizeof(*local_nb) * npages);
out_pp_rnb:
- free_per_page_niobufs (npages, pp_rnb, remote_nb);
+ free_per_page_niobufs(npages, pp_rnb, remote_nb);
out:
if (rc == 0) {
oti_to_request(oti, req);
static int ost_san_brw(struct ptlrpc_request *req, int cmd)
{
- struct lustre_handle *conn = &req->rq_reqmsg->handle;
struct niobuf_remote *remote_nb, *res_nb;
struct obd_ioobj *ioo;
- struct ost_body *body;
+ struct ost_body *body, *repbody;
int rc, i, j, objcount, niocount, size[2] = {sizeof(*body)};
int n;
int swab;
/* XXX not set to use latest protocol */
- swab = lustre_msg_swabbed (req->rq_reqmsg);
- body = lustre_swab_reqbuf (req, 0, sizeof (*body),
- lustre_swab_ost_body);
+ swab = lustre_msg_swabbed(req->rq_reqmsg);
+ body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body);
if (body == NULL) {
- CERROR ("Missing/short ost_body\n");
- GOTO (out, rc = -EFAULT);
+ CERROR("Missing/short ost_body\n");
+ GOTO(out, rc = -EFAULT);
}
- ioo = lustre_swab_reqbuf(req, 1, sizeof (*ioo),
- lustre_swab_obd_ioobj);
+ ioo = lustre_swab_reqbuf(req, 1, sizeof(*ioo), lustre_swab_obd_ioobj);
if (ioo == NULL) {
- CERROR ("Missing/short ioobj\n");
- GOTO (out, rc = -EFAULT);
+ CERROR("Missing/short ioobj\n");
+ GOTO(out, rc = -EFAULT);
}
objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo);
niocount = ioo[0].ioo_bufcnt;
niocount += ioo[i].ioo_bufcnt;
}
- remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb),
+ remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof(*remote_nb),
lustre_swab_niobuf_remote);
if (remote_nb == NULL) {
- CERROR ("Missing/short niobuf\n");
- GOTO (out, rc = -EFAULT);
+ CERROR("Missing/short niobuf\n");
+ GOTO(out, rc = -EFAULT);
}
if (swab) { /* swab the remaining niobufs */
for (i = 1; i < niocount; i++)
if (rc)
GOTO(out, rc);
- req->rq_status = obd_san_preprw(cmd, conn, objcount, ioo,
- niocount, remote_nb);
+ req->rq_status = obd_san_preprw(cmd, req->rq_export, &body->oa,
+ objcount, ioo, niocount, remote_nb);
if (req->rq_status)
- GOTO (out, rc = 0);
+ GOTO(out, rc = 0);
+
+ repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody));
+ memcpy(&repbody->oa, &body->oa, sizeof(body->oa));
res_nb = lustre_msg_buf(req->rq_repmsg, 1, size[1]);
- memcpy (res_nb, remote_nb, size[1]);
+ memcpy(res_nb, remote_nb, size[1]);
rc = 0;
out:
if (rc) {
return rc;
}
+static int ost_log_cancel(struct ptlrpc_request *req)
+{
+ struct lustre_handle *conn;
+ struct llog_cookie *logcookies;
+ int num_cookies, rc = 0;
+ ENTRY;
+
+ logcookies = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*logcookies));
+ if (logcookies == NULL) {
+ DEBUG_REQ(D_HA, req, "no cookies sent");
+ RETURN(-EFAULT);
+ }
+ num_cookies = req->rq_reqmsg->buflens[0] / sizeof(*logcookies);
+
+ /* workaround until we don't need to send replies */
+ rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
+ if (rc)
+ RETURN(rc);
+ req->rq_repmsg->status = 0;
+ /* end workaround */
+
+ conn = (struct lustre_handle *)&req->rq_reqmsg->handle;
+ rc = obd_log_cancel(conn, NULL, num_cookies, logcookies, 0);
+
+ RETURN(rc);
+}
+
+static int ost_set_info(struct ptlrpc_request *req)
+{
+ struct lustre_handle *conn;
+ char *key;
+ int keylen, rc = 0;
+ ENTRY;
+
+ key = lustre_msg_buf(req->rq_reqmsg, 0, 1);
+ if (key == NULL) {
+ DEBUG_REQ(D_HA, req, "no set_info key");
+ RETURN(-EFAULT);
+ }
+ keylen = req->rq_reqmsg->buflens[0];
+
+ rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
+ if (rc)
+ RETURN(rc);
+
+ conn = (struct lustre_handle *)&req->rq_reqmsg->handle;
+ rc = obd_set_info(conn, keylen, key, 0, NULL);
+ req->rq_repmsg->status = 0;
+ RETURN(rc);
+}
+
static int filter_recovery_request(struct ptlrpc_request *req,
struct obd_device *obd, int *process)
{
case OST_DESTROY:
case OST_OPEN:
case OST_PUNCH:
- case OST_SETATTR:
+ case OST_SETATTR:
case OST_SYNCFS:
case OST_WRITE:
+ case OBD_LOG_CANCEL:
case LDLM_ENQUEUE:
*process = target_queue_recovery_request(req, obd);
RETURN(0);
int abort_recovery, recovering;
if (req->rq_export == NULL) {
- CERROR("lustre_ost: operation %d on unconnected OST\n",
+ CDEBUG(D_HA, "operation %d on unconnected OST\n",
req->rq_reqmsg->opc);
req->rq_status = -ENOTCONN;
GOTO(out, rc = -ENOTCONN);
if (rc || !should_process)
RETURN(rc);
}
- }
+ }
if (strcmp(req->rq_obd->obd_type->typ_name, "ost") != 0)
GOTO(out, rc = -EINVAL);
OBD_FAIL_RETURN(OBD_FAIL_OST_SYNCFS_NET, 0);
rc = ost_syncfs(req);
break;
+ case OST_SET_INFO:
+ DEBUG_REQ(D_INODE, req, "set_info");
+ rc = ost_set_info(req);
case OBD_PING:
DEBUG_REQ(D_INODE, req, "ping");
rc = target_handle_ping(req);
break;
+ case OBD_LOG_CANCEL:
+ CDEBUG(D_INODE, "log cancel\n");
+ OBD_FAIL_RETURN(OBD_FAIL_OBD_LOG_CANCEL_NET, 0);
+ rc = ost_log_cancel(req);
+ break;
case LDLM_ENQUEUE:
CDEBUG(D_INODE, "enqueue\n");
OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
static int ost_setup(struct obd_device *obddev, obd_count len, void *buf)
{
struct ost_obd *ost = &obddev->u.ost;
- int err;
- int i;
+ int err, i;
ENTRY;
+#ifdef ENABLE_ORPHANS
+ err = llog_start_commit_thread();
+ if (err < 0)
+ RETURN(err);
+#endif
+
ost->ost_service = ptlrpc_init_svc(OST_NEVENTS, OST_NBUFS,
OST_BUFSIZE, OST_MAXREQSIZE,
OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
ost_handle, "ost", obddev);
if (!ost->ost_service) {
CERROR("failed to start service\n");
- GOTO(error_disc, err = -ENOMEM);
+ RETURN(-ENOMEM);
}
for (i = 0; i < OST_NUM_THREADS; i++) {
err = ptlrpc_start_thread(obddev, ost->ost_service, name);
if (err) {
CERROR("error starting thread #%d: rc %d\n", i, err);
- GOTO(error_disc, err = -EINVAL);
+ RETURN(-EINVAL);
}
}
RETURN(0);
-
-error_disc:
- RETURN(err);
}
-static int ost_cleanup(struct obd_device *obddev, int force, int failover)
+static int ost_cleanup(struct obd_device *obddev, int flags)
{
struct ost_obd *ost = &obddev->u.ost;
int err = 0;
{
struct lprocfs_static_vars lvars;
- lprocfs_init_vars(&lvars);
+ lprocfs_init_vars(ost,&lvars);
return lprocfs_obd_attach(dev, lvars.obd_vars);
}
return lprocfs_obd_detach(dev);
}
-/* I don't think this function is ever used, since nothing
+/* I don't think this function is ever used, since nothing
* connects directly to this module.
*/
static int ost_connect(struct lustre_handle *conn,
struct lprocfs_static_vars lvars;
ENTRY;
- lprocfs_init_vars(&lvars);
+ lprocfs_init_vars(ost,&lvars);
RETURN(class_register_type(&ost_obd_ops, lvars.module_vars,
LUSTRE_OST_NAME));
}
-static void __exit ost_exit(void)
+static void /*__exit*/ ost_exit(void)
{
class_unregister_type(LUSTRE_OST_NAME);
}
config.log
config.status
configure
+.*.o.cmd
-EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
+# portals/utils/debug.c wants <linux/version.h> from userspace. sigh.
+HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS)
+LIBREADLINE := @LIBREADLINE@
+# 2.5's makefiles aren't nice to cross dir libraries in host programs
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
-EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
+HOSTCFLAGS := $(EXTRA_CFLAGS)
+# the kernel doesn't want us to build archives for host binaries :/
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
-include fs/lustre/portals/Kernelenv
+include $(src)/Kernelenv
-obj-y += portals/
+# The ordering of these determines the order that each subsystem's
+# module_init() functions are called in. if these are changed make sure
+# they reflect the dependencies between each subsystem's _init functions.
obj-y += libcfs/
-obj-y += knals/
+obj-y += portals/
obj-y += router/
+obj-y += knals/
+obj-y += tests/
+
+obj-m += utils/
AC_ARG_WITH(linux, [ --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux)
AC_SUBST(LINUX)
+if test x$enable_inkernel = xyes ; then
+ echo ln -s `pwd` $LINUX/fs/lustre
+ rm $LINUX/fs/lustre
+ ln -s `pwd` $LINUX/fs/lustre
+fi
-# --------- UML? --------------------
+# --------------------
AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...)
if test $host_cpu = "lib" ; then
host_cpu="lib"
MOD_LINK=elf64_ia64
;;
+ x86_64 )
+ AC_MSG_RESULT($host_cpu)
+ KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -fomit-frame-pointer -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -finline-limit=2000 -fno-strength-reduce -fno-asynchronous-unwind-tables'
+ KCPPFLAGS='-D__KERNEL__ -DMODULE'
+ MOD_LINK=elf_x86_64
+;;
+
sparc64 )
AC_MSG_RESULT($host_cpu)
KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs'
AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.)
fi
-# ------------ RELEASE and moduledir ------------------
+# ------------ LINUXRELEASE and moduledir ------------------
AC_MSG_CHECKING(for Linux release)
dnl We need to rid ourselves of the nasty [ ] quotes.
changequote(, )
dnl Get release from version.h
- RELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
+ LINUXRELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`"
changequote([, ])
- moduledir='$(libdir)/modules/'$RELEASE/kernel
+ moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel
AC_SUBST(moduledir)
modulefsdir='$(moduledir)/fs/$(PACKAGE)'
AC_SUBST(modulefsdir)
+ AC_MSG_RESULT($LINUXRELEASE)
+ AC_SUBST(LINUXRELEASE)
+
+# ------------ RELEASE --------------------------------
+ AC_MSG_CHECKING(lustre release)
+
+ dnl We need to rid ourselves of the nasty [ ] quotes.
+ changequote(, )
+ dnl Get release from version.h
+ RELEASE="`sed -ne 's/-/_/g' -e 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_]*\).*/\1/p' $LINUX/include/linux/version.h`_`date +%Y%m%d%H%M`"
+ changequote([, ])
+
AC_MSG_RESULT($RELEASE)
AC_SUBST(RELEASE)
# This needs to run after we've defined the KCPPFLAGS
AC_MSG_CHECKING(for kernel version)
-AC_TRY_LINK([#define __KERNEL__
+AC_TRY_COMPILE([#define __KERNEL__
#include <linux/sched.h>],
[struct task_struct p;
p.sighand = NULL;],
AC_MSG_RESULT(redhat-2.4.20)
CPPFLAGS="$CPPFLAGS -DCONFIG_RH_2_4_20"
else
- AC_MSG_RESULT($RELEASE)
+ AC_MSG_RESULT($LINUXRELEASE)
fi
/* portals/include/config.h.in. Generated from configure.in by autoheader. */
+/* Compile with orphan support */
+#undef ENABLE_ORPHANS
+
+/* Use the Pinger */
+#undef ENABLE_PINGER
+
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
#ifndef _KP30_INCLUDED
#define _KP30_INCLUDED
-
#define PORTAL_DEBUG
#ifndef offsetof
#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
-#ifndef CONFIG_SMP
-# define smp_processor_id() 0
-#endif
-
/*
* Debugging
*/
extern unsigned int portal_stack;
extern unsigned int portal_debug;
extern unsigned int portal_printk;
-/* Debugging subsystems (8 bit ID)
- *
- * If you add debug subsystem #32, you need to send email to phil, because
- * you're going to break kernel subsystem debug filtering. */
-#define S_UNDEFINED (0 << 24)
-#define S_MDC (1 << 24)
-#define S_MDS (2 << 24)
-#define S_OSC (3 << 24)
-#define S_OST (4 << 24)
-#define S_CLASS (5 << 24)
-#define S_OBDFS (6 << 24) /* obsolete */
-#define S_LLITE (7 << 24)
-#define S_RPC (8 << 24)
-#define S_EXT2OBD (9 << 24) /* obsolete */
-#define S_PORTALS (10 << 24)
-#define S_SOCKNAL (11 << 24)
-#define S_QSWNAL (12 << 24)
-#define S_PINGER (13 << 24)
-#define S_FILTER (14 << 24)
-#define S_TRACE (15 << 24) /* obsolete */
-#define S_ECHO (16 << 24)
-#define S_LDLM (17 << 24)
-#define S_LOV (18 << 24)
-#define S_GMNAL (19 << 24)
-#define S_PTLROUTER (20 << 24)
-#define S_COBD (21 << 24)
-#define S_PTLBD (22 << 24)
-#define S_LOG (23 << 24)
-
-/* If you change these values, please keep portals/linux/utils/debug.c
+/* Debugging subsystems (32 bits, non-overlapping) */
+#define S_UNDEFINED (1 << 0)
+#define S_MDC (1 << 1)
+#define S_MDS (1 << 2)
+#define S_OSC (1 << 3)
+#define S_OST (1 << 4)
+#define S_CLASS (1 << 5)
+#define S_LOG (1 << 6)
+#define S_LLITE (1 << 7)
+#define S_RPC (1 << 8)
+#define S_MGMT (1 << 9)
+#define S_PORTALS (1 << 10)
+#define S_SOCKNAL (1 << 11)
+#define S_QSWNAL (1 << 12)
+#define S_PINGER (1 << 13)
+#define S_FILTER (1 << 14)
+#define S_PTLBD (1 << 15)
+#define S_ECHO (1 << 16)
+#define S_LDLM (1 << 17)
+#define S_LOV (1 << 18)
+#define S_GMNAL (1 << 19)
+#define S_PTLROUTER (1 << 20)
+#define S_COBD (1 << 21)
+
+/* If you change these values, please keep portals/utils/debug.c
* up to date! */
-/* Debugging masks (24 bits, non-overlapping) */
+/* Debugging masks (32 bits, non-overlapping) */
#define D_TRACE (1 << 0) /* ENTRY/EXIT markers */
#define D_INODE (1 << 1)
#define D_SUPER (1 << 2)
#define D_RPCTRACE (1 << 20) /* for distributed debugging */
#define D_VFSTRACE (1 << 21)
-#ifndef __KERNEL__
-#define THREAD_SIZE 8192
+#ifdef __KERNEL__
+# include <linux/sched.h> /* THREAD_SIZE */
+#else
+# define THREAD_SIZE 8192
#endif
-#ifdef __ia64__
-#define CDEBUG_STACK() (THREAD_SIZE - \
+
+#ifdef __KERNEL__
+# ifdef __ia64__
+# define CDEBUG_STACK (THREAD_SIZE - \
((unsigned long)__builtin_dwarf_cfa() & \
(THREAD_SIZE - 1)))
-#else
-#define CDEBUG_STACK() (THREAD_SIZE - \
+# else
+# define CDEBUG_STACK (THREAD_SIZE - \
((unsigned long)__builtin_frame_address(0) & \
(THREAD_SIZE - 1)))
-#endif
+# endif
-#ifdef __KERNEL__
#define CHECK_STACK(stack) \
do { \
if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \
/*panic("LBUG");*/ \
} \
} while (0)
-#else
+#else /* __KERNEL__ */
#define CHECK_STACK(stack) do { } while(0)
-#endif
+#define CDEBUG_STACK (0L)
+#endif /* __KERNEL__ */
#if 1
#define CDEBUG(mask, format, a...) \
do { \
- CHECK_STACK(CDEBUG_STACK()); \
+ CHECK_STACK(CDEBUG_STACK); \
if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) || \
(portal_debug & (mask) && \
- portal_subsystem_debug & (1 << (DEBUG_SUBSYSTEM >> 24)))) \
+ portal_subsystem_debug & DEBUG_SUBSYSTEM)) \
portals_debug_msg(DEBUG_SUBSYSTEM, mask, \
__FILE__, __FUNCTION__, __LINE__, \
- CDEBUG_STACK(), format , ## a); \
+ CDEBUG_STACK, format, ## a); \
} while (0)
#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
#define EXIT do { } while (0)
#endif
-
#ifdef __KERNEL__
# include <linux/vmalloc.h>
# include <linux/time.h>
#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
#ifdef PORTAL_DEBUG
-extern void kportal_assertion_failed(char *expr,char *file,char *func,int line);
+extern void kportal_assertion_failed(char *expr, char *file, const char *func,
+ const int line);
#define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__, \
__FUNCTION__, __LINE__))
#else
#endif /* PORTALS_PROFILING */
/* debug.c */
-void portals_run_lbug_upcall(char * file, char *fn, int line);
+void portals_run_lbug_upcall(char * file, const char *fn, const int line);
void portals_debug_dumplog(void);
int portals_debug_init(unsigned long bufsize);
int portals_debug_cleanup(void);
int portals_debug_clear_buffer(void);
int portals_debug_mark_buffer(char *text);
int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
- char *file, unsigned int size);
+ char *file, unsigned int size);
__s32 portals_debug_copy_to_user(char *buf, unsigned long len);
#if (__GNUC__)
/* Use the special GNU C __attribute__ hack to have the compiler check the
# warning printf has been defined as a macro...
# undef printf
#endif
-void portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
- unsigned long stack, const char *format, ...)
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+ const int line, unsigned long stack,
+ const char *format, ...)
__attribute__ ((format (printf, 7, 8)));
#else
-void portals_debug_msg (int subsys, int mask, char *file, char *fn,
- int line, unsigned long stack,
- const char *format, ...);
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+ const int line, unsigned long stack,
+ const char *format, ...);
#endif /* __GNUC__ */
void portals_debug_set_level(unsigned int debug_level);
# define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0);
# define PORTAL_FREE(a, b) do { free(a); } while (0);
# define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \
- printf ("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format, \
- (subsys) >> 24, (mask), (long)time(0), file, fn, line, \
- getpid() , stack, ## a);
+ printf("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format, \
+ (subsys), (mask), (long)time(0), file, fn, line, \
+ getpid() , stack, ## a);
#endif
#ifndef CURRENT_TIME
void kportal_put_ni (int nal);
#ifdef __CYGWIN__
-#ifndef BITS_PER_LONG
-#if (~0UL) == 0xffffffffUL
-#define BITS_PER_LONG 32
-#else
-#define BITS_PER_LONG 64
-#endif
-#endif
+# ifndef BITS_PER_LONG
+# if (~0UL) == 0xffffffffUL
+# define BITS_PER_LONG 32
+# else
+# define BITS_PER_LONG 64
+# endif
+# endif
#endif
#if (BITS_PER_LONG == 32 || __WORDSIZE == 32)
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _PORTALS_COMPAT_H
+#define _PORTALS_COMPAT_H
+
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+#if SPINLOCK_DEBUG
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
+# define SIGNAL_MASK_ASSERT() \
+ LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC)
+# else
+# define SIGNAL_MASK_ASSERT() \
+ LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC)
+# endif
+#else
+# define SIGNAL_MASK_ASSERT()
+#endif
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
-# define SIGNAL_MASK_LOCK(task, flags) \
+
+# define SIGNAL_MASK_LOCK(task, flags) \
spin_lock_irqsave(&task->sighand->siglock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags) \
+# define SIGNAL_MASK_UNLOCK(task, flags) \
spin_unlock_irqrestore(&task->sighand->siglock, flags)
+# define USERMODEHELPER(path, argv, envp) \
+ call_usermodehelper(path, argv, envp, 1)
# define RECALC_SIGPENDING recalc_sigpending()
-#else
-# define SIGNAL_MASK_LOCK(task, flags) \
+# define CURRENT_SECONDS get_seconds()
+
+#else /* 2.4.x */
+
+# define SIGNAL_MASK_LOCK(task, flags) \
spin_lock_irqsave(&task->sigmask_lock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags) \
+# define SIGNAL_MASK_UNLOCK(task, flags) \
spin_unlock_irqrestore(&task->sigmask_lock, flags)
+# define USERMODEHELPER(path, argv, envp) \
+ call_usermodehelper(path, argv, envp)
# define RECALC_SIGPENDING recalc_sigpending(current)
+# define CURRENT_SECONDS CURRENT_TIME
+
+#endif
+
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
+# define THREAD_NAME(comm, fmt, a...) \
+ sprintf(comm, fmt "|%d", ## a, current->thread.extern_pid)
+#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# define THREAD_NAME(comm, fmt, a...) \
+ sprintf(comm, fmt "|%d", ## a, current->thread.mode.tt.extern_pid)
+#else
+# define THREAD_NAME(comm, fmt, a...) \
+ sprintf(comm, fmt, ## a)
#endif
+
+#endif /* _PORTALS_COMPAT_H */
#ifndef _LINUX_LIST_H
-#define _LINUX_LIST_H
-
/*
* Simple doubly linked list implementation.
__list_del(entry->prev, entry->next);
INIT_LIST_HEAD(entry);
}
+#endif
+#ifndef list_for_each_entry
/**
* list_move - delete from one list and add as another's head
* @list: the entry to move
__list_del(list->prev, list->next);
list_add_tail(list, head);
}
+#endif
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
/**
* list_empty - tests whether a list is empty
* @head: the list to test.
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Compile with:
- * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
+ * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
*/
#ifndef __LTRACE_H_
#define __LTRACE_H_
argv[0] = "debug_kernel";
argv[1] = fname;
argv[2] = "1";
-
+
fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]);
-
+
return jt_dbg_debug_kernel(3, argv);
}
static inline int ltrace_clear()
{
char* argv[1];
-
+
argv[0] = "clear";
-
+
fprintf(stderr, "[ptlctl] %s\n", argv[0]);
-
+
return jt_dbg_clear_debug_buf(1, argv);
}
{
char* argv[2];
char mark_buf[PATH_MAX];
-
+
snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text);
-
+
argv[0] = "mark";
argv[1] = mark_buf;
return jt_dbg_mark_debug_buf(2, argv);
char* argv[2];
argv[0] = "list";
argv[1] = "applymasks";
-
+
fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]);
-
+
return jt_dbg_list(2, argv);
}
#ifdef PORTALS_DEV_ID
rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
#endif
- ltrace_filter("class");
+ ltrace_filter("class");
ltrace_filter("socknal");
- ltrace_filter("qswnal");
- ltrace_filter("gmnal");
- ltrace_filter("portals");
-
- ltrace_show("all_types");
- ltrace_filter("trace");
- ltrace_filter("malloc");
- ltrace_filter("net");
- ltrace_filter("page");
- ltrace_filter("other");
- ltrace_filter("info");
+ ltrace_filter("qswnal");
+ ltrace_filter("gmnal");
+ ltrace_filter("portals");
+
+ ltrace_show("all_types");
+ ltrace_filter("trace");
+ ltrace_filter("malloc");
+ ltrace_filter("net");
+ ltrace_filter("page");
+ ltrace_filter("other");
+ ltrace_filter("info");
ltrace_applymasks();
return rc;
struct timezone tz;
int nob;
int underuml = !not_uml();
-
+
gettimeofday(&tv, &tz);
nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \"");
"(%s:%d:%s() %d+%lu): ",
"lltrace.h", __LINE__, __FUNCTION__, 0, 0L);
}
-
+
nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname);
system(cmdbuf);
}
-/*
-*/
-
#ifndef MYRNAL_H
#define MYRNAL_H
-/*
-*/
#ifndef _NAL_H_
#define _NAL_H_
-/*
- */
-
#ifndef _INCppidh_
#define _INCppidh_
/*
-*/
-/*
* stringtab.h
*/
#define _P30_TYPES_H_
#ifdef __linux__
-#include <asm/types.h>
-#include <asm/timex.h>
+# include <asm/types.h>
+# include <asm/timex.h>
#else
-#include <sys/types.h>
+# include <sys/types.h>
typedef u_int32_t __u32;
typedef u_int64_t __u64;
-typedef unsigned long long cycles_t;
-static inline cycles_t get_cycles(void) { return 0; }
+#endif
+
+#ifdef __KERNEL__
+# include <linux/time.h>
+#else
+# include <sys/time.h>
+# define do_gettimeofday(tv) gettimeofday(tv, NULL)
#endif
typedef __u64 ptl_nid_t;
typedef ptl_handle_any_t ptl_handle_me_t;
#define PTL_HANDLE_NONE \
-((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
+ ((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
#define PTL_EQ_NONE PTL_HANDLE_NONE
static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
ptl_handle_me_t unlinked_me;
ptl_md_t mem_desc;
ptl_hdr_data_t hdr_data;
- cycles_t arrival_time;
+ struct timeval arrival_time;
volatile ptl_seq_t sequence;
} ptl_event_t;
-
typedef enum {
PTL_ACK_REQ,
PTL_NOACK_REQ
} ptl_ack_req_t;
-
typedef struct {
volatile ptl_seq_t sequence;
ptl_size_t size;
ptl_eq_t *eq;
} ptl_ni_t;
-
typedef struct {
int max_match_entries; /* max number of match entries */
int max_mem_descriptors; /* max number of memory descriptors */
Makefile
Makefile.in
+.*.o.cmd
-include ../Kernelenv
+include $(obj)/../Kernelenv
obj-y = socknal/
-# more coming...
\ No newline at end of file
+# more coming...
return &kgmnal_api;
}
-static void __exit
+static void /*__exit*/
kgmnal_finalize(void)
{
struct list_head *tmp;
/* Called by kernel at module unload time */
-static void __exit
+static void /*__exit*/
kscimacnal_finalize(void)
{
/* FIXME: How should the shutdown procedure really look? */
.deps
Makefile
Makefile.in
+.*.o.cmd
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-include ../../Kernelenv
+include $(src)/../../Kernelenv
obj-y += ksocknal.o
ksocknal-objs := socknal.o socknal_cb.o
}
-void __exit
+void /*__exit*/
ktoenal_module_fini (void)
{
CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags);
goto get_fmb; /* => go get a fwd msg buffer */
default:
+ break;
}
/* Not Reached */
LBUG ();
goto out; /* (later) */
default:
+ break;
}
/* Not Reached */
Makefile
Makefile.in
link-stamp
+.*.o.cmd
include fs/lustre/portals/Kernelenv
obj-y += libcfs.o
-licfs-objs := module.o proc.o debug.o
\ No newline at end of file
+libcfs-objs := module.o proc.o debug.o
memset(debug_buf, 0, debug_size);
debug_wrapped = 0;
- printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
- bufsize, debug_buf);
+ //printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n",
+ //bufsize, debug_buf);
atomic_set(&debug_off_a, debug_off);
notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier);
debug_size = bufsize;
if (debug_buf == NULL)
return -EINVAL;
- CDEBUG(0, "*******************************************************************************\n");
+ CDEBUG(0, "********************************************************\n");
CDEBUG(0, "DEBUG MARKER: %s\n", text);
- CDEBUG(0, "*******************************************************************************\n");
+ CDEBUG(0, "********************************************************\n");
return 0;
}
/* FIXME: I'm not very smart; someone smarter should make this better. */
void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
- unsigned long stack, const char *format, ...)
+portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+ const int line, unsigned long stack, const char *format, ...)
{
va_list ap;
unsigned long flags;
do_gettimeofday(&tv);
prefix_nob = snprintf(debug_buf + debug_off, max_nob,
- "%02x:%06x:%d:%lu.%06lu ",
- subsys >> 24, mask, smp_processor_id(),
+ "%06x:%06x:%d:%lu.%06lu ",
+ subsys, mask, smp_processor_id(),
tv.tv_sec, tv.tv_usec);
max_nob -= prefix_nob;
va_start(ap, format);
msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob,
- max_nob, format, ap);
+ max_nob, format, ap);
max_nob -= msg_nob;
va_end(ap);
portal_debug = debug_level;
}
-void portals_run_lbug_upcall(char * file, char *fn, int line)
+void portals_run_lbug_upcall(char *file, const char *fn, const int line)
{
char *argv[6];
char *envp[3];
argv[0] = portals_upcall;
argv[1] = "LBUG";
argv[2] = file;
- argv[3] = fn;
+ argv[3] = (char *)fn;
argv[4] = buf;
argv[5] = NULL;
struct semaphore nal_cmd_sem;
#ifdef PORTAL_DEBUG
-void
-kportal_assertion_failed (char *expr, char *file, char *func, int line)
+void kportal_assertion_failed(char *expr, char *file, const char *func,
+ const int line)
{
- portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK(),
+ portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK,
"ASSERTION(%s) failed\n", expr);
LBUG_WITH_LOC(file, func, line);
}
.deps
Makefile
Makefile.in
+.*.o.cmd
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-include ../Kernelenv
+include $(src)/../Kernelenv
obj-y += portals.o
-portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o lib-move.o lib-msg.o lib-ni.o lib-not-impl.o lib-pid.o api-eq.o api-errno.o api-init.o api-md.o api-me.o api-ni.o api-wrap.o
+portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \
+ lib-move.o lib-msg.o lib-ni.o lib-pid.o \
+ api-eq.o api-errno.o api-init.o api-me.o api-ni.o \
+ api-wrap.o
#include <portals/api-support.h>
int ptl_init;
-unsigned int portal_subsystem_debug = 0xfff7e3ff;
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL | S_GMNAL);
unsigned int portal_debug = ~0;
unsigned int portal_printk;
unsigned int portal_stack;
msg->send_ack = 0;
msg->md = md;
- msg->ev.arrival_time = get_cycles();
+ do_gettimeofday(&msg->ev.arrival_time);
md->pending++;
if (md->threshold != PTL_MD_THRESH_INF) {
LASSERT (md->threshold > 0);
.deps
Makefile
Makefile.in
+.*.o.cmd
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-include ../Kernelenv
+include $(src)/../Kernelenv
obj-y += kptlrouter.o
kptlrouter-objs := router.o proc.o
#include "router.h"
-struct list_head kpr_routes;
-struct list_head kpr_nals;
+LIST_HEAD(kpr_routes);
+LIST_HEAD(kpr_nals);
unsigned long long kpr_fwd_bytes;
unsigned long kpr_fwd_packets;
*
* Once in a blue moon we register/deregister NALs and add/remove routing
* entries (thread context only)... */
-rwlock_t kpr_rwlock;
+rwlock_t kpr_rwlock = RW_LOCK_UNLOCKED;
kpr_router_interface_t kpr_router_interface = {
kprri_register: kpr_register_nal,
int
kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
{
- long flags;
+ unsigned long flags;
struct list_head *e;
kpr_nal_entry_t *ne;
void
kpr_shutdown_nal (void *arg)
{
- long flags;
+ unsigned long flags;
kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid);
void
kpr_deregister_nal (void *arg)
{
- long flags;
+ unsigned long flags;
kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid);
kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid,
ptl_nid_t hi_nid)
{
- long flags;
+ unsigned long flags;
struct list_head *e;
kpr_route_entry_t *re;
int
kpr_del_route (ptl_nid_t nid)
{
- long flags;
+ unsigned long flags;
struct list_head *e;
CDEBUG(D_OTHER, "Del route "LPX64"\n", nid);
return (-ENOENT);
}
-static void __exit
+static void /*__exit*/
kpr_finalise (void)
{
LASSERT (list_empty (&kpr_nals));
CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
atomic_read(&portal_kmemory));
- rwlock_init(&kpr_rwlock);
- INIT_LIST_HEAD(&kpr_routes);
- INIT_LIST_HEAD(&kpr_nals);
-
kpr_proc_init();
PORTAL_SYMBOL_REGISTER(kpr_router_interface);
Makefile
Makefile.in
.deps
+.*.o.cmd
/* called by the portals_ioctl for ping requests */
-static int kping_client(struct portal_ioctl_data *args)
+int kping_client(struct portal_ioctl_data *args)
{
PORTAL_ALLOC (client, sizeof(struct pingcli_data));
if (client == NULL)
} /* pingcli_init() */
-static void __exit pingcli_cleanup(void)
+static void /*__exit*/ pingcli_cleanup(void)
{
PORTAL_SYMBOL_UNREGISTER (kping_client);
} /* pingcli_cleanup() */
#include <asm/semaphore.h>
#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
-#define MAXSIZE (16*1024*1024)
+#define MAXSIZE (16*1024)
static unsigned ping_head_magic;
static unsigned ping_bulk_magic;
-static int nal = 0; // Your NAL,
+static int nal = SOCKNAL; // Your NAL,
static unsigned long packets_valid = 0; // Valid packets
static int running = 1;
atomic_t pkt;
} /* pingsrv_init() */
-static void __exit pingsrv_cleanup(void)
+static void /*__exit*/ pingsrv_cleanup(void)
{
remove_proc_entry ("net/pingsrv", NULL);
/* called by the portals_ioctl for ping requests */
-static int kping_client(struct portal_ioctl_data *args)
+int kping_client(struct portal_ioctl_data *args)
{
PORTAL_ALLOC (client, sizeof(struct pingcli_data));
} /* pingcli_init() */
-static void __exit pingcli_cleanup(void)
+static void /*__exit*/ pingcli_cleanup(void)
{
PORTAL_SYMBOL_UNREGISTER (kping_client);
} /* pingcli_cleanup() */
} /* pingsrv_init() */
-static void __exit pingsrv_cleanup(void)
+static void /*__exit*/ pingsrv_cleanup(void)
{
remove_proc_entry ("net/pingsrv", NULL);
/* FIXME: I'm not very smart; someone smarter should make this better. */
void
-portals_debug_msg (int subsys, int mask, char *file, char *fn, int line,
- const char *format, ...)
+portals_debug_msg (int subsys, int mask, char *file, const char *fn,
+ const int line, const char *format, ...)
{
va_list ap;
unsigned long flags;
ptlctl
.deps
routerstat
-wirecheck
\ No newline at end of file
+wirecheck
+.*.cmd
static char *buf = rawbuf;
static int max = 8192;
//static int g_pfd = -1;
-static int subsystem_array[1 << 8];
+static int subsystem_mask = ~0;
static int debug_mask = ~0;
static const char *portal_debug_subsystems[] =
- {"undefined", "mdc", "mds", "osc", "ost", "class", "obdfs", "llite",
- "rpc", "ext2obd", "portals", "socknal", "qswnal", "pinger", "filter",
- "obdtrace", "echo", "ldlm", "lov", "gmnal", "router", "ptldb", NULL};
+ {"undefined", "mdc", "mds", "osc", "ost", "class", "log", "llite",
+ "rpc", "mgmt", "portals", "socknal", "qswnal", "pinger", "filter",
+ "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", NULL};
static const char *portal_debug_masks[] =
{"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl",
"blocks", "net", "warning", "buffs", "other", "dentry", "portals",
- "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", NULL};
+ "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace",
+ NULL};
struct debug_daemon_cmd {
char *cmd;
printf("%s output from subsystem \"%s\"\n",
enable ? "Enabling" : "Disabling",
portal_debug_subsystems[i]);
- subsystem_array[i] = enable;
+ if (enable)
+ subsystem_mask |= (1 << i);
+ else
+ subsystem_mask &= ~(1 << i);
found = 1;
}
}
int dbg_initialize(int argc, char **argv)
{
- memset(subsystem_array, 1, sizeof(subsystem_array));
return 0;
}
for (i = 0; portal_debug_masks[i] != NULL; i++)
printf(", %s", portal_debug_masks[i]);
printf("\n");
- }
- else if (strcasecmp(argv[1], "applymasks") == 0) {
- unsigned int subsystem_mask = 0;
- for (i = 0; portal_debug_subsystems[i] != NULL; i++) {
- if (subsystem_array[i]) subsystem_mask |= (1 << i);
- }
+ } else if (strcasecmp(argv[1], "applymasks") == 0) {
applymask_all(subsystem_mask, debug_mask);
}
return 0;
{
char *p, *z;
unsigned long subsystem, debug, dropped = 0, kept = 0;
- int max_sub, max_type;
-
- for (max_sub = 0; portal_debug_subsystems[max_sub] != NULL; max_sub++)
- ;
- for (max_type = 0; portal_debug_masks[max_type] != NULL; max_type++)
- ;
while (size) {
p = memchr(buf, '\n', size);
z++;
/* for some reason %*s isn't working. */
*p = '\0';
- if (subsystem < max_sub &&
- subsystem_array[subsystem] &&
+ if ((subsystem_mask & subsystem) &&
(!debug || (debug_mask & debug))) {
if (raw)
fprintf(fd, "%s\n", buf);
{"mds_ext3", "lustre/mds"},
{"mds_extN", "lustre/mds"},
{"ptlbd", "lustre/ptlbd"},
+ {"mgmt_svc", "lustre/mgmt"},
+ {"mgmt_cli", "lustre/mgmt"},
{NULL, NULL}
};
char *path = "..";
#include <stdio.h>
#include <sys/types.h>
+#include <netdb.h>
#include <sys/socket.h>
#include <netinet/tcp.h>
#include <netdb.h>
return ((e == NULL) ? "???" : e->name);
}
+static struct hostent *
+ptl_gethostbyname(char * hname) {
+ struct hostent *he;
+ he = gethostbyname(hname);
+ if (!he) {
+ switch(h_errno) {
+ case HOST_NOT_FOUND:
+ case NO_ADDRESS:
+ fprintf(stderr, "Unable to resolve hostname: %s\n",
+ hname);
+ break;
+ default:
+ fprintf(stderr, "gethostbyname error: %s\n",
+ strerror(errno));
+ break;
+ }
+ return NULL;
+ }
+ return he;
+}
+
int
ptl_parse_nid (ptl_nid_t *nidp, char *str)
{
if ((('a' <= str[0] && str[0] <= 'z') ||
('A' <= str[0] && str[0] <= 'Z')) &&
- (he = gethostbyname (str)) != NULL)
+ (he = ptl_gethostbyname (str)) != NULL)
{
__u32 addr = *(__u32 *)he->h_addr;
goto usage;
}
- he = gethostbyname(argv[1]);
- if (!he) {
- fprintf(stderr, "gethostbyname error: %s\n",
- strerror(errno));
+ he = ptl_gethostbyname(argv[1]);
+ if (!he)
return -1;
- }
g_port = atol(argv[2]);
PORTAL_IOC_INIT(data);
if (argc == 2) {
- he = gethostbyname(argv[1]);
- if (!he) {
- fprintf(stderr, "gethostbyname error: %s\n",
- strerror(errno));
+ he = ptl_gethostbyname(argv[1]);
+ if (!he)
return -1;
- }
data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
PORTAL_IOC_INIT(data);
if (argc == 2) {
- he = gethostbyname(argv[1]);
- if (!he) {
- fprintf(stderr, "gethostbyname error: %s\n",
- strerror(errno));
+ he = ptl_gethostbyname(argv[1]);
+ if (!he)
return -1;
- }
data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */
RETURN(0);
}
-static int ptlbd_cl_cleanup(struct obd_device *obd, int force, int failover)
+static int ptlbd_cl_cleanup(struct obd_device *obd, int flags)
{
struct ptlbd_obd *ptlbd = &obd->u.ptlbd;
struct obd_import *imp;
/* modelled after ptlrpc_import_connect() */
-int ptlbd_cl_connect(struct lustre_handle *conn,
- struct obd_device *obd,
- struct obd_uuid *target_uuid)
+int ptlbd_cl_connect(struct lustre_handle *conn, struct obd_device *obd,
+ struct obd_uuid *target_uuid)
{
struct ptlbd_obd *ptlbd = &obd->u.ptlbd;
struct obd_import *imp = ptlbd->bd_import;
{
struct lprocfs_static_vars lvars;
- lprocfs_init_vars(&lvars);
+ lprocfs_init_vars(ptlbd,&lvars);
return class_register_type(&ptlbd_cl_obd_ops, lvars.module_vars,
OBD_PTLBD_CL_DEVICENAME);
}
RETURN(ret);
}
-static void __exit ptlbd_exit(void)
+static void /*__exit*/ ptlbd_exit(void)
{
ENTRY;
ptlbd_cl_exit();
RETURN(rc);
}
-static int ptlbd_sv_cleanup(struct obd_device *obddev, int force, int failover)
+static int ptlbd_sv_cleanup(struct obd_device *obddev, int flags)
{
struct ptlbd_obd *ptlbd = &obddev->u.ptlbd;
ENTRY;
{
struct lprocfs_static_vars lvars;
- lprocfs_init_vars(&lvars);
+ lprocfs_init_vars(ptlbd,&lvars);
return class_register_type(&ptlbd_sv_obd_ops, lvars.module_vars,
OBD_PTLBD_SV_DEVICENAME);
}
ptlrpc_SOURCES = recover.c connection.c ptlrpc_module.c events.c service.c \
client.c niobuf.c pack_generic.c lproc_ptlrpc.c pinger.c ptlrpc_lib.c \
-ptlrpc_internal.h
+ptlrpc_internal.h recov_thread.c
endif
include $(top_srcdir)/Rules
struct ptlrpc_peer peer;
int err;
- err = ptlrpc_uuid_to_peer (uuid, &peer);
+ err = ptlrpc_uuid_to_peer(uuid, &peer);
if (err != 0) {
CERROR("cannot find peer %s!\n", uuid->uuid);
return;
}
- memcpy (&conn->c_peer, &peer, sizeof (peer));
+ memcpy(&conn->c_peer, &peer, sizeof (peer));
return;
}
if (!desc)
return NULL;
- spin_lock_init (&desc->bd_lock);
+ spin_lock_init(&desc->bd_lock);
init_waitqueue_head(&desc->bd_waitq);
INIT_LIST_HEAD(&desc->bd_page_list);
desc->bd_md_h = PTL_HANDLE_NONE;
struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req,
int type, int portal)
{
- struct obd_import *imp = req->rq_import;
+ struct obd_import *imp = req->rq_import;
struct ptlrpc_bulk_desc *desc;
- LASSERT (type == BULK_PUT_SINK || type == BULK_GET_SOURCE);
+ LASSERT(type == BULK_PUT_SINK || type == BULK_GET_SOURCE);
desc = new_bulk();
if (desc == NULL)
struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp (struct ptlrpc_request *req,
int type, int portal)
{
- struct obd_export *exp = req->rq_export;
+ struct obd_export *exp = req->rq_export;
struct ptlrpc_bulk_desc *desc;
- LASSERT (type == BULK_PUT_SOURCE || type == BULK_GET_SINK);
+ LASSERT(type == BULK_PUT_SOURCE || type == BULK_GET_SINK);
desc = new_bulk();
if (desc == NULL)
OBD_ALLOC(bulk, sizeof(*bulk));
if (bulk == NULL)
- return (-ENOMEM);
+ return -ENOMEM;
- LASSERT (page != NULL);
- LASSERT (pageoffset >= 0);
- LASSERT (len > 0);
- LASSERT (pageoffset + len <= PAGE_SIZE);
+ LASSERT(page != NULL);
+ LASSERT(pageoffset >= 0);
+ LASSERT(len > 0);
+ LASSERT(pageoffset + len <= PAGE_SIZE);
bulk->bp_page = page;
bulk->bp_pageoffset = pageoffset;
struct list_head *tmp, *next;
ENTRY;
- LASSERT (desc != NULL);
- LASSERT (desc->bd_page_count != 0x5a5a5a5a); /* not freed already */
- LASSERT (!desc->bd_network_rw); /* network hands off or */
+ LASSERT(desc != NULL);
+ LASSERT(desc->bd_page_count != 0x5a5a5a5a); /* not freed already */
+ LASSERT(!desc->bd_network_rw); /* network hands off or */
list_for_each_safe(tmp, next, &desc->bd_page_list) {
struct ptlrpc_bulk_page *bulk;
ptlrpc_free_bulk_page(bulk);
}
- LASSERT (desc->bd_page_count == 0);
+ LASSERT(desc->bd_page_count == 0);
LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
if (desc->bd_export)
void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk)
{
- LASSERT (bulk != NULL);
+ LASSERT(bulk != NULL);
list_del(&bulk->bp_link);
bulk->bp_desc->bd_page_count--;
request->rq_connection = ptlrpc_connection_addref(imp->imp_connection);
- spin_lock_init (&request->rq_lock);
+ spin_lock_init(&request->rq_lock);
INIT_LIST_HEAD(&request->rq_list);
init_waitqueue_head(&request->rq_wait_for_rep);
request->rq_xid = ptlrpc_next_xid();
struct ptlrpc_request *req =
list_entry(tmp, struct ptlrpc_request, rq_set_chain);
- LASSERT (req->rq_phase == expected_phase);
+ LASSERT(req->rq_phase == expected_phase);
n++;
}
- LASSERT (set->set_remaining == 0 || set->set_remaining == n);
+ LASSERT(set->set_remaining == 0 || set->set_remaining == n);
list_for_each_safe(tmp, next, &set->set_requests) {
struct ptlrpc_request *req =
list_entry(tmp, struct ptlrpc_request, rq_set_chain);
list_del_init(&req->rq_set_chain);
- LASSERT (req->rq_phase == expected_phase);
+ LASSERT(req->rq_phase == expected_phase);
if (req->rq_phase == RQ_PHASE_NEW) {
/* higher level (i.e. LOV) failed;
* let the sub reqs clean up */
req->rq_status = -EBADR;
- interpreter(req, &req->rq_async_args, req->rq_status);
+ interpreter(req, &req->rq_async_args,
+ req->rq_status);
}
set->set_remaining--;
}
int rc;
ENTRY;
- LASSERT (!req->rq_receiving_reply);
- LASSERT (req->rq_replied);
+ LASSERT(!req->rq_receiving_reply);
+ LASSERT(req->rq_replied);
if (restartp != NULL)
*restartp = 0;
rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
if (rc) {
CERROR("unpack_rep failed: %d\n", rc);
- RETURN (-EPROTO);
+ RETURN(-EPROTO);
}
if (req->rq_repmsg->type != PTL_RPC_MSG_REPLY &&
req->rq_repmsg->type != PTL_RPC_MSG_ERR) {
CERROR("invalid packet type received (type=%u)\n",
req->rq_repmsg->type);
- RETURN (-EPROTO);
+ RETURN(-EPROTO);
}
/* Store transno in reqmsg for replay. */
if (req->rq_err)
RETURN(-EIO);
+ if (req->rq_no_resend)
+ RETURN(rc); /* -ENOTCONN */
+
if (req->rq_resend) {
if (restartp == NULL)
LBUG(); /* async resend not supported yet */
*restartp = 1;
lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
DEBUG_REQ(D_HA, req, "resending: ");
- RETURN (0);
+ RETURN(0);
}
CERROR("request should be err or resend: %p\n", req);
imp->imp_max_transno = req->rq_transno;
/* Replay-enabled imports return commit-status information. */
- if (req->rq_repmsg->last_committed) {
+ if (req->rq_repmsg->last_committed)
imp->imp_peer_committed_transno =
req->rq_repmsg->last_committed;
- }
ptlrpc_free_committed(imp);
spin_unlock_irqrestore(&imp->imp_lock, flags);
}
continue;
if (req->rq_phase == RQ_PHASE_INTERPRET)
- GOTO (interpret, req->rq_status);
-
+ GOTO(interpret, req->rq_status);
+
if (req->rq_err) {
ptlrpc_unregister_reply(req);
if (req->rq_status == 0)
list_del_init(&req->rq_list);
spin_unlock_irqrestore(&imp->imp_lock, flags);
- GOTO (interpret, req->rq_status);
+ GOTO(interpret, req->rq_status);
}
if (req->rq_intr) {
list_del_init(&req->rq_list);
spin_unlock_irqrestore(&imp->imp_lock, flags);
- GOTO (interpret, req->rq_status);
+ GOTO(interpret, req->rq_status);
}
if (req->rq_phase == RQ_PHASE_RPC) {
list_add_tail(&req->rq_list,
&imp->imp_sending_list);
- if (req->rq_import_generation <
+ if (req->rq_import_generation <
imp->imp_generation) {
req->rq_status = -EIO;
req->rq_phase = RQ_PHASE_INTERPRET;
- spin_unlock_irqrestore(&imp->imp_lock,
+ spin_unlock_irqrestore(&imp->imp_lock,
flags);
- GOTO (interpret, req->rq_status);
+ GOTO(interpret, req->rq_status);
}
spin_unlock_irqrestore(&imp->imp_lock, flags);
req->rq_resend = 0;
spin_unlock_irqrestore(&req->rq_lock,
flags);
+
ptlrpc_unregister_reply(req);
if (req->rq_bulk)
ptlrpc_unregister_bulk(req);
- }
+ }
rc = ptl_send_rpc(req);
if (rc) {
req->rq_status = rc;
req->rq_phase = RQ_PHASE_INTERPRET;
- GOTO (interpret, req->rq_status);
+ GOTO(interpret, req->rq_status);
}
}
*/
if (req->rq_bulk == NULL || req->rq_status != 0) {
req->rq_phase = RQ_PHASE_INTERPRET;
- GOTO (interpret, req->rq_status);
+ GOTO(interpret, req->rq_status);
}
req->rq_phase = RQ_PHASE_BULK;
}
- LASSERT (req->rq_phase == RQ_PHASE_BULK);
+ LASSERT(req->rq_phase == RQ_PHASE_BULK);
if (!ptlrpc_bulk_complete (req->rq_bulk))
continue;
req->rq_phase = RQ_PHASE_INTERPRET;
interpret:
- LASSERT (req->rq_phase == RQ_PHASE_INTERPRET);
- LASSERT (!req->rq_receiving_reply);
+ LASSERT(req->rq_phase == RQ_PHASE_INTERPRET);
+ LASSERT(!req->rq_receiving_reply);
ptlrpc_unregister_reply(req);
if (req->rq_bulk != NULL)
set->set_remaining--;
}
- RETURN (set->set_remaining == 0);
+ RETURN(set->set_remaining == 0);
}
int ptlrpc_expire_one_request(struct ptlrpc_request *req)
time_t now = LTIME_S (CURRENT_TIME);
ENTRY;
- LASSERT (set != NULL);
+ LASSERT(set != NULL);
/* A timeout expired; see which reqs it applies to... */
list_for_each (tmp, &set->set_requests) {
struct list_head *tmp;
unsigned long flags;
- LASSERT (set != NULL);
+ LASSERT(set != NULL);
CERROR("INTERRUPTED SET %p\n", set);
list_for_each(tmp, &set->set_requests) {
int timeout;
ENTRY;
+ SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */
LASSERT(!list_empty(&set->set_requests));
list_for_each(tmp, &set->set_requests) {
req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
- LASSERT (req->rq_level == LUSTRE_CONN_FULL);
- LASSERT (req->rq_phase == RQ_PHASE_NEW);
+ LASSERT(req->rq_level == LUSTRE_CONN_FULL);
+ LASSERT(req->rq_phase == RQ_PHASE_NEW);
req->rq_phase = RQ_PHASE_RPC;
imp = req->rq_import;
spin_lock (&req->rq_lock);
req->rq_waiting = 1;
spin_unlock (&req->rq_lock);
- LASSERT (list_empty (&req->rq_list));
+ LASSERT(list_empty (&req->rq_list));
// list_del(&req->rq_list);
list_add_tail(&req->rq_list, &imp->imp_delayed_list);
spin_unlock_irqrestore(&imp->imp_lock, flags);
list_add_tail(&req->rq_list, &imp->imp_sending_list);
spin_unlock_irqrestore(&imp->imp_lock, flags);
+ req->rq_reqmsg->status = current->pid;
CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:ni:nid:opc"
" %s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm,
imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status,
now = LTIME_S (CURRENT_TIME);
timeout = 0;
list_for_each (tmp, &set->set_requests) {
- req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
+ req = list_entry(tmp, struct ptlrpc_request,
+ rq_set_chain);
/* request in-flight? */
if (!((req->rq_phase == RQ_PHASE_RPC &&
expired_set, interrupted_set, set);
rc = l_wait_event(set->set_waitq, ptlrpc_check_set(set), &lwi);
- LASSERT (rc == 0 || rc == -EINTR || rc == -ETIMEDOUT);
+ LASSERT(rc == 0 || rc == -EINTR || rc == -ETIMEDOUT);
/* -EINTR => all requests have been flagged rq_intr so next
* check completes.
* the error cases -eeb. */
} while (rc != 0);
- LASSERT (set->set_remaining == 0);
+ LASSERT(set->set_remaining == 0);
rc = 0;
list_for_each(tmp, &set->set_requests) {
req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
- LASSERT (req->rq_phase == RQ_PHASE_COMPLETE);
+ LASSERT(req->rq_phase == RQ_PHASE_COMPLETE);
if (req->rq_status != 0)
rc = req->rq_status;
}
return;
}
- LASSERT (!request->rq_receiving_reply);
+ LASSERT(!request->rq_receiving_reply);
/* We must take it off the imp_replay_list first. Otherwise, we'll set
* request->rq_reqmsg to NULL while osc_close is dereferencing it. */
if (request == NULL)
RETURN(1);
- if (request == (void *)(long)(0x5a5a5a5a5a5a5a5a) ||
+ if (request == (void *)(long)(0x5a5a5a5a5a5a5a5a) ||
request->rq_obd == (void *)(long)(0x5a5a5a5a5a5a5a5a)) {
CERROR("dereferencing freed request (bug 575)\n");
LBUG();
int rc;
ENTRY;
- LASSERT (!in_interrupt ()); /* might sleep */
+ LASSERT(!in_interrupt ()); /* might sleep */
spin_lock_irqsave (&request->rq_lock, flags);
if (!request->rq_receiving_reply) { /* not waiting for a reply */
return;
}
- LASSERT (!request->rq_replied); /* callback hasn't completed */
+ LASSERT(!request->rq_replied); /* callback hasn't completed */
spin_unlock_irqrestore (&request->rq_lock, flags);
rc = PtlMDUnlink (request->rq_reply_md_h);
LBUG ();
case PTL_OK: /* unlinked before completion */
- LASSERT (request->rq_receiving_reply);
- LASSERT (!request->rq_replied);
+ LASSERT(request->rq_receiving_reply);
+ LASSERT(!request->rq_replied);
spin_lock_irqsave (&request->rq_lock, flags);
request->rq_receiving_reply = 0;
spin_unlock_irqrestore (&request->rq_lock, flags);
rc = l_wait_event (request->rq_wait_for_rep,
request->rq_replied, &lwi);
- LASSERT (rc == 0 || rc == -ETIMEDOUT);
+ LASSERT(rc == 0 || rc == -ETIMEDOUT);
if (rc == 0) {
spin_lock_irqsave (&request->rq_lock, flags);
/* Ensure the callback has completed scheduling
/* fall through */
case PTL_INV_MD: /* callback completed */
- LASSERT (!request->rq_receiving_reply);
- LASSERT (request->rq_replied);
+ LASSERT(!request->rq_receiving_reply);
+ LASSERT(request->rq_replied);
EXIT;
return;
}
req = list_entry(tmp, struct ptlrpc_request, rq_list);
/* XXX ok to remove when 1357 resolved - rread 05/29/03 */
- LASSERT (req != last_req);
+ LASSERT(req != last_req);
last_req = req;
if (req->rq_import_generation < imp->imp_generation) {
struct l_wait_info lwi;
struct obd_import *imp = req->rq_import;
struct obd_device *obd = imp->imp_obd;
- struct ptlrpc_connection *conn = imp->imp_connection;
- unsigned int flags;
+ unsigned long flags;
int do_restart = 0;
int timeout = 0;
ENTRY;
- LASSERT (req->rq_set == NULL);
- LASSERT (!req->rq_receiving_reply);
+ LASSERT(req->rq_set == NULL);
+ LASSERT(!req->rq_receiving_reply);
/* for distributed debugging */
req->rq_reqmsg->status = current->pid;
"%s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm,
imp->imp_obd->obd_uuid.uuid,
req->rq_reqmsg->status, req->rq_xid,
- conn->c_peer.peer_ni->pni_name, conn->c_peer.peer_nid,
+ imp->imp_connection->c_peer.peer_ni->pni_name,
+ imp->imp_connection->c_peer.peer_nid,
req->rq_reqmsg->opc);
/* Mark phase here for a little debug help */
if (req->rq_import->imp_invalid && req->rq_level == LUSTRE_CONN_FULL) {
DEBUG_REQ(D_ERROR, req, "IMP_INVALID:");
spin_unlock_irqrestore(&imp->imp_lock, flags);
- GOTO (out, rc = -EIO);
+ GOTO(out, rc = -EIO);
}
if (req->rq_import_generation < imp->imp_generation) {
DEBUG_REQ(D_ERROR, req, "req old gen:");
spin_unlock_irqrestore(&imp->imp_lock, flags);
- GOTO (out, rc = -EIO);
+ GOTO(out, rc = -EIO);
}
if (req->rq_level > imp->imp_level) {
if (req->rq_no_recov || obd->obd_no_recov ||
imp->imp_dlm_fake) {
spin_unlock_irqrestore(&imp->imp_lock, flags);
- GOTO (out, rc = -EWOULDBLOCK);
+ GOTO(out, rc = -EWOULDBLOCK);
}
list_add_tail(&req->rq_list, &imp->imp_delayed_list);
(req->rq_level <= imp->imp_level ||
req->rq_err),
&lwi);
- DEBUG_REQ(D_HA, req, "\"%s\" awake: (%d > %d)",
- current->comm, req->rq_level, imp->imp_level);
+ DEBUG_REQ(D_HA, req, "\"%s\" awake: (%d > %d or %d == 1)",
+ current->comm, imp->imp_level, req->rq_level,
+ req->rq_err);
spin_lock_irqsave(&imp->imp_lock, flags);
list_del_init(&req->rq_list);
- if (req->rq_err ||
+ if (req->rq_err ||
req->rq_import_generation < imp->imp_generation)
rc = -EIO;
if (rc) {
spin_unlock_irqrestore(&imp->imp_lock, flags);
- GOTO (out, rc);
+ GOTO(out, rc);
}
- CERROR("process %d resumed\n", current->pid);
+ DEBUG_REQ(D_HA, req, "resumed");
}
/* XXX this is the same as ptlrpc_set_wait */
&reply_ev);
reply_in_callback(&reply_ev);
- LASSERT (reply_ev.mem_desc.user_ptr == (void *)req);
+ LASSERT(reply_ev.mem_desc.user_ptr == (void *)req);
// ptlrpc_check_reply(req);
// not required now it only tests
}
"%s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm,
imp->imp_obd->obd_uuid.uuid,
req->rq_reqmsg->status, req->rq_xid,
- conn->c_peer.peer_ni->pni_name, conn->c_peer.peer_nid,
+ imp->imp_connection->c_peer.peer_ni->pni_name,
+ imp->imp_connection->c_peer.peer_nid,
req->rq_reqmsg->opc);
spin_lock_irqsave(&imp->imp_lock, flags);
ptlrpc_bulk_complete(req->rq_bulk),
&lwi);
if (brc != 0) {
- LASSERT (brc == -ETIMEDOUT);
+ LASSERT(brc == -ETIMEDOUT);
CERROR ("Timed out waiting for bulk\n");
rc = brc;
}
if (rc < 0) {
/* MDS blocks for put ACKs before replying */
/* OSC sets rq_no_resend for the time being */
- LASSERT (req->rq_no_resend);
+ LASSERT(req->rq_no_resend);
ptlrpc_unregister_bulk (req);
}
}
- LASSERT (!req->rq_receiving_reply);
+ LASSERT(!req->rq_receiving_reply);
req->rq_phase = RQ_PHASE_INTERPRET;
- RETURN (rc);
+ RETURN(rc);
}
int ptlrpc_replay_req(struct ptlrpc_request *req)
* state it was left in */
/* Not handling automatic bulk replay yet (or ever?) */
- LASSERT (req->rq_bulk == NULL);
+ LASSERT(req->rq_bulk == NULL);
DEBUG_REQ(D_NET, req, "about to replay");
{ OST_SAN_READ, "ost_san_read" },
{ OST_SAN_WRITE, "ost_san_write" },
{ OST_SYNCFS, "ost_syncfs" },
+ { OST_SET_INFO, "ost_set_info" },
{ MDS_GETATTR, "mds_getattr" },
{ MDS_GETATTR_NAME, "mds_getattr_name" },
{ MDS_CLOSE, "mds_close" },
{ MDS_GETSTATUS, "mds_getstatus" },
{ MDS_STATFS, "mds_statfs" },
{ MDS_GETLOVINFO, "mds_getlovinfo" },
+ { MDS_PIN, "mds_pin" },
+ { MDS_UNPIN, "mds_unpin" },
{ LDLM_ENQUEUE, "ldlm_enqueue" },
{ LDLM_CONVERT, "ldlm_convert" },
{ LDLM_CANCEL, "ldlm_cancel" },
{ PTLBD_FLUSH, "ptlbd_flush" },
{ PTLBD_CONNECT, "ptlbd_connect" },
{ PTLBD_DISCONNECT, "ptlbd_disconnect" },
- { OBD_PING, "obd_ping" }
+ { OBD_PING, "obd_ping" },
+ { OBD_LOG_CANCEL, "obd_log_cancel" },
};
const char* ll_opcode2str(__u32 opcode)
}
lprocfs_counter_init(svc_stats, PTLRPC_REQWAIT_CNTR,
- svc_counter_config, "req_waittime", "cycles");
+ svc_counter_config, "req_waittime", "usec");
/* Wait for b_eq branch
lprocfs_counter_init(svc_stats, PTLRPC_SVCEQDEPTH_CNTR,
svc_counter_config, "svc_eqdepth", "reqs");
/* no stddev on idletime */
lprocfs_counter_init(svc_stats, PTLRPC_SVCIDLETIME_CNTR,
(LPROCFS_CNTR_EXTERNALLOCK|LPROCFS_CNTR_AVGMINMAX),
- "svc_idletime", "cycles");
+ "svc_idletime", "usec");
for (i = 0; i < LUSTRE_MAX_OPCODES; i++) {
__u32 opcode = ll_rpc_opcode_table[i].opcode;
lprocfs_counter_init(svc_stats, PTLRPC_LAST_CNTR + i,
svc_counter_config, ll_opcode2str(opcode),
- "cycles");
+ "usec");
}
rc = lprocfs_register_stats(svc_procroot, "stats", svc_stats);
}
buflen = m->buflens[n];
- if (buflen == 0) {
- CERROR("msg %p buffer[%d] is zero length\n", m, n);
- return NULL;
- }
-
if (buflen < min_size) {
CERROR("msg %p buffer[%d] size %d too small (required %d)\n",
- m, n, buflen, min_size);
+ m, n, buflen, min_size);
return NULL;
}
{
void *ptr;
- LASSERT_REQSWAB (req, index);
+ LASSERT_REQSWAB(req, index);
ptr = lustre_msg_buf(req->rq_reqmsg, index, min_size);
if (ptr == NULL)
- return (NULL);
+ return NULL;
- if (swabber != NULL &&
- lustre_msg_swabbed (req->rq_reqmsg))
+ if (swabber != NULL && lustre_msg_swabbed(req->rq_reqmsg))
((void (*)(void *))swabber)(ptr);
- return (ptr);
+ return ptr;
}
/* Wrap up the normal fixed length case */
{
void *ptr;
- LASSERT_REPSWAB (req, index);
+ LASSERT_REPSWAB(req, index);
- ptr = lustre_msg_buf (req->rq_repmsg, index, min_size);
+ ptr = lustre_msg_buf(req->rq_repmsg, index, min_size);
if (ptr == NULL)
- return (NULL);
+ return NULL;
- if (swabber != NULL &&
- lustre_msg_swabbed (req->rq_repmsg))
+ if (swabber != NULL && lustre_msg_swabbed(req->rq_repmsg))
((void (*)(void *))swabber)(ptr);
- return (ptr);
+ return ptr;
}
/* byte flipping routines for all wire types declared in
LASSERT (REINT_RENAME == 5);
LASSERT (REINT_OPEN == 6);
LASSERT (REINT_MAX == 6);
- LASSERT (IT_INTENT_EXEC == 1);
- LASSERT (IT_OPEN_LOOKUP == 2);
- LASSERT (IT_OPEN_NEG == 4);
- LASSERT (IT_OPEN_POS == 8);
- LASSERT (IT_OPEN_CREATE == 16);
- LASSERT (IT_OPEN_OPEN == 32);
+ LASSERT (DISP_IT_EXECD == 1);
+ LASSERT (DISP_LOOKUP_EXECD == 2);
+ LASSERT (DISP_LOOKUP_NEG == 4);
+ LASSERT (DISP_LOOKUP_POS == 8);
+ LASSERT (DISP_OPEN_CREATE == 16);
+ LASSERT (DISP_OPEN_OPEN == 32);
LASSERT (MDS_STATUS_CONN == 1);
LASSERT (MDS_STATUS_LOV == 2);
LASSERT (MDS_OPEN_HAS_EA == 1);
int ptlrpc_pinger_add_import(struct obd_import *imp)
{
+#ifndef ENABLE_PINGER
+ return 0;
+#else
int rc;
ENTRY;
-#ifndef ENABLE_PINGER
- RETURN(0);
-#else
if (!list_empty(&imp->imp_pinger_chain))
RETURN(-EALREADY);
int ptlrpc_pinger_del_import(struct obd_import *imp)
{
+#ifndef ENABLE_PINGER
+ return 0;
+#else
int rc;
ENTRY;
-#ifndef ENABLE_PINGER
- RETURN(0);
-#else
if (list_empty(&imp->imp_pinger_chain))
RETURN(-ENOENT);
RECALC_SIGPENDING;
SIGNAL_MASK_UNLOCK(current, flags);
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
- sprintf(current->comm, "%s|%d", data->name,current->thread.extern_pid);
-#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- sprintf(current->comm, "%s|%d", data->name,
- current->thread.mode.tt.extern_pid);
-#else
- strcpy(current->comm, data->name);
-#endif
+ THREAD_NAME(current->comm, "%s", data->name);
unlock_kernel();
/* Record that the thread is running */
down(&pinger_sem);
list_for_each(iter, &pinger_imports) {
struct obd_import *imp =
- list_entry(iter, struct obd_import, imp_pinger_chain);
+ list_entry(iter, struct obd_import,
+ imp_pinger_chain);
int generation, level;
unsigned long flags;
spin_unlock_irqrestore(&imp->imp_lock, flags);
if (level != LUSTRE_CONN_FULL) {
- CDEBUG(D_HA, "not pinging %s (in recovery)\n",
+ CDEBUG(D_HA,
+ "not pinging %s (in recovery)\n",
imp->imp_target_uuid.uuid);
continue;
}
- req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, NULL);
+ req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL,
+ NULL);
if (!req) {
CERROR("OOM trying to ping\n");
break;
}
+ req->rq_no_resend = 1;
req->rq_replen = lustre_msg_size(0, NULL);
req->rq_level = LUSTRE_CONN_FULL;
req->rq_phase = RQ_PHASE_RPC;
/* ldlm hooks that we need, managed via inter_module_{get,put} */
extern int (*ptlrpc_ldlm_namespace_cleanup)(struct ldlm_namespace *, int);
extern int (*ptlrpc_ldlm_cli_cancel_unused)(struct ldlm_namespace *,
- struct ldlm_res_id *, int);
+ struct ldlm_res_id *, int);
extern int (*ptlrpc_ldlm_replay_locks)(struct obd_import *);
int ptlrpc_get_ldlm_hooks(void);
void ptlrpc_daemonize(void);
void ptlrpc_request_handle_eviction(struct ptlrpc_request *);
-void lustre_assert_wire_constants (void);
+void lustre_assert_wire_constants(void);
void ptlrpc_lprocfs_register_service(struct obd_device *obddev,
struct ptlrpc_service *svc);
void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc);
+/* recovd_thread.c */
+int llog_init_commit_master(void);
+int llog_cleanup_commit_master(int force);
static inline int opcode_offset(__u32 opc) {
if (opc < OST_LAST_OPC) {
(LDLM_LAST_OPC - LDLM_FIRST_OPC) +
(MDS_LAST_OPC - MDS_FIRST_OPC) +
(OST_LAST_OPC - OST_FIRST_OPC));
- } else if (opc == OBD_PING) {
+ } else if (opc < OBD_LAST_OPC) {
/* OBD Ping */
- return (opc - OBD_PING +
+ return (opc - OBD_FIRST_OPC +
(PTLBD_LAST_OPC - PTLBD_FIRST_OPC) +
(LDLM_LAST_OPC - LDLM_FIRST_OPC) +
(MDS_LAST_OPC - MDS_FIRST_OPC) +
}
}
-#define LUSTRE_MAX_OPCODES (1 + (PTLBD_LAST_OPC - PTLBD_FIRST_OPC) \
- + (LDLM_LAST_OPC - LDLM_FIRST_OPC) \
- + (MDS_LAST_OPC - MDS_FIRST_OPC) \
- + (OST_LAST_OPC - OST_FIRST_OPC))
+#define LUSTRE_MAX_OPCODES ((PTLBD_LAST_OPC - PTLBD_FIRST_OPC) + \
+ (LDLM_LAST_OPC - LDLM_FIRST_OPC) + \
+ (MDS_LAST_OPC - MDS_FIRST_OPC) + \
+ (OST_LAST_OPC - OST_FIRST_OPC) + \
+ (OBD_LAST_OPC - OBD_FIRST_OPC))
enum {
PTLRPC_REQWAIT_CNTR = 0,
#ifdef __KERNEL__
# include <linux/module.h>
-#else
+#else
# include <liblustre.h>
#endif
#include <linux/obd.h>
#include <linux/obd_ost.h>
+#include <linux/lustre_mgmt.h>
#include <linux/lustre_net.h>
#include <linux/lustre_dlm.h>
struct obd_import *imp;
struct obd_uuid server_uuid;
int rq_portal, rp_portal, connect_op;
- char *name;
+ char *name = obddev->obd_type->typ_name;
ENTRY;
- if (obddev->obd_type->typ_ops->o_brw) {
+ /* In a more perfect world, we would hang a ptlrpc_client off of
+ * obd_type and just use the values from there. */
+ if (!strcmp(name, LUSTRE_OSC_NAME)) {
rq_portal = OST_REQUEST_PORTAL;
rp_portal = OSC_REPLY_PORTAL;
- name = "osc";
connect_op = OST_CONNECT;
- } else {
+ } else if (!strcmp(name, LUSTRE_MDC_NAME)) {
rq_portal = MDS_REQUEST_PORTAL;
rp_portal = MDC_REPLY_PORTAL;
- name = "mdc";
connect_op = MDS_CONNECT;
+ } else if (!strcmp(name, LUSTRE_MGMTCLI_NAME)) {
+ rq_portal = MGMT_REQUEST_PORTAL;
+ rp_portal = MGMT_REPLY_PORTAL;
+ connect_op = MGMT_CONNECT;
+ } else {
+ CERROR("unknown client OBD type \"%s\", can't setup\n",
+ name);
+ RETURN(-EINVAL);
}
if (data->ioc_inllen1 < 1) {
cli->cl_import = imp;
cli->cl_max_mds_easize = sizeof(struct lov_mds_md);
+ cli->cl_max_mds_cookiesize = sizeof(struct llog_cookie);
cli->cl_sandev = to_kdev_t(0);
+ /* Register with management client if we need to. */
+ if (data->ioc_inllen3 > 0) {
+ char *mgmt_name = data->ioc_inlbuf3;
+ int rc;
+ struct obd_device *mgmt_obd;
+ mgmtcli_register_for_events_t register_f;
+
+ CDEBUG(D_HA, "%s registering with %s for events about %s\n",
+ obddev->obd_name, mgmt_name, server_uuid.uuid);
+
+ mgmt_obd = class_name2obd(mgmt_name);
+ if (!mgmt_obd) {
+ CERROR("can't find mgmtcli %s to register\n",
+ mgmt_name);
+ class_destroy_import(imp);
+ RETURN(-ENOENT);
+ }
+
+ register_f = inter_module_get("mgmtcli_register_for_events");
+ if (!register_f) {
+ CERROR("can't i_m_g mgmtcli_register_for_events\n");
+ class_destroy_import(imp);
+ RETURN(-ENOSYS);
+ }
+
+ rc = register_f(mgmt_obd, obddev, &imp->imp_target_uuid);
+ inter_module_put("mgmtcli_register_for_events");
+
+ if (!rc)
+ cli->cl_mgmtcli_obd = mgmt_obd;
+
+ RETURN(rc);
+ }
+
RETURN(0);
}
-int client_obd_cleanup(struct obd_device *obddev, int force, int failover)
+int client_obd_cleanup(struct obd_device *obddev, int flags)
{
- struct client_obd *client = &obddev->u.cli;
+ struct client_obd *cli = &obddev->u.cli;
- if (!client->cl_import)
+ if (!cli->cl_import)
RETURN(-EINVAL);
- class_destroy_import(client->cl_import);
- client->cl_import = NULL;
+ if (cli->cl_mgmtcli_obd) {
+ mgmtcli_deregister_for_events_t dereg_f;
+
+ dereg_f = inter_module_get("mgmtcli_deregister_for_events");
+ dereg_f(cli->cl_mgmtcli_obd, obddev);
+ inter_module_put("mgmtcli_deregister_for_events");
+ }
+ class_destroy_import(cli->cl_import);
+ cli->cl_import = NULL;
RETURN(0);
}
int rc;
ENTRY;
- lustre_assert_wire_constants ();
-
+ lustre_assert_wire_constants();
+
rc = ptlrpc_init_portals();
if (rc)
RETURN(rc);
ptlrpc_init_connection();
+ llog_init_commit_master();
ptlrpc_put_connection_superhack = ptlrpc_put_connection;
ptlrpc_abort_inflight_superhack = ptlrpc_abort_inflight;
{
ptlrpc_exit_portals();
ptlrpc_cleanup_connection();
+#ifdef ENABLE_ORPHANS
+ llog_cleanup_commit_master(0);
+#endif
}
/* connection.c */
#define DEBUG_SUBSYSTEM S_RPC
#ifdef __KERNEL__
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kmod.h>
+# include <linux/config.h>
+# include <linux/module.h>
+# include <linux/kmod.h>
#else
-#include <liblustre.h>
+# include <liblustre.h>
#endif
#include <linux/obd_support.h>
struct lustre_handle old_hdl;
__u64 committed_before_reconnect = imp->imp_peer_committed_transno;
- CERROR("reconnect handle "LPX64"\n",
+ CERROR("reconnect handle "LPX64"\n",
imp->imp_dlm_handle.cookie);
req = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp);
GOTO(out_disc, rc = -ENOTCONN);
}
- if (memcmp(&imp->imp_remote_handle, &req->rq_repmsg->handle,
+ if (memcmp(&imp->imp_remote_handle, &req->rq_repmsg->handle,
sizeof(imp->imp_remote_handle))) {
CERROR("%s@%s changed handle from "LPX64" to "LPX64
"; copying, but this may foreshadow disaster\n",
CERROR("reconnected to %s@%s after partition\n",
imp->imp_target_uuid.uuid, conn->c_remote_uuid.uuid);
GOTO(out_disc, rc = RECON_RESULT_RECONNECTED);
- } else if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECOVERING) {
+ } else if (lustre_msg_get_op_flags(req->rq_repmsg) &
+ MSG_CONNECT_RECOVERING) {
rc = RECON_RESULT_RECOVERING;
} else {
rc = RECON_RESULT_EVICTED;
}
-
+
old_hdl = imp->imp_remote_handle;
imp->imp_remote_handle = req->rq_repmsg->handle;
CERROR("reconnected to %s@%s ("LPX64", was "LPX64")!\n",
rc = USERMODEHELPER(argv[0], argv, envp);
if (rc < 0) {
CERROR("Error invoking recovery upcall %s %s %s: %d; check "
- "/proc/sys/lustre/upcall\n",
+ "/proc/sys/lustre/upcall\n",
argv[0], argv[1], argv[2], rc);
-
+
} else {
CERROR("Invoked upcall %s %s %s",
argv[0], argv[1], argv[2]);
rc = USERMODEHELPER(argv[0], argv, envp);
if (rc < 0) {
- CERROR("Error invoking recovery upcall %s %s %s %s %s: %d; check "
- "/proc/sys/lustre/lustre_upcall\n",
+ CERROR("Error invoking recovery upcall %s %s %s %s %s: %d; "
+ "check /proc/sys/lustre/lustre_upcall\n",
argv[0], argv[1], argv[2], argv[3], argv[4],rc);
-
+
} else {
CERROR("Invoked upcall %s %s %s %s %s\n",
argv[0], argv[1], argv[2], argv[3], argv[4]);
struct list_head *tmp, *pos;
struct ptlrpc_request *req;
unsigned long flags;
- __u64 committed = imp->imp_peer_committed_transno;
ENTRY;
/* It might have committed some after we last spoke, so make sure we
spin_unlock_irqrestore(&imp->imp_lock, flags);
CDEBUG(D_HA, "import %p from %s has committed "LPD64"\n",
- imp, imp->imp_target_uuid.uuid, committed);
+ imp, imp->imp_target_uuid.uuid, imp->imp_peer_committed_transno);
list_for_each(tmp, &imp->imp_replay_list) {
req = list_entry(tmp, struct ptlrpc_request, rq_list);
* than the one we're replaying (it can't be committed until it's
* replayed, and we're doing that here). l_f_e_safe protects against
* problems with the current request being committed, in the unlikely
- * event of that race. So, in conclusion, I think that it's safe to
+ * event of that race. So, in conclusion, I think that it's safe to
* perform this list-walk without the imp_lock held.
*
* But, the {mdc,osc}_replay_open callbacks both iterate
DEBUG_REQ(D_HA, req, "REPLAY:");
rc = ptlrpc_replay_req(req);
-
+
if (rc) {
CERROR("recovery replay error %d for req "LPD64"\n",
rc, req->rq_xid);
ptlrpc_abort_inflight(imp);
}
-
void ptlrpc_handle_failed_import(struct obd_import *imp)
{
ENTRY;
int rc;
struct obd_import *imp= failed_req->rq_import;
unsigned long flags;
- struct ptlrpc_request *req;
ENTRY;
CDEBUG(D_HA, "import %s of %s@%s evicted: reconnecting\n",
failed_req->rq_err = 1;
spin_unlock_irqrestore (&failed_req->rq_lock, flags);
}
- ptlrpc_req_finished(req);
EXIT;
}
notify_obd = imp->imp_obd->u.cli.cl_containing_lov;
- /* When deactivating, mark import invalid, and
- abort in-flight requests. */
+ /* When deactivating, mark import invalid, and abort in-flight
+ * requests. */
if (!active) {
- CDEBUG(D_ERROR, "setting import %s INVALID\n", imp->imp_target_uuid.uuid);
spin_lock_irqsave(&imp->imp_lock, flags);
- imp->imp_invalid = 1;
+ /* This is a bit of a hack, but invalidating replayable
+ * imports makes a temporary reconnect failure into a much more
+ * ugly -- and hard to remedy -- situation. */
+ if (!imp->imp_replayable) {
+ CDEBUG(D_HA, "setting import %s INVALID\n",
+ imp->imp_target_uuid.uuid);
+ imp->imp_invalid = 1;
+ }
imp->imp_generation++;
spin_unlock_irqrestore(&imp->imp_lock, flags);
ptlrpc_invalidate_import_state(imp);
-// ptlrpc_abort_inflight(imp);
- }
+ //ptlrpc_abort_inflight(imp);
+ }
if (notify_obd == NULL)
GOTO(out, rc = 0);
out:
/* When activating, mark import valid */
- if (active) {
- CDEBUG(D_ERROR, "setting import %s VALID\n", imp->imp_target_uuid.uuid);
+ if (active && !rc) {
+ CDEBUG(D_HA, "setting import %s VALID\n",
+ imp->imp_target_uuid.uuid);
spin_lock_irqsave(&imp->imp_lock, flags);
imp->imp_invalid = 0;
spin_unlock_irqrestore(&imp->imp_lock, flags);
ENTRY;
LASSERT (!imp->imp_dlm_fake);
-
+
spin_lock_irqsave(&imp->imp_lock, flags);
if (imp->imp_level != LUSTRE_CONN_FULL)
in_recovery = 1;
ENTRY;
spin_lock_irqsave(&imp->imp_lock, flags);
- if (imp->imp_level == LUSTRE_CONN_FULL ||
+ if (imp->imp_level == LUSTRE_CONN_FULL ||
imp->imp_level == LUSTRE_CONN_NOTCONN)
imp->imp_level = LUSTRE_CONN_RECOVER;
else
in_recover = 1;
spin_unlock_irqrestore(&imp->imp_lock, flags);
- if (in_recover == 1)
+ if (in_recover == 1)
RETURN(-EALREADY);
if (new_uuid) {
reparent_to_init();
}
+static long timeval_sub(struct timeval *large, struct timeval *small)
+{
+ return (large->tv_sec - small->tv_sec) * 1000000 +
+ (large->tv_usec - small->tv_usec);
+}
+
static int ptlrpc_main(void *arg)
{
- struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
+ struct ptlrpc_svc_data *data = arg;
struct obd_device *obddev = data->dev;
struct ptlrpc_service *svc = data->svc;
struct ptlrpc_thread *thread = data->thread;
struct ptlrpc_request *request;
ptl_event_t *event;
- int rc = 0;
unsigned long flags;
- cycles_t workdone_time = -1;
- cycles_t svc_workcycles = -1;
+ struct timeval start_time, finish_time;
+ long total;
+ int rc = 0;
ENTRY;
lock_kernel();
RECALC_SIGPENDING;
SIGNAL_MASK_UNLOCK(current, flags);
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
- sprintf(current->comm, "%s|%d", data->name,current->thread.extern_pid);
-#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- sprintf(current->comm, "%s|%d", data->name,
- current->thread.mode.tt.extern_pid);
-#else
- strcpy(current->comm, data->name);
-#endif
+ THREAD_NAME(current->comm, "%s", data->name);
unlock_kernel();
OBD_ALLOC(event, sizeof(*event));
- if (!event)
+ if (event == NULL)
GOTO(out, rc = -ENOMEM);
OBD_ALLOC(request, sizeof(*request));
- if (!request)
+ if (request == NULL)
GOTO(out_event, rc = -ENOMEM);
/* Record that the thread is running */
/* XXX maintain a list of all managed devices: insert here */
+ do_gettimeofday(&finish_time);
/* And now, loop forever on requests */
while (1) {
struct l_wait_info lwi = { 0 };
l_wait_event(svc->srv_waitq,
ptlrpc_check_event(svc, thread, event), &lwi);
+ spin_lock(&svc->srv_lock);
if (thread->t_flags & SVC_STOPPING) {
- spin_lock(&svc->srv_lock);
thread->t_flags &= ~SVC_STOPPING;
spin_unlock(&svc->srv_lock);
break;
}
- if (thread->t_flags & SVC_EVENT) {
- cycles_t workstart_time;
-
- spin_lock(&svc->srv_lock);
- thread->t_flags &= ~SVC_EVENT;
- /* Update Service Statistics */
- workstart_time = get_cycles();
- if (workdone_time != -1 && svc->svc_stats != NULL) {
- /* Stats for req(n) are updated just before
- * req(n+1) is executed. This avoids need to
- * reacquire svc->srv_lock after
- * call to handling_request().
- */
- int opc;
-
- /* req_waittime */
- lprocfs_counter_add(svc->svc_stats,
- PTLRPC_REQWAIT_CNTR,
- (workstart_time -
- event->arrival_time));
- /* svc_eqdepth */
- /* Wait for b_eq branch
- lprocfs_counter_add(svc->svc_stats,
- PTLRPC_SVCEQDEPTH_CNTR,
- 0);
- */
- /* svc_idletime */
- lprocfs_counter_add(svc->svc_stats,
- PTLRPC_SVCIDLETIME_CNTR,
- (workstart_time -
- workdone_time));
- /* previous request */
- opc = opcode_offset(request->rq_reqmsg->opc);
- if (opc > 0) {
- LASSERT(opc < LUSTRE_MAX_OPCODES);
- lprocfs_counter_add(svc->svc_stats, opc,
- PTLRPC_LAST_CNTR +
- svc_workcycles);
- }
- }
+ if (!(thread->t_flags & SVC_EVENT)) {
+ CERROR("unknown flag in service");
spin_unlock(&svc->srv_lock);
+ LBUG();
+ EXIT;
+ break;
+ }
+
+ thread->t_flags &= ~SVC_EVENT;
+ spin_unlock(&svc->srv_lock);
+
+ do_gettimeofday(&start_time);
+ total = timeval_sub(&start_time, &event->arrival_time);
+ if (svc->svc_stats != NULL) {
+ lprocfs_counter_add(svc->svc_stats, PTLRPC_REQWAIT_CNTR,
+ total);
+ lprocfs_counter_add(svc->svc_stats,
+ PTLRPC_SVCIDLETIME_CNTR,
+ timeval_sub(&start_time,
+ &finish_time));
+#if 0 /* Wait for b_eq branch */
+ lprocfs_counter_add(svc->svc_stats,
+ PTLRPC_SVCEQDEPTH_CNTR, 0);
+#endif
+ }
+ if (total / 1000000 > (long)obd_timeout) {
+ CERROR("Dropping request from NID "LPX64" because it's "
+ "%ld seconds old.\n", event->initiator.nid,
+ total / 1000000); /* bug 1502 */
+ } else {
+ CDEBUG(D_HA, "request from NID "LPX64" noticed after "
+ "%ldus\n", event->initiator.nid, total);
rc = handle_incoming_request(obddev, svc, event,
request);
- workdone_time = get_cycles();
- svc_workcycles = workdone_time - workstart_time;
- continue;
}
-
- CERROR("unknown break in service");
- LBUG();
- EXIT;
- break;
+ do_gettimeofday(&finish_time);
+ total = timeval_sub(&finish_time, &start_time);
+
+ CDEBUG((total / 1000000 > (long)obd_timeout) ? D_ERROR : D_HA,
+ "request "LPU64" from NID "LPX64" processed in %ldus "
+ "(%ldus total)\n", request->rq_xid, event->initiator.nid,
+ total, timeval_sub(&finish_time, &event->arrival_time));
+
+ if (svc->svc_stats != NULL) {
+ int opc = opcode_offset(request->rq_reqmsg->opc);
+ if (opc > 0) {
+ LASSERT(opc < LUSTRE_MAX_OPCODES);
+ lprocfs_counter_add(svc->svc_stats,
+ opc + PTLRPC_LAST_CNTR,
+ total);
+ }
+ }
}
/* NB should wait for all SENT callbacks to complete before exiting
* here. Unfortunately at this time there is no way to track this
- * state.
- */
+ * state. */
OBD_FREE(request, sizeof(*request));
out_event:
OBD_FREE(event, sizeof(*event));
# lustre.spec
%define version b_devel
-%define kversion @RELEASE@
+%define kversion @LINUXRELEASE@
%define linuxdir @LINUX@
-Release: 0306170928kernel
Summary: Lustre Lite File System
Name: lustre-lite
Version: %{version}
+Release: @RELEASE@
Copyright: GPL
Group: Utilities/System
Requires: lustre-modules, PyXML
-BuildRoot: /var/tmp/lustre-%{version}-root
Source: ftp://ftp.lustre.com/pub/lustre/lustre-%{version}.tar.gz
+BuildRoot: /var/tmp/lustre-%{version}-root
%description
The Lustre Lite Cluster File System: kernel drivers for file system,
./configure --with-linux='%{linuxdir}'
make
-#%ifarch i386
-#cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version}
-#./configure --with-lib
-#make
-#%endif
-
%install
cd $RPM_BUILD_DIR/lustre-%{version}
make install prefix=$RPM_BUILD_ROOT
-#%ifarch i386
-#cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version}
-#make install prefix=$RPM_BUILD_ROOT
-#%endif
-
%ifarch alpha
# this hurts me
conf_flag=
fi
depmod -ae || exit 0
-grep -q obdclass /etc/modules.conf || \
- echo 'alias char-major-10-241 obdclass' >> /etc/modules.conf
+#grep -q obdclass /etc/modules.conf || \
+# echo 'alias char-major-10-241 obdclass' >> /etc/modules.conf
-grep -q '/dev/obd' /etc/modules.conf || \
- echo 'alias /dev/obd obdclass' >> /etc/modules.conf
+#grep -q '/dev/obd' /etc/modules.conf || \
+# echo 'alias /dev/obd obdclass' >> /etc/modules.conf
-grep -q '/dev/lustre' /etc/modules.conf || \
- echo 'alias /dev/lustre obdclass' >> /etc/modules.conf
+#grep -q '/dev/lustre' /etc/modules.conf || \
+# echo 'alias /dev/lustre obdclass' >> /etc/modules.conf
-grep -q portals /etc/modules.conf || \
- echo 'alias char-major-10-240 portals' >> /etc/modules.conf
+#grep -q portals /etc/modules.conf || \
+# echo 'alias char-major-10-240 portals' >> /etc/modules.conf
-grep -q '/dev/portals' /etc/modules.conf || \
- echo 'alias /dev/portals portals' >> /etc/modules.conf
+#grep -q '/dev/portals' /etc/modules.conf || \
+# echo 'alias /dev/portals portals' >> /etc/modules.conf
%postun
depmod -ae || exit 0
cp $tmp $slapd
rm $tmp
fi
+
%clean
#rm -rf $RPM_BUILD_ROOT
openfile
unlinkmany
fchdir_test
+*.cmd
getdents
o_directory
+mkdirdeep
+utime
+small_write
# LDADD := -lreadline -ltermcap # -lefence
EXTRA_DIST = $(pkgexample_SCRIPTS) $(noinst_SCRIPTS) $(noinst_DATA) \
sanity.sh rundbench mcreate
-pkgexample_SCRIPTS = llmount.sh llmountcleanup.sh llecho.sh llechocleanup.sh local.sh echo.sh uml.sh lov.sh
+pkgexample_SCRIPTS = llmount.sh llmountcleanup.sh llecho.sh llechocleanup.sh
+pkgexample_SCRIPTS += local.sh echo.sh uml.sh lov.sh
noinst_DATA =
-noinst_SCRIPTS = leak_finder.pl llecho.sh llmount.sh llmountcleanup.sh tbox.sh \
- llrmount.sh runfailure-mds runvmstat runfailure-net runfailure-ost \
- runiozone runregression-net.sh runtests sanity.sh rundbench
+noinst_SCRIPTS = leak_finder.pl llecho.sh llmount.sh llmountcleanup.sh tbox.sh
+noinst_SCRIPTS += llrmount.sh runfailure-mds runvmstat runfailure-net
+noinst_SCRIPTS += runfailure-ost runiozone runregression-net.sh runtests
+noinst_SCRIPTS += sanity.sh rundbench
noinst_PROGRAMS = openunlink testreq truncate directio openme writeme open_delay
-noinst_PROGRAMS += munlink tchmod toexcl fsx test_brw openclose createdestroy
-noinst_PROGRAMS += stat createmany statmany multifstat createtest mlink
+noinst_PROGRAMS += tchmod toexcl fsx test_brw openclose createdestroy
+noinst_PROGRAMS += stat createmany statmany multifstat createtest mlink utime
noinst_PROGRAMS += opendirunlink opendevunlink unlinkmany fchdir_test checkstat
-noinst_PROGRAMS += wantedi statone runas openfile getdents o_directory
+noinst_PROGRAMS += wantedi statone runas openfile getdents mkdirdeep o_directory
+noinst_PROGRAMS += small_write
# noinst_PROGRAMS += ldaptest
-sbin_PROGRAMS = mcreate mkdirmany
+sbin_PROGRAMS = mcreate munlink mkdirmany
# ldaptest_SOURCES = ldaptest.c
tchmod_SOURCES = tchmod.c
wantedi_SOURCES = wantedi.c
createtest_SOURCES = createtest.c
open_delay_SOURCES = open_delay.c
-opendirunlink_SOURCES=opendirunlink.c
-opendevunlink_SOURCES=opendirunlink.c
-fchdir_test_SOURCES=fchdir_test.c
+opendirunlink_SOURCES = opendirunlink.c
+opendevunlink_SOURCES = opendevunlink.c
+fchdir_test_SOURCES = fchdir_test.c
getdents_SOURCES=getdents.c
o_directory_SOURCES = o_directory.c
-#mkdirdeep_SOURCES= mkdirdeep.c
-#mkdirdeep_LDADD=-L../portals/util -lptlctl
-#mkdirdeep_CPPFLAGS=-I$(top_srcdir)/portals/include
+utime_SOURCES = utime.c
+mkdirdeep_SOURCES = mkdirdeep.c
+mkdirdeep_LDADD=-L$(top_builddir)/portals/utils -lptlctl
+mkdirdeep_CPPFLAGS=-I$(top_srcdir)/portals/include
+small_write_SOURCES = small_write.c
include $(top_srcdir)/Rules
SRCDIR="`dirname $0`"
CREATE=$SRCDIR/create.pl
+RENAME=$SRCDIR/rename.pl
debug_client_on()
{
debug_client_on
echo "create.pl, 2 mounts, 1 thread, 10 ops, debug on"
-perl $CREATE -- $MNT 2 10
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=10
echo "create.pl, 2 mounts, 1 thread, 100 ops, debug on"
-perl $CREATE --silent -- $MNT 2 100
-echo "create.pl --mcreate=0, 2 mounts, 1 thread, 10 ops, debug on"
-perl $CREATE --mcreate=0 -- $MNT 2 10
-echo "create.pl --mcreate=0, 2 mounts, 1 thread, 100 ops, debug on"
-perl $CREATE --mcreate=0 --silent -- $MNT 2 100
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 1 thread, 10 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=10 --use_mcreate=0
+echo "create.pl --use_mcreate=0, 2 mounts, 1 thread, 100 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --use_mcreate=0 --silent
echo "rename.pl, 2 mounts, 1 thread, 10 ops, debug on"
-perl rename.pl --count=2 $MNT 10
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=10
echo "rename.pl, 2 mounts, 1 thread, 100 ops, debug on"
-perl rename.pl --count=2 --silent $MNT 100
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=100 --silent
debug_client_off
echo "create.pl, 2 mounts, 1 thread, 1000 ops, debug off"
-perl $CREATE --silent -- $MNT 2 1000
-echo "create.pl --mcreate=0, 2 mounts, 1 thread, 1000 ops, debug off"
-perl $CREATE --silent --mcreate=0 -- $MNT 2 1000
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=1000 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 1 thread, 1000 ops, debug off"
+perl $CREATE --silent --use_mcreate=0 -- $MNT 2 1000
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=1000 --use_mcreate=0 --silent
echo "rename.pl, 2 mounts, 1 thread, 1000 ops, debug off"
-perl rename.pl --count=2 --silent $MNT 1000
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=1000 --silent
debug_client_on
echo "create.pl, 2 mounts, 2 threads, 100 ops, debug on"
-perl $CREATE --silent -- $MNT 2 100 &
-perl $CREATE --silent -- $MNT 2 100 &
-wait
-echo "create.pl --mcreate=0, 2 mounts, 2 threads, 100 ops, debug on"
-perl $CREATE --silent --mcreate=0 -- $MNT 2 100 &
-perl $CREATE --silent --mcreate=0 -- $MNT 2 100 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=2 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 2 threads, 100 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=2 --use_mcreate=0 --silent
echo "rename.pl, 2 mounts, 2 thread, 1000 ops, debug on"
-perl rename.pl --count=2 --silent $MNT 1000 &
-perl rename.pl --count=2 --silent $MNT 1000 &
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=1000 --num_threads=2 --silent
debug_client_off
echo "create.pl, 2 mounts, 2 threads, 2000 ops, debug off"
-perl $CREATE --silent -- $MNT 2 2000 &
-perl $CREATE --silent -- $MNT 2 2000 &
-wait
-echo "create.pl --mcreate=0, 2 mounts, 2 threads, 2000 ops, debug off"
-perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
-perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=2 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 2 threads, 2000 ops, debug off"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=2 --use_mcreate=0 --silent
echo "rename.pl, 2 mounts, 2 threads, 2000 ops, debug off"
-perl rename.pl --count=2 --silent $MNT 2000 &
-perl rename.pl --count=2 --silent $MNT 2000 &
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=2 --silent
debug_client_on
echo "create.pl, 2 mounts, 4 threads, 100 ops, debug on"
-for i in `seq 1 4`; do
- perl $CREATE --silent -- $MNT 2 100 &
-done
-wait
-echo "create.pl --mcreate=0, 2 mounts, 4 threads, 100 ops, debug on"
-for i in `seq 1 4`; do
- perl $CREATE --silent --mcreate=0 -- $MNT 2 100 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=4 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 4 threads, 100 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=4 --use_mcreate=0 --silent
echo "rename.pl, 2 mounts, 4 threads, 2000 ops, debug on"
-for i in `seq 1 4`; do
- perl rename.pl --count=2 --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --silent
debug_client_off
echo "create.pl, 2 mounts, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
- perl $CREATE --silent -- $MNT 2 2000 &
-done
-wait
-echo "create.pl --mcreate=0, 2 mounts, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
- perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 4 threads, 2000 ops, debug off"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --use_mcreate=0 --silent
echo "rename.pl, 2 mounts, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
- perl rename.pl --count=2 --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --silent
debug_client_on
echo "create.pl, 2 mounts, 8 threads, 500 ops, debug on"
-for i in `seq 1 8`; do
- perl $CREATE --silent -- $MNT 2 500 &
-done
-wait
-echo "create.pl --mcreate=0, 2 mounts, 8 threads, 500 ops, debug on"
-for i in `seq 1 8`; do
- perl $CREATE --silent --mcreate=0 -- $MNT 2 500 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=500 --num_threads=8 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 8 threads, 500 ops, debug on"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=500 --num_threads=8 --use_mcreate=0 --silent
echo "rename.pl, 2 mounts, 8 threads, 2000 ops, debug on"
-for i in `seq 1 8`; do
- perl rename.pl --count=2 --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --silent
debug_client_off
echo "create.pl, 2 mounts, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
- perl $CREATE --silent -- $MNT 2 2000 &
-done
-wait
-echo "create.pl --mcreate=0, 2 mounts, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
- perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --silent
+echo "create.pl --use_mcreate=0, 2 mounts, 8 threads, 2000 ops, debug off"
+perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --use_mcreate=0 --silent
echo "rename.pl, 2 mounts, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
- perl rename.pl --count=2 --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --silent
SRCDIR="`dirname $0`"
CREATE=$SRCDIR/create.pl
+RENAME=$SRCDIR/rename.pl
debug_client_on()
{
debug_client_on
echo "create.pl, 1 mount, 1 thread, 10 ops, debug on"
-perl $CREATE -- $MNT -1 10
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=10
echo "create.pl, 1 mount, 1 thread, 100 ops, debug on"
-perl $CREATE --silent -- $MNT -1 100
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --silent
echo "create.pl --mcreate=0, 1 mount, 1 thread, 10 ops, debug on"
-perl $CREATE --mcreate=0 -- $MNT -1 10
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=10 --use_mcreate=0
echo "create.pl --mcreate=0, 1 mount, 1 thread, 100 ops, debug on"
-perl $CREATE --mcreate=0 --silent -- $MNT -1 100
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --use_mcreate=0 --silent
echo "rename.pl, 1 mount, 1 thread, 10 ops, debug on"
-perl rename.pl $MNT 10
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=10
echo "rename.pl, 1 mount, 1 thread, 100 ops, debug on"
-perl rename.pl --silent $MNT 100
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=100 --silent
debug_client_off
echo "create.pl, 1 mount, 1 thread, 1000 ops, debug off"
-perl $CREATE --silent -- $MNT -1 1000
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --silent
echo "create.pl --mcreate=0, 1 mount, 1 thread, 1000 ops, debug off"
-perl $CREATE --silent --mcreate=0 -- $MNT -1 1000
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --use_mcreate=0 --silent
echo "rename.pl, 1 mount, 1 thread, 1000 ops, debug off"
-perl rename.pl --silent $MNT 1000
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --silent
debug_client_on
echo "create.pl, 1 mount, 2 threads, 100 ops, debug on"
-perl $CREATE --silent -- $MNT -1 100 &
-perl $CREATE --silent -- $MNT -1 100 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=2 --silent
echo "create.pl --mcreate=0, 1 mount, 2 threads, 100 ops, debug on"
-perl $CREATE --silent --mcreate=0 -- $MNT -1 100 &
-perl $CREATE --silent --mcreate=0 -- $MNT -1 100 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=2 --use_mcreate=0 --silent
echo "rename.pl, 1 mount, 2 thread, 1000 ops, debug on"
-perl rename.pl --silent $MNT 1000 &
-perl rename.pl --silent $MNT 1000 &
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --num_threads=2 --silent
debug_client_off
echo "create.pl, 1 mount, 2 threads, 2000 ops, debug off"
-perl $CREATE --silent -- $MNT -1 2000 &
-perl $CREATE --silent -- $MNT -1 2000 &
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=2 --silent
echo "create.pl --mcreate=0, 1 mount, 2 threads, 2000 ops, debug off"
-perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
-perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=2 --use_mcreate=0 --silent
wait
echo "rename.pl, 1 mount, 2 threads, 2000 ops, debug off"
-perl rename.pl --silent $MNT 2000 &
-perl rename.pl --silent $MNT 2000 &
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=2 --silent
debug_client_on
echo "create.pl, 1 mount, 4 threads, 100 ops, debug on"
-for i in `seq 1 4`; do
- perl $CREATE --silent -- $MNT -1 100 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=4 --silent
echo "create.pl --mcreate=0, 1 mount, 4 threads, 100 ops, debug on"
-for i in `seq 1 4`; do
- perl $CREATE --silent --mcreate=0 -- $MNT -1 100 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=4 --use_mcreate=0 --silent
echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug on"
-for i in `seq 1 4`; do
- perl rename.pl --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4 --silent
debug_client_off
echo "create.pl, 1 mount, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
- perl $CREATE --silent -- $MNT -1 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4 --silent
echo "create.pl --mcreate=0, 1 mount, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
- perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4 --use_mcreate=0 --silent
echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug off"
-for i in `seq 1 4`; do
- perl rename.pl --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4 --silent
debug_client_on
echo "create.pl, 1 mount, 8 threads, 500 ops, debug on"
-for i in `seq 1 8`; do
- perl $CREATE --silent -- $MNT -1 500 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=500 --num_threads=8 --silent
echo "create.pl --mcreate=0, 1 mount, 8 threads, 500 ops, debug on"
-for i in `seq 1 8`; do
- perl $CREATE --silent --mcreate=0 -- $MNT -1 500 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=500 --num_threads=8 --use_mcreate=0 --silent
echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug on"
-for i in `seq 1 8`; do
- perl rename.pl --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8 --silent
debug_client_off
echo "create.pl, 1 mount, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
- perl $CREATE --silent -- $MNT -1 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8 --silent
echo "create.pl --mcreate=0, 1 mount, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
- perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 &
-done
-wait
+perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8 --use_mcreate=0 --silent
echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug off"
-for i in `seq 1 8`; do
- perl rename.pl --silent $MNT 2000 &
-done
-wait
+perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8 --silent
+
sh rundbench 1
sh rundbench 2
sh rundbench 4
[ "$CONFIGS" -a -z "$SANITYN" ] && SANITYN=no
[ "$CONFIGS" ] || CONFIGS="local lov"
-[ "$MAX_THREADS" ] || MAX_THREADS=50
+[ "$MAX_THREADS" ] || MAX_THREADS=10
if [ -z "$THREADS" ]; then
KB=`awk '/MemTotal:/ { print $2 }' /proc/meminfo`
THREADS=`expr $KB / 16384`
if [ "$IOZONE_DIR" != "no" ]; then
mount | grep $MNT || sh llmount.sh
SPACE=`df $MNT | tail -1 | awk '{ print $4 }'`
- IOZ_THREADS=`expr $SPACE / $SIZE`
+ IOZ_THREADS=`expr $SPACE / \( $SIZE + $SIZE / 1000 \)`
[ $THREADS -lt $IOZ_THREADS ] && IOZ_THREADS=$THREADS
$DEBUG_OFF
LMC=${LMC:-../utils/lmc -m $config}
TMP=${TMP:-/tmp}
-MDSDEV=$TMP/mds1
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
MDSSIZE=50000
+FSTYPE=${FSTYPE:-ext3}
-OSTDEV=$TMP/ost1
+OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`}
OSTSIZE=200000
rm -f $config
${LMC} --add net --node localhost --nid localhost --nettype tcp || exit 11
# configure mds server
-${LMC} --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20
+${LMC} --add mds --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
# configure ost
-${LMC} --add ost --node localhost --obd obd1 --obdtype obdecho || exit 30
+${LMC} --add ost --node localhost --obd obd1 --fstype $FSTYPE --obdtype obdecho || exit 30
# configure ost
-${LMC} --add ost --node localhost --obd obd2 --obdtype obdecho || exit 30
+${LMC} --add ost --node localhost --obd obd2 --fstype $FSTYPE --obdtype obdecho || exit 30
${LMC} --add cobd --node localhost --real_obd obd1 --cache_obd obd2
-#!/usr/bin/perl
+#!/usr/bin/perl -w
+use strict;
+$|++;
+
+$ENV{PATH}="/bin:/usr/bin";
+$ENV{ENV}="";
+$ENV{BASH_ENV}="";
+use POSIX ":sys_wait_h";
+
+use diagnostics;
use Getopt::Long;
+use vars qw(
+ $MAX_THREADS
+ );
+
+# Don't try to run more than this many threads concurrently.
+$MAX_THREADS = 16;
+
+# Initialize variables
my $silent = 0;
-my $mcreate = 1; # should we use mcreate or open?
-my $files = 5;
+my $use_mcreate = 1; # should we use mcreate or open?
+my $num_files = 5; # number of files to create
+my $iterations = 1;
+my $num_threads = 1;
+my $mountpt;
+my $num_mounts = -1;
+# Get options from the command line.
GetOptions("silent!" => \$silent,
- "mcreate=i" => \$mcreate,
- "files=i" => \$files);
+ "use_mcreate=i" => \$use_mcreate,
+ "num_files=i" => \$num_files,
+ "mountpt=s" => \$mountpt,
+ "num_mounts=i" => \$num_mounts,
+ "iterations=i" => \$iterations,
+ "num_threads=i" => \$num_threads,
+ ) || die &usage;
+
+# Check for mandatory args.
+if (!$mountpt ||
+ !$num_mounts) {
+ die &usage;
+}
+
+if ($num_threads > $MAX_THREADS) {
+ print "\nMAX_THREADS is currently set to $MAX_THREADS.\n\n";
+ print "You will have to change this in the source\n";
+ print "if you really want to run with $num_threads threads.\n\n";
+ exit 1;
+}
-my $mtpt = shift || usage();
-my $mount_count = shift || usage();
-my $i = shift || usage();
-my $count = $i;
+# Initialize rand() function.
+srand (time ^ $$ ^ unpack "%L*", `ps axww | gzip`);
+
+#########################################################################
+### MAIN
+
+for (my $i=1; $i<=$num_threads; $i++) {
+ my $status = &fork_and_create($i);
+ last if ($status != 0);
+}
+
+# Wait for all our threads to finish.
+my $child = 0;
+do {
+ $child = waitpid(-1, WNOHANG);
+} until $child > 0;
+sleep 1;
+
+exit 0;
+
+#########################################################################
+### SUBROUTINES
sub usage () {
- print "Usage: $0 [--silent] [--mcreate=n] [--files=n] <mnt prefix> <mnt count> <iterations>\n";
- print "example: $0 /mnt/lustre 2 50\n";
- print " will test in /mnt/lustre1 and /mnt/lustre2\n";
- print " $0 /mnt/lustre -1 50\n";
- print " will test in /mnt/lustre only\n";
+ print "\nUsage: $0 [--silent] [--use_mcreate=n] [--num_files=n] [--iterations=n] [--num_threads=n] --mountpt=/path/to/lustre/mount --num_mounts=n\n\n";
+ print "\t--silent\tminimal output\n";
+ print "\t--use_mcreate=n\tuse mcreate to create files, default=1 (yes)\n";
+ print "\t--num_files=n\tnumber of files to create per iteration, default=5\n";
+ print "\t--iterations=n\tnumber of iterations to perform, default=1\n";
+ print "\t--num_threads=n\tnumber of thread to run, default=1\n";
+ print "\t--mountpt\tlocation of lustre mount\n";
+ print "\t--num_mounts=n\tnumber of lustre mounts to test across, default=-1 (single mount point without numeric suffix)\n\n";
+ print "example: $0 --mountpt=/mnt/lustre --num_mounts=2 --iterations=50\n";
+ print " will perform 50 interations in /mnt/lustre1 and /mnt/lustre2\n";
+ print " $0 --mountpt=/mnt/lustre --num_mounts=-1 --iterations=50\n";
+ print " will perform 50 iterations in /mnt/lustre only\n\n";
exit;
}
-sub do_open($) {
- my $path = shift;
+#########################################################################
+sub fork_and_create ($) {
+ my ($thread_num) = @_;
+
+ FORK: {
+ if (my $pid = fork) {
+ # parent here
+ # child process pid is available in $pid
+ return 0;
+ } elsif (defined $pid) { # $pid is zero here if defined
+ my $current_iteration=1;
+ while ($current_iteration <= $iterations) {
+ for (my $i=1; $i<=$num_files; $i++) {
+ my $which = "";
+ if ($num_mounts > 0) {
+ $which = int(rand() * $num_mounts) + 1;
+ }
+ my $d = int(rand() * $num_files);
+ do_open("${mountpt}${which}/thread${thread_num}.${d}");
+
+ if ($num_mounts > 0) {
+ $which = int(rand() * $num_mounts) + 1;
+ }
+ $d = int(rand() * $num_files);
+ my $path = "${mountpt}${which}/thread${thread_num}.${d}";
+ print "Thread $thread_num: Unlink $path start [" . $$."]...\n" if !$silent;
+ if (unlink($path)) {
+ print "Thread $thread_num: Unlink done [$$] $path: Success\n" if !$silent;
+ } else {
+ print "Thread $thread_num: Unlink done [$$] $path: $!\n"if !$silent;
+ }
+ }
+ if (($current_iteration) % 100 == 0) {
+ print STDERR "Thread $thread_num: " . $current_iteration . " operations [" . $$ . "]\n";
+ }
+ $current_iteration++;
+ }
+
+ my $which = "";
+ if ($num_mounts > 0) {
+ $which = int(rand() * $num_mounts) + 1;
+ }
+ for (my $d = 0; $d < $num_files; $d++) {
+ my $path = "${mountpt}${which}/thread${thread_num}.${d}";
+ unlink("$path") if (-e $path);
+ }
+
+ print "Thread $thread_num: Done.\n";
+
+ exit 0;
+
+ } elsif ($! =~ /No more process/) {
+ # EAGAIN, supposedly recoverable fork error
+ sleep 5;
+ redo FORK;
+ } else {
+ # weird fork error
+ die "Can't fork: $!\n";
+ }
+ }
+
+}
+
+#########################################################################
+
+sub do_open ($) {
+ my ($path) = @_;;
- if ($mcreate) {
+ if ($use_mcreate) {
my $tmp = `./mcreate $path`;
if ($tmp) {
print "Creating $path [" . $$."]...\n" if !$silent;
}
} else {
print "Opening $path [" . $$."]...\n"if !$silent;
- open(FH, ">$path") || die "open($PATH): $!";
+ open(FH, ">$path") || die "open($path: $!";
print "Open done [$$] $path: Success\n"if !$silent;
close(FH) || die;
}
}
-while ($i--) {
- my $which = "";
- if ($mount_count > 0) {
- $which = int(rand() * $mount_count) + 1;
- }
- $d = int(rand() * $files);
- do_open("$mtpt$which/$d");
-
- if ($mount_count > 0) {
- $which = int(rand() * $mount_count) + 1;
- }
- $d = int(rand() * $files);
- $path = "$mtpt$which/$d";
- print "Unlink $path start [" . $$."]...\n"if !$silent;
- if (unlink($path)) {
- print "Unlink done [$$] $path: Success\n"if !$silent;
- } else {
- print "Unlink done [$$] $path: $!\n"if !$silent;
- }
- if (($count - $i) % 100 == 0) {
- print STDERR ($count - $i) . " operations [" . $$ . "]\n";
- }
-}
-
-my $which = "";
-if ($mount_count > 0) {
- $which = int(rand() * $mount_count) + 1;
-}
-for ($d = 0; $d < $files; $d++) {
- unlink("$mtpt$which/$d");
-}
-
-print "Done.\n";
return 1;
}
- printf("directio on %s for %dx%lu blocks \n", argv[1], blocks,
+ printf("directio on %s for %dx%lu bytes \n", argv[1], blocks,
st.st_blksize);
seek = (off64_t)seek_blocks * (off64_t)st.st_blksize;
return 1;
}
+ printf("PASS\n");
return 0;
}
# FIXME: make LMC not require MDS for obdecho LOV
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
MDSSIZE=10000
+FSTYPE=${FSTYPE:-ext3}
STRIPE_BYTES=65536
STRIPES_PER_OBJ=2 # 0 means stripe over all OSTs
$LMC --add net --node $SERVER --nid $SERVERNID --nettype $NET || exit 2
if (($LOV)); then
- $LMC --add mds --node $SERVER --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10
+ $LMC --add mds --node $SERVER --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 10
$LMC --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 11
$LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 12
$LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 13
if (size_by_seek == (off_t)-1)
prterr("save_buffer: lseek eof");
else if (bufferlength > size_by_seek) {
- warn("save_buffer: .fsxgood file too short... will
-save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek,
- (unsigned long long)bufferlength);
+ warn("save_buffer: .fsxgood file too short... will"
+ "save 0x%llx bytes instead of 0x%llx\n",
+ (unsigned long long)size_by_seek,
+ (unsigned long long)bufferlength);
bufferlength = size_by_seek;
}
}
if (byteswritten == -1)
prterr("save_buffer write");
else
- warn("save_buffer: short write, 0x%x bytes instead
-of 0x%llx\n",
+ warn("save_buffer: short write, 0x%x bytes instead"
+ "of 0x%llx\n",
(unsigned)byteswritten,
(unsigned long long)bufferlength);
}
if (n) {
prt("\t0x%5x\n", n);
if (bad)
- prt("operation# (mod 256) for the bad data
-may be %u\n", ((unsigned)op & 0xff));
+ prt("operation# (mod 256) for the bad data"
+ "may be %u\n", ((unsigned)op & 0xff));
else
- prt("operation# (mod 256) for the bad data
-unknown, check HOLE and EXTEND ops\n");
+ prt("operation# (mod 256) for the bad data"
+ "unknown, check HOLE and EXTEND ops\n");
} else
prt("????????????????\n");
report_failure(110);
usage(void)
{
fprintf(stdout, "usage: %s",
- "fsx [-dnqLOW] [-b opnum] [-c Prob] [-l flen] [-m
-start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t
-truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed]
-fname\n\
- -b opnum: beginning operation number (default 1)\n\
- -c P: 1 in P chance of file close+open at each op (default infinity)\n\
- -d: debug output for all operations [-d -d = more debugging]\n\
- -l flen: the upper bound on file size (default 262144)\n\
- -m startop:endop: monitor (print debug output) specified byte range
-(default 0:infinity)\n\
- -n: no verifications of file size\n\
- -o oplen: the upper bound on operation size (default 65536)\n\
- -p progressinterval: debug output at specified operation interval\n\
- -q: quieter operation\n\
- -r readbdy: 4096 would make reads page aligned (default 1)\n\
- -s style: 1 gives smaller truncates (default 0)\n\
- -t truncbdy: 4096 would make truncates page aligned (default 1)\n\
- -w writebdy: 4096 would make writes page aligned (default 1)\n\
- -D startingop: debug output starting at specified operation\n\
- -L: fsxLite - no file creations & no file size changes\n\
- -N numops: total # operations to do (default infinity)\n\
- -O: use oplen (see -o flag) for every op (default random)\n\
- -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n\
- -S seed: for random # generator (default 1) 0 gets timestamp\n\
- -W: mapped write operations DISabled\n\
- -R: read() system calls only (mapped reads disabled)\n\
- fname: this filename is REQUIRED (no default)\n");
+ "fsx [-dnqLOW] [-b opnum] [-c Prob] [-l flen] [-m "
+"start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t "
+"truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] "
+"fname\n"
+" -b opnum: beginning operation number (default 1)\n"
+" -c P: 1 in P chance of file close+open at each op (default infinity)\n"
+" -d: debug output for all operations [-d -d = more debugging]\n"
+" -l flen: the upper bound on file size (default 262144)\n"
+" -m startop:endop: monitor (print debug output) specified byte rang"
+"(default 0:infinity)\n"
+" -n: no verifications of file size\n"
+" -o oplen: the upper bound on operation size (default 65536)\n"
+" -p progressinterval: debug output at specified operation interval\n"
+" -q: quieter operation\n"
+" -r readbdy: 4096 would make reads page aligned (default 1)\n"
+" -s style: 1 gives smaller truncates (default 0)\n"
+" -t truncbdy: 4096 would make truncates page aligned (default 1)\n"
+" -w writebdy: 4096 would make writes page aligned (default 1)\n"
+" -D startingop: debug output starting at specified operation\n"
+" -L: fsxLite - no file creations & no file size changes\n"
+" -N numops: total # operations to do (default infinity)\n"
+" -O: use oplen (see -o flag) for every op (default random)\n"
+" -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n"
+" -S seed: for random # generator (default 1) 0 gets timestamp\n"
+" -W: mapped write operations DISabled\n"
+" -R: read() system calls only (mapped reads disabled)\n"
+" fname: this filename is REQUIRED (no default)\n");
exit(90);
}
case 'b':
simulatedopcount = getnum(optarg, &endp);
if (!quiet)
- fprintf(stdout, "Will begin at operation
-%ld\n",
+ fprintf(stdout, "Will begin at operation"
+ "%ld\n",
simulatedopcount);
if (simulatedopcount == 0)
usage();
prterr(fname);
warn("main: error on write");
} else
- warn("main: short write, 0x%x bytes instead
-of 0x%x\n",
+ warn("main: short write, 0x%x bytes instead"
+ "of 0x%x\n",
(unsigned)written, maxfilelen);
exit(98);
}
my ($line, $memory);
my $debug_line = 0;
+my $total = 0;
+my $max = 0;
+
while ($line = <>) {
$debug_line++;
my ($file, $func, $lno, $name, $size, $addr, $type);
- if ($line =~ m/^.*\((.*):(\d+):(.*)\(\) (\d+ \| )?\d+\+\d+\): [vk](.*) '(.*)': (\d+) at (.*) \(tot .*$/) {
+ if ($line =~ m/^.*\((.*):(\d+):(.*)\(\) (\d+ \| )?\d+\+\d+\): (k|v|slab-)(.*) '(.*)': (\d+) at (.*) \(tot (.*)\).*$/) {
$file = $1;
$lno = $2;
$func = $3;
- $type = $5;
- $name = $6;
- $size = $7;
- $addr = $8;
+ $type = $6;
+ $name = $7;
+ $size = $8;
+ $addr = $9;
+ $tot = $10;
# we can't dump the log after portals has exited, so skip "leaks"
# from memory freed in the portals module unloading.
next;
}
- if ($type eq 'malloced') {
+ if (index($type, 'alloced') >= 0) {
+ if (defined($memory->{$addr})) {
+ print STDERR "*** Two allocs with the same address ($size bytes at $addr, $file:$func:$lno)\n";
+ print STDERR " first malloc at $memory->{$addr}->{file}:$memory->{$addr}->{func}:$memory->{$addr}->{lno}, second at $file:$func:$lno\n";
+ next;
+ }
+
$memory->{$addr}->{name} = $name;
$memory->{$addr}->{size} = $size;
$memory->{$addr}->{file} = $file;
$memory->{$addr}->{func} = $func;
$memory->{$addr}->{lno} = $lno;
$memory->{$addr}->{debug_line} = $debug_line;
+
+ $total += $size;
+ if ($total > $max) {
+ $max = $total;
+ }
} else {
if (!defined($memory->{$addr})) {
print STDERR "*** Free without malloc ($size bytes at $addr, $file:$func:$lno)\n";
}
delete $memory->{$addr};
+ $total -= $size;
+ }
+ if ($total != int($tot)) {
+ print "kernel total $tot != my total $total\n";
+ $total = $tot;
}
}
print STDERR "*** Leak: $memory->{$key}->{size} bytes allocated at $key ($memory->{$key}->{file}:$memory->{$key}->{func}:$memory->{$key}->{lno}, debug file line $memory->{$key}->{debug_line})\n";
}
-print "Done.\n";
+print "maximum used: $max, amount leaked: $total\n";
echo "Storing LKCD module info in $LCMD"
cat /tmp/ogdb-`hostname` | while read JUNK M JUNK; do
MOD="../$M"
- MAP=`echo $MOD | sed -e 's/\.o$/.map/'`
- MODNAME=`basename $MOD | sed -e 's/\.o$//'`
+ MODNAME="`basename $MOD .o`"
+ MAP="$TMP/$MODNAME.map"
nm $MOD > $MAP
echo namelist -a $PWD/$MOD | tee -a $LCMD
- echo symtab -a $PWD/$MAP $MODNAME | tee -a $LCMD
+ echo symtab -a $MAP $MODNAME | tee -a $LCMD
done
#!/bin/sh
-LCONF=${LCONF:-../utils/lconf}
+PATH=`dirname $0`/../utils:$PATH
+
+LCONF=${LCONF:-lconf}
NAME=${NAME:-echo}
config=$NAME.xml
cat <<EOF
run getattr tests as:
-../utils/lctl --device '\$ECHO_$SERVER' test_getattr 1000000
+`dirname $0`../utils/lctl --device '\$ECHO_$SERVER' test_getattr 1000000
EOF
verbose="-v"
fi
-${LCONF} $portals_opt $lustre_opt $node_opt ${REFORMAT:---reformat} --gdb \
- $verbose $conf_opt || exit 2
+${LCONF} $portals_opt $lustre_opt $node_opt ${REFORMAT:---reformat} \
+ ${GDB:---gdb} $verbose $conf_opt || exit 2
LMC="${LMC:-lmc} -m $config"
TMP=${TMP:-/tmp}
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
MDSSIZE=${MDSSIZE:-50000}
+FSTYPE=${FSTYPE:-ext3}
-OSTDEV=${OSTDEV:-$TMP/ost1}
+OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`}
OSTSIZE=${OSTSIZE:-200000}
-FSTYPE=${FSTYPE:-ext3}
rm -f $config
${LMC} --add net --node localhost --nid localhost --nettype tcp || exit 11
# configure mds server
-${LMC} --add mds --nspath /mnt/mds_ns --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
+${LMC} --add mds --nspath /mnt/mds_ns --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
# configure ost
${LMC} --add ost --nspath /mnt/ost_ns --node localhost --ost ost1 --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE || exit 30
LMC=${LMC:-lmc}
TMP=${TMP:-/tmp}
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
MDSSIZE=${MDSSIZE:-50000}
+FSTYPE=${FSTYPE:-ext3}
-OSTDEV1=${OSTDEV1:-$TMP/ost1}
-OSTDEV2=${OSTDEV2:-$TMP/ost2}
-OSTDEV3=${OSTDEV3:-$TMP/ost3}
+OSTDEV1=${OSTDEV1:-$TMP/ost1-`hostname`}
+OSTDEV2=${OSTDEV2:-$TMP/ost2-`hostname`}
+OSTDEV3=${OSTDEV3:-$TMP/ost3-`hostname`}
OSTSIZE=${OSTSIZE:-100000}
+# 1 to config an echo client instead of llite
+ECHO_CLIENT=${ECHO_CLIENT:-}
STRIPE_BYTES=65536
STRIPES_PER_OBJ=2 # 0 means stripe over all OSTs
${LMC} -o $config --add net --node localhost --nid localhost --nettype tcp || exit 1
# configure mds server
-${LMC} -m $config --format --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10
+${LMC} -m $config --format --add mds --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 10
# configure ost
${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 20
-${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV1 --size $OSTSIZE || exit 21
-${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV2 --size $OSTSIZE || exit 22
-${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV3 --size $OSTSIZE || exit 23
-
-# create client config
-${LMC} -m $config --add mtpt --node localhost --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
+${LMC} -m $config --add ost --node localhost --lov lov1 --fstype $FSTYPE --dev $OSTDEV1 --size $OSTSIZE || exit 21
+${LMC} -m $config --add ost --node localhost --lov lov1 --fstype $FSTYPE --dev $OSTDEV2 --size $OSTSIZE || exit 22
+${LMC} -m $config --add ost --node localhost --lov lov1 --fstype $FSTYPE --dev $OSTDEV3 --size $OSTSIZE || exit 23
+
+if [ -z "$ECHO_CLIENT" ]; then
+ # create client config
+ ${LMC} -m $config --add mtpt --node localhost --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
+else
+ ${LMC} -m $config --add echo_client --node localhost --ost lov1 || exit 31
+fi
LMC="${LMC:-lmc} -m $config"
TMP=${TMP:-/tmp}
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
MDSSIZE=${MDSSIZE:-50000}
+FSTYPE=${FSTYPE:-ext3}
-OSTDEV=${OSTDEV:-$TMP/ost1}
+OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`}
OSTSIZE=${OSTSIZE:-200000}
rm -f $config
${LMC} --add net --node localhost --nid localhost --nettype tcp || exit 11
# configure mds server
-${LMC} --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20
+${LMC} --add mds --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20
# configure ost
-${LMC} --add ost --node localhost --ost ost1 --dev $OSTDEV --size $OSTSIZE || exit 30
+${LMC} --add ost --node localhost --ost ost1 --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE || exit 30
# create client config
${LMC} --add mtpt --node localhost --path /mnt/lustre1 --mds mds1 --ost ost1 || exit 40
LMC=${LMC-../utils/lmc}
TMP=${TMP:-/tmp}
-MDSDEV=$TMP/mds1
-MDSDEV2=$TMP/mds2
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
+MDSDEV2=${MDSDEV:-$TMP/mds2-`hostname`}
MDSSIZE=50000
+FSTYPE=${FSTYPE:-ext3}
-OSTDEV1=$TMP/ost1
-OSTDEV2=$TMP/ost2
+OSTDEV1=${OSTDEV1:-$TMP/ost1-`hostname`}
+OSTDEV2=${OSTDEV2:-$TMP/ost2-`hostname`}
OSTSIZE=100000
MDSNODE=uml1
${LMC} -m $config --add net --node $CLIENT --nid $CLIENT --nettype tcp || exit 3
# configure mds server
-${LMC} -m $config --format --add mds --node $MDSNODE --mds mds1 --dev $MDSDEV --size $MDSSIZE ||exit 10
-${LMC} -m $config --format --add mds --node $MDSNODE --mds mds2 --dev $MDSDEV2 --size $MDSSIZE ||exit 10
+${LMC} -m $config --format --add mds --node $MDSNODE --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE ||exit 10
+${LMC} -m $config --format --add mds --node $MDSNODE --mds mds2 --fstype $FSTYPE --dev $MDSDEV2 --size $MDSSIZE ||exit 10
# configure ost
${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20
${LMC} -m $config --add lov --lov lov2 --mds mds2 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20
-${LMC} -m $config --add ost --node $OSTNODE --lov lov1 --dev $OSTDEV1 --size $OSTSIZE || exit 21
-${LMC} -m $config --add ost --node $OSTNODE --lov lov2 --dev $OSTDEV2 --size $OSTSIZE || exit 22
+${LMC} -m $config --add ost --node $OSTNODE --lov lov1 --fstype $FSTYPE --dev $OSTDEV1 --size $OSTSIZE || exit 21
+${LMC} -m $config --add ost --node $OSTNODE --lov lov2 --fstype $FSTYPE --dev $OSTDEV2 --size $OSTSIZE || exit 22
# create client config
${LMC} -m $config --add mtpt --node $CLIENT --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
${LMC} -m $config --add mtpt --node $CLIENT --path /mnt/lustre2 --mds mds2 --lov lov2 || exit 30
-
-
-
-
#include <sys/stat.h>
#include <dirent.h>
#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
int main(int argc, char **argv)
{
fprintf(stderr, "creating special file %s\n", dname1);
rc = mknod(dname1, 0777|S_IFIFO, 0);
if (rc == -1) {
- fprintf(stderr, "creating %s fails: %s\n",
+ fprintf(stderr, "creating %s fails: %s\n",
dname1, strerror(errno));
exit(1);
}
dname1, strerror(errno));
exit(1);
}
-
+
// doesn't matter if the two dirs are the same??
fddev2 = open(dname2, O_RDONLY | O_NONBLOCK);
if (fddev2 == -1) {
dname2, strerror(errno));
exit(1);
}
-
+
// delete the special file
fprintf (stderr, "unlinking %s\n", dname1);
rc = unlink(dname1);
if (rc) {
- fprintf(stderr, "unlink %s error: %s\n",
+ fprintf(stderr, "unlink %s error: %s\n",
dname1, strerror(errno));
exit(1);
}
- if (access(dname2, F_OK) == 0){
+ if (access(dname2, F_OK) == 0) {
fprintf(stderr, "%s still exists\n", dname2);
exit(1);
}
- if (access(dname1, F_OK) == 0){
+ if (access(dname1, F_OK) == 0) {
fprintf(stderr, "%s still exists\n", dname1);
exit(1);
}
// fchmod one special file
rc = fchmod (fddev1, 0777);
- if(rc == -1)
- {
- fprintf(stderr, "fchmod unlinked special file %s fails: %s\n",
+ if (rc == -1) {
+ fprintf(stderr, "fchmod unlinked special file %s fails: %s\n",
dname1, strerror(errno));
exit(1);
}
-
+
// fstat two files to check if they are the same
rc = fstat(fddev1, &st1);
- if(rc == -1)
- {
- fprintf(stderr, "fstat unlinked special file %s fails: %s\n",
+ if (rc == -1) {
+ fprintf(stderr, "fstat unlinked special file %s fails: %s\n",
dname1, strerror(errno));
exit(1);
}
if (st1.st_mode != st2.st_mode) { // can we do this?
fprintf(stderr, "fstat different value on %s and %s\n", dname1, dname2);
exit(1);
- }
+ }
fprintf(stderr, "Ok, everything goes well.\n");
return 0;
#include <unistd.h>
typedef struct flag_mapping {
- char string[20];
- int flag;
+ const char *string;
+ const int flag;
} FLAG_MAPPING;
FLAG_MAPPING flag_table[] = {
case 'f': {
char *tmp;
- cloned_flags = (char *)malloc(strlen(optarg));
+ cloned_flags = (char *)malloc(strlen(optarg)+1);
if (cloned_flags == NULL) {
fprintf(stderr, "Insufficient memory.\n");
exit(-1);
}
- strncpy(cloned_flags, optarg, strlen(optarg));
+ strncpy(cloned_flags, optarg, strlen(optarg)+1);
for (tmp = strtok(optarg, ":|"); tmp;
tmp = strtok(NULL, ":|")) {
int i = 0;
#include <string.h>
#include <errno.h>
#include <sys/types.h>
+#include <sys/stat.h>
#include <stdlib.h>
#include <unistd.h>
-#define T1 "write before unlink\n"
-#define T2 "write after unlink\n"
+#define T1 "write data before unlink\n"
+#define T2 "write data after unlink\n"
char buf[128];
int main(int argc, char **argv)
{
- char *fname, *fname2;
+ char *fname, *fname2;
+ struct stat st;
int fd, rc;
if (argc < 2 || argc > 3) {
exit(1);
}
- fname = argv[1];
- if (argc == 3)
- fname2 = argv[2];
- else
- fname2 = argv[1];
+ fname = argv[1];
+ if (argc == 3)
+ fname2 = argv[2];
+ else
+ fname2 = argv[1];
fprintf(stderr, "opening\n");
fd = open(fname, O_RDWR | O_TRUNC | O_CREAT, 0644);
fprintf(stderr, "writing\n");
rc = write(fd, T1, strlen(T1) + 1);
if (rc != strlen(T1) + 1) {
- fprintf(stderr, "write (normal) %s\n", strerror(errno));
+ fprintf(stderr, "write (normal) %s (rc %d)\n",
+ strerror(errno), rc);
+ exit(1);
+ }
+
+ if (argc == 3) {
+ fprintf(stderr, "closing %s\n", fname);
+ rc = close(fd);
+ if (rc) {
+ fprintf(stderr, "close (normal) %s\n", strerror(errno));
+ exit(1);
+ }
+
+ fprintf(stderr, "opening %s\n", fname2);
+ fd = open(fname2, O_RDWR);
+ if (fd == -1) {
+ fprintf(stderr, "open (unlink) %s\n", strerror(errno));
+ exit(1);
+ }
+
+ fprintf (stderr, "unlinking %s\n", fname2);
+ rc = unlink(fname2);
+ if (rc) {
+ fprintf(stderr, "unlink %s\n", strerror(errno));
+ exit(1);
+ }
+
+ if (access(fname2, F_OK) == 0) {
+ fprintf(stderr, "%s still exists\n", fname2);
+ exit(1);
+ }
+ } else {
+ fprintf(stderr, "resetting fd offset\n");
+ rc = lseek(fd, 0, SEEK_SET);
+ if (rc) {
+ fprintf(stderr, "seek %s\n", strerror(errno));
+ exit(1);
+ }
+
+ printf("unlink %s and press enter\n", fname);
+ getc(stdin);
+ }
+
+ if (access(fname, F_OK) == 0) {
+ fprintf(stderr, "%s still exists\n", fname);
exit(1);
}
- if (argc == 3) {
- fprintf(stderr, "closing %s\n", fname);
- rc = close(fd);
- if (rc) {
- fprintf(stderr, "close (normal) %s\n", strerror(errno));
- exit(1);
- }
-
- fprintf(stderr, "opening %s\n", fname2);
- fd = open(fname2, O_RDWR);
- if (fd == -1) {
- fprintf(stderr, "open (unlink) %s\n", strerror(errno));
- exit(1);
- }
-
- fprintf (stderr, "unlinking %s\n", fname2);
- rc = unlink(fname2);
- if (rc) {
- fprintf(stderr, "unlink %s\n", strerror(errno));
- exit(1);
- }
-
- if (access(fname2, F_OK) == 0) {
- fprintf(stderr, "%s still exists\n", fname2);
- exit(1);
- }
- } else {
- printf("unlink %s and press enter\n", fname);
- getc(stdin);
- }
-
- if (access(fname, F_OK) == 0) {
- fprintf(stderr, "%s still exists\n", fname);
- exit(1);
- }
+ fprintf(stderr, "fstating\n");
+ rc = fstat(fd, &st);
+ if (rc) {
+ fprintf(stderr, "fstat (unlink) %s\n", strerror(errno));
+ exit(1);
+ }
+ if (st.st_nlink != 0)
+ fprintf(stderr, "st_nlink = %d\n", (int)st.st_nlink);
fprintf(stderr, "reading\n");
rc = read(fd, buf, strlen(T1) + 1);
if (rc != strlen(T1) + 1) {
- fprintf(stderr, "read (unlink) %s rc %d\n",
+ fprintf(stderr, "read (unlink) %s (rc %d)\n",
strerror(errno), rc);
exit(1);
}
fprintf(stderr, "truncating\n");
rc = ftruncate(fd, 0);
- if (rc ) {
+ if (rc) {
fprintf(stderr, "truncate (unlink) %s\n", strerror(errno));
exit(1);
}
fprintf(stderr, "reading again\n");
rc = read(fd, buf, strlen(T2) + 1);
if (rc != strlen(T2) + 1) {
- fprintf(stderr, "read (after unlink rewrite) %s\n",
- strerror(errno));
+ fprintf(stderr, "read (after unlink rewrite) %s (rc %d)\n",
+ strerror(errno), rc);
exit(1);
}
exit(1);
}
- fprintf(stderr, "closing again\n");
+ fprintf(stderr, "closing\n");
rc = close(fd);
if (rc) {
fprintf(stderr, "close (unlink) %s\n", strerror(errno));
NETWORKTYPE=${NETWORKTYPE:-tcp}
MOUNTPT=${MOUNTPT:-/mnt/lustre}
CONFIG=${CONFIG:-recovery-cleanup.xml}
-MDSDEV=${MDSDEV:-/tmp/mds}
-OSTDEV=${OSTDEV:-/tmp/ost}
+MDSDEV=${MDSDEV:-/tmp/mds-`hostname`}
MDSSIZE=${MDSSIZE:-100000}
+FSTYPE=${FSTYPE:-ext3}
+OSTDEV=${OSTDEV:-/tmp/ost-`hostname`}
OSTSIZE=${OSTSIZE:-100000}
do_mds() {
lmc -m $CONFIG --add net --node $NODE --nid `h2$NETWORKTYPE $NODE` \
--nettype $NETWORKTYPE || exit 4
done
- lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --dev $MDSDEV \
- --size $MDSSIZE || exit 5
- lmc -m $CONFIG --add ost --node $OSTNODE --ost ost1 --dev $OSTDEV \
- --size $OSTSIZE || exit 6
+ lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --fstype $FSTYPE \
+ --dev $MDSDEV --size $MDSSIZE || exit 5
+ lmc -m $CONFIG --add ost --node $OSTNODE --ost ost1 --fstype $FSTYPE \
+ --dev $OSTDEV --size $OSTSIZE || exit 6
lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --mds mds1 \
--ost ost1 || exit 7
}
NETWORKTYPE=${NETWORKTYPE:-tcp}
MOUNTPT=${MOUNTPT:-/mnt/lustre}
CONFIG=${CONFIG:-recovery-small.xml}
-MDSDEV=${MDSDEV:-/tmp/mds}
-OSTDEV=${OSTDEV:-/tmp/ost}
+MDSDEV=${MDSDEV:-/tmp/mds-`hostname`}
MDSSIZE=${MDSSIZE:-100000}
+OSTDEV=${OSTDEV:-/tmp/ost-`hostname`}
OSTSIZE=${OSTSIZE:-100000}
UPCALL=${UPCALL:-$RPWD/recovery-small-upcall.sh}
FSTYPE=${FSTYPE:-ext3}
-#!/usr/bin/perl
+#!/usr/bin/perl -w
use strict;
+$|++;
+
+$ENV{PATH}="/bin:/usr/bin";
+$ENV{ENV}="";
+$ENV{BASH_ENV}="";
+
use diagnostics;
use Getopt::Long;
+use POSIX ":sys_wait_h";
-sub usage () {
- print "Usage: $0 <mount point prefix> <iterations>\n";
- print "example: $0 --count=2 /mnt/lustre 50\n";
- print " will test in /mnt/lustre1 and /mnt/lustre2\n";
- print " $0 --count=0 /mnt/lustre 50\n";
- print " will test in /mnt/lustre only\n";
- exit;
-}
-my ($j, $k, $d, $f1, $f2, $path, $silent);
-my $count = 0;
-my $create = 10;
+use vars qw(
+ $MAX_THREADS
+ );
+
+# Don't try to run more than this many threads concurrently.
+$MAX_THREADS = 16;
+
+# Initialize variables
+my $silent = 0;
+my $create_files = 1; # should we create files or not?
+my $use_mcreate = 1; # should we use mcreate or open?
+my $num_dirs = 3; # number of directories to create
+my $num_files = 6; # number of files to create
+my $iterations = 1;
+my $num_threads = 1;
+my $mountpt;
+my $num_mounts = -1;
GetOptions("silent!"=> \$silent,
- "count=i" => \$count,
- "create=i" => \$create);
+ "use_mcreate=i" => \$use_mcreate,
+ "create_files=i" => \$create_files,
+ "use_mcreate=i" => \$use_mcreate,
+ "num_files=i" => \$num_files,
+ "num_dirs=i" => \$num_dirs,
+ "mountpt=s" => \$mountpt,
+ "num_mounts=i" => \$num_mounts,
+ "iterations=i" => \$iterations,
+ "num_threads=i" => \$num_threads,
+ ) || die &usage;
-my $mtpt = shift || usage();
-my $i = shift || usage();
-my $total = $i;
-my $files = 6;
-my $dirs = 3;
-my $mcreate = 0; # should we use mcreate or open?
+# Check for mandatory args.
+if (!$mountpt ||
+ !$num_mounts) {
+ die &usage;
+}
-my $which = "";
-if ($count > 0) {
- $which = int(rand() * $count) + 1;
+if ($num_threads > $MAX_THREADS) {
+ print "\nMAX_THREADS is currently set to $MAX_THREADS.\n\n";
+ print "You will have to change this in the source\n";
+ print "if you really want to run with $num_threads threads.\n\n";
+ exit 1;
}
-$k = $dirs;
-if ($create == 0) {
- $k = 0;
+# Initialize rand() function.
+srand (time ^ $$ ^ unpack "%L*", `ps axww | gzip`);
+
+#########################################################################
+### MAIN
+
+my $which = "";
+if ($num_mounts > 0) {
+ $which = int(rand() * $num_mounts) + 1;
}
-while ($k--) {
- $path = "$mtpt$which/$k";
- my $rc = mkdir $path, 0755;
- print "mkdir $path failed: $!\n" if !$rc;
- $j = $files;
- while ($j--) {
- `./mcreate $path/$j`;
+
+# Create files and directories (if necessary)
+if ($create_files) {
+ for (my $i=1; $i<=$num_threads;$i++) {
+ for (my $j=0; $j<$num_dirs;$j++) {
+ my $path = "${mountpt}${which}/${i}.${j}";
+ mkdir $path, 0755 || die "Can't mkdir $path: $!\n";
+ for (my $k=0; $k<$num_files; $k++) {
+ my $filepath = "${path}/${k}";
+ &create_file($filepath);
+ if (! -e $filepath) {
+ die "Error creating $filepath\n";
+ }
+ }
+ }
}
}
-while ($i--) {
- my $which = "";
- if ($count > 0) {
- $which = int(rand() * $count) + 1;
- }
- $d = int(rand() * $dirs);
- $f1 = int(rand() * $files);
- $f2 = int(rand() * $files);
- print "[$$] $mtpt$which/$d/$f1 $mtpt$which/$d/$f2 ...\n" if !$silent;
- my $rc = rename "$mtpt$which/$d/$f1", "$mtpt$which/$d/$f2";
- print "[$$] done: $rc\n" if !$silent;
- if (($total - $i) % 100 == 0) {
- print STDERR "[" . $$ . "]" . ($total - $i) . " operations\n";
+for (my $i=1; $i<=$num_threads; $i++) {
+ my $status = &fork_and_rename($i);
+ last if ($status != 0);
+}
+
+# Wait for all our threads to finish.
+# Wait for all our threads to finish.
+my $child = 0;
+do {
+ $child = waitpid(-1, WNOHANG);
+} until $child > 0;
+sleep 1;
+
+# Unlink files and directories (if necessary)
+if ($create_files) {
+ for (my $i=1; $i<=$num_threads;$i++) {
+ for (my $j=0; $j<$num_dirs;$j++) {
+ my $path = "${mountpt}${which}/${i}.${j}";
+ for (my $k=0; $k<=$num_files; $k++) {
+ my $filepath = "${path}/${k}";
+ unlink("$filepath") if (-e $filepath);
+ }
+ my $rc = rmdir $path;
+ print "rmdir $path failed: $!\n" if !$rc;
+ }
}
}
-$k = $dirs;
-if ($create == 0) {
- $k = 0;
+exit 0;
+
+#########################################################################
+### SUBROUTINES
+
+sub usage () {
+ print "\nUsage: $0 [--silent] [--create_files=n] [--use_mcreate=n] [--num_dirs=n] [--num_files=n] [--iterations=n] [--num_threads=n] --num_mounts=n --mountpt=/path/to/lustre/mount\n\n";
+ print "\t--silent\tminimal output\n";
+ print "\t--create_files=n\create files at start, default=1 (yes)\n";
+ print "\t--use_mcreate=n\tuse mcreate to create files, default=1 (yes)\n";
+ print "\t--num_dirs=n\tnumber of directories to create per iteration, default=3\n";
+ print "\t--num_files=n\tnumber of files to create per directory, default=6\n";
+ print "\t--iterations=n\tnumber of iterations to perform, default=1\n";
+ print "\t--num_threads=n\tnumber of thread to run, default=1\n";
+ print "\t--mountpt\tlocation of lustre mount\n";
+ print "\t--num_mounts=n\tnumber of lustre mounts to test across, default=-1 (single mount point without numeric suffix)\n\n";
+ print "example: $0 --mountpt=/mnt/lustre --num_mounts=2 --iterations=50\n";
+ print " will perform 50 interations in /mnt/lustre1 and /mnt/lustre2\n";
+ print " $0 --mountpt=/mnt/lustre --num_mounts=-1 --iterations=50\n";
+ print " will perform 50 iterations in /mnt/lustre only\n\n";
+ exit;
}
-while ($k--) {
- $path = "$mtpt$which/$k";
- $j = $files;
- while ($j--) {
- unlink "$path/$j";
+
+
+#########################################################################
+sub create_file ($) {
+ my ($path) = @_;;
+
+ if ($use_mcreate) {
+ my $tmp = `./mcreate $path`;
+ if ($tmp =~ /.*error: (.*)\n/) {
+ die "Error mcreating $path: $!\n";
+ }
+ } else {
+ open(FH, ">$path") || die "Error opening $path: $!\n";
+ close(FH) || die;
}
- my $rc = rmdir $path;
- print "rmdir $path failed: $!\n" if !$rc;
+ return 0;
}
-print "Done.\n";
+#########################################################################
+sub fork_and_rename ($) {
+ my ($thread_num) = @_;
+
+ FORK: {
+ if (my $pid = fork) {
+ # parent here
+ # child process pid is available in $pid
+ return 0;
+ } elsif (defined $pid) { # $pid is zero here if defined
+
+ my $current_iteration=1;
+ while ($current_iteration <= $iterations) {
+ for (my $i=0; $i<$num_files; $i++) {
+ my $which = "";
+ if ($num_mounts > 0) {
+ $which = int(rand() * $num_mounts) + 1;
+ }
+
+ my $d = int(rand() * $num_dirs);
+ my $f1 = int(rand() * $num_files);
+ my $f2 = int(rand() * $num_files);
+ my $path_f1 = "${mountpt}${which}/${thread_num}.${d}/${f1}";
+ my $path_f2 = "${mountpt}${which}/${thread_num}.${d}/${f2}";
+
+ print "Thread $thread_num: [$$] $path_f1 $path_f2 ...\n" if !$silent;
+ my $rc = rename $path_f1, $path_f2;
+ print "Thread $thread_num: [$$] done: $rc\n" if !$silent;
+ }
+ if (($current_iteration) % 100 == 0) {
+ print STDERR "Thread $thread_num: " . $current_iteration . " operations [" . $$ . "]\n";
+
+ }
+ $current_iteration++;
+ }
+
+ print "Thread $thread_num: Done.\n";
+
+ exit 0;
+
+ } elsif ($! =~ /No more process/) {
+ # EAGAIN, supposedly recoverable fork error
+ sleep 5;
+ redo FORK;
+ } else {
+ # weird fork error
+ die "Can't fork: $!\n";
+ }
+ }
+
+}
#include <string.h>
#include <errno.h>
#include <sys/types.h>
+#include <grp.h>
#include <sys/wait.h>
#define DEBUG 0
-void Usage_and_abort(void)
+static const char usage[] =
+"Usage: %s -u user_id [-g grp_id ] [ -G ] command\n"
+" -u user_id switch to UID user_id\n"
+" -g grp_id switch to GID grp_id\n"
+" -G clear supplementary groups\n";
+
+void Usage_and_abort(const char *name)
{
- fprintf(stderr, "Usage: runas -u user_id [ -g grp_id ]"
- " command_to_be_run \n");
- exit(-1);
+ fprintf(stderr, usage, name);
+ exit(-1);
}
-// Usage: runas -u user_id [ -g grp_id ] [--] command_to_be_run
-// return: the return value of "command_to_be_run"
-// NOTE: returning -1 might be the return code of this program itself or
-// the "command_to_be_run"
-
-// ROOT runs "runas" for free
-// Other users run "runas" requires chmod 6755 "command_to_be_run"
-
int main(int argc, char **argv)
{
- char **my_argv;
+ char **my_argv, *name = argv[0];
int status;
int c,i;
int gid_is_set = 0;
int uid_is_set = 0;
+ int clear_supp_groups = 0;
uid_t user_id;
gid_t grp_id;
if (argc == 1)
- Usage_and_abort();
+ Usage_and_abort(name);
// get UID and GID
- while ((c = getopt (argc, argv, "+u:g:h")) != -1) {
+ while ((c = getopt (argc, argv, "+u:g:hG")) != -1) {
switch (c) {
case 'u':
user_id = (uid_t)atoi(optarg);
gid_is_set = 1;
break;
- case 'h':
- Usage_and_abort();
+ case 'G':
+ clear_supp_groups = 1;
break;
default:
- //fprintf(stderr, "Bad parameters.\n");
- //Usage_and_abort ();
+ case 'h':
+ Usage_and_abort(name);
break;
}
}
if (!uid_is_set)
- Usage_and_abort();
+ Usage_and_abort(name);
if (optind == argc) {
- fprintf(stderr, "Bad parameters.\n");
- Usage_and_abort();
+ fputs("Must specify command to run.\n", stderr);
+ Usage_and_abort(name);
}
// assemble the command
exit(-1);
}
+ if (clear_supp_groups) {
+ status = setgroups(0, NULL);
+ if (status == -1) {
+ perror("clearing supplementary groups");
+ exit(-1);
+ }
+ }
+
// set UID
status = setreuid(user_id, user_id );
if(status == -1) {
exit(-1);
}
-
- fprintf(stderr, "running as USER(%d), Grp (%d): ", user_id, grp_id );
+ fprintf(stderr, "running as UID %d, GID %d%s:", user_id, grp_id,
+ clear_supp_groups ? ", cleared groups" : "");
for (i = 0; i < argc - optind; i++)
fprintf(stderr, " [%s]", my_argv[i]);
#!/bin/sh
-
-DIR=${DIR:-/mnt/lustre/`hostname`}
+MNT=${MNT:-/mnt/lustre}
+DIR=${DIR:-$MNT/`hostname`}
#[ -e /proc/sys/portals/debug ] && echo 0 > /proc/sys/portals/debug
mkdir -p $DIR
TGT=$DIR/client.txt
#!/bin/sh
PATH=`dirname $0`/../utils:$PATH
-obdstat filter 1 | while read LINE; do
+llobdstat.pl $1 1 | while read LINE; do
echo "`date +s`: $LINE"
- [ "$1" ] && echo "`date +s`: $LINE" >> $1
+ [ "$2" ] && echo "`date +s`: $LINE" >> $2
done
#!/bin/sh
SRCDIR="`dirname $0`/"
-export PATH=/sbin:/usr/sbin:$SRCDIR:$PATH
+export PATH=/sbin:/usr/sbin:$SRCDIR/../utils:$PATH
LOOPS=${LOOPS:-1}
COUNT=${COUNT:-1000000}
shift
done
-OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
-if [ -z "$OSCMT" ]; then
+MOUNT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
+if [ -z "$MOUNT" ]; then
sh llmount.sh
- OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
- [ -z "$OSCMT" ] && fail "no lustre filesystem mounted" 1
+ MOUNT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`"
+ [ -z "$MOUNT" ] && fail "no lustre filesystem mounted" 1
I_MOUNTED="yes"
fi
-OSCTMP=`echo $OSCMT | tr "/" "."`
+OSCTMP=`echo $MOUNT | tr "/" "."`
USED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
USED=`expr $USED + 16` # Some space for the status file
# let's start slowly here...
-log "touching $OSCMT"
-touch $OSCMT || fail "can't touch $OSCMT" 2
-HOSTS=$OSCMT/hosts.$$
-
-# this will cause the following cp to trigger bug #620096
-log "create an empty file $HOSTS"
-mcreate $HOSTS
-
-log "copying /etc/hosts to $HOSTS"
-cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS" 3
-log "comparing /etc/hosts and $HOSTS"
-diff -u /etc/hosts $HOSTS || fail "$HOSTS different" 4
-log "renaming $HOSTS to $HOSTS.ren"
-mv $HOSTS $HOSTS.ren || fail "can't rename $HOSTS to $HOSTS.ren" 5
-log "copying /etc/hosts to $HOSTS again"
-cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS again" 6
-log "truncating $HOSTS"
-> $HOSTS || fail "can't truncate $HOSTS" 8
-log "removing $HOSTS"
-rm $HOSTS || fail "can't remove $HOSTS" 9
-
-DST=$OSCMT/runtest.$$
+log "touching $MOUNT"
+touch $MOUNT || fail "can't touch $MOUNT" 2
+HOSTS=$MOUNT/hosts.$$
+
+if [ $COUNT -gt 10 -o $COUNT -eq 0 ]; then
+ # this will cause the following cp to trigger bug #620096
+ log "create an empty file $HOSTS"
+ mcreate $HOSTS
+ log "copying /etc/hosts to $HOSTS"
+ cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS" 3
+ log "comparing /etc/hosts and $HOSTS"
+ diff -u /etc/hosts $HOSTS || fail "$HOSTS different" 4
+ log "renaming $HOSTS to $HOSTS.ren"
+ mv $HOSTS $HOSTS.ren || fail "can't rename $HOSTS to $HOSTS.ren" 5
+ log "copying /etc/hosts to $HOSTS again"
+ cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS again" 6
+ log "truncating $HOSTS"
+ > $HOSTS || fail "can't truncate $HOSTS" 8
+ log "removing $HOSTS"
+ rm $HOSTS || fail "can't remove $HOSTS" 9
+fi
+
+DST=$MOUNT/runtest.$$
# let's start slowly here...
log "creating $DST"
mkdir $DST || fail "can't mkdir $DST" 10
sh llmountcleanup.sh || exit 19
sh llrmount.sh || exit 20
-log "renaming $HOSTS.ren to $HOSTS"
-mv $HOSTS.ren $HOSTS || fail "can't rename $HOSTS.ren to $HOSTS" 32
-log "truncating $HOSTS"
-> $HOSTS || fail "can't truncate $HOSTS" 34
-log "removing $HOSTS"
-rm $HOSTS || fail "can't remove $HOSTS again" 36
log "removing $DST"
rm -r $V $DST || fail "can't remove $DST" 37
+if [ $COUNT -gt 10 -o $COUNT -eq 0 ]; then
+ log "renaming $HOSTS.ren to $HOSTS"
+ mv $HOSTS.ren $HOSTS || fail "can't rename $HOSTS.ren to $HOSTS" 32
+ log "truncating $HOSTS"
+ > $HOSTS || fail "can't truncate $HOSTS" 34
+ log "removing $HOSTS"
+ rm $HOSTS || fail "can't remove $HOSTS again" 36
+fi
+
# mkdirmany test (bug 589)
-log "running mkdirmany $OSCMT/base$$ 100"
-$MKDIRMANY $OSCMT/base$$ 100 || fail "mkdirmany failed"
+log "running mkdirmany $MOUNT/base$$ 100"
+$MKDIRMANY $MOUNT/base$$ 100 || fail "mkdirmany failed"
log "removing mkdirmany directories"
-rmdir $OSCMT/base$$* || fail "mkdirmany cleanup failed"
+rmdir $MOUNT/base$$* || fail "mkdirmany cleanup failed"
log "done"
NOWUSED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1`
-if [ $NOWUSED -gt $USED ]; then
+if [ `expr $NOWUSED - $USED` -gt 1024 ]; then
echo "Space not all freed: now ${NOWUSED}kB, was ${USED}kB." 1>&2
- echo "This is normal on BA OSTs, because of subdirectories." 1>&2
fi
if [ "$I_MOUNTED" = "yes" ]; then
#!/bin/sh
vmstat 1 | while read LINE ; do
LINE="`date +%s`: $LINE"
- echo $LINE
- [ "$1" ] && echo $LINE >> $1
+ echo "$LINE"
+ [ "$1" ] && echo "$LINE" >> $1
done
set -e
ONLY=${ONLY:-"$*"}
-ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"34 35"} # bugs 1365 and 1360 respectively
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"35 32q 37 39"} # bugs 1360, 1504
SRCDIR=`dirname $0`
PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
-CHECKSTAT=${CHECKSTAT:-"./checkstat -v"}
+CHECKSTAT=${CHECKSTAT:-"checkstat -v"}
CREATETEST=${CREATETEST:-createtest}
LFIND=${LFIND:-lfind}
LSTRIPE=${LSTRIPE:-lstripe}
LCTL=${LCTL:-lctl}
MCREATE=${MCREATE:-mcreate}
+OPENFILE=${OPENFILE:-openfile}
+OPENUNLINK=${OPENUNLINK:-openunlink}
TOEXCL=${TOEXCL:-toexcl}
TRUNCATE=${TRUNCATE:-truncate}
RUNAS=${RUNAS:-"runas -u $RUNAS_ID"}
fi
-MOUNT=${MOUNT:-/mnt/lustre}
-DIR=${DIR:-$MOUNT}
-export NAME=$NAME
+export NAME=${NAME:-local}
SAVE_PWD=$PWD
clean() {
- echo -n "cln.."
- sh llmountcleanup.sh > /dev/null || exit 20
+ echo -n "cln.."
+ sh llmountcleanup.sh > /dev/null || exit 20
}
-
CLEAN=${CLEAN:-clean}
+
start() {
- echo -n "mnt.."
- sh llrmount.sh > /dev/null || exit 10
- echo "done"
+ echo -n "mnt.."
+ sh llrmount.sh > /dev/null || exit 10
+ echo "done"
}
START=${START:-start}
}
run_one() {
- if ! mount | grep -q $MOUNT; then
+ if ! mount | grep -q $DIR; then
$START
fi
log "== test $1: $2"
}
error() {
- echo FAIL
- exit 1
+ echo "FAIL: $@"
+ exit 1
}
pass() {
- echo PASS
+ echo PASS
}
-if ! mount | grep $MOUNT; then
+MOUNT="`mount | awk '/^'$NAME' .* lustre_lite / { print $3 }'`"
+if [ -z "$MOUNT" ]; then
sh llmount.sh
+ MOUNT="`mount | awk '/^'$NAME' .* lustre_lite / { print $3 }'`"
+ [ -z "$MOUNT" ] && error "NAME=$NAME not mounted"
I_MOUNTED=yes
fi
+[ `echo $MOUNT | wc -w` -gt 1 ] && error "NAME=$NAME mounted more than once"
+
+DIR=${DIR:-$MOUNT}
+[ -z "`echo $DIR | grep $MOUNT`" ] && echo "$DIR not in $MOUNT" && exit 99
+
+rm -rf $DIR/[Rdfs][1-9]*
+
echo preparing for tests involving mounts
-EXT2_DEV=/tmp/SANITY.LOOP
-dd if=/dev/zero of=$EXT2_DEV bs=1k seek=1000 count=1 > /dev/null
-mke2fs -F $EXT2_DEV > /dev/null
+EXT2_DEV=${EXT2_DEV:-/tmp/SANITY.LOOP}
+touch $EXT2_DEV
+mke2fs -F $EXT2_DEV 1000 > /dev/null
test_0() {
touch $DIR/f
}
run_test 5 "mkdir .../d5 .../d5/d2; chmod .../d5/d2 ============"
-test_6() {
- touch $DIR/f6
- chmod 0666 $DIR/f6
- $CHECKSTAT -t file -p 0666 $DIR/f6 || error
+test_6a() {
+ touch $DIR/f6a
+ chmod 0666 $DIR/f6a || error
+ $CHECKSTAT -t file -p 0666 -u \#$UID $DIR/f6a || error
+}
+run_test 6a "touch .../f6a; chmod .../f6a ======================"
+
+test_6b() {
+ [ $RUNAS_ID -eq $UID ] && echo "skipping test 6b" && return
+ $RUNAS chmod 0444 $DIR/f6a && error
+ $CHECKSTAT -t file -p 0666 -u \#$UID $DIR/f6a || error
+}
+run_test 6b "$RUNAS chmod .../f6a (should return error) =="
+
+test_6c() {
+ [ $RUNAS_ID -eq $UID ] && echo "skipping test 6c" && return
+ touch $DIR/f6c
+ chown $RUNAS_ID $DIR/f6c || error
+ $CHECKSTAT -t file -u \#$RUNAS_ID $DIR/f6c || error
+}
+run_test 6c "touch .../f6c; chown .../f6c ======================"
+
+test_6d() {
+ [ $RUNAS_ID -eq $UID ] && echo "skipping test 6d" && return
+ $RUNAS chown $UID $DIR/f6c && error
+ $CHECKSTAT -t file -u \#$RUNAS_ID $DIR/f6c || error
}
-run_test 6 "touch .../f6; chmod .../f6 ========================="
+run_test 6d "$RUNAS chown .../f6c (should return error) =="
+
+test_6e() {
+ [ $RUNAS_ID -eq $UID ] && echo "skipping test 6e" && return
+ touch $DIR/f6e
+ chgrp $RUNAS_ID $DIR/f6e || error
+ $CHECKSTAT -t file -u \#$UID -g \#$RUNAS_ID $DIR/f6e || error
+}
+run_test 6e "touch .../f6e; chgrp .../f6e ======================"
+
+test_6f() {
+ [ $RUNAS_ID -eq $UID ] && echo "skipping test 6f" && return
+ $RUNAS chgrp $UID $DIR/f6e && error
+ $CHECKSTAT -t file -u \#$UID -g \#$RUNAS_ID $DIR/f6e || error
+}
+run_test 6f "$RUNAS chgrp .../f6e (should return error) =="
test_7a() {
mkdir $DIR/d7
run_test 23 "O_CREAT|O_EXCL in subdir =========================="
test_24a() {
- echo '============ rename sanity ================================='
+ echo '== rename sanity =============================================='
echo '-- same directory rename'
mkdir $DIR/R1
touch $DIR/R1/f
$CHECKSTAT -t dir $DIR/R9/a || error
$CHECKSTAT -a file $DIR/R9/a/f || error
}
-run_test 24i "rename file to dir error: touch f ; mkdir a ; rename f a ====="
+run_test 24i "rename file to dir error: touch f ; mkdir a ; rename f a"
test_24j() {
mkdir $DIR/R10
run_test 24j "source does not exist ============================"
test_25a() {
- echo '== symlink sanity ======================================='
+ echo '== symlink sanity ============================================='
mkdir $DIR/d25
ln -s d25 $DIR/s25
touch $DIR/s25/foo || error
run_test 26a "multiple component symlink ======================="
test_26b() {
- ln -s d26/d26-2/foo $DIR/s26-2
+ mkdir -p $DIR/d26b/d26-2
+ ln -s d26b/d26-2/foo $DIR/s26-2
touch $DIR/s26-2 || error
}
run_test 26b "multiple component symlink at end of lookup ======"
run_test 26e "unlink multiple component recursive symlink ======"
test_27a() {
- echo '== stripe sanity ========================================'
+ echo '== stripe sanity =============================================='
mkdir $DIR/d27
$LSTRIPE $DIR/d27/f0 8192 0 1
$CHECKSTAT -t file $DIR/d27/f0
pass
- log "test_27b: write to one stripe file ========================="
+ log "== test_27b: write to one stripe file ========================="
cp /etc/hosts $DIR/d27/f0
}
run_test 27a "one stripe file =================================="
test_27c() {
$LSTRIPE $DIR/d27/f01 8192 0 2
pass
- log "test_27d: write to two stripe file file f01 ================"
+ log "== test_27d: write to two stripe file file f01 ================"
dd if=/dev/zero of=$DIR/d27/f01 bs=4k count=4
}
run_test 27c "create two stripe file f01 ======================="
test_27f() {
$LSTRIPE $DIR/d27/fbad 100 1 2 || true
dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4
+ $LFIND $DIR/d27/fbad
}
run_test 27f "lstripe with bad stripe size (should return error on LOV)"
test_27g() {
$MCREATE $DIR/d27/fnone || error
pass
- log "test 27.9: lfind ============================================"
- $LFIND $DIR/d27
+ log "== test 27h: lfind ============================================"
+ $LFIND $DIR/d27/fnone | grep -q "Has no stripe info" || error
}
run_test 27g "mcreate file without objects to test lfind ======="
run_test 30 "run binary from Lustre (execve) ==================="
test_31() {
- ./openunlink $DIR/f31 $DIR/f31 || error
+ $OPENUNLINK $DIR/f31 $DIR/f31 || error
}
run_test 31 "open-unlink file =================================="
ls -al $DIR/d32d/ext2-mountpoint/../d2/test_dir || error
umount $DIR/d32d/ext2-mountpoint || error
}
-run_test 32d "open d32d/ext2-mountpoint/../d2/test_dir =========="
+run_test 32d "open d32d/ext2-mountpoint/../d2/test_dir ========="
test_32e() {
[ -e $DIR/d32e ] && rm -fr $DIR/d32e
$CHECKSTAT -t link $DIR/d32e/tmp/symlink11 || error
$CHECKSTAT -t link $DIR/d32e/symlink01 || error
}
-run_test 32e "stat d32e/symlink->tmp/symlink->lustre-subdir ====="
+run_test 32e "stat d32e/symlink->tmp/symlink->lustre-subdir ===="
test_32f() {
[ -e $DIR/d32f ] && rm -fr $DIR/d32f
ls $DIR/d32f/tmp/symlink11 || error
ls $DIR/d32f/symlink01 || error
}
-run_test 32f "open d32f/symlink->tmp/symlink->lustre-subdir ====="
+run_test 32f "open d32f/symlink->tmp/symlink->lustre-subdir ===="
test_32g() {
[ -e $DIR/d32g ] && rm -fr $DIR/d32g
$CHECKSTAT -t file $DIR/d32i/ext2-mountpoint/../test_file || error
umount $DIR/d32i/ext2-mountpoint || error
}
-run_test 32i "stat d32i/ext2-mountpoint/../test_file ============"
+run_test 32i "stat d32i/ext2-mountpoint/../test_file ==========="
test_32j() {
[ -e $DIR/d32j ] && rm -fr $DIR/d32j
cat $DIR/d32j/ext2-mountpoint/../test_file || error
umount $DIR/d32j/ext2-mountpoint || error
}
-run_test 32j "open d32j/ext2-mountpoint/../test_file ============"
+run_test 32j "open d32j/ext2-mountpoint/../test_file ==========="
test_32k() {
- [ -e $DIR/d32k ] && rm -fr $DIR/d32k
+ rm -fr $DIR/d32k
mkdir -p $DIR/d32k/ext2-mountpoint
mount -t ext2 -o loop $EXT2_DEV $DIR/d32k/ext2-mountpoint
mkdir -p $DIR/d32k/d2
$CHECKSTAT -t file $DIR/d32k/ext2-mountpoint/../d2/test_file || error
umount $DIR/d32k/ext2-mountpoint || error
}
-run_test 32k "stat d32k/ext2-mountpoint/../d2/test_file ========="
+run_test 32k "stat d32k/ext2-mountpoint/../d2/test_file ========"
test_32l() {
- [ -e $DIR/d32l ] && rm -fr $DIR/d32l
+ rm -fr $DIR/d32l
mkdir -p $DIR/d32l/ext2-mountpoint
mount -t ext2 -o loop $EXT2_DEV $DIR/d32l/ext2-mountpoint || error
mkdir -p $DIR/d32l/d2
cat $DIR/d32l/ext2-mountpoint/../d2/test_file || error
umount $DIR/d32l/ext2-mountpoint || error
}
-run_test 32l "open d32l/ext2-mountpoint/../d2/test_file ========="
+run_test 32l "open d32l/ext2-mountpoint/../d2/test_file ========"
test_32m() {
- [ -e $DIR/d32m ] && rm -fr $DIR/d32m
+ rm -fr $DIR/d32m
mkdir -p $DIR/d32m/tmp
TMP_DIR=$DIR/d32m/tmp
ln -s $DIR $TMP_DIR/symlink11
$CHECKSTAT -t link $DIR/d32m/tmp/symlink11 || error
$CHECKSTAT -t link $DIR/d32m/symlink01 || error
}
-run_test 32m "stat d32m/symlink->tmp/symlink->lustre-root ======="
+run_test 32m "stat d32m/symlink->tmp/symlink->lustre-root ======"
test_32n() {
- [ -e $DIR/d32n ] && rm -fr $DIR/d32n
+ rm -fr $DIR/d32n
mkdir -p $DIR/d32n/tmp
TMP_DIR=$DIR/d32n/tmp
ln -s $DIR $TMP_DIR/symlink11
ls -l $DIR/d32n/tmp/symlink11 || error
ls -l $DIR/d32n/symlink01 || error
}
-run_test 32n "open d32n/symlink->tmp/symlink->lustre-root ======="
+run_test 32n "open d32n/symlink->tmp/symlink->lustre-root ======"
test_32o() {
- [ -e $DIR/d32o ] && rm -fr $DIR/d32o
- [ -e $DIR/test_file ] && rm -fr $DIR/test_file
+ rm -fr $DIR/d32o
+ rm -f $DIR/test_file
touch $DIR/test_file
mkdir -p $DIR/d32o/tmp
TMP_DIR=$DIR/d32o/tmp
run_test 32o "stat d32o/symlink->tmp/symlink->lustre-root/test_file"
test_32p() {
- [ -e $DIR/d32p ] && rm -fr $DIR/d32p
- [ -e $DIR/test_file ] && rm -fr $DIR/test_file
+ rm -fr $DIR/d32p
+ rm -f $DIR/test_file
touch $DIR/test_file
mkdir -p $DIR/d32p/tmp
TMP_DIR=$DIR/d32p/tmp
}
run_test 32p "open d32p/symlink->tmp/symlink->lustre-root/test_file"
+test_32q() {
+ [ -e $DIR/d32q ] && rm -fr $DIR/d32q
+ mkdir -p $DIR/d32q
+ mount -t ext2 -o loop $EXT2_DEV $DIR/d32q
+ ls $DIR/d32q || error
+ umount $DIR/d32q || error
+}
+run_test 32q "ls a mounted file system ========================="
+
# chmod 444 /mnt/lustre/somefile
# open(/mnt/lustre/somefile, O_RDWR)
# Should return -1
test_33() {
- [ -e $DIR/test_33_file ] && rm -fr $DIR/test_33_file
+ rm -f $DIR/test_33_file
touch $DIR/test_33_file
chmod 444 $DIR/test_33_file
chown $RUNAS_ID $DIR/test_33_file
- $RUNAS openfile -f O_RDWR $DIR/test_33_file && error || true
+ $RUNAS $OPENFILE -f O_RDWR $DIR/test_33_file && error || true
}
run_test 33 "write file with mode 444 (should return error) ===="
-test_34() {
- $MCREATE $DIR/f
- $TRUNCATE $DIR/f 100
- rm $DIR/f
+TEST_34_SIZE=${TEST_34_SIZE:-2000000000000}
+test_34a() {
+ rm -f $DIR/test_34_file
+ $MCREATE $DIR/test_34_file || error
+ $LFIND $DIR/test_34_file | grep -q "Has no stripe information" || error
+ $TRUNCATE $DIR/test_34_file $TEST_34_SIZE || error
+ $LFIND $DIR/test_34_file | grep -q "Has no stripe information" || error
+ $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+}
+run_test 34a "truncate file that has not been opened ==========="
+
+test_34b() {
+ $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+ $OPENFILE -f O_RDONLY $DIR/test_34_file
+ $LFIND $DIR/test_34_file | grep -q "Has no stripe information" || error
+ $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
}
-run_test 34 "truncate file that has not been opened ============"
+run_test 34b "O_RDONLY opening file doesn't create objects ====="
+
+test_34c() {
+ $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+ $OPENFILE -f O_RDWR $DIR/test_34_file
+ $LFIND $DIR/test_34_file | grep -q "Has no stripe information" && error
+ $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+}
+run_test 34c "O_RDWR opening file-with-size works =============="
+
+test_34d() {
+ dd if=/dev/zero of=$DIR/test_34_file conv=notrunc bs=4k count=1 || error
+ $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error
+ rm $DIR/test_34_file
+}
+run_test 34d "write to sparse file ============================="
+
+test_34e() {
+ rm -f $DIR/test_34_file
+ $MCREATE $DIR/test_34_file || error
+ $TRUNCATE $DIR/test_34_file 1000 || error
+ $CHECKSTAT -s 1000 $DIR/test_34_file || error
+ $OPENFILE -f O_RDWR $DIR/test_34_file
+ $CHECKSTAT -s 1000 $DIR/test_34_file || error
+}
+run_test 34e "create objects, some with size and some without =="
test_35() {
- [ -e $DIR/test_35_file ] && rm -fr $DIR/test_35_file
cp /bin/sh $DIR/test_35_file
chmod 444 $DIR/test_35_file
chown $RUNAS_ID $DIR/test_35_file
- $DIR/test_35_file && error
- return 0
+ $DIR/test_35_file && error || true
+ rm $DIR/test_35_file
}
run_test 35 "exec file with mode 444 (should return error) ====="
test_36a() {
- log 36 "cvs operations ===================================="
- mkdir -p $DIR/cvsroot
- chown $RUNAS_ID $DIR/cvsroot
- $RUNAS cvs -d $DIR/cvsroot init
+ sleep 1 # we need a rest, or UMLs clock becomes skewed
+ rm -f $DIR/test_36_file
+ utime $DIR/test_36_file || error
}
-run_test 36a "cvs init ========================================="
+run_test 36a "MDS utime check (mknod, utime) ==================="
test_36b() {
- # on the LLNL clusters, runas will still pick up root's $TMP settings,
- # which will not be writable for the runas user, and then you get a CVS
- # error message with a corrupt path string (CVS bug) and panic.
- # We're not using much space, so just stick it in /tmp, which is
- # safe.
- OLDTMPDIR=$TMPDIR
- OLDTMP=$TMP
- TMPDIR=/tmp
- TMP=/tmp
-
- cd /etc/init.d
- $RUNAS cvs -d $DIR/cvsroot import -m "nomesg" reposname vtag rtag
-
- TMPDIR=$OLDTMPDIR
- TMP=$OLDTMP
+ sleep 1
+ echo "" > $DIR/test_36_file
+ utime $DIR/test_36_file || error
}
-run_test 36b "cvs import ======================================="
+run_test 36b "OST utime check (open, utime) ===================="
test_36c() {
- cd $DIR
- mkdir -p $DIR/reposname
- chown $RUNAS_ID $DIR/reposname
- $RUNAS cvs -d $DIR/cvsroot co reposname
+ sleep 1
+ rm -f $DIR/d36/test_36_file
+ mkdir $DIR/d36
+ chown $RUNAS_ID $DIR/d36
+ $RUNAS utime $DIR/d36/test_36_file || error
}
-run_test 36c "cvs checkout ====================================="
+run_test 36c "non-root MDS utime check (mknod, utime) =========="
test_36d() {
- cd $DIR/reposname
- $RUNAS touch foo36
- $RUNAS cvs add -m 'addmsg' foo36
+ sleep 1
+ echo "" > $DIR/d36/test_36_file
+ $RUNAS utime $DIR/d36/test_36_file || error
}
-run_test 36d "cvs add =========================================="
+run_test 36d "non-root OST utime check (open, utime) ==========="
test_36e() {
- cd $DIR/reposname
- $RUNAS cvs update
-}
-run_test 36e "cvs update ======================================="
-
-# XXX change this: use a non root user
-test_36f() {
- cd $DIR/reposname
- $RUNAS cvs commit -m 'nomsg' foo36
+ sleep 1
+ [ $RUNAS_ID -eq $UID ] && return
+ touch $DIR/d36/test_36_file2
+ $RUNAS utime $DIR/d36/test_36_file2 && error || true
}
-run_test 36f "cvs commit ======================================="
+run_test 36e "utime on non-owned file (should return error) ===="
test_37() {
mkdir -p $DIR/dextra
echo f > $DIR/dextra/fbugfile
- mount -t ext2 -o loop /$EXT2_DEV $DIR/dextra
- ls $DIR/dextra |grep "\<fbugfile\>" && error
- umount /$EXT2_DEV
- rm -f DIR/dextra/fbugfile
+ mount -t ext2 -o loop $EXT2_DEV $DIR/dextra
+ ls $DIR/dextra | grep "\<fbugfile\>" && error
+ umount $DIR/dextra || error
+ rm -f $DIR/dextra/fbugfile || error
}
-run_test 37 "ls a mounted file system to check the old contents ====="
+run_test 37 "ls a mounted file system to check old content ====="
# open(file, O_DIRECTORY) will leak a request and not cleanup (bug 1501)
test_38() {
- o_directory $DIR/test38
+ o_directory $DIR/test38
}
run_test 38 "open a regular file with O_DIRECTORY =============="
-
+
+test_39() {
+ touch $DIR/test_39_file
+ touch $DIR/test_39_file2
+# ls -l $DIR/test_39_file $DIR/test_39_file2
+# ls -lu $DIR/test_39_file $DIR/test_39_file2
+# ls -lc $DIR/test_39_file $DIR/test_39_file2
+ sleep 2
+ $OPENFILE -f O_CREAT:O_TRUNC:O_WRONLY $DIR/test_39_file2
+# ls -l $DIR/test_39_file $DIR/test_39_file2
+# ls -lu $DIR/test_39_file $DIR/test_39_file2
+# ls -lc $DIR/test_39_file $DIR/test_39_file2
+ [ $DIR/test_39_file2 -nt $DIR/test_39_file ] || error
+}
+run_test 39 "mtime changed on create ==========================="
+
+test_40() {
+ dd if=/dev/zero of=$DIR/f40 bs=4096 count=1
+ $RUNAS $OPENFILE -f O_WRONLY:O_TRUNC $DIR/f40 && error
+ $CHECKSTAT -t file -s 4096 $DIR/f40 || error
+}
+run_test 40 "failed open(O_TRUNC) doesn't truncate ============="
+
+test_41() {
+ # bug 1553
+ small_write $DIR/f41 18
+}
+run_test 41 "test small file write + fstat ====================="
+
+# on the LLNL clusters, runas will still pick up root's $TMP settings,
+# which will not be writable for the runas user, and then you get a CVS
+# error message with a corrupt path string (CVS bug) and panic.
+# We're not using much space, so just stick it in /tmp, which is safe.
+OLDTMPDIR=$TMPDIR
+OLDTMP=$TMP
+TMPDIR=/tmp
+TMP=/tmp
+OLDHOME=$HOME
+[ $RUNAS_ID -ne $UID ] && HOME=/tmp
+
+test_99a() {
+ echo 99 "cvs operations ===================================="
+ mkdir -p $DIR/d99cvsroot
+ chown $RUNAS_ID $DIR/d99cvsroot
+ $RUNAS cvs -d $DIR/d99cvsroot init || error
+}
+run_test 99a "cvs init ========================================="
+
+test_99b() {
+ cd /etc/init.d
+ $RUNAS cvs -d $DIR/d99cvsroot import -m "nomesg" d99reposname vtag rtag
+}
+run_test 99b "cvs import ======================================="
+
+test_99c() {
+ cd $DIR
+ mkdir -p $DIR/d99reposname
+ chown $RUNAS_ID $DIR/d99reposname
+ $RUNAS cvs -d $DIR/d99cvsroot co d99reposname
+}
+run_test 99c "cvs checkout ====================================="
+
+test_99d() {
+ cd $DIR/d99reposname
+ $RUNAS touch foo99
+ $RUNAS cvs add -m 'addmsg' foo99
+}
+run_test 99d "cvs add =========================================="
+
+test_99e() {
+ cd $DIR/d99reposname
+ $RUNAS cvs update
+}
+run_test 99e "cvs update ======================================="
+
+test_99f() {
+ cd $DIR/d99reposname
+ $RUNAS cvs commit -m 'nomsg' foo99
+}
+run_test 99f "cvs commit ======================================="
+
+TMPDIR=$OLDTMPDIR
+TMP=$OLDTMP
+HOME=$OLDHOME
log "cleanup: ======================================================"
-rm -r $DIR/[Rdfs][1-9]*
+rm -rf $DIR/[Rdfs][1-9]*
if [ "$I_MOUNTED" = "yes" ]; then
sh llmountcleanup.sh || error
fi
set -e
-PATH=$PATH:.
+ONLY=${ONLY:-"$*"}
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"8"} # bug 1557
+
+SRCDIR=`dirname $0`
+PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
CHECKSTAT=${CHECKSTAT:-"checkstat -v"}
-MOUNT1=${MOUNT1:-/mnt/lustre1}
-MOUNT2=${MOUNT2:-/mnt/lustre2}
+CREATETEST=${CREATETEST:-createtest}
+LFIND=${LFIND:-lfind}
+LSTRIPE=${LSTRIPE:-lstripe}
+LCTL=${LCTL:-lctl}
+MCREATE=${MCREATE:-mcreate}
+OPENFILE=${OPENFILE:-openfile}
+OPENUNLINK=${OPENUNLINK:-openunlink}
+TOEXCL=${TOEXCL:-toexcl}
+TRUNCATE=${TRUNCATE:-truncate}
+
+if [ $UID -ne 0 ]; then
+ RUNAS_ID="$UID"
+ RUNAS=""
+else
+ RUNAS_ID=${RUNAS_ID:-500}
+ RUNAS=${RUNAS:-"runas -u $RUNAS_ID"}
+fi
+
export NAME=${NAME:-mount2}
+SAVE_PWD=$PWD
+
clean() {
- echo -n "cln.."
- sh llmountcleanup.sh > /dev/null
+ echo -n "cln.."
+ sh llmountcleanup.sh > /dev/null || exit 20
}
-
CLEAN=${CLEAN:-clean}
+
start() {
- echo -n "mnt.."
- sh llrmount.sh > /dev/null
- echo -n "done"
+ echo -n "mnt.."
+ sh llrmount.sh > /dev/null || exit 10
+ echo "done"
}
START=${START:-start}
-error () {
- echo FAIL
- exit 1
-}
-
-pass() {
- echo PASS
-}
-
-mkdir -p $MOUNT2
-mount | grep $MOUNT1 || sh llmount.sh
-
-echo -n "test 1: check create on 2 mtpt's..."
-touch $MOUNT1/f1
-[ -f $MOUNT2/f1 ] || error
-pass
-
-echo "test 2: check attribute updates on 2 mtpt's..."
-chmod 777 $MOUNT2/f1
-$CHECKSTAT -t file -p 0777 $MOUNT1/f1 || error
-pass
-
-echo "test 2b: check cached attribute updates on 2 mtpt's..."
-touch $MOUNT1/f2b
-ls -l $MOUNT2/f2b
-chmod 777 $MOUNT2/f2b
-$CHECKSTAT -t file -p 0777 $MOUNT1/f2b || error
-pass
-
-echo "test 2c: check cached attribute updates on 2 mtpt's..."
-touch $MOUNT1/f2c
-ls -l $MOUNT2/f2c
-chmod 777 $MOUNT1/f2c
-$CHECKSTAT -t file -p 0777 $MOUNT2/f2c || error
-pass
-
-echo "test 3: check after remount attribute updates on 2 mtpt's..."
-chmod a-x $MOUNT2/f1
-$CLEAN
-$START
-$CHECKSTAT -t file -p 0666 $MOUNT1/f1 || error
-pass
-
-echo "test 4: unlink on one mountpoint removes file on other..."
-rm $MOUNT2/f1
-$CHECKSTAT -a $MOUNT1/f1 || error
-pass
-
-echo -n "test 5: symlink on one mtpt, readlink on another..."
-( cd $MOUNT1 ; ln -s this/is/good lnk )
-
-[ "this/is/good" = "`perl -e 'print readlink("/mnt/lustre2/lnk");'`" ] || error
-pass
-
-echo -n "test 6: fstat validation on multiple mount points..."
-./multifstat $MOUNT1/f6 $MOUNT2/f6
-pass
-
-if [ -n "$BUG_1365" ]; then
-echo -n "test 7: create a file on one mount, truncate it on the other..."
-mcreate $MOUNT1/f1
-truncate $MOUNT2/f1 100
-rm $MOUNT1/f1
-pass
-else
-echo "Skipping test for 1365: set \$BUG_1365 to run it (and crash, likely)."
-fi
+log() {
+ echo "$*"
+ lctl mark "$*" || true
+}
+
+run_one() {
+ if ! mount | grep -q $DIR1; then
+ $START
+ fi
+ log "== test $1: $2"
+ test_$1 || error
+ pass
+ cd $SAVE_PWD
+ $CLEAN
+}
+
+run_test() {
+ for O in $ONLY; do
+ if [ "`echo $1 | grep '\<'$O'[a-z]*\>'`" ]; then
+ echo ""
+ run_one $1 "$2"
+ return $?
+ else
+ echo -n "."
+ fi
+ done
+ for X in $EXCEPT $ALWAYS_EXCEPT; do
+ if [ "`echo $1 | grep '\<'$X'[a-z]*\>'`" ]; then
+ echo "skipping excluded test $1"
+ return 0
+ fi
+ done
+ if [ -z "$ONLY" ]; then
+ run_one $1 "$2"
+ return $?
+ fi
+}
+
+error () {
+ echo "FAIL: $@"
+ exit 1
+}
+
+pass() {
+ echo PASS
+}
+
+MOUNT1=`mount| awk '/^'$NAME' .* lustre_lite / { print $3 }'| head -1`
+MOUNT2=`mount| awk '/^'$NAME' .* lustre_lite / { print $3 }'| tail -1`
+[ -z "$MOUNT1" ] && error "NAME=$NAME not mounted once"
+[ "$MOUNT1" = "$MOUNT2" ] && error "NAME=$NAME not mounted twice"
+[ `mount| awk '/^'$NAME' .* lustre_lite / { print $3 }'| wc -l` -ne 2 ] && \
+ error "NAME=$NAME mounted more than twice"
+
+DIR1=${DIR1:-$MOUNT1}
+DIR2=${DIR2:-$MOUNT2}
+[ -z "`echo $DIR1 | grep $MOUNT1`" ] && echo "$DIR1 not in $MOUNT1" && exit 96
+[ -z "`echo $DIR2 | grep $MOUNT2`" ] && echo "$DIR2 not in $MOUNT2" && exit 95
+
+rm -f $DIR1/[df][0-9]* $DIR1/lnk
+
+test_1a() {
+ touch $DIR1/f1
+ [ -f $DIR2/f1 ] || error
+}
+run_test 1a "check create on 2 mtpt's =========================="
+
+test_1b() {
+ chmod 777 $DIR2/f1
+ $CHECKSTAT -t file -p 0777 $DIR1/f1 || error
+ chmod a-x $DIR2/f1
+}
+run_test 1b "check attribute updates on 2 mtpt's ==============="
+
+test_1c() {
+ $CHECKSTAT -t file -p 0666 $DIR1/f1 || error
+}
+run_test 1c "check after remount attribute updates on 2 mtpt's ="
+
+test_1d() {
+ rm $DIR2/f1
+ $CHECKSTAT -a $DIR1/f1 || error
+}
+run_test 1d "unlink on one mountpoint removes file on other ===="
+
+test_2a() {
+ touch $DIR1/f2a
+ ls -l $DIR2/f2a
+ chmod 777 $DIR2/f2a
+ $CHECKSTAT -t file -p 0777 $DIR1/f2a || error
+}
+run_test 2a "check cached attribute updates on 2 mtpt's ========"
+
+test_2b() {
+ touch $DIR1/f2b
+ ls -l $DIR2/f2b
+ chmod 777 $DIR1/f2b
+ $CHECKSTAT -t file -p 0777 $DIR2/f2b || error
+}
+run_test 2b "check cached attribute updates on 2 mtpt's ========"
+
+test_3() {
+ ( cd $DIR1 ; ln -s this/is/good lnk )
+ [ "this/is/good" = "`perl -e 'print readlink("'$DIR2/lnk'");'`" ] || \
+ error
+}
+run_test 3 "symlink on one mtpt, readlink on another ==========="
+
+test_4() {
+ ./multifstat $DIR1/f6 $DIR2/f6
+}
+run_test 4 "fstat validation on multiple mount points =========="
+
+test_5() {
+ mcreate $DIR1/f5
+ truncate $DIR2/f5 100
+ rm $DIR1/f5
+}
+run_test 5 "create a file on one mount, truncate it on the other"
+
+test_6() {
+ ./openunlink $DIR1/f6 $DIR2/f6 || error
+}
+run_test 6 "remove of open file on other node =================="
+
+test_7() {
+ ./opendirunlink $DIR1/d7 $DIR2/d7 || error
+}
+run_test 7 "remove of open directory on other node ============="
+
+test_8() {
+ ./opendevunlink $DIR1/dev8 $DIR2/dev8 || error
+}
+run_test 8 "remove of open special file on other node =========="
+
+test_9() {
+ MTPT=1
+ > $DIR2/f9
+ for C in a b c d e f g h i j k l; do
+ DIR=`eval echo \\$DIR$MTPT`
+ echo -n $C >> $DIR/f9
+ [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
+ done
+ [ "`cat $DIR1/f9`" = "abcdefghijkl" ] || error
+}
+run_test 9 "append of file with sub-page size on multiple mounts"
+
+test_10() {
+ MTPT=1
+ OFFSET=0
+ > $DIR2/f10
+ for C in a b c d e f g h i j k l; do
+ DIR=`eval echo \\$DIR$MTPT`
+ echo -n $C | dd of=$DIR/f10 bs=1 seek=$OFFSET count=1
+ [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
+ OFFSET=`expr $OFFSET + 1`
+ done
+ [ "`cat $DIR1/f10`" = "abcdefghijkl" ] || error
+}
+run_test 10 "write of file with sub-page size on multiple mounts "
-echo "test 9: remove of open file on other node..."
-./openunlink $MOUNT1/f9 $MOUNT2/f9 || error
-pass
-
-echo "test 9b: remove of open directory on other node..."
-./opendirunlink $MOUNT1/dir1 $MOUNT2/dir1 || error
-pass
-
-#echo "test 9c: remove of open special file on other node..."
-#./opendevunlink $MOUNT1/dev1 $MOUNT2/dev1 || error
-#pass
-
-echo -n "test 10: append of file with sub-page size on multiple mounts..."
-MTPT=1
-> $MOUNT2/f10
-for C in a b c d e f g h i j k l; do
- MOUNT=`eval echo \\$MOUNT$MTPT`
- echo -n $C >> $MOUNT/f10
- [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
-done
-[ "`cat $MOUNT1/f10`" = "abcdefghijkl" ] && pass || error
-
-echo -n "test 11: write of file with sub-page size on multiple mounts..."
-MTPT=1
-OFFSET=0
-> $MOUNT2/f11
-for C in a b c d e f g h i j k l; do
- MOUNT=`eval echo \\$MOUNT$MTPT`
- echo -n $C | dd of=$MOUNT/f11 bs=1 seek=$OFFSET count=1
- [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1
- OFFSET=`expr $OFFSET + 1`
-done
-[ "`cat $MOUNT1/f11`" = "abcdefghijkl" ] && pass || error
-
-rm -f $MOUNT1/f[0-9]* $MOUNT1/lnk
-
-$CLEAN
-
-exit
+rm -f $DIR1/f[0-9]* $DIR1/lnk
LMC=${LMC:-lmc}
TMP=${TMP:-/tmp}
-MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
MDSSIZE=${MDSSIZE:-50000}
OSTDEVBASE=$TMP/ost
FSTYPE=${FSTYPE:-ext3}
NETTYPE=${NETTYPE:-tcp}
+NIDTYPE=${NIDTYPE:-$NODETYPE}
# NOTE - You can't have different MDS/OST nodes and also have clients on the
# MDS/OST nodes without using --endlevel and --startlevel during lconf.
rm -f $config
+h2localhost () {
+ echo localhost
+}
+
h2tcp () {
case $1 in
client) echo '\*' ;;
echo -n "adding NET for:"
for NODE in `echo $MDSNODE $OSTNODES $CLIENTS | tr -s " " "\n" | sort -u`; do
echo -n " $NODE"
- ${LMC} -m $config --add net --node $NODE --nid `h2$NETTYPE $NODE` --nettype $NETTYPE || exit 1
+ ${LMC} -m $config --add net --node $NODE --nid `h2$NIDTYPE $NODE` --nettype $NETTYPE || exit 1
done
# configure mds server
for NODE in $OSTNODES; do
eval OSTDEV=\$OSTDEV$COUNT
echo -n " $NODE"
- OSTDEV=${OSTDEV:-$OSTDEVBASE$COUNT}
+ OSTDEV=${OSTDEV:-$OSTDEVBASE$COUNT-`hostname`}
${LMC} -m $config --add ost --node $NODE --lov lov1 --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE || exit 21
COUNT=`expr $COUNT + 1`
done
if (argc != 2)
usage(argv[0]);
- before_mknod = time(0);
+ /* Adjust the before time back one second, because the kernel's
+ * CURRENT_TIME (lockless clock reading, used to set inode times)
+ * may drift against the do_gettimeofday() time (TSC-corrected and
+ * locked clock reading, used to return timestamps to user space).
+ * This means that the mknod time could be a second older than the
+ * before time, even for a local filesystem such as ext3.
+ */
+ before_mknod = time(0) - 1;
rc = mknod(filename, 0700, S_IFREG);
after_mknod = time(0);
if (rc && errno != EEXIST) {
return 4;
}
- printf("%s: good mknod times %lu <= %lu <= %lu\n",
- prog, before_mknod, st.st_mtime, after_mknod);
+ printf("%s: good mknod times %lu%s <= %lu <= %lu\n",
+ prog, before_mknod, before_mknod == st.st_mtime ? "*":"",
+ st.st_mtime, after_mknod);
sleep(5);
}
- before_utime = time(0);
+ /* See above */
+ before_utime = time(0) - 1;
rc = utime(filename, NULL);
after_utime = time(0);
if (rc) {
return 7;
}
- printf("%s: good utime times %lu <= %lu <= %lu\n",
- prog, before_utime, st.st_mtime, after_utime);
+ printf("%s: good utime times %lu%s <= %lu <= %lu\n",
+ prog, before_utime, before_utime == st.st_mtime ? "*" : "",
+ st.st_mtime, after_utime);
return 0;
}
obdio
obdbarrier
lload
-wirecheck
\ No newline at end of file
+wirecheck
+.*.cmd
+.*.d
from error import LconfError, OptionError
from cmdline import Options
-CONFIG_VERSION="2003060501"
+CONFIG_VERSION="2003070801"
import sys, getopt, types
import string, os
import ldap
+from stat import S_IROTH, S_IRGRP
PYMOD_DIR = "/usr/lib/lustre/python"
def development_mode():
sys.path.append(PYMOD_DIR)
import Lustre
+PARAM = Lustre.Options.PARAM
lactive_options = [
- ('ldapurl',"LDAP server URL", Lustre.Options.PARAM,
- "ldap://localhost"),
- ('config', "Cluster config name used for LDAP query", Lustre.Options.PARAM),
- ('group', "The group of devices to update", Lustre.Options.PARAM),
- ('active', "The active node name", Lustre.Options.PARAM),
+ ('ldapurl',"LDAP server URL", PARAM, "ldap://localhost"),
+ ('config', "Cluster config name used for LDAP query", PARAM),
+ ('group', "The group of devices to update", PARAM),
+ ('active', "The active node name", PARAM),
+ ('pwfile', "File containing password", PARAM),
]
def fatal(*args):
print "! " + msg
sys.exit(1)
-
cl = Lustre.Options("lactive","", lactive_options)
config, args = cl.parse(sys.argv[1:])
if not config.config:
fatal("Missing config")
-
+
+if config.pwfile:
+ try:
+ pwperm = os.stat(config.pwfile)[0]
+ pwreadable = pwperm & (S_IRGRP | S_IROTH)
+ if pwreadable:
+ if pwreadable == (S_IRGRP | S_IROTH):
+ readable_by = "group and others"
+ elif pwreadable == S_IRGRP:
+ readable_by = "group"
+ else:
+ readable_by = "others"
+ print "WARNING: Password file %s is readable by %s" % (
+ config.pwfile, readable_by)
+
+ pwfile = open(config.pwfile, "r")
+ pw = string.strip(pwfile.readline())
+ pwfile.close()
+ except Exception, e:
+ fatal("Can't read secret from pwfile %s: %s" % (config.pwfile, e))
+else:
+ print "no pwfile specified, binding anonymously"
+ pw = ""
+
base = "config=%s,fs=lustre" % (config.config,)
-db = Lustre.LustreDB_LDAP('', {}, base=base, pw = "secret",
- url = config.ldapurl)
+db = Lustre.LustreDB_LDAP('', {}, base=base, pw = pw, url = config.ldapurl)
active_node = db.lookup_name(config.active)
if not active_node:
#!/usr/bin/env python
#
-# Copyright (C) 2002 Cluster File Systems, Inc.
-# Author: Robert Read <rread@clusterfs.com>
+# Copyright (C) 2002-2003 Cluster File Systems, Inc.
+# Authors: Robert Read <rread@clusterfs.com>
+# Mike Shaver <shaver@clusterfs.com>
# This file is part of Lustre, http://www.lustre.org.
#
# Lustre is free software; you can redistribute it and/or
import sys, getopt, types
import string, os, stat, popen2, socket, time, random, fcntl, select
-import re, exceptions, signal
+import re, exceptions, signal, traceback
import xml.dom.minidom
if sys.version[0] == '1':
PORTALS_DIR = 'portals'
-# Please keep these uptodate with the values in portals/kp30.h
+# Please keep these in sync with the values in portals/kp30.h
ptldebug_names = {
"trace" : (1 << 0),
"inode" : (1 << 1),
"ptlrouter" : (20 << 24),
"cobd" : (21 << 24),
"ptlbd" : (22 << 24),
+ "log" : (23 << 24),
+ "mgmt" : (24 << 24),
}
add_route %s %s %s
quit """ % (net,
gw, lo, hi)
- self.run(cmds)
-
+ try:
+ self.run(cmds)
+ except CommandError, e:
+ log ("ignore: ")
+ e.dump()
def del_route(self, net, gw, lo, hi):
cmds = """
quit """ % (net,
uuid, tgt, net,
gw, tgt)
- self.run(cmds)
+ try:
+ self.run(cmds)
+ except CommandError, e:
+ log ("ignore: ")
+ e.dump()
# add a route to a range
def del_route_host(self, net, uuid, gw, tgt):
local=string.rstrip(local[0])
return local
-
# XXX: instead of device_list, ask for $name and see what we get
def is_prepared(name):
self_nid = self.nid
if gw_nid < self_nid:
try:
- lctl.disconnect(router.net_type, router.nid, router.port,
- router.uuid)
+ lctl.disconnect(gw.net_type, gw.nid, gw.port,
+ gw.uuid)
except CommandError, e:
print "disconnectAll failed: ", self.name
e.dump()
e.dump()
cleanup_error(e.rc)
+class Management(Module):
+ def __init__(self, db):
+ Module.__init__(self, 'MGMT', db)
+ self.add_lustre_module('obdclass', 'obdclass')
+ self.add_lustre_module('ptlrpc', 'ptlrpc')
+ self.add_lustre_module('ldlm', 'ldlm')
+ self.add_lustre_module('mgmt', 'mgmt_svc')
+
+ def prepare(self):
+ if is_prepared(self.name):
+ return
+ self.info()
+ lctl.newdev(attach="mgmt %s %s" % (self.name, self.uuid))
+
+ def safe_to_clean(self):
+ return 1
+
+ def cleanup(self):
+ if is_prepared(self.name):
+ Module.cleanup(self)
+
class LDLM(Module):
def __init__(self,db):
Module.__init__(self, 'LDLM', db)
Module.cleanup(self)
class LOV(Module):
- def __init__(self, db, uuid):
+ def __init__(self, db, uuid, fs_name):
Module.__init__(self, 'LOV', db)
self.add_lustre_module('mdc', 'mdc')
self.add_lustre_module('lov', 'lov')
self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
self.osclist = []
self.client_uuid = generate_client_uuid(self.name)
+ self.fs_name = fs_name
self.mdc_name = ''
- self.mdc = get_mdc(db, self.client_uuid, self.name, self.mds_uuid)
+ self.mdc = get_mdc(db, self.client_uuid, fs_name, self.mds_uuid)
for obd_uuid in self.devlist:
obd = self.db.lookup(obd_uuid)
- osc = get_osc(obd, self.client_uuid, self.name)
+ osc = get_osc(obd, self.client_uuid, fs_name)
if osc:
self.osclist.append(osc)
else:
# isn't implemented here yet.
osc.prepare(ignore_connect_failure=0)
except CommandError, e:
- print "Error preparing OSC %s (inactive)\n" % osc.uuid
+ print "Error preparing OSC %s\n" % osc.uuid
raise e
self.mdc.prepare()
self.mdc_name = self.mdc.name
Module.cleanup(self)
for osc in self.osclist:
osc.cleanup()
- mdc = get_mdc(self.db, self.client_uuid, self.name, self.mds_uuid)
+ mdc = get_mdc(self.db, self.client_uuid, self.fs_name, self.mds_uuid)
mdc.cleanup()
def load_module(self):
break
class LOVConfig(Module):
- def __init__(self,db):
+ def __init__(self, db):
Module.__init__(self, 'LOVConfig', db)
self.lov_uuid = self.db.get_first_ref('lov')
l = self.db.lookup(self.lov_uuid)
- self.lov = LOV(l, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
+ self.lov = LOV(l, "YOU_SHOULD_NEVER_SEE_THIS_UUID", '')
def prepare(self):
lov = self.lov
if not self.osdtype == 'obdecho':
clean_loop(self.devpath)
+def mgmt_uuid_for_fs(mtpt_name):
+ if not mtpt_name:
+ return ''
+ mtpt_db = toplevel.lookup_name(mtpt_name)
+ fs_uuid = mtpt_db.get_first_ref('filesystem')
+ fs = toplevel.lookup(fs_uuid)
+ if not fs:
+ return ''
+ return fs.get_first_ref('mgmt')
+
# Generic client module, used by OSC and MDC
class Client(Module):
- def __init__(self, tgtdb, uuid, module, owner):
+ def __init__(self, tgtdb, uuid, module, fs_name, self_name=None,
+ module_dir=None):
self.target_name = tgtdb.getName()
self.target_uuid = tgtdb.getUUID()
self.db = tgtdb
self.module = module
self.module_name = string.upper(module)
- self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
- self.target_name, owner)
+ if not self_name:
+ self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
+ self.target_name, fs_name)
+ else:
+ self.name = self_name
self.uuid = uuid
self.lookup_server(self.tgt_dev_uuid)
- self.add_lustre_module(module, module)
+ mgmt_uuid = mgmt_uuid_for_fs(fs_name)
+ if mgmt_uuid:
+ self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid)
+ else:
+ self.mgmt_name = ''
+ self.fs_name = fs_name
+ if not module_dir:
+ module_dir = module
+ self.add_lustre_module(module_dir, module)
def lookup_server(self, srv_uuid):
""" Lookup a server's network information """
raise e
if srv:
lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid),
- setup ="%s %s" %(self.target_uuid, srv.uuid))
+ setup ="%s %s %s" % (self.target_uuid, srv.uuid,
+ self.mgmt_name))
def cleanup(self):
if is_prepared(self.name):
else:
srv, r = find_route(self.get_servers())
if srv:
- lctl.del_route_host(r[0], srv.uuid, r[1], r[2])
+ lctl.del_route_host(r[0], srv.uuid, r[1], r[3])
except CommandError, e:
log(self.module_name, "cleanup failed: ", self.name)
e.dump()
class MDC(Client):
- def __init__(self, db, uuid, owner):
- Client.__init__(self, db, uuid, 'mdc', owner)
+ def __init__(self, db, uuid, fs_name):
+ Client.__init__(self, db, uuid, 'mdc', fs_name)
+
class OSC(Client):
- def __init__(self, db, uuid, owner):
- Client.__init__(self, db, uuid, 'osc', owner)
+ def __init__(self, db, uuid, fs_name):
+ Client.__init__(self, db, uuid, 'osc', fs_name)
+def mgmtcli_name_for_uuid(uuid):
+ return 'MGMTCLI_%s' % uuid
+
+class ManagementClient(Client):
+ def __init__(self, db, uuid):
+ Client.__init__(self, db, uuid, 'mgmt_cli', '',
+ self_name = mgmtcli_name_for_uuid(db.getUUID()),
+ module_dir = 'mgmt')
class COBD(Module):
def __init__(self, db):
# virtual interface for OSC and LOV
class VOSC(Module):
- def __init__(self, db, uuid, owner):
+ def __init__(self, db, uuid, fs_name):
Module.__init__(self, 'VOSC', db)
if db.get_class() == 'lov':
- self.osc = LOV(db, uuid)
+ self.osc = LOV(db, uuid, fs_name)
else:
- self.osc = get_osc(db, uuid, owner)
+ self.osc = get_osc(db, uuid, fs_name)
def get_uuid(self):
return self.osc.uuid
def get_name(self):
def load_module(self):
self.osc.load_module()
Module.load_module(self)
+
def cleanup_module(self):
Module.cleanup_module(self)
self.osc.cleanup_module()
+
def generate_client_uuid(name):
client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
name,
int(random.random() * 1048576))
return client_uuid[:36]
+
class Mountpoint(Module):
def __init__(self,db):
Module.__init__(self, 'MTPT', db)
fs = self.db.lookup(self.fs_uuid)
self.mds_uuid = fs.get_first_ref('mds')
self.obd_uuid = fs.get_first_ref('obd')
+ self.mgmt_uuid = fs.get_first_ref('mgmt')
obd = self.db.lookup(self.obd_uuid)
client_uuid = generate_client_uuid(self.name)
self.vosc = VOSC(obd, client_uuid, self.name)
self.add_lustre_module('mdc', 'mdc')
self.mdc = get_mdc(db, client_uuid, self.name, self.mds_uuid)
self.add_lustre_module('llite', 'llite')
-
+ if self.mgmt_uuid:
+ self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid),
+ client_uuid)
+ else:
+ self.mgmtcli = None
def prepare(self):
if fs_is_mounted(self.path):
log(self.path, "already mounted.")
return
+ if self.mgmtcli:
+ self.mgmtcli.prepare()
self.vosc.prepare()
if self.vosc.need_mdc():
self.mdc.prepare()
self.vosc.cleanup()
if self.vosc.need_mdc():
self.mdc.cleanup()
+ if self.mgmtcli:
+ self.mgmtcli.cleanup()
def load_module(self):
+ if self.mgmtcli:
+ self.mgmtcli.load_module()
self.vosc.load_module()
Module.load_module(self)
+
def cleanup_module(self):
Module.cleanup_module(self)
self.vosc.cleanup_module()
+ if self.mgmtcli:
+ self.mgmtcli.cleanup_module()
# ============================================================
ret = 6
elif type in ('ldlm',):
ret = 20
+ elif type in ('mgmt',):
+ ret = 25
elif type in ('osd', 'cobd'):
ret = 30
elif type in ('mdsdev',):
#
# OSC is no longer in the xml, so we have to fake it.
# this is getting ugly and begging for another refactoring
-def get_osc(ost_db, uuid, owner):
- osc = OSC(ost_db, uuid, owner)
+def get_osc(ost_db, uuid, fs_name):
+ osc = OSC(ost_db, uuid, fs_name)
return osc
-def get_mdc(db, uuid, owner, mds_uuid):
+def get_mdc(db, uuid, fs_name, mds_uuid):
mds_db = db.lookup(mds_uuid);
if not mds_db:
panic("no mds:", mds_uuid)
- mdc = MDC(mds_db, uuid, owner)
+ mdc = MDC(mds_db, uuid, fs_name)
return mdc
############################################################
n = Mountpoint(db)
elif type == 'echoclient':
n = ECHO_CLIENT(db)
+ elif type == 'mgmt':
+ n = Management(db)
else:
panic ("unknown service type:", type)
return n
def sys_set_subsystem():
if config.subsystem != None:
try:
- val = eval(config.ptldebug, ptldebug_names)
+ val = eval(config.subsystem, subsystem_names)
val = "0x%x" % (val,)
sysctl('portals/subsystem_debug', val)
except NameError, e:
]
def main():
- global lctl, config
+ global lctl, config, toplevel
# in the upcall this is set to SIG_IGN
signal.signal(signal.SIGCHLD, signal.SIG_DFL)
dn = "config=%s,fs=lustre" % (config.config)
db = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl)
else:
- cl.usage()
+ print 'Missing config file or ldap URL.'
+ print 'see lconf --help for command summary'
sys.exit(1)
+ toplevel = db
+
ver = db.get_version()
if not ver:
panic("No version found in config data, please recreate.")
main()
except Lustre.LconfError, e:
print e
+# traceback.print_exc(file=sys.stdout)
+ sys.exit(1)
except CommandError, e:
e.dump()
sys.exit(e.rc)
{"debug_kernel", jt_dbg_debug_kernel, 0,
"get debug buffer and dump to a file"
"usage: debug_kernel [file] [raw]"},
+ {"dk", jt_dbg_debug_kernel, 0,
+ "get debug buffer and dump to a file"
+ "usage: dk [file] [raw]"},
{"debug_file", jt_dbg_debug_file, 0,
"read debug buffer from input and dump to output"
"usage: debug_file <input> [output] [raw]"},
#define MAX_LOV_UUID_COUNT 1000
#define OBD_NOT_FOUND (-1)
-char * cmd;
-struct option longOpts[] = {
+char *cmd;
+struct option longOpts[] = {
{"help", 0, 0, 'h'},
{"obd", 1, 0, 'o'},
{"query", 0, 0, 'q'},
{"verbose", 0, 0, 'v'},
{0, 0, 0, 0}
- };
-int query;
-int verbose;
-char * shortOpts = "ho:qv";
-char * usageMsg = "[ --obd <obd uuid> | --query ] <dir|file> ...";
-
-int max_ost_count = MAX_LOV_UUID_COUNT;
-struct obd_uuid * obduuid;
-char * buf;
-int buflen;
-struct obd_uuid * uuids;
+ };
+int query;
+int verbose;
+char shortOpts[] = "ho:qv";
+char usageMsg[] = "[ --obd <obd uuid> | --query ] <dir|file> ...";
+
+int max_ost_count = MAX_LOV_UUID_COUNT;
+struct obd_uuid *obduuid;
+char *buf;
+int buflen;
+struct obd_uuid *uuids;
struct obd_ioctl_data data;
-struct lov_desc desc;
-int uuidslen;
-int cfglen;
+struct lov_desc desc;
+int uuidslen;
+int cfglen;
struct lov_mds_md *lmm;
-int lmmlen;
+int lmmlen;
+int printed_UUIDs;
void init();
void usage(FILE *stream);
void errMsg(char *fmt, ...);
-void processPath(const char *path);
+void processPath(char *path);
-int
-main (int argc, char **argv) {
+int main (int argc, char **argv) {
int c;
cmd = basename(argv[0]);
switch (c) {
case 'o':
if (obduuid) {
- errMsg("obd '%s' already specified: '%s'.",
- obduuid, optarg);
+ printf("obd '%s' already specified: '%s'\n",
+ obduuid->uuid, optarg);
exit(1);
}
usage(stderr);
exit(1);
default:
- errMsg("Internal error. Valid '%s' unrecognized.",
+ printf("Internal error. Valid '%s' unrecognized\n",
argv[optind - 1]);
usage(stderr);
exit(1);
exit (0);
}
-void
-init()
+void init()
{
int datalen, desclen;
}
if ((buf = malloc(buflen)) == NULL) {
- errMsg("Unable to allocate %d bytes of memory for ioctl's.",
- buflen);
+ errMsg("Unable to allocate %d bytes of memory for ioctl's");
exit(1);
}
uuids = (struct obd_uuid *)buf;
}
-void
-usage(FILE *stream)
+void usage(FILE *stream)
{
fprintf(stream, "usage: %s %s\n", cmd, usageMsg);
}
-void
-errMsg(char *fmt, ...)
+void errMsg(char *fmt, ...)
{
va_list args;
+ int tmp_errno = errno;
fprintf(stderr, "%s: ", cmd);
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
- fprintf(stderr, "\n");
+ fprintf(stderr, ": %s (%d)\n", strerror(tmp_errno), tmp_errno);
}
-void
-processPath(const char *path)
+void processPath(char *path)
{
int fd;
int rc;
int i;
- int obdindex;
+ int obdindex = OBD_NOT_FOUND;
int obdcount;
struct obd_uuid *uuidp;
+ char *fname, *dirname;
- if (query || verbose && !obduuid) {
+ if ((query || verbose) && !obduuid) {
printf("%s\n", path);
}
- if ((fd = open(path, O_RDONLY | O_LOV_DELAY_CREATE)) < 0) {
- errMsg("open \"%.20s\" failed.", path);
- perror("open");
+ fname = strrchr(path, '/');
+ if (fname != NULL && fname[1] != '\0') {
+ *fname = '\0';
+ fname++;
+ dirname = path;
+ } else if (fname != NULL && fname[1] == '\0') {
+ printf("need getdents support\n");
return;
+ } else {
+ dirname = ".";
+ fname = path;
}
- memset(&data, 0, sizeof(data));
- data.ioc_inllen1 = sizeof(desc);
- data.ioc_inlbuf1 = (char *)&desc;
- data.ioc_inllen2 = uuidslen;
- data.ioc_inlbuf2 = (char *)uuids;
+ if ((fd = open(dirname, O_RDONLY)) < 0) {
+ errMsg("open \"%.20s\" failed", dirname);
+ return;
+ }
- memset(&desc, 0, sizeof(desc));
- desc.ld_tgt_count = max_ost_count;
+ if (!printed_UUIDs) {
+ memset(&data, 0, sizeof(data));
+ data.ioc_inllen1 = sizeof(desc);
+ data.ioc_inlbuf1 = (char *)&desc;
+ data.ioc_inllen2 = uuidslen;
+ data.ioc_inlbuf2 = (char *)uuids;
- if (obd_ioctl_pack(&data, &buf, buflen)) {
- errMsg("internal buffering error.");
- exit(1);
- }
+ memset(&desc, 0, sizeof(desc));
+ desc.ld_tgt_count = max_ost_count;
+
+ if (obd_ioctl_pack(&data, &buf, buflen)) {
+ errMsg("internal buffering error");
+ exit(1);
+ }
- rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf);
- if (rc) {
- if (errno == ENOTTY) {
- if (!obduuid) {
- printf("Not a regular file or not Lustre file.\n\n");
+ rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf);
+ if (rc) {
+ if (errno == ENOTTY) {
+ if (!obduuid) {
+ errMsg("error getting LOV config");
+ }
+ return;
}
- return;
+ errMsg("OBD_IOC_LOV_GET_CONFIG ioctl failed: %s");
+ exit(1);
}
- errMsg("OBD_IOC_LOV_GET_CONFIG ioctl failed: %d.", errno);
- perror("ioctl");
- exit(1);
- }
- if (obd_ioctl_unpack(&data, buf, buflen)) {
- errMsg("Invalid reply from ioctl.");
- exit(1);
- }
+ if (obd_ioctl_unpack(&data, buf, buflen)) {
+ errMsg("Invalid reply from ioctl");
+ exit(1);
+ }
- obdcount = desc.ld_tgt_count;
- if (obdcount == 0)
- return;
+ obdcount = desc.ld_tgt_count;
+ if (obdcount == 0)
+ return;
- obdindex = OBD_NOT_FOUND;
+ obdindex = OBD_NOT_FOUND;
- if (obduuid) {
- for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) {
- if (strncmp((const char *)obduuid, (const char *)uuidp,
- sizeof(*uuidp)) == 0) {
- obdindex = i;
+ if (obduuid) {
+ for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) {
+ if (strncmp((char *)obduuid, (char *)uuidp,
+ sizeof(*uuidp)) == 0) {
+ obdindex = i;
+ }
}
- }
- if (obdindex == OBD_NOT_FOUND)
- return;
- } else if (query || verbose) {
- printf("OBDS:\n");
- for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++)
- printf("%4d: %s\n", i, (char *)uuidp);
+ if (obdindex == OBD_NOT_FOUND)
+ return;
+ } else if (query || verbose) {
+ printf("OBDS:\n");
+ for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++)
+ printf("%4d: %s\n", i, (char *)uuidp);
+ }
+ printed_UUIDs = 1;
}
- memset((void *)buf, 0, buflen);
- lmm->lmm_magic = LOV_MAGIC;
- lmm->lmm_ost_count = max_ost_count;
-
- rc = ioctl(fd, LL_IOC_LOV_GETSTRIPE, (void *)lmm);
+ strcpy((char *)lmm, fname);
+ rc = ioctl(fd, IOC_MDC_GETSTRIPE, (void *)lmm);
if (rc) {
if (errno == ENODATA) {
- if(!obduuid) {
- printf("Has no stripe information.\n\n");
- }
+ if (!obduuid)
+ printf("Has no stripe information.\n");
}
else {
- errMsg("LL_IOC_LOV_GETSTRIPE ioctl failed. %d", errno);
- perror("ioctl");
+ errMsg("IOC_MDC_GETSTRIPE ioctl failed");
}
return;
}
#
"""
-lmc - lustre configurtion data manager
+lmc - lustre configuration data manager
- See lustre book for documentation for lmc.
+ See Lustre book (http://www.lustre.org/docs/lustre.pdf) for documentation on lmc.
"""
--path /mnt/point
--mds mds_name
--ost ost_name OR --lov lov_name
+
+--add mgmt - Management/monitoring service
+ --node node_name
+ --mgmt mgmt_service_name
"""
PARAM = Lustre.Options.PARAM
# network
('nettype', "Specify the network type. This can be tcp/elan/gm/scimac.", PARAM),
('nid', "Give the network ID, e.g ElanID/IP Address as used by portals.", PARAM),
- ('tcpbuf', "Optional arguement to specify the TCP buffer size.", PARAM, "0"),
- ('port', "Optional arguement to specify the TCP port number.", PARAM, DEFAULT_PORT),
- ('nid_exchange', "Optional arguement to indicate if nid exchange should be done.", PARAM, 0),
- ('irq_affinity', "Optional arguement.", PARAM, 0),
+ ('tcpbuf', "Optional argument to specify the TCP buffer size.", PARAM, "0"),
+ ('port', "Optional argument to specify the TCP port number.", PARAM, DEFAULT_PORT),
+ ('nid_exchange', "Optional argument to indicate if nid exchange should be done.", PARAM, 0),
+ ('irq_affinity', "Optional argument.", PARAM, 0),
('hostaddr', "", PARAM,""),
('cluster_id', "Specify the cluster ID", PARAM, "0"),
('mds', "Specify MDS name.", PARAM),
('ost', "Specify the OST name.", PARAM,""),
('osdtype', "This could obdfilter or obdecho.", PARAM, "obdfilter"),
- ('failover', ""),
+ ('failover', "Enable failover support on OSTs or MDS?"),
('group', "", PARAM),
('dev', "Path of the device on local system.", PARAM,""),
('size', "Specify the size of the device if needed.", PARAM,"0"),
('journal_size', "Specify new journal size for underlying ext3 file system.", PARAM,"0"),
- ('fstype', "Optional arguement to specify the filesystem type.", PARAM, "ext3"),
+ ('fstype', "Optional argument to specify the filesystem type.", PARAM, "ext3"),
('ostuuid', "", PARAM,""),
('nspath', "Local mount point of server namespace.", PARAM,""),
('format', ""),
# cobd
('real_obd', "", PARAM),
('cache_obd', "", PARAM),
+
+ ('mgmt', "Specify management/monitoring service name.", PARAM, ""),
]
def error(*args):
mdd.appendChild(self.ref("target", mds_uuid))
return mdd
+ def mgmt(self, mgmt_name, mgmt_uuid, node_uuid):
+ mgmt = self.newService("mgmt", mgmt_name, mgmt_uuid)
+ mgmt.appendChild(self.ref("node", node_uuid))
+ # Placeholder until mgmt-service failover.
+ mgmt.appendChild(self.ref("active", mgmt_uuid))
+ return mgmt
+
def mountpoint(self, name, uuid, fs_uuid, path):
mtpt = self.newService("mountpoint", name, uuid)
mtpt.appendChild(self.ref("filesystem", fs_uuid))
self.addElement(mtpt, "path", path)
return mtpt
- def filesystem(self, name, uuid, mds_uuid, obd_uuid):
+ def filesystem(self, name, uuid, mds_uuid, obd_uuid, mgmt_uuid):
fs = self.newService("filesystem", name, uuid)
fs.appendChild(self.ref("mds", mds_uuid))
fs.appendChild(self.ref("obd", obd_uuid))
+ if mgmt_uuid:
+ fs.appendChild(self.ref("mgmt", mgmt_uuid))
return fs
def echo_client(self, name, uuid, osc_uuid):
lustre.appendChild(mdd)
+def add_mgmt(gen, lustre, options):
+ node_name = get_option(options, 'node')
+ node_uuid = name2uuid(lustre, node_name)
+ mgmt_name = get_option(options, 'mgmt')
+ if not mgmt_name:
+ mgmt_name = new_name('MGMT_' + node_name)
+ mgmt_uuid = name2uuid(lustre, mgmt_name, fatal=0)
+ if not mgmt_uuid:
+ mgmt_uuid = new_uuid(mgmt_name)
+ mgmt = gen.mgmt(mgmt_name, mgmt_uuid, node_uuid)
+ lustre.appendChild(mgmt)
+ else:
+ mgmt = lookup(lustre, mgmt_uuid)
+
+ node = findByName(lustre, node_name, "node")
+ node_add_profile(gen, node, 'mgmt', mgmt_uuid)
+
def add_ost(gen, lustre, options):
node_name = get_option(options, 'node')
lovname = get_option(options, 'lov')
lovconfig = gen.lovconfig(lovconfig_name, lovconfig_uuid, uuid)
lustre.appendChild(lovconfig)
-def new_filesystem(gen, lustre, mds_uuid, obd_uuid):
+def new_filesystem(gen, lustre, mds_uuid, obd_uuid, mgmt_uuid):
fs_name = new_name("FS_fsname")
fs_uuid = new_uuid(fs_name)
mds = lookup(lustre, mds_uuid)
mds.appendChild(gen.ref("filesystem", fs_uuid))
- fs = gen.filesystem(fs_name, fs_uuid, mds_uuid, obd_uuid)
+ fs = gen.filesystem(fs_name, fs_uuid, mds_uuid, obd_uuid, mgmt_uuid)
lustre.appendChild(fs)
return fs_uuid
-def get_fs_uuid(gen, lustre, mds_name, obd_name):
+def get_fs_uuid(gen, lustre, mds_name, obd_name, mgmt_name):
mds_uuid = name2uuid(lustre, mds_name, tag='mds')
obd_uuid = name2uuid(lustre, obd_name, tag='lov', fatal=0)
if not obd_uuid:
obd_uuid = name2uuid(lustre, obd_name, tag='ost', fatal=1)
+ if mgmt_name:
+ mgmt_uuid = name2uuid(lustre, mgmt_name, tag='mgmt', fatal=1)
+ else:
+ mgmt_uuid = ''
fs_uuid = lookup_filesystem(lustre, mds_uuid, obd_uuid)
if not fs_uuid:
- fs_uuid = new_filesystem(gen, lustre, mds_uuid, obd_uuid)
+ fs_uuid = new_filesystem(gen, lustre, mds_uuid, obd_uuid, mgmt_uuid)
return fs_uuid
def add_mtpt(gen, lustre, options):
lov_name = get_option(options, 'ost')
if lov_name == '':
error("--add mtpt requires either --filesystem or --mds with an --lov lov_name or --ost ost_name")
- fs_uuid = get_fs_uuid(gen, lustre, mds_name, lov_name)
+ mgmt_name = get_option(options, 'mgmt')
+ fs_uuid = get_fs_uuid(gen, lustre, mds_name, lov_name, mgmt_name)
else:
fs_uuid = name2uuid(lustre, fs_name, tag='filesystem')
add_echo_client(gen, lustre, options)
elif devtype == 'cobd':
add_cobd(gen, lustre, options)
+ elif devtype == 'mgmt':
+ add_mgmt(gen, lustre, options)
else:
error("unknown device type:", devtype)
}
}
- free (b);
+ free(b);
- obdio_disconnect (conn);
+ obdio_disconnect(conn, 0);
return (rc == 0 ? 0 : 1);
}
if (conn == NULL)
return (1);
- rc = obdio_test_fixed_extent (conn, myhid, mypid, reps, locked,
- oid, base_offset, size);
+ rc = obdio_test_fixed_extent(conn, myhid, mypid, reps, locked,
+ oid, base_offset, size);
- obdio_disconnect (conn);
+ obdio_disconnect(conn, 0);
return (rc == 0 ? 0 : 1);
}
}
void
-obdio_disconnect (struct obdio_conn *conn)
+obdio_disconnect (struct obdio_conn *conn, int flags)
{
close (conn->oc_fd);
/* obdclass will automatically close on last ref */
};
extern struct obdio_conn * obdio_connect (int device);
-extern void obdio_disconnect (struct obdio_conn *conn);
-extern int obdio_open (struct obdio_conn *conn, uint64_t oid,
+extern void obdio_disconnect(struct obdio_conn *conn, int flags);
+extern int obdio_open(struct obdio_conn *conn, uint64_t oid,
+ struct lustre_handle *fh);
+extern int obdio_close(struct obdio_conn *conn, uint64_t oid,
struct lustre_handle *fh);
-extern int obdio_close (struct obdio_conn *conn, uint64_t oid,
- struct lustre_handle *fh);
-extern int obdio_pread (struct obdio_conn *conn, uint64_t oid,
+extern int obdio_pread(struct obdio_conn *conn, uint64_t oid,
+ char *buffer, uint32_t count, uint64_t offset);
+extern int obdio_pwrite(struct obdio_conn *conn, uint64_t oid,
char *buffer, uint32_t count, uint64_t offset);
-extern int obdio_pwrite (struct obdio_conn *conn, uint64_t oid,
- char *buffer, uint32_t count, uint64_t offset);
-extern int obdio_enqueue (struct obdio_conn *conn, uint64_t oid,
- int mode, uint64_t offset, uint32_t count,
- struct lustre_handle *lh);
-extern int obdio_cancel (struct obdio_conn *conn, struct lustre_handle *lh);
-extern void *obdio_alloc_aligned_buffer (void **spacep, int size);
-extern struct obdio_barrier *obdio_new_barrier (uint64_t oid, uint64_t id, int npeers) ;
-extern int obdio_setup_barrier (struct obdio_conn *conn, struct obdio_barrier *b);
-extern int obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b);
+extern int obdio_enqueue(struct obdio_conn *conn, uint64_t oid,
+ int mode, uint64_t offset, uint32_t count,
+ struct lustre_handle *lh);
+extern int obdio_cancel(struct obdio_conn *conn, struct lustre_handle *lh);
+extern void *obdio_alloc_aligned_buffer(void **spacep, int size);
+extern struct obdio_barrier *obdio_new_barrier(uint64_t oid, uint64_t id,
+ int npeers);
+extern int obdio_setup_barrier(struct obdio_conn *conn,
+ struct obdio_barrier *b);
+extern int obdio_barrier(struct obdio_conn *conn, struct obdio_barrier *b);
#endif
CHECK_VALUE (REINT_OPEN);
CHECK_VALUE (REINT_MAX);
- CHECK_VALUE (IT_INTENT_EXEC);
- CHECK_VALUE (IT_OPEN_LOOKUP);
- CHECK_VALUE (IT_OPEN_NEG);
- CHECK_VALUE (IT_OPEN_POS);
- CHECK_VALUE (IT_OPEN_CREATE);
- CHECK_VALUE (IT_OPEN_OPEN);
+ CHECK_VALUE (DISP_IT_EXECD);
+ CHECK_VALUE (DISP_LOOKUP_EXECD);
+ CHECK_VALUE (DISP_LOOKUP_NEG);
+ CHECK_VALUE (DISP_LOOKUP_POS);
+ CHECK_VALUE (DISP_OPEN_CREATE);
+ CHECK_VALUE (DISP_OPEN_OPEN);
CHECK_VALUE (MDS_STATUS_CONN);
CHECK_VALUE (MDS_STATUS_LOV);