From: phil Date: Fri, 25 Jul 2003 17:58:07 +0000 (+0000) Subject: merge b_devel into HEAD, which will become 0.7.3 X-Git-Tag: 0.8.0 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=a2a0746305449dbd925879b14dc2c0d6040bb8bf;p=fs%2Flustre-release.git merge b_devel into HEAD, which will become 0.7.3 - dozens and dozens of fixes for working with 2.6 kernels - new 2.4 kernel APIs - uncountable bug fixes --- diff --git a/lnet/.cvsignore b/lnet/.cvsignore index 99ac885..c1a9bdf 100644 --- a/lnet/.cvsignore +++ b/lnet/.cvsignore @@ -6,3 +6,4 @@ autom4te.cache config.log config.status configure +.*.o.cmd diff --git a/lnet/Kernelenv.in b/lnet/Kernelenv.in index 29a713f..7a48c58 100644 --- a/lnet/Kernelenv.in +++ b/lnet/Kernelenv.in @@ -1 +1,6 @@ -EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include +EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include +# portals/utils/debug.c wants from userspace. sigh. +HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS) +LIBREADLINE := @LIBREADLINE@ +# 2.5's makefiles aren't nice to cross dir libraries in host programs +PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o diff --git a/lnet/Kernelenv.mk b/lnet/Kernelenv.mk index 29a713f..7c66dfa 100644 --- a/lnet/Kernelenv.mk +++ b/lnet/Kernelenv.mk @@ -1 +1,4 @@ -EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include +EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include +HOSTCFLAGS := $(EXTRA_CFLAGS) +# the kernel doesn't want us to build archives for host binaries :/ +PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o diff --git a/lnet/Makefile.mk b/lnet/Makefile.mk index be0e51a..73a19df 100644 --- a/lnet/Makefile.mk +++ b/lnet/Makefile.mk @@ -1,6 +1,12 @@ -include fs/lustre/portals/Kernelenv +include $(src)/Kernelenv -obj-y += portals/ +# The ordering of these determines the order that each subsystem's +# module_init() functions are called in. if these are changed make sure +# they reflect the dependencies between each subsystem's _init functions. obj-y += libcfs/ -obj-y += knals/ +obj-y += portals/ obj-y += router/ +obj-y += knals/ +obj-y += tests/ + +obj-m += utils/ diff --git a/lnet/archdep.m4 b/lnet/archdep.m4 index 7a4e05c..1a7741bc 100644 --- a/lnet/archdep.m4 +++ b/lnet/archdep.m4 @@ -11,8 +11,13 @@ AC_ARG_WITH(lib, [ --with-lib compile lustre library], host_cpu="lib") AC_ARG_WITH(linux, [ --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux) AC_SUBST(LINUX) +if test x$enable_inkernel = xyes ; then + echo ln -s `pwd` $LINUX/fs/lustre + rm $LINUX/fs/lustre + ln -s `pwd` $LINUX/fs/lustre +fi -# --------- UML? -------------------- +# -------------------- AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...) if test $host_cpu = "lib" ; then host_cpu="lib" @@ -111,6 +116,13 @@ case ${host_cpu} in MOD_LINK=elf64_ia64 ;; + x86_64 ) + AC_MSG_RESULT($host_cpu) + KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -fomit-frame-pointer -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -finline-limit=2000 -fno-strength-reduce -fno-asynchronous-unwind-tables' + KCPPFLAGS='-D__KERNEL__ -DMODULE' + MOD_LINK=elf_x86_64 +;; + sparc64 ) AC_MSG_RESULT($host_cpu) KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs' @@ -160,21 +172,33 @@ if test $host_cpu != "lib" ; then AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.) fi -# ------------ RELEASE and moduledir ------------------ +# ------------ LINUXRELEASE and moduledir ------------------ AC_MSG_CHECKING(for Linux release) dnl We need to rid ourselves of the nasty [ ] quotes. changequote(, ) dnl Get release from version.h - RELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`" + LINUXRELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`" changequote([, ]) - moduledir='$(libdir)/modules/'$RELEASE/kernel + moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel AC_SUBST(moduledir) modulefsdir='$(moduledir)/fs/$(PACKAGE)' AC_SUBST(modulefsdir) + AC_MSG_RESULT($LINUXRELEASE) + AC_SUBST(LINUXRELEASE) + +# ------------ RELEASE -------------------------------- + AC_MSG_CHECKING(lustre release) + + dnl We need to rid ourselves of the nasty [ ] quotes. + changequote(, ) + dnl Get release from version.h + RELEASE="`sed -ne 's/-/_/g' -e 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_]*\).*/\1/p' $LINUX/include/linux/version.h`_`date +%Y%m%d%H%M`" + changequote([, ]) + AC_MSG_RESULT($RELEASE) AC_SUBST(RELEASE) @@ -302,7 +326,7 @@ AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib) # This needs to run after we've defined the KCPPFLAGS AC_MSG_CHECKING(for kernel version) -AC_TRY_LINK([#define __KERNEL__ +AC_TRY_COMPILE([#define __KERNEL__ #include ], [struct task_struct p; p.sighand = NULL;], @@ -313,5 +337,5 @@ if test $RH_2_4_20 = 1; then AC_MSG_RESULT(redhat-2.4.20) CPPFLAGS="$CPPFLAGS -DCONFIG_RH_2_4_20" else - AC_MSG_RESULT($RELEASE) + AC_MSG_RESULT($LINUXRELEASE) fi diff --git a/lnet/include/config.h.in b/lnet/include/config.h.in index 3aa6909..f9605ab1 100644 --- a/lnet/include/config.h.in +++ b/lnet/include/config.h.in @@ -1,5 +1,11 @@ /* portals/include/config.h.in. Generated from configure.in by autoheader. */ +/* Compile with orphan support */ +#undef ENABLE_ORPHANS + +/* Use the Pinger */ +#undef ENABLE_PINGER + /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index ee3b9fc..2133391 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -4,7 +4,6 @@ #ifndef _KP30_INCLUDED #define _KP30_INCLUDED - #define PORTAL_DEBUG #ifndef offsetof @@ -13,10 +12,6 @@ #define LOWEST_BIT_SET(x) ((x) & ~((x) - 1)) -#ifndef CONFIG_SMP -# define smp_processor_id() 0 -#endif - /* * Debugging */ @@ -24,39 +19,34 @@ extern unsigned int portal_subsystem_debug; extern unsigned int portal_stack; extern unsigned int portal_debug; extern unsigned int portal_printk; -/* Debugging subsystems (8 bit ID) - * - * If you add debug subsystem #32, you need to send email to phil, because - * you're going to break kernel subsystem debug filtering. */ -#define S_UNDEFINED (0 << 24) -#define S_MDC (1 << 24) -#define S_MDS (2 << 24) -#define S_OSC (3 << 24) -#define S_OST (4 << 24) -#define S_CLASS (5 << 24) -#define S_OBDFS (6 << 24) /* obsolete */ -#define S_LLITE (7 << 24) -#define S_RPC (8 << 24) -#define S_EXT2OBD (9 << 24) /* obsolete */ -#define S_PORTALS (10 << 24) -#define S_SOCKNAL (11 << 24) -#define S_QSWNAL (12 << 24) -#define S_PINGER (13 << 24) -#define S_FILTER (14 << 24) -#define S_TRACE (15 << 24) /* obsolete */ -#define S_ECHO (16 << 24) -#define S_LDLM (17 << 24) -#define S_LOV (18 << 24) -#define S_GMNAL (19 << 24) -#define S_PTLROUTER (20 << 24) -#define S_COBD (21 << 24) -#define S_PTLBD (22 << 24) -#define S_LOG (23 << 24) - -/* If you change these values, please keep portals/linux/utils/debug.c +/* Debugging subsystems (32 bits, non-overlapping) */ +#define S_UNDEFINED (1 << 0) +#define S_MDC (1 << 1) +#define S_MDS (1 << 2) +#define S_OSC (1 << 3) +#define S_OST (1 << 4) +#define S_CLASS (1 << 5) +#define S_LOG (1 << 6) +#define S_LLITE (1 << 7) +#define S_RPC (1 << 8) +#define S_MGMT (1 << 9) +#define S_PORTALS (1 << 10) +#define S_SOCKNAL (1 << 11) +#define S_QSWNAL (1 << 12) +#define S_PINGER (1 << 13) +#define S_FILTER (1 << 14) +#define S_PTLBD (1 << 15) +#define S_ECHO (1 << 16) +#define S_LDLM (1 << 17) +#define S_LOV (1 << 18) +#define S_GMNAL (1 << 19) +#define S_PTLROUTER (1 << 20) +#define S_COBD (1 << 21) + +/* If you change these values, please keep portals/utils/debug.c * up to date! */ -/* Debugging masks (24 bits, non-overlapping) */ +/* Debugging masks (32 bits, non-overlapping) */ #define D_TRACE (1 << 0) /* ENTRY/EXIT markers */ #define D_INODE (1 << 1) #define D_SUPER (1 << 2) @@ -80,20 +70,23 @@ extern unsigned int portal_printk; #define D_RPCTRACE (1 << 20) /* for distributed debugging */ #define D_VFSTRACE (1 << 21) -#ifndef __KERNEL__ -#define THREAD_SIZE 8192 +#ifdef __KERNEL__ +# include /* THREAD_SIZE */ +#else +# define THREAD_SIZE 8192 #endif -#ifdef __ia64__ -#define CDEBUG_STACK() (THREAD_SIZE - \ + +#ifdef __KERNEL__ +# ifdef __ia64__ +# define CDEBUG_STACK (THREAD_SIZE - \ ((unsigned long)__builtin_dwarf_cfa() & \ (THREAD_SIZE - 1))) -#else -#define CDEBUG_STACK() (THREAD_SIZE - \ +# else +# define CDEBUG_STACK (THREAD_SIZE - \ ((unsigned long)__builtin_frame_address(0) & \ (THREAD_SIZE - 1))) -#endif +# endif -#ifdef __KERNEL__ #define CHECK_STACK(stack) \ do { \ if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \ @@ -105,20 +98,21 @@ extern unsigned int portal_printk; /*panic("LBUG");*/ \ } \ } while (0) -#else +#else /* __KERNEL__ */ #define CHECK_STACK(stack) do { } while(0) -#endif +#define CDEBUG_STACK (0L) +#endif /* __KERNEL__ */ #if 1 #define CDEBUG(mask, format, a...) \ do { \ - CHECK_STACK(CDEBUG_STACK()); \ + CHECK_STACK(CDEBUG_STACK); \ if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) || \ (portal_debug & (mask) && \ - portal_subsystem_debug & (1 << (DEBUG_SUBSYSTEM >> 24)))) \ + portal_subsystem_debug & DEBUG_SUBSYSTEM)) \ portals_debug_msg(DEBUG_SUBSYSTEM, mask, \ __FILE__, __FUNCTION__, __LINE__, \ - CDEBUG_STACK(), format , ## a); \ + CDEBUG_STACK, format, ## a); \ } while (0) #define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a) @@ -162,7 +156,6 @@ do { \ #define EXIT do { } while (0) #endif - #ifdef __KERNEL__ # include # include @@ -210,7 +203,8 @@ static inline void our_cond_resched(void) #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */ #ifdef PORTAL_DEBUG -extern void kportal_assertion_failed(char *expr,char *file,char *func,int line); +extern void kportal_assertion_failed(char *expr, char *file, const char *func, + const int line); #define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__, \ __FUNCTION__, __LINE__)) #else @@ -560,14 +554,14 @@ extern struct prof_ent prof_ents[MAX_PROFS]; #endif /* PORTALS_PROFILING */ /* debug.c */ -void portals_run_lbug_upcall(char * file, char *fn, int line); +void portals_run_lbug_upcall(char * file, const char *fn, const int line); void portals_debug_dumplog(void); int portals_debug_init(unsigned long bufsize); int portals_debug_cleanup(void); int portals_debug_clear_buffer(void); int portals_debug_mark_buffer(char *text); int portals_debug_set_daemon(unsigned int cmd, unsigned int length, - char *file, unsigned int size); + char *file, unsigned int size); __s32 portals_debug_copy_to_user(char *buf, unsigned long len); #if (__GNUC__) /* Use the special GNU C __attribute__ hack to have the compiler check the @@ -578,13 +572,14 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len); # warning printf has been defined as a macro... # undef printf #endif -void portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, - unsigned long stack, const char *format, ...) +void portals_debug_msg(int subsys, int mask, char *file, const char *fn, + const int line, unsigned long stack, + const char *format, ...) __attribute__ ((format (printf, 7, 8))); #else -void portals_debug_msg (int subsys, int mask, char *file, char *fn, - int line, unsigned long stack, - const char *format, ...); +void portals_debug_msg(int subsys, int mask, char *file, const char *fn, + const int line, unsigned long stack, + const char *format, ...); #endif /* __GNUC__ */ void portals_debug_set_level(unsigned int debug_level); @@ -618,9 +613,9 @@ extern void kportal_blockallsigs (void); # define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0); # define PORTAL_FREE(a, b) do { free(a); } while (0); # define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \ - printf ("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format, \ - (subsys) >> 24, (mask), (long)time(0), file, fn, line, \ - getpid() , stack, ## a); + printf("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format, \ + (subsys), (mask), (long)time(0), file, fn, line, \ + getpid() , stack, ## a); #endif #ifndef CURRENT_TIME @@ -911,13 +906,13 @@ ptl_handle_ni_t *kportal_get_ni (int nal); void kportal_put_ni (int nal); #ifdef __CYGWIN__ -#ifndef BITS_PER_LONG -#if (~0UL) == 0xffffffffUL -#define BITS_PER_LONG 32 -#else -#define BITS_PER_LONG 64 -#endif -#endif +# ifndef BITS_PER_LONG +# if (~0UL) == 0xffffffffUL +# define BITS_PER_LONG 32 +# else +# define BITS_PER_LONG 64 +# endif +# endif #endif #if (BITS_PER_LONG == 32 || __WORDSIZE == 32) diff --git a/lnet/include/linux/portals_compat25.h b/lnet/include/linux/portals_compat25.h index e28fbac..a7cb4d1 100644 --- a/lnet/include/linux/portals_compat25.h +++ b/lnet/include/linux/portals_compat25.h @@ -1,13 +1,56 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef _PORTALS_COMPAT_H +#define _PORTALS_COMPAT_H + +// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved +#if SPINLOCK_DEBUG +# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20) +# define SIGNAL_MASK_ASSERT() \ + LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC) +# else +# define SIGNAL_MASK_ASSERT() \ + LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC) +# endif +#else +# define SIGNAL_MASK_ASSERT() +#endif +// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved + #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20) -# define SIGNAL_MASK_LOCK(task, flags) \ + +# define SIGNAL_MASK_LOCK(task, flags) \ spin_lock_irqsave(&task->sighand->siglock, flags) -# define SIGNAL_MASK_UNLOCK(task, flags) \ +# define SIGNAL_MASK_UNLOCK(task, flags) \ spin_unlock_irqrestore(&task->sighand->siglock, flags) +# define USERMODEHELPER(path, argv, envp) \ + call_usermodehelper(path, argv, envp, 1) # define RECALC_SIGPENDING recalc_sigpending() -#else -# define SIGNAL_MASK_LOCK(task, flags) \ +# define CURRENT_SECONDS get_seconds() + +#else /* 2.4.x */ + +# define SIGNAL_MASK_LOCK(task, flags) \ spin_lock_irqsave(&task->sigmask_lock, flags) -# define SIGNAL_MASK_UNLOCK(task, flags) \ +# define SIGNAL_MASK_UNLOCK(task, flags) \ spin_unlock_irqrestore(&task->sigmask_lock, flags) +# define USERMODEHELPER(path, argv, envp) \ + call_usermodehelper(path, argv, envp) # define RECALC_SIGPENDING recalc_sigpending(current) +# define CURRENT_SECONDS CURRENT_TIME + +#endif + +#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) +# define THREAD_NAME(comm, fmt, a...) \ + sprintf(comm, fmt "|%d", ## a, current->thread.extern_pid) +#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +# define THREAD_NAME(comm, fmt, a...) \ + sprintf(comm, fmt "|%d", ## a, current->thread.mode.tt.extern_pid) +#else +# define THREAD_NAME(comm, fmt, a...) \ + sprintf(comm, fmt, ## a) #endif + +#endif /* _PORTALS_COMPAT_H */ diff --git a/lnet/include/lnet/internal.h b/lnet/include/lnet/internal.h index d78cad4..a70b465 100644 --- a/lnet/include/lnet/internal.h +++ b/lnet/include/lnet/internal.h @@ -1,5 +1,3 @@ -/* -*/ #ifndef _P30_INTERNAL_H_ #define _P30_INTERNAL_H_ diff --git a/lnet/include/lnet/list.h b/lnet/include/lnet/list.h index 2b63312..78a1e2d 100644 --- a/lnet/include/lnet/list.h +++ b/lnet/include/lnet/list.h @@ -1,6 +1,4 @@ #ifndef _LINUX_LIST_H -#define _LINUX_LIST_H - /* * Simple doubly linked list implementation. @@ -101,7 +99,9 @@ static inline void list_del_init(struct list_head *entry) __list_del(entry->prev, entry->next); INIT_LIST_HEAD(entry); } +#endif +#ifndef list_for_each_entry /** * list_move - delete from one list and add as another's head * @list: the entry to move @@ -124,7 +124,10 @@ static inline void list_move_tail(struct list_head *list, __list_del(list->prev, list->next); list_add_tail(list, head); } +#endif +#ifndef _LINUX_LIST_H +#define _LINUX_LIST_H /** * list_empty - tests whether a list is empty * @head: the list to test. diff --git a/lnet/include/lnet/lltrace.h b/lnet/include/lnet/lltrace.h index 7d1b304..d389aab 100644 --- a/lnet/include/lnet/lltrace.h +++ b/lnet/include/lnet/lltrace.h @@ -2,7 +2,7 @@ * vim:expandtab:shiftwidth=8:tabstop=8: * * Compile with: - * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl + * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl */ #ifndef __LTRACE_H_ #define __LTRACE_H_ @@ -31,20 +31,20 @@ static inline int ltrace_write_file(char* fname) argv[0] = "debug_kernel"; argv[1] = fname; argv[2] = "1"; - + fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]); - + return jt_dbg_debug_kernel(3, argv); } static inline int ltrace_clear() { char* argv[1]; - + argv[0] = "clear"; - + fprintf(stderr, "[ptlctl] %s\n", argv[0]); - + return jt_dbg_clear_debug_buf(1, argv); } @@ -52,9 +52,9 @@ static inline int ltrace_mark(int indent_level, char* text) { char* argv[2]; char mark_buf[PATH_MAX]; - + snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text); - + argv[0] = "mark"; argv[1] = mark_buf; return jt_dbg_mark_debug_buf(2, argv); @@ -65,9 +65,9 @@ static inline int ltrace_applymasks() char* argv[2]; argv[0] = "list"; argv[1] = "applymasks"; - + fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]); - + return jt_dbg_list(2, argv); } @@ -95,19 +95,19 @@ static inline int ltrace_start() #ifdef PORTALS_DEV_ID rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH); #endif - ltrace_filter("class"); + ltrace_filter("class"); ltrace_filter("socknal"); - ltrace_filter("qswnal"); - ltrace_filter("gmnal"); - ltrace_filter("portals"); - - ltrace_show("all_types"); - ltrace_filter("trace"); - ltrace_filter("malloc"); - ltrace_filter("net"); - ltrace_filter("page"); - ltrace_filter("other"); - ltrace_filter("info"); + ltrace_filter("qswnal"); + ltrace_filter("gmnal"); + ltrace_filter("portals"); + + ltrace_show("all_types"); + ltrace_filter("trace"); + ltrace_filter("malloc"); + ltrace_filter("net"); + ltrace_filter("page"); + ltrace_filter("other"); + ltrace_filter("info"); ltrace_applymasks(); return rc; @@ -146,7 +146,7 @@ static inline void ltrace_add_processnames(char* fname) struct timezone tz; int nob; int underuml = !not_uml(); - + gettimeofday(&tv, &tz); nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \""); @@ -167,7 +167,7 @@ static inline void ltrace_add_processnames(char* fname) "(%s:%d:%s() %d+%lu): ", "lltrace.h", __LINE__, __FUNCTION__, 0, 0L); } - + nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname); system(cmdbuf); } diff --git a/lnet/include/lnet/myrnal.h b/lnet/include/lnet/myrnal.h index 12b1925..13790f7 100644 --- a/lnet/include/lnet/myrnal.h +++ b/lnet/include/lnet/myrnal.h @@ -1,6 +1,3 @@ -/* -*/ - #ifndef MYRNAL_H #define MYRNAL_H diff --git a/lnet/include/lnet/nal.h b/lnet/include/lnet/nal.h index 88be63c..7cb3ab7 100644 --- a/lnet/include/lnet/nal.h +++ b/lnet/include/lnet/nal.h @@ -1,5 +1,3 @@ -/* -*/ #ifndef _NAL_H_ #define _NAL_H_ diff --git a/lnet/include/lnet/ppid.h b/lnet/include/lnet/ppid.h index 4727599..760f465 100644 --- a/lnet/include/lnet/ppid.h +++ b/lnet/include/lnet/ppid.h @@ -1,6 +1,3 @@ -/* - */ - #ifndef _INCppidh_ #define _INCppidh_ diff --git a/lnet/include/lnet/stringtab.h b/lnet/include/lnet/stringtab.h index c9683f7..33e4375 100644 --- a/lnet/include/lnet/stringtab.h +++ b/lnet/include/lnet/stringtab.h @@ -1,5 +1,3 @@ /* -*/ -/* * stringtab.h */ diff --git a/lnet/include/lnet/types.h b/lnet/include/lnet/types.h index d4038b6..0269290 100644 --- a/lnet/include/lnet/types.h +++ b/lnet/include/lnet/types.h @@ -2,14 +2,19 @@ #define _P30_TYPES_H_ #ifdef __linux__ -#include -#include +# include +# include #else -#include +# include typedef u_int32_t __u32; typedef u_int64_t __u64; -typedef unsigned long long cycles_t; -static inline cycles_t get_cycles(void) { return 0; } +#endif + +#ifdef __KERNEL__ +# include +#else +# include +# define do_gettimeofday(tv) gettimeofday(tv, NULL) #endif typedef __u64 ptl_nid_t; @@ -31,7 +36,7 @@ typedef ptl_handle_any_t ptl_handle_md_t; typedef ptl_handle_any_t ptl_handle_me_t; #define PTL_HANDLE_NONE \ -((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1}) + ((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1}) #define PTL_EQ_NONE PTL_HANDLE_NONE static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2) @@ -108,17 +113,15 @@ typedef struct { ptl_handle_me_t unlinked_me; ptl_md_t mem_desc; ptl_hdr_data_t hdr_data; - cycles_t arrival_time; + struct timeval arrival_time; volatile ptl_seq_t sequence; } ptl_event_t; - typedef enum { PTL_ACK_REQ, PTL_NOACK_REQ } ptl_ack_req_t; - typedef struct { volatile ptl_seq_t sequence; ptl_size_t size; @@ -130,7 +133,6 @@ typedef struct { ptl_eq_t *eq; } ptl_ni_t; - typedef struct { int max_match_entries; /* max number of match entries */ int max_mem_descriptors; /* max number of memory descriptors */ diff --git a/lnet/klnds/.cvsignore b/lnet/klnds/.cvsignore index 282522d..89a4aa6 100644 --- a/lnet/klnds/.cvsignore +++ b/lnet/klnds/.cvsignore @@ -1,2 +1,3 @@ Makefile Makefile.in +.*.o.cmd diff --git a/lnet/klnds/Makefile.mk b/lnet/klnds/Makefile.mk index ce40a60..cd5d9d6 100644 --- a/lnet/klnds/Makefile.mk +++ b/lnet/klnds/Makefile.mk @@ -1,4 +1,4 @@ -include ../Kernelenv +include $(obj)/../Kernelenv obj-y = socknal/ -# more coming... \ No newline at end of file +# more coming... diff --git a/lnet/klnds/gmlnd/gmnal.c b/lnet/klnds/gmlnd/gmnal.c index ceeea2a..0cffc158 100644 --- a/lnet/klnds/gmlnd/gmnal.c +++ b/lnet/klnds/gmlnd/gmnal.c @@ -124,7 +124,7 @@ static nal_t *kgmnal_init(int interface, ptl_pt_index_t ptl_size, return &kgmnal_api; } -static void __exit +static void /*__exit*/ kgmnal_finalize(void) { struct list_head *tmp; diff --git a/lnet/klnds/scimaclnd/scimacnal.c b/lnet/klnds/scimaclnd/scimacnal.c index 1066d69..479cc2c 100644 --- a/lnet/klnds/scimaclnd/scimacnal.c +++ b/lnet/klnds/scimaclnd/scimacnal.c @@ -112,7 +112,7 @@ static nal_t *kscimacnal_init(int interface, ptl_pt_index_t ptl_size, /* Called by kernel at module unload time */ -static void __exit +static void /*__exit*/ kscimacnal_finalize(void) { /* FIXME: How should the shutdown procedure really look? */ diff --git a/lnet/klnds/socklnd/.cvsignore b/lnet/klnds/socklnd/.cvsignore index e995588..95973d6 100644 --- a/lnet/klnds/socklnd/.cvsignore +++ b/lnet/klnds/socklnd/.cvsignore @@ -1,3 +1,4 @@ .deps Makefile Makefile.in +.*.o.cmd diff --git a/lnet/klnds/socklnd/Makefile.mk b/lnet/klnds/socklnd/Makefile.mk index 46edf01..5c1b366 100644 --- a/lnet/klnds/socklnd/Makefile.mk +++ b/lnet/klnds/socklnd/Makefile.mk @@ -3,7 +3,7 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -include ../../Kernelenv +include $(src)/../../Kernelenv obj-y += ksocknal.o ksocknal-objs := socknal.o socknal_cb.o diff --git a/lnet/klnds/toelnd/toenal.c b/lnet/klnds/toelnd/toenal.c index 1f5dc38..77ee473 100644 --- a/lnet/klnds/toelnd/toenal.c +++ b/lnet/klnds/toelnd/toenal.c @@ -379,7 +379,7 @@ ktoenal_cmd(struct portal_ioctl_data * data, void * private) } -void __exit +void /*__exit*/ ktoenal_module_fini (void) { CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", diff --git a/lnet/klnds/toelnd/toenal_cb.c b/lnet/klnds/toelnd/toenal_cb.c index ec37f6f..abd0731 100644 --- a/lnet/klnds/toelnd/toenal_cb.c +++ b/lnet/klnds/toelnd/toenal_cb.c @@ -893,6 +893,7 @@ ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags) spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags); goto get_fmb; /* => go get a fwd msg buffer */ default: + break; } /* Not Reached */ LBUG (); @@ -934,6 +935,7 @@ ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags) goto out; /* (later) */ default: + break; } /* Not Reached */ diff --git a/lnet/libcfs/.cvsignore b/lnet/libcfs/.cvsignore index 67d1a3d..7fa686f 100644 --- a/lnet/libcfs/.cvsignore +++ b/lnet/libcfs/.cvsignore @@ -2,3 +2,4 @@ Makefile Makefile.in link-stamp +.*.o.cmd diff --git a/lnet/libcfs/Makefile.mk b/lnet/libcfs/Makefile.mk index 3196ea2..9aa838f 100644 --- a/lnet/libcfs/Makefile.mk +++ b/lnet/libcfs/Makefile.mk @@ -6,4 +6,4 @@ include fs/lustre/portals/Kernelenv obj-y += libcfs.o -licfs-objs := module.o proc.o debug.o \ No newline at end of file +libcfs-objs := module.o proc.o debug.o diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index 8d26dbb..f37cd96 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -571,8 +571,8 @@ int portals_debug_init(unsigned long bufsize) memset(debug_buf, 0, debug_size); debug_wrapped = 0; - printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n", - bufsize, debug_buf); + //printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n", + //bufsize, debug_buf); atomic_set(&debug_off_a, debug_off); notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier); debug_size = bufsize; @@ -632,9 +632,9 @@ int portals_debug_mark_buffer(char *text) if (debug_buf == NULL) return -EINVAL; - CDEBUG(0, "*******************************************************************************\n"); + CDEBUG(0, "********************************************************\n"); CDEBUG(0, "DEBUG MARKER: %s\n", text); - CDEBUG(0, "*******************************************************************************\n"); + CDEBUG(0, "********************************************************\n"); return 0; } @@ -672,8 +672,8 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len) /* FIXME: I'm not very smart; someone smarter should make this better. */ void -portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, - unsigned long stack, const char *format, ...) +portals_debug_msg(int subsys, int mask, char *file, const char *fn, + const int line, unsigned long stack, const char *format, ...) { va_list ap; unsigned long flags; @@ -728,8 +728,8 @@ portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, do_gettimeofday(&tv); prefix_nob = snprintf(debug_buf + debug_off, max_nob, - "%02x:%06x:%d:%lu.%06lu ", - subsys >> 24, mask, smp_processor_id(), + "%06x:%06x:%d:%lu.%06lu ", + subsys, mask, smp_processor_id(), tv.tv_sec, tv.tv_usec); max_nob -= prefix_nob; @@ -752,7 +752,7 @@ portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, va_start(ap, format); msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob, - max_nob, format, ap); + max_nob, format, ap); max_nob -= msg_nob; va_end(ap); @@ -790,7 +790,7 @@ void portals_debug_set_level(unsigned int debug_level) portal_debug = debug_level; } -void portals_run_lbug_upcall(char * file, char *fn, int line) +void portals_run_lbug_upcall(char *file, const char *fn, const int line) { char *argv[6]; char *envp[3]; @@ -803,7 +803,7 @@ void portals_run_lbug_upcall(char * file, char *fn, int line) argv[0] = portals_upcall; argv[1] = "LBUG"; argv[2] = file; - argv[3] = fn; + argv[3] = (char *)fn; argv[4] = buf; argv[5] = NULL; diff --git a/lnet/libcfs/module.c b/lnet/libcfs/module.c index 14cc325..e8eb290 100644 --- a/lnet/libcfs/module.c +++ b/lnet/libcfs/module.c @@ -62,10 +62,10 @@ static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1]; struct semaphore nal_cmd_sem; #ifdef PORTAL_DEBUG -void -kportal_assertion_failed (char *expr, char *file, char *func, int line) +void kportal_assertion_failed(char *expr, char *file, const char *func, + const int line) { - portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK(), + portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK, "ASSERTION(%s) failed\n", expr); LBUG_WITH_LOC(file, func, line); } diff --git a/lnet/lnet/.cvsignore b/lnet/lnet/.cvsignore index e995588..95973d6 100644 --- a/lnet/lnet/.cvsignore +++ b/lnet/lnet/.cvsignore @@ -1,3 +1,4 @@ .deps Makefile Makefile.in +.*.o.cmd diff --git a/lnet/lnet/Makefile.mk b/lnet/lnet/Makefile.mk index 5627ef7..7822846 100644 --- a/lnet/lnet/Makefile.mk +++ b/lnet/lnet/Makefile.mk @@ -3,7 +3,10 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -include ../Kernelenv +include $(src)/../Kernelenv obj-y += portals.o -portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o lib-move.o lib-msg.o lib-ni.o lib-not-impl.o lib-pid.o api-eq.o api-errno.o api-init.o api-md.o api-me.o api-ni.o api-wrap.o +portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \ + lib-move.o lib-msg.o lib-ni.o lib-pid.o \ + api-eq.o api-errno.o api-init.o api-me.o api-ni.o \ + api-wrap.o diff --git a/lnet/lnet/api-init.c b/lnet/lnet/api-init.c index e59c922..dc1fead 100644 --- a/lnet/lnet/api-init.c +++ b/lnet/lnet/api-init.c @@ -26,7 +26,7 @@ #include int ptl_init; -unsigned int portal_subsystem_debug = 0xfff7e3ff; +unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL | S_GMNAL); unsigned int portal_debug = ~0; unsigned int portal_printk; unsigned int portal_stack; diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index fde4f16..02f8b60 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -544,7 +544,7 @@ get_new_msg (nal_cb_t *nal, lib_md_t *md) msg->send_ack = 0; msg->md = md; - msg->ev.arrival_time = get_cycles(); + do_gettimeofday(&msg->ev.arrival_time); md->pending++; if (md->threshold != PTL_MD_THRESH_INF) { LASSERT (md->threshold > 0); diff --git a/lnet/router/.cvsignore b/lnet/router/.cvsignore index e995588..95973d6 100644 --- a/lnet/router/.cvsignore +++ b/lnet/router/.cvsignore @@ -1,3 +1,4 @@ .deps Makefile Makefile.in +.*.o.cmd diff --git a/lnet/router/Makefile.mk b/lnet/router/Makefile.mk index 64bd09b..9b02c03 100644 --- a/lnet/router/Makefile.mk +++ b/lnet/router/Makefile.mk @@ -3,7 +3,7 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -include ../Kernelenv +include $(src)/../Kernelenv obj-y += kptlrouter.o kptlrouter-objs := router.o proc.o diff --git a/lnet/router/router.c b/lnet/router/router.c index 6074c3c..27a7fba 100644 --- a/lnet/router/router.c +++ b/lnet/router/router.c @@ -23,8 +23,8 @@ #include "router.h" -struct list_head kpr_routes; -struct list_head kpr_nals; +LIST_HEAD(kpr_routes); +LIST_HEAD(kpr_nals); unsigned long long kpr_fwd_bytes; unsigned long kpr_fwd_packets; @@ -35,7 +35,7 @@ atomic_t kpr_queue_depth; * * Once in a blue moon we register/deregister NALs and add/remove routing * entries (thread context only)... */ -rwlock_t kpr_rwlock; +rwlock_t kpr_rwlock = RW_LOCK_UNLOCKED; kpr_router_interface_t kpr_router_interface = { kprri_register: kpr_register_nal, @@ -55,7 +55,7 @@ kpr_control_interface_t kpr_control_interface = { int kpr_register_nal (kpr_nal_interface_t *nalif, void **argp) { - long flags; + unsigned long flags; struct list_head *e; kpr_nal_entry_t *ne; @@ -98,7 +98,7 @@ kpr_register_nal (kpr_nal_interface_t *nalif, void **argp) void kpr_shutdown_nal (void *arg) { - long flags; + unsigned long flags; kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid); @@ -123,7 +123,7 @@ kpr_shutdown_nal (void *arg) void kpr_deregister_nal (void *arg) { - long flags; + unsigned long flags; kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid); @@ -296,7 +296,7 @@ int kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid, ptl_nid_t hi_nid) { - long flags; + unsigned long flags; struct list_head *e; kpr_route_entry_t *re; @@ -345,7 +345,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid, int kpr_del_route (ptl_nid_t nid) { - long flags; + unsigned long flags; struct list_head *e; CDEBUG(D_OTHER, "Del route "LPX64"\n", nid); @@ -398,7 +398,7 @@ kpr_get_route(int idx, int *gateway_nalid, ptl_nid_t *gateway_nid, return (-ENOENT); } -static void __exit +static void /*__exit*/ kpr_finalise (void) { LASSERT (list_empty (&kpr_nals)); @@ -427,10 +427,6 @@ kpr_initialise (void) CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n", atomic_read(&portal_kmemory)); - rwlock_init(&kpr_rwlock); - INIT_LIST_HEAD(&kpr_routes); - INIT_LIST_HEAD(&kpr_nals); - kpr_proc_init(); PORTAL_SYMBOL_REGISTER(kpr_router_interface); diff --git a/lnet/tests/.cvsignore b/lnet/tests/.cvsignore index 051d1bd..d0c4c88 100644 --- a/lnet/tests/.cvsignore +++ b/lnet/tests/.cvsignore @@ -1,3 +1,4 @@ Makefile Makefile.in .deps +.*.o.cmd diff --git a/lnet/tests/ping_cli.c b/lnet/tests/ping_cli.c index 389ffbb..4d04ffb 100644 --- a/lnet/tests/ping_cli.c +++ b/lnet/tests/ping_cli.c @@ -260,7 +260,7 @@ pingcli_start(struct portal_ioctl_data *args) /* called by the portals_ioctl for ping requests */ -static int kping_client(struct portal_ioctl_data *args) +int kping_client(struct portal_ioctl_data *args) { PORTAL_ALLOC (client, sizeof(struct pingcli_data)); if (client == NULL) @@ -282,7 +282,7 @@ static int __init pingcli_init(void) } /* pingcli_init() */ -static void __exit pingcli_cleanup(void) +static void /*__exit*/ pingcli_cleanup(void) { PORTAL_SYMBOL_UNREGISTER (kping_client); } /* pingcli_cleanup() */ diff --git a/lnet/tests/ping_srv.c b/lnet/tests/ping_srv.c index 1037d09..873e11c 100644 --- a/lnet/tests/ping_srv.c +++ b/lnet/tests/ping_srv.c @@ -47,11 +47,11 @@ #include #define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval)) -#define MAXSIZE (16*1024*1024) +#define MAXSIZE (16*1024) static unsigned ping_head_magic; static unsigned ping_bulk_magic; -static int nal = 0; // Your NAL, +static int nal = SOCKNAL; // Your NAL, static unsigned long packets_valid = 0; // Valid packets static int running = 1; atomic_t pkt; @@ -282,7 +282,7 @@ static int __init pingsrv_init(void) } /* pingsrv_init() */ -static void __exit pingsrv_cleanup(void) +static void /*__exit*/ pingsrv_cleanup(void) { remove_proc_entry ("net/pingsrv", NULL); diff --git a/lnet/tests/sping_cli.c b/lnet/tests/sping_cli.c index 4cef08b..35e114b 100644 --- a/lnet/tests/sping_cli.c +++ b/lnet/tests/sping_cli.c @@ -235,7 +235,7 @@ pingcli_start(struct portal_ioctl_data *args) /* called by the portals_ioctl for ping requests */ -static int kping_client(struct portal_ioctl_data *args) +int kping_client(struct portal_ioctl_data *args) { PORTAL_ALLOC (client, sizeof(struct pingcli_data)); @@ -258,7 +258,7 @@ static int __init pingcli_init(void) } /* pingcli_init() */ -static void __exit pingcli_cleanup(void) +static void /*__exit*/ pingcli_cleanup(void) { PORTAL_SYMBOL_UNREGISTER (kping_client); } /* pingcli_cleanup() */ diff --git a/lnet/tests/sping_srv.c b/lnet/tests/sping_srv.c index a18ea35..2b45a46 100644 --- a/lnet/tests/sping_srv.c +++ b/lnet/tests/sping_srv.c @@ -269,7 +269,7 @@ static int __init pingsrv_init(void) } /* pingsrv_init() */ -static void __exit pingsrv_cleanup(void) +static void /*__exit*/ pingsrv_cleanup(void) { remove_proc_entry ("net/pingsrv", NULL); diff --git a/lnet/ulnds/debug.c b/lnet/ulnds/debug.c index 529bb2d..b73f042 100644 --- a/lnet/ulnds/debug.c +++ b/lnet/ulnds/debug.c @@ -84,8 +84,8 @@ int portals_debug_copy_to_user(char *buf, unsigned long len) /* FIXME: I'm not very smart; someone smarter should make this better. */ void -portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, - const char *format, ...) +portals_debug_msg (int subsys, int mask, char *file, const char *fn, + const int line, const char *format, ...) { va_list ap; unsigned long flags; diff --git a/lnet/ulnds/socklnd/debug.c b/lnet/ulnds/socklnd/debug.c index 529bb2d..b73f042 100644 --- a/lnet/ulnds/socklnd/debug.c +++ b/lnet/ulnds/socklnd/debug.c @@ -84,8 +84,8 @@ int portals_debug_copy_to_user(char *buf, unsigned long len) /* FIXME: I'm not very smart; someone smarter should make this better. */ void -portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, - const char *format, ...) +portals_debug_msg (int subsys, int mask, char *file, const char *fn, + const int line, const char *format, ...) { va_list ap; unsigned long flags; diff --git a/lnet/utils/.cvsignore b/lnet/utils/.cvsignore index 148310a..8e474ad 100644 --- a/lnet/utils/.cvsignore +++ b/lnet/utils/.cvsignore @@ -5,4 +5,5 @@ debugctl ptlctl .deps routerstat -wirecheck \ No newline at end of file +wirecheck +.*.cmd diff --git a/lnet/utils/debug.c b/lnet/utils/debug.c index 9ab1c73d..0a009d2 100644 --- a/lnet/utils/debug.c +++ b/lnet/utils/debug.c @@ -53,17 +53,18 @@ static char rawbuf[8192]; static char *buf = rawbuf; static int max = 8192; //static int g_pfd = -1; -static int subsystem_array[1 << 8]; +static int subsystem_mask = ~0; static int debug_mask = ~0; static const char *portal_debug_subsystems[] = - {"undefined", "mdc", "mds", "osc", "ost", "class", "obdfs", "llite", - "rpc", "ext2obd", "portals", "socknal", "qswnal", "pinger", "filter", - "obdtrace", "echo", "ldlm", "lov", "gmnal", "router", "ptldb", NULL}; + {"undefined", "mdc", "mds", "osc", "ost", "class", "log", "llite", + "rpc", "mgmt", "portals", "socknal", "qswnal", "pinger", "filter", + "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", NULL}; static const char *portal_debug_masks[] = {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl", "blocks", "net", "warning", "buffs", "other", "dentry", "portals", - "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", NULL}; + "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", + NULL}; struct debug_daemon_cmd { char *cmd; @@ -88,7 +89,10 @@ static int do_debug_mask(char *name, int enable) printf("%s output from subsystem \"%s\"\n", enable ? "Enabling" : "Disabling", portal_debug_subsystems[i]); - subsystem_array[i] = enable; + if (enable) + subsystem_mask |= (1 << i); + else + subsystem_mask &= ~(1 << i); found = 1; } } @@ -111,7 +115,6 @@ static int do_debug_mask(char *name, int enable) int dbg_initialize(int argc, char **argv) { - memset(subsystem_array, 1, sizeof(subsystem_array)); return 0; } @@ -213,12 +216,7 @@ int jt_dbg_list(int argc, char **argv) for (i = 0; portal_debug_masks[i] != NULL; i++) printf(", %s", portal_debug_masks[i]); printf("\n"); - } - else if (strcasecmp(argv[1], "applymasks") == 0) { - unsigned int subsystem_mask = 0; - for (i = 0; portal_debug_subsystems[i] != NULL; i++) { - if (subsystem_array[i]) subsystem_mask |= (1 << i); - } + } else if (strcasecmp(argv[1], "applymasks") == 0) { applymask_all(subsystem_mask, debug_mask); } return 0; @@ -230,12 +228,6 @@ static void dump_buffer(FILE *fd, char *buf, int size, int raw) { char *p, *z; unsigned long subsystem, debug, dropped = 0, kept = 0; - int max_sub, max_type; - - for (max_sub = 0; portal_debug_subsystems[max_sub] != NULL; max_sub++) - ; - for (max_type = 0; portal_debug_masks[max_type] != NULL; max_type++) - ; while (size) { p = memchr(buf, '\n', size); @@ -247,8 +239,7 @@ static void dump_buffer(FILE *fd, char *buf, int size, int raw) z++; /* for some reason %*s isn't working. */ *p = '\0'; - if (subsystem < max_sub && - subsystem_array[subsystem] && + if ((subsystem_mask & subsystem) && (!debug || (debug_mask & debug))) { if (raw) fprintf(fd, "%s\n", buf); @@ -551,6 +542,8 @@ int jt_dbg_modules(int argc, char **argv) {"mds_ext3", "lustre/mds"}, {"mds_extN", "lustre/mds"}, {"ptlbd", "lustre/ptlbd"}, + {"mgmt_svc", "lustre/mgmt"}, + {"mgmt_cli", "lustre/mgmt"}, {NULL, NULL} }; char *path = ".."; diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index 90d66f5..a89f4f7 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -106,6 +107,27 @@ nal2name (int nal) return ((e == NULL) ? "???" : e->name); } +static struct hostent * +ptl_gethostbyname(char * hname) { + struct hostent *he; + he = gethostbyname(hname); + if (!he) { + switch(h_errno) { + case HOST_NOT_FOUND: + case NO_ADDRESS: + fprintf(stderr, "Unable to resolve hostname: %s\n", + hname); + break; + default: + fprintf(stderr, "gethostbyname error: %s\n", + strerror(errno)); + break; + } + return NULL; + } + return he; +} + int ptl_parse_nid (ptl_nid_t *nidp, char *str) { @@ -127,7 +149,7 @@ ptl_parse_nid (ptl_nid_t *nidp, char *str) if ((('a' <= str[0] && str[0] <= 'z') || ('A' <= str[0] && str[0] <= 'Z')) && - (he = gethostbyname (str)) != NULL) + (he = ptl_gethostbyname (str)) != NULL) { __u32 addr = *(__u32 *)he->h_addr; @@ -351,12 +373,9 @@ int jt_ptl_connect(int argc, char **argv) goto usage; } - he = gethostbyname(argv[1]); - if (!he) { - fprintf(stderr, "gethostbyname error: %s\n", - strerror(errno)); + he = ptl_gethostbyname(argv[1]); + if (!he) return -1; - } g_port = atol(argv[2]); @@ -525,12 +544,9 @@ int jt_ptl_disconnect(int argc, char **argv) PORTAL_IOC_INIT(data); if (argc == 2) { - he = gethostbyname(argv[1]); - if (!he) { - fprintf(stderr, "gethostbyname error: %s\n", - strerror(errno)); + he = ptl_gethostbyname(argv[1]); + if (!he) return -1; - } data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */ @@ -582,12 +598,9 @@ int jt_ptl_push_connection (int argc, char **argv) PORTAL_IOC_INIT(data); if (argc == 2) { - he = gethostbyname(argv[1]); - if (!he) { - fprintf(stderr, "gethostbyname error: %s\n", - strerror(errno)); + he = ptl_gethostbyname(argv[1]); + if (!he) return -1; - } data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */ diff --git a/lustre/.cvsignore b/lustre/.cvsignore index 776ef36..a8a5356 100644 --- a/lustre/.cvsignore +++ b/lustre/.cvsignore @@ -15,4 +15,4 @@ cscope.files cscope.out autom4te-2.53.cache autom4te.cache - +.*.o.cmd diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 89eaef7..17c08c6 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -1,4 +1,14 @@ tbd + * version v0_8 + * bug fixes + - orphans are moved into the PENDING directory for possible recovery + - replayed opens now open by fid for orphan/rename safety (1042) + - last close of an orphan inode generates a transno (683) + - chdir() and mount() now pin the directory entry (1020) + - avoid CERROR in normal ll_setattr_raw() error case (1500) + - discard very old requests without processing them (1502) + +2003-06-15 Phil Schwan * version v0_7 * bug fixes - imports and exports cleanup too early, need refcounts (349, 879, 1045) diff --git a/lustre/Makefile.mk b/lustre/Makefile.mk index e540148..59178a4 100644 --- a/lustre/Makefile.mk +++ b/lustre/Makefile.mk @@ -1,4 +1,22 @@ -include fs/lustre/portals/Kernelenv +include $(src)/portals/Kernelenv + +# for scripts/version_tag.pl +LINUX = @LINUX@ obj-y += portals/ +# obdclass has to come before anything that does class_register.. +obj-y += obdclass/ +obj-y += ptlrpc/ +obj-y += ldlm/ +obj-y += obdfilter/ +obj-y += mdc/ obj-y += mds/ +obj-y += obdecho/ +obj-y += osc/ +obj-y += ost/ +obj-y += lov/ +obj-y += llite/ + +# portals needs to be before utils/, which pulls in ptlctl objects +obj-m += utils/ +obj-m += tests/ diff --git a/lustre/cobd/cache_obd.c b/lustre/cobd/cache_obd.c index 5efb545..2d3549b 100644 --- a/lustre/cobd/cache_obd.c +++ b/lustre/cobd/cache_obd.c @@ -36,13 +36,13 @@ static int cobd_attach(struct obd_device *dev, obd_count len, void *data) { struct lprocfs_static_vars lvars; - lprocfs_init_vars(&lvars); - return lprocfs_obd_attach(dev, lvars.obd_vars); + lprocfs_init_vars(cobd, &lvars); + return lprocfs_obd_attach(dev, lvars.obd_vars); } static int cobd_detach(struct obd_device *dev) { - return lprocfs_obd_detach(dev); + return lprocfs_obd_detach(dev); } static int @@ -82,24 +82,23 @@ cobd_setup (struct obd_device *dev, obd_count len, void *buf) return (0); fail_0: - obd_disconnect (&cobd->cobd_target, 0 ); + obd_disconnect(&cobd->cobd_target, 0); return (rc); } -static int -cobd_cleanup (struct obd_device *dev, int force, int failover) +static int cobd_cleanup(struct obd_device *dev, int flags) { struct cache_obd *cobd = &dev->u.cobd; int rc; - if (!list_empty (&dev->obd_exports)) + if (!list_empty(&dev->obd_exports)) return (-EBUSY); - rc = obd_disconnect (&cobd->cobd_cache, failover); + rc = obd_disconnect(&cobd->cobd_cache, flags); if (rc != 0) CERROR ("error %d disconnecting cache\n", rc); - rc = obd_disconnect (&cobd->cobd_target, failover); + rc = obd_disconnect(&cobd->cobd_target, flags); if (rc != 0) CERROR ("error %d disconnecting target\n", rc); @@ -116,13 +115,12 @@ cobd_connect (struct lustre_handle *conn, struct obd_device *obd, return (rc); } -static int -cobd_disconnect (struct lustre_handle *conn, int failover) +static int cobd_disconnect(struct lustre_handle *conn, int flags) { - int rc = class_disconnect (conn, failover); + int rc = class_disconnect(conn, flags); CERROR ("rc %d\n", rc); - return (rc); + return (rc); } static int @@ -144,23 +142,15 @@ cobd_get_info(struct lustre_handle *conn, obd_count keylen, return obd_get_info(&cobd->cobd_target, keylen, key, vallen, val); } -static int cobd_statfs(struct obd_export *exp, struct obd_statfs *osfs) +static int cobd_statfs(struct obd_device *obd, struct obd_statfs *osfs, + unsigned long max_age) { - struct obd_export *cobd_exp; - int rc; - - if (exp->exp_obd == NULL) - return -EINVAL; - - cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target); - rc = obd_statfs(cobd_exp, osfs); - class_export_put(cobd_exp); - return rc; + return obd_statfs(class_conn2obd(&obd->u.cobd.cobd_target), osfs, + max_age); } -static int -cobd_getattr(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *lsm) +static int cobd_getattr(struct lustre_handle *conn, struct obdo *oa, + struct lov_stripe_md *lsm) { struct obd_device *obd = class_conn2obd(conn); struct cache_obd *cobd; @@ -207,11 +197,10 @@ cobd_close(struct lustre_handle *conn, struct obdo *oa, return (obd_close (&cobd->cobd_target, oa, lsm, oti)); } -static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo, +static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb, - struct niobuf_local *res, void **desc_private, - struct obd_trans_info *oti) + struct niobuf_local *res, struct obd_trans_info *oti) { struct obd_export *cobd_exp; int rc; @@ -223,16 +212,17 @@ static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo, return -EOPNOTSUPP; cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target); - rc = obd_preprw(cmd, cobd_exp, obdo, objcount, obj, niocount, nb, res, - desc_private, oti); + rc = obd_preprw(cmd, cobd_exp, oa, objcount, obj, niocount, nb, res, + oti); class_export_put(cobd_exp); + return rc; } -static int cobd_commitrw(int cmd, struct obd_export *exp, +static int cobd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_local *local, - void *desc_private, struct obd_trans_info *oti) + struct obd_trans_info *oti) { struct obd_export *cobd_exp; int rc; @@ -244,16 +234,14 @@ static int cobd_commitrw(int cmd, struct obd_export *exp, return -EOPNOTSUPP; cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target); - rc = obd_commitrw(cmd, cobd_exp, objcount, obj, niocount, local, - desc_private, oti); + rc = obd_commitrw(cmd, cobd_exp, oa, objcount, obj,niocount,local,oti); class_export_put(cobd_exp); return rc; } -static inline int -cobd_brw(int cmd, struct lustre_handle *conn, - struct lov_stripe_md *lsm, obd_count oa_bufs, - struct brw_page *pga, struct obd_trans_info *oti) +static int cobd_brw(int cmd, struct lustre_handle *conn, struct obdo *oa, + struct lov_stripe_md *lsm, obd_count oa_bufs, + struct brw_page *pga, struct obd_trans_info *oti) { struct obd_device *obd = class_conn2obd(conn); struct cache_obd *cobd; @@ -267,13 +255,11 @@ cobd_brw(int cmd, struct lustre_handle *conn, return -EOPNOTSUPP; cobd = &obd->u.cobd; - return (obd_brw (cmd, &cobd->cobd_target, - lsm, oa_bufs, pga, oti)); + return (obd_brw(cmd, &cobd->cobd_target, oa, lsm, oa_bufs, pga, oti)); } -static int -cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len, - void *karg, void *uarg) +static int cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len, + void *karg, void *uarg) { struct obd_device *obd = class_conn2obd(conn); struct cache_obd *cobd; @@ -286,7 +272,7 @@ cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len, /* intercept? */ cobd = &obd->u.cobd; - return (obd_iocontrol (cmd, &cobd->cobd_target, len, karg, uarg)); + return (obd_iocontrol(cmd, &cobd->cobd_target, len, karg, uarg)); } static struct obd_ops cobd_ops = { @@ -317,16 +303,16 @@ static int __init cobd_init(void) struct lprocfs_static_vars lvars; ENTRY; - printk(KERN_INFO "Lustre Caching OBD driver; info@clusterfs.com\n"); + printk(KERN_INFO "Lustre Caching OBD driver; info@clusterfs.com\n"); - lprocfs_init_vars(&lvars); + lprocfs_init_vars(cobd, &lvars); RETURN(class_register_type(&cobd_ops, lvars.module_vars, OBD_CACHE_DEVICENAME)); } -static void __exit cobd_exit(void) +static void /*__exit*/ cobd_exit(void) { - class_unregister_type(OBD_CACHE_DEVICENAME); + class_unregister_type(OBD_CACHE_DEVICENAME); } MODULE_AUTHOR("Cluster File Systems, Inc. "); diff --git a/lustre/cobd/lproc_cache.c b/lustre/cobd/lproc_cache.c index fd7474b..ba9b9cf 100644 --- a/lustre/cobd/lproc_cache.c +++ b/lustre/cobd/lproc_cache.c @@ -25,67 +25,59 @@ #include #ifndef LPROCFS -struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; -struct lprocfs_vars lprocfs_module_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_module_vars[] = { {0} }; #else /* Common STATUS namespace */ -static int rd_target(char *page, char **start, off_t off, int count, - int *eof, void *data) +static int cobd_rd_target(char *page, char **start, off_t off, int count, + int *eof, void *data) { - struct obd_device *dev = (struct obd_device*)data; - struct lustre_handle *conn; - struct obd_export *exp; - int rc; + struct obd_device *cobd = (struct obd_device *)data; + int rc; - LASSERT(dev != NULL); - conn = &dev->u.cobd.cobd_target; + LASSERT(cobd != NULL); - if (!dev->obd_set_up) { - rc = snprintf (page, count, "not set up\n"); - } else { - exp = class_conn2export(conn); - LASSERT(exp != NULL); - rc = snprintf(page, count, "%s\n", - exp->exp_obd->obd_uuid.uuid); - class_export_put(exp); - } - return (rc); + if (!cobd->obd_set_up) { + rc = snprintf(page, count, "not set up\n"); + } else { + struct obd_device *tgt = + class_conn2obd(&cobd->u.cobd.cobd_target); + LASSERT(tgt != NULL); + rc = snprintf(page, count, "%s\n", tgt->obd_uuid.uuid); + } + return rc; } -static int rd_cache(char *page, char **start, off_t off, int count, - int *eof, void *data) +static int cobd_rd_cache(char *page, char **start, off_t off, int count, + int *eof, void *data) { - struct obd_device *dev = (struct obd_device*)data; - struct lustre_handle *conn; - struct obd_export *exp; - int rc; + struct obd_device *cobd = (struct obd_device*)data; + int rc; - LASSERT(dev != NULL); - conn = &dev->u.cobd.cobd_cache; + LASSERT(cobd != NULL); - if (!dev->obd_set_up) { - rc = snprintf (page, count, "not set up\n"); + if (!cobd->obd_set_up) { + rc = snprintf(page, count, "not set up\n"); } else { - exp = class_conn2export(conn); - LASSERT (exp != NULL); - rc = snprintf(page, count, "%s\n", - exp->exp_obd->obd_uuid.uuid); - class_export_put(exp); - } - return (rc); + struct obd_device *cache = + class_conn2obd(&cobd->u.cobd.cobd_cache); + LASSERT(cache != NULL); + rc = snprintf(page, count, "%s\n", cache->obd_uuid.uuid); + } + return rc; } -struct lprocfs_vars lprocfs_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, - { "target_uuid", rd_target, 0, 0 }, - { "cache_uuid", rd_cache, 0, 0 }, +static struct lprocfs_vars lprocfs_obd_vars[] = { + { "uuid", lprocfs_rd_uuid, 0, 0 }, + { "target_uuid", cobd_rd_target, 0, 0 }, + { "cache_uuid", cobd_rd_cache, 0, 0 }, { 0 } }; struct lprocfs_vars lprocfs_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, + { "num_refs", lprocfs_rd_numrefs, 0, 0 }, { 0 } }; #endif /* LPROCFS */ -LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars) +LPROCFS_INIT_VARS(cobd, lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/conf/lustre.dtd b/lustre/conf/lustre.dtd index 51d1d1a..de4d653 100644 --- a/lustre/conf/lustre.dtd +++ b/lustre/conf/lustre.dtd @@ -33,10 +33,11 @@ + echoclient_ref | mountpoint_ref | mgmt_ref)*> - + @@ -45,6 +46,9 @@ + + + @@ -57,6 +61,11 @@ + + + @@ -110,16 +119,20 @@ - - - - + + + + + + + + diff --git a/lustre/configure.in b/lustre/configure.in index 8e12135..50f82c8 100644 --- a/lustre/configure.in +++ b/lustre/configure.in @@ -14,6 +14,18 @@ AM_INIT_AUTOMAKE(lustre, builtin([esyscmd], [sed -ne '/^%define version /{ s/.*v AC_ARG_ENABLE(extN, [ --enable-extN use extN instead of ext3 for lustre backend]) AM_CONDITIONAL(EXTN, test x$enable_extN = xyes) +# the pinger is temporary, until we have the recovery node in place +AC_ARG_ENABLE(pinger, [ --enable-pinger recovery pinger support]) +if test x$enable_pinger = xyes ; then + AC_DEFINE(ENABLE_PINGER, 1, Use the Pinger) +fi + +# very experimental orphan support +AC_ARG_ENABLE(orphans, [ --enable-orphans very experimental orphan recovery support]) +if test x$enable_orphans = xyes ; then + AC_DEFINE(ENABLE_ORPHANS, 1, Compile with orphan support) +fi + AC_ARG_WITH(obd-buffer-size, [ --with-obd-buffer-size=[size] set lctl ioctl maximum (default=8K)],OBD_BUFFER_SIZE=$with_obd_buffer_size,OBD_BUFFER_SIZE=8192) AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size]) @@ -21,15 +33,8 @@ sinclude(portals/build.m4) sinclude(portals/archdep.m4) if test x$enable_inkernel = xyes ; then -cp Makefile.mk Makefile.in -cp mds/Makefile.mk mds/Makefile.in -cp portals/Kernelenv.mk portals/Kernelenv.in -cp portals/Makefile.mk portals/Makefile.in -cp portals/libcfs/Makefile.mk portals/libcfs/Makefile.in -cp portals/portals/Makefile.mk portals/portals/Makefile.in -cp portals/knals/Makefile.mk portals/knals/Makefile.in -cp portals/knals/socknal/Makefile.mk portals/knals/socknal/Makefile.in -cp portals/router/Makefile.mk portals/router/Makefile.in + find . -name Makefile.mk | sed 's/.mk$//' | xargs -n 1 \ + sh -e -x -c '(cp -f $0.mk $0.in)' fi AM_CONFIG_HEADER(portals/include/config.h) diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index 202a761..6b94901ef 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -363,16 +363,16 @@ struct page { #define kmap(page) (page)->addr #define kunmap(a) do { int foo = 1; foo++; } while (0) -static inline struct page *alloc_pages(int mask, unsigned long foo) +static inline struct page *alloc_pages(int mask, unsigned long order) { struct page *pg = malloc(sizeof(*pg)); if (!pg) return NULL; #ifdef MAP_ANONYMOUS - pg->addr = mmap(0, PAGE_SIZE, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); + pg->addr = mmap(0, PAGE_SIZE << order, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); #else - pg->addr = malloc(PAGE_SIZE); + pg->addr = malloc(PAGE_SIZE << order); #endif if (!pg->addr) { @@ -407,26 +407,27 @@ static inline struct page* __grab_cache_page(int index) /* arithmetic */ #define do_div(a,b) \ ({ \ - unsigned long ret; \ - ret = (a)%(b); \ - (a) = (a)/(b); \ - (ret); \ + unsigned long remainder;\ + remainder = (a) % (b); \ + (a) = (a) / (b); \ + (remainder); \ }) /* VFS stuff */ -#define ATTR_MODE 1 -#define ATTR_UID 2 -#define ATTR_GID 4 -#define ATTR_SIZE 8 -#define ATTR_ATIME 16 -#define ATTR_MTIME 32 -#define ATTR_CTIME 64 -#define ATTR_ATIME_SET 128 -#define ATTR_MTIME_SET 256 -#define ATTR_FORCE 512 /* Not a change, but a change it */ -#define ATTR_ATTR_FLAG 1024 -#define ATTR_RAW 2048 /* file system, not vfs will massage attrs */ -#define ATTR_FROM_OPEN 4096 /* called from open path, ie O_TRUNC */ +#define ATTR_MODE 0x0001 +#define ATTR_UID 0x0002 +#define ATTR_GID 0x0004 +#define ATTR_SIZE 0x0008 +#define ATTR_ATIME 0x0010 +#define ATTR_MTIME 0x0020 +#define ATTR_CTIME 0x0040 +#define ATTR_ATIME_SET 0x0080 +#define ATTR_MTIME_SET 0x0100 +#define ATTR_FORCE 0x0200 /* Not a change, but a change it */ +#define ATTR_ATTR_FLAG 0x0400 +#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ +#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ +#define ATTR_CTIME_SET 0x2000 struct iattr { unsigned int ia_valid; diff --git a/lustre/include/linux/lprocfs_status.h b/lustre/include/linux/lprocfs_status.h index fb96bde..e6678f8 100644 --- a/lustre/include/linux/lprocfs_status.h +++ b/lustre/include/linux/lprocfs_status.h @@ -24,14 +24,25 @@ #ifndef _LPROCFS_SNMP_H #define _LPROCFS_SNMP_H + #ifdef __KERNEL__ #include #include #include +#include #include +#include + +# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +# include +# else +# define kstatfs statfs +# endif + +#else +# define kstatfs statfs #endif -#include #ifndef LPROCFS #ifdef CONFIG_PROC_FS /* Ensure that /proc is configured */ @@ -116,9 +127,8 @@ struct lprocfs_stats { /* class_obd.c */ extern struct proc_dir_entry *proc_lustre_root; -/* lproc_lov.c */ -extern struct file_operations ll_proc_target_fops; struct obd_device; +struct file; #ifdef LPROCFS @@ -184,14 +194,18 @@ void lprocfs_init_multi_vars(unsigned int idx, \ x->obd_vars = glob[idx].obd_vars; \ } \ -#define LPROCFS_INIT_VARS(vclass, vinstance) \ -void lprocfs_init_vars(struct lprocfs_static_vars *x) \ +#define LPROCFS_INIT_VARS(name, vclass, vinstance) \ +void lprocfs_##name##_init_vars(struct lprocfs_static_vars *x) \ { \ x->module_vars = vclass; \ x->obd_vars = vinstance; \ } \ -extern void lprocfs_init_vars(struct lprocfs_static_vars *var); +#define lprocfs_init_vars(NAME, VAR) \ +do { \ + extern void lprocfs_##NAME##_init_vars(struct lprocfs_static_vars *); \ + lprocfs_##NAME##_init_vars(VAR); \ +} while (0) extern void lprocfs_init_multi_vars(unsigned int idx, struct lprocfs_static_vars *var); /* lprocfs_status.c */ @@ -220,6 +234,8 @@ extern int lprocfs_rd_uuid(char *page, char **start, off_t off, int count, int *eof, void *data); extern int lprocfs_rd_name(char *page, char **start, off_t off, int count, int *eof, void *data); +extern int lprocfs_rd_fstype(char *page, char **start, off_t off, + int count, int *eof, void *data); extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off, int count, int *eof, void *data); extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, @@ -228,38 +244,24 @@ extern int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count, int *eof, void *data); /* Statfs helpers */ -struct statfs; extern int lprocfs_rd_blksize(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs); + int count, int *eof, void *data); extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs); + int count, int *eof, void *data); extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs); + int count, int *eof, void *data); extern int lprocfs_rd_filestotal(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs); + int count, int *eof, void *data); extern int lprocfs_rd_filesfree(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs); + int count, int *eof, void *data); extern int lprocfs_rd_filegroups(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs); + int count, int *eof, void *data); /* lprocfs_status.c: counter read/write functions */ -struct file; extern int lprocfs_counter_read(char *page, char **start, off_t off, int count, int *eof, void *data); extern int lprocfs_counter_write(struct file *file, const char *buffer, unsigned long count, void *data); - -#define DEFINE_LPROCFS_STATFS_FCT(fct_name, get_statfs_fct) \ -int fct_name(char *page, char **start, off_t off, \ - int count, int *eof, void *data) \ -{ \ - struct statfs sfs; \ - int rc = get_statfs_fct((struct obd_device*)data, &sfs); \ - return (rc == 0 ? \ - lprocfs_##fct_name (page, start, off, count, eof, &sfs) : \ - rc); \ -} - #else /* LPROCFS is not defined */ static inline void lprocfs_counter_add(struct lprocfs_stats *stats, @@ -289,18 +291,17 @@ static inline void lprocfs_free_obd_stats(struct obd_device *obddev) static inline struct proc_dir_entry * lprocfs_register(const char *name, struct proc_dir_entry *parent, struct lprocfs_vars *list, void *data) { return NULL; } -#define LPROCFS_INIT_MULTI_VARS(array, size) +#define LPROCFS_INIT_MULTI_VARS(array, size) do {} while (0) static inline void lprocfs_init_multi_vars(unsigned int idx, struct lprocfs_static_vars *x) { return; } -#define LPROCFS_INIT_VARS(vclass, vinstance) -static inline void lprocfs_init_vars(struct lprocfs_static_vars *x) { return; } +#define LPROCFS_INIT_VARS(name, vclass, vinstance) do {} while (0) +#define lprocfs_init_vars(...) do {} while (0) static inline int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *var, void *data) { return 0; } static inline void lprocfs_remove(struct proc_dir_entry *root) {}; static inline struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head, const char *name) {return 0;} -struct obd_device; static inline int lprocfs_obd_attach(struct obd_device *dev, struct lprocfs_vars *list) { return 0; } static inline int lprocfs_obd_detach(struct obd_device *dev) { return 0; } @@ -318,37 +319,30 @@ static inline int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count, int *eof, void *data) { return 0; } /* Statfs helpers */ -struct statfs; static inline int lprocfs_rd_blksize(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs) { return 0; } + int count, int *eof, void *data) { return 0; } static inline int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs) { return 0; } + int count, int *eof, void *data) { return 0; } static inline int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs) { return 0; } + int count, int *eof, void *data) { return 0; } static inline int lprocfs_rd_filestotal(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs) { return 0; } + int count, int *eof, void *data) { return 0; } static inline int lprocfs_rd_filesfree(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs) { return 0; } + int count, int *eof, void *data) { return 0; } static inline int lprocfs_rd_filegroups(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs) { return 0; } + int count, int *eof, void *data) { return 0; } static inline int lprocfs_counter_read(char *page, char **start, off_t off, int count, int *eof, void *data) { return 0; } -struct file; static inline int lprocfs_counter_write(struct file *file, const char *buffer, unsigned long count, void *data) { return 0; } - -#define DEFINE_LPROCFS_STATFS_FCT(fct_name, get_statfs_fct) \ -int fct_name(char *page, char **start, off_t off, \ - int count, int *eof, void *data) { *eof = 1; return 0; } - #endif /* LPROCFS */ #endif /* LPROCFS_SNMP_H */ diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 4275a10..3609d52 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -23,22 +23,67 @@ #ifndef _COMPAT25_H #define _COMPAT25_H -#include +#ifdef __KERNEL__ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) -#define KDEVT_VAL(dev, val) dev.value = 0 -#else -#define KDEVT_VAL(dev, val) dev = 0 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) && LINUX_VERSION_CODE < KERNEL_VERSION(2,5,69) +#error sorry, lustre requires at least 2.5.69 #endif +#include + #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) # define PGCACHE_WRLOCK(mapping) write_lock(&mapping->page_lock) # define PGCACHE_WRUNLOCK(mapping) write_unlock(&mapping->page_lock) -#else + +#define KDEVT_INIT(val) { .value = val } +#define LTIME_S(time) (time.tv_sec) +#define USERMODEHELPER(path, argv, envp) \ + call_usermodehelper(path, argv, envp, 1) +#define ll_path_lookup path_lookup + + +#define ll_pgcache_lock(mapping) spin_lock(&mapping->page_lock) +#define ll_pgcache_unlock(mapping) spin_unlock(&mapping->page_lock) + +#else /* 2.4.. */ + # define PGCACHE_WRLOCK(mapping) spin_lock(&pagecache_lock) # define PGCACHE_WRUNLOCK(mapping) spin_unlock(&pagecache_lock) + +/* 2.5 uses hlists for some things, like the d_hash. we'll treat them + * as 2.5 and let macros drop back.. */ +#define hlist_entry list_entry +#define hlist_head list_head +#define hlist_node list_head +#define HLIST_HEAD LIST_HEAD +#define INIT_HLIST_HEAD INIT_LIST_HEAD +#define hlist_del_init list_del_init +#define hlist_add_head list_add +#define hlist_for_each_safe list_for_each_safe +#define KDEVT_INIT(val) (val) +#define ext3_xattr_set_handle ext3_xattr_set +#define try_module_get __MOD_INC_USE_COUNT +#define module_put __MOD_DEC_USE_COUNT +#define LTIME_S(time) (time) +#ifndef CONFIG_RH_2_4_20 +#define cpu_online(cpu) (cpu_online_map & (1<= KERNEL_VERSION(2,5,0) # define filemap_fdatasync(mapping) filemap_fdatawrite(mapping) @@ -54,18 +99,6 @@ # define Page_Uptodate(page) PageUptodate(page) #endif -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) -# define USERMODEHELPER(path, argv, envp) call_usermodehelper(path, argv, envp, 0) -#else -# define USERMODEHELPER(path, argv, envp) call_usermodehelper(path, argv, envp) -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) -# define LL_CHECK_DIRTY(sb) do { }while(0) -#else -# define LL_CHECK_DIRTY(sb) ll_check_dirty(sb) -#endif - #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) #define rb_node_s rb_node #define rb_root_s rb_root @@ -73,4 +106,5 @@ typedef struct rb_root_s rb_root_t; typedef struct rb_node_s rb_node_t; #endif +#endif /* __KERNEL__ */ #endif /* _COMPAT25_H */ diff --git a/lustre/include/linux/lustre_dlm.h b/lustre/include/linux/lustre_dlm.h index 2db4196..8fc90ae 100644 --- a/lustre/include/linux/lustre_dlm.h +++ b/lustre/include/linux/lustre_dlm.h @@ -188,6 +188,7 @@ struct ldlm_lock { * it's no longer in use. If the lock is not granted, a process sleeps * on this waitq to learn when it becomes granted. */ wait_queue_head_t l_waitq; + struct timeval l_enqueued_time; }; typedef int (*ldlm_res_compat)(struct ldlm_lock *child, struct ldlm_lock *new); @@ -316,6 +317,8 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns, ldlm_res_iterator_t iter, void *closure); int ldlm_replay_locks(struct obd_import *imp); +void ldlm_change_cbdata(struct ldlm_namespace *, struct ldlm_res_id *, + ldlm_iterator_t iter, void *data); /* ldlm_extent.c */ int ldlm_extent_compat(struct ldlm_lock *, struct ldlm_lock *); @@ -450,6 +453,8 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *, struct ldlm_res_id *, /* mds/handler.c */ /* This has to be here because recurisve inclusion sucks. */ +int intent_disposition(struct ldlm_reply *rep, int flag); +void intent_set_disposition(struct ldlm_reply *rep, int flag); int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void *data, int flag); diff --git a/lustre/include/linux/lustre_export.h b/lustre/include/linux/lustre_export.h index 6939a95..677ddc6 100644 --- a/lustre/include/linux/lustre_export.h +++ b/lustre/include/linux/lustre_export.h @@ -11,7 +11,7 @@ #define __EXPORT_H #include -#include +#include struct mds_client_data; @@ -19,7 +19,8 @@ struct mds_export_data { struct list_head med_open_head; spinlock_t med_open_lock; struct mds_client_data *med_mcd; - int med_off; + loff_t med_off; + int med_idx; }; struct ldlm_export_data { @@ -37,6 +38,16 @@ struct ec_export_data { /* echo client */ struct list_head eced_locks; }; +/* In-memory access to client data from OST struct */ +struct filter_client_data; +struct filter_export_data { + struct list_head fed_open_head; //files to close on disconnect + spinlock_t fed_lock; /* protects fed_open_head */ + struct filter_client_data *fed_fcd; + loff_t fed_lr_off; + int fed_lr_idx; +}; + struct obd_export { struct portals_handle exp_handle; atomic_t exp_refcount; @@ -48,7 +59,8 @@ struct obd_export { struct ptlrpc_request *exp_outstanding_reply; time_t exp_last_request_time; spinlock_t exp_lock; /* protects flags int below */ - int exp_failed:1, exp_failover:1; + int exp_failed:1; + int exp_flags; union { struct mds_export_data eu_mds_data; struct filter_export_data eu_filter_data; diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index fc00fe1..37ffc4f5 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -30,7 +30,8 @@ #include #include -typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd, int error); +typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd, + void *data, int error); struct fsfilt_objinfo { struct dentry *fso_dentry; @@ -41,9 +42,9 @@ struct fsfilt_operations { struct list_head fs_list; struct module *fs_owner; char *fs_type; - void *(* fs_start)(struct inode *inode, int op); + void *(* fs_start)(struct inode *inode, int op, void *desc_private); void *(* fs_brw_start)(int objcount, struct fsfilt_objinfo *fso, - int niocount, struct niobuf_remote *nb); + int niocount, void *desc_private); int (* fs_commit)(struct inode *inode, void *handle,int force_sync); int (* fs_setattr)(struct dentry *dentry, void *handle, struct iattr *iattr, int do_trunc); @@ -54,16 +55,19 @@ struct fsfilt_operations { loff_t *offset); int (* fs_journal_data)(struct file *file); int (* fs_set_last_rcvd)(struct obd_device *obd, __u64 last_rcvd, - void *handle, fsfilt_cb_t cb_func); + void *handle, fsfilt_cb_t cb_func, + void *cb_data); int (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs); int (* fs_sync)(struct super_block *sb); int (* fs_prep_san_write)(struct inode *inode, long *blocks, int nblocks, loff_t newsize); + int (* fs_write_record)(struct file *, char *, int size, loff_t *); + int (* fs_read_record)(struct file *, char *, int size, loff_t *); }; extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops); extern void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops); -extern struct fsfilt_operations *fsfilt_get_ops(char *type); +extern struct fsfilt_operations *fsfilt_get_ops(const char *type); extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops); #define FSFILT_OP_UNLINK 1 @@ -75,26 +79,53 @@ extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops); #define FSFILT_OP_MKNOD 7 #define FSFILT_OP_SETATTR 8 #define FSFILT_OP_LINK 9 +#define FSFILT_OP_CREATE_LOG 10 +#define FSFILT_OP_UNLINK_LOG 11 -static inline void *fsfilt_start(struct obd_device *obd, - struct inode *inode, int op) +static inline void *fsfilt_start(struct obd_device *obd, struct inode *inode, + int op, struct obd_trans_info *oti) { unsigned long now = jiffies; - void *handle = obd->obd_fsops->fs_start(inode, op); - CDEBUG(D_HA, "started handle %p\n", handle); - if (time_after(jiffies, now + 15*HZ)) + void *parent_handle = oti ? oti->oti_handle : NULL; + void *handle = obd->obd_fsops->fs_start(inode, op, parent_handle); + CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle); + + if (oti != NULL) { + if (parent_handle == NULL) { + oti->oti_handle = handle; + } else if (handle != parent_handle) { + CERROR("mismatch: parent %p, handle %p, oti %p\n", + parent_handle, handle, oti->oti_handle); + LBUG(); + } + } + if (time_after(jiffies, now + 15 * HZ)) CERROR("long journal start time %lus\n", (jiffies - now) / HZ); return handle; } static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount, struct fsfilt_objinfo *fso, int niocount, - struct niobuf_remote *nb) + struct obd_trans_info *oti) { unsigned long now = jiffies; - void *handle = obd->obd_fsops->fs_brw_start(objcount, fso, niocount,nb); - CDEBUG(D_HA, "started handle %p\n", handle); - if (time_after(jiffies, now + 15*HZ)) + void *parent_handle = oti ? oti->oti_handle : NULL; + void *handle; + + handle = obd->obd_fsops->fs_brw_start(objcount, fso, niocount, + parent_handle); + CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle); + + if (oti != NULL) { + if (parent_handle == NULL) { + oti->oti_handle = handle; + } else if (handle != parent_handle) { + CERROR("mismatch: parent %p, handle %p, oti %p\n", + parent_handle, handle, oti->oti_handle); + LBUG(); + } + } + if (time_after(jiffies, now + 15 * HZ)) CERROR("long journal start time %lus\n", (jiffies - now) / HZ); return handle; } @@ -105,7 +136,7 @@ static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode, unsigned long now = jiffies; int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync); CDEBUG(D_HA, "committing handle %p\n", handle); - if (time_after(jiffies, now + 15*HZ)) + if (time_after(jiffies, now + 15 * HZ)) CERROR("long journal start time %lus\n", (jiffies - now) / HZ); return rc; } @@ -116,9 +147,8 @@ static inline int fsfilt_setattr(struct obd_device *obd, struct dentry *dentry, unsigned long now = jiffies; int rc; rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr, do_trunc); - if (time_after(jiffies, now + 15*HZ)) + if (time_after(jiffies, now + 15 * HZ)) CERROR("long setattr time %lus\n", (jiffies - now) / HZ); - return rc; } @@ -147,9 +177,11 @@ static inline int fsfilt_journal_data(struct obd_device *obd, struct file *file) } static inline int fsfilt_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd, - void *handle, fsfilt_cb_t cb_func) + void *handle, fsfilt_cb_t cb_func, + void *cb_data) { - return obd->obd_fsops->fs_set_last_rcvd(obd, last_rcvd,handle,cb_func); + return obd->obd_fsops->fs_set_last_rcvd(obd, last_rcvd, handle, + cb_func, cb_data); } static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *fs, @@ -172,6 +204,19 @@ static inline int fs_prep_san_write(struct obd_device *obd, return obd->obd_fsops->fs_prep_san_write(inode, blocks, nblocks, newsize); } + +static inline int fsfilt_read_record(struct obd_device *obd, struct file *file, + char *buf, loff_t size, loff_t *offs) +{ + return obd->obd_fsops->fs_read_record(file, buf, size, offs); +} + +static inline int fsfilt_write_record(struct obd_device *obd, struct file *file, + char *buf, loff_t size, loff_t *offs) +{ + return obd->obd_fsops->fs_write_record(file, buf, size, offs); +} + #endif /* __KERNEL__ */ #endif diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index f4a5f2d..055b7a4 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -18,7 +18,7 @@ * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * (Un)packing of OST requests + * Lustre wire protocol definitions. * * We assume all nodes are either little-endian or big-endian, and we * always send messages in the sender's native format. The receiver @@ -29,9 +29,9 @@ * implemented either here, inline (trivial implementations) or in * ptlrpc/pack_generic.c. These 'swabbers' convert the type from "other" * endian, in-place in the message buffer. - * + * * A swabber takes a single pointer argument. The caller must already have - * verified that the length of the message buffer >= sizeof (type). + * verified that the length of the message buffer >= sizeof (type). * * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine * may be defined that swabs just the variable part, after the caller has @@ -90,29 +90,33 @@ extern struct obd_uuid lctl_fake_uuid; * FOO_BULK_PORTAL is for incoming bulk on the FOO */ -#define CONNMGR_REQUEST_PORTAL 1 -#define CONNMGR_REPLY_PORTAL 2 -//#define OSC_REQUEST_PORTAL 3 -#define OSC_REPLY_PORTAL 4 -//#define OSC_BULK_PORTAL 5 -#define OST_REQUEST_PORTAL 6 -//#define OST_REPLY_PORTAL 7 -#define OST_BULK_PORTAL 8 -//#define MDC_REQUEST_PORTAL 9 -#define MDC_REPLY_PORTAL 10 -//#define MDC_BULK_PORTAL 11 -#define MDS_REQUEST_PORTAL 12 -//#define MDS_REPLY_PORTAL 13 -#define MDS_BULK_PORTAL 14 -#define LDLM_CB_REQUEST_PORTAL 15 -#define LDLM_CB_REPLY_PORTAL 16 +#define CONNMGR_REQUEST_PORTAL 1 +#define CONNMGR_REPLY_PORTAL 2 +//#define OSC_REQUEST_PORTAL 3 +#define OSC_REPLY_PORTAL 4 +//#define OSC_BULK_PORTAL 5 +#define OST_REQUEST_PORTAL 6 +//#define OST_REPLY_PORTAL 7 +#define OST_BULK_PORTAL 8 +//#define MDC_REQUEST_PORTAL 9 +#define MDC_REPLY_PORTAL 10 +//#define MDC_BULK_PORTAL 11 +#define MDS_REQUEST_PORTAL 12 +//#define MDS_REPLY_PORTAL 13 +#define MDS_BULK_PORTAL 14 +#define LDLM_CB_REQUEST_PORTAL 15 +#define LDLM_CB_REPLY_PORTAL 16 #define LDLM_CANCEL_REQUEST_PORTAL 17 #define LDLM_CANCEL_REPLY_PORTAL 18 #define PTLBD_REQUEST_PORTAL 19 #define PTLBD_REPLY_PORTAL 20 #define PTLBD_BULK_PORTAL 21 -#define MDS_SETATTR_PORTAL 22 -#define MDS_READPAGE_PORTAL 23 +#define MDS_SETATTR_PORTAL 22 +#define MDS_READPAGE_PORTAL 23 +#define MGMT_REQUEST_PORTAL 24 +#define MGMT_REPLY_PORTAL 25 +#define MGMT_CLI_REQUEST_PORTAL 26 +#define MGMT_CLI_REPLY_PORTAL 27 #define SVC_KILLED 1 #define SVC_EVENT 2 @@ -159,7 +163,7 @@ struct lustre_msg { static inline int lustre_msg_swabbed (struct lustre_msg *msg) { - return (msg->magic == __swab32 (PTLRPC_MSG_MAGIC)); + return (msg->magic == __swab32(PTLRPC_MSG_MAGIC)); } /* Flags that are operation-specific go in the top 16 bits. */ @@ -207,9 +211,10 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags) * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT) */ -#define MSG_CONNECT_RECOVERING 0x1 -#define MSG_CONNECT_RECONNECT 0x2 +#define MSG_CONNECT_RECOVERING 0x1 +#define MSG_CONNECT_RECONNECT 0x2 #define MSG_CONNECT_REPLAYABLE 0x4 +#define MSG_CONNECT_PEER 0x8 /* * OST requests: OBDO & OBD request records @@ -234,13 +239,13 @@ typedef enum { OST_SAN_READ = 14, OST_SAN_WRITE = 15, OST_SYNCFS = 16, + OST_SET_INFO = 17, OST_LAST_OPC } ost_cmd_t; #define OST_FIRST_OPC OST_REPLY /* When adding OST RPC opcodes, please update * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */ - typedef uint64_t obd_id; typedef uint64_t obd_gr; typedef uint64_t obd_time; @@ -324,8 +329,23 @@ struct lov_mds_md { #define OBD_MD_LINKNAME (0x00040000) /* symbolic link target */ #define OBD_MD_FLHANDLE (0x00080000) /* file handle */ #define OBD_MD_FLCKSUM (0x00100000) /* bulk data checksum */ +#define OBD_MD_FLQOS (0x00200000) /* quality of service stats */ +#define OBD_MD_FLOSCOPQ (0x00400000) /* osc opaque data */ +#define OBD_MD_FLCOOKIE (0x00800000) /* log cancellation cookie */ #define OBD_MD_FLNOTOBD (~(OBD_MD_FLOBDFLG | OBD_MD_FLBLOCKS | OBD_MD_LINKNAME|\ - OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | OBD_MD_FLCKSUM)) + OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | OBD_MD_FLCKSUM|\ + OBD_MD_FLQOS | OBD_MD_FLOSCOPQ | OBD_MD_FLCOOKIE)) + +static inline struct lustre_handle *obdo_handle(struct obdo *oa) +{ + return (struct lustre_handle *)oa->o_inline; +} + +static inline struct llog_cookie *obdo_logcookie(struct obdo *oa) +{ + return (struct llog_cookie *)(oa->o_inline + + sizeof(struct lustre_handle)); +} struct obd_statfs { __u64 os_type; @@ -399,6 +419,8 @@ typedef enum { MDS_GETSTATUS = 40, MDS_STATFS = 41, MDS_GETLOVINFO = 42, + MDS_PIN = 43, + MDS_UNPIN = 44, MDS_LAST_OPC } mds_cmd_t; #define MDS_FIRST_OPC MDS_GETATTR @@ -417,12 +439,20 @@ typedef enum { #define REINT_OPEN 6 #define REINT_MAX 6 -#define IT_INTENT_EXEC 1 -#define IT_OPEN_LOOKUP (1 << 1) -#define IT_OPEN_NEG (1 << 2) -#define IT_OPEN_POS (1 << 3) -#define IT_OPEN_CREATE (1 << 4) -#define IT_OPEN_OPEN (1 << 5) +/* the disposition of the intent outlines what was executed */ +#define DISP_IT_EXECD 1 +#define DISP_LOOKUP_EXECD (1 << 1) +#define DISP_LOOKUP_NEG (1 << 2) +#define DISP_LOOKUP_POS (1 << 3) +#define DISP_OPEN_CREATE (1 << 4) +#define DISP_OPEN_OPEN (1 << 5) +#define DISP_ENQ_COMPLETE (1<<6) + + +struct ll_uctxt { + __u32 gid1; + __u32 gid2; +}; struct ll_fid { __u64 id; @@ -504,6 +534,11 @@ struct mds_rec_setattr { __u32 sa_suppgid; }; +/* Remove this once we declare it in include/linux/fs.h (v21 kernel patch?) */ +#ifndef ATTR_CTIME_SET +#define ATTR_CTIME_SET 0x2000 +#endif + extern void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa); struct mds_rec_create { @@ -720,9 +755,109 @@ struct ptlbd_rsp { extern void lustre_swab_ptlbd_rsp (struct ptlbd_rsp *r); /* + * Opcodes for management/monitoring node. + */ +#define MGMT_CONNECT 250 +#define MGMT_DISCONNECT 251 +#define MGMT_EXCEPTION 252 /* node died, etc. */ + +/* * Opcodes for multiple servers. */ -#define OBD_PING 400 +#define OBD_PING 400 +#define OBD_LOG_CANCEL 401 +#define OBD_LAST_OPC (OBD_LOG_CANCEL + 1) +#define OBD_FIRST_OPC OBD_PING + +/* catalog of log objects */ + +/* Identifier for a single log object */ +struct llog_logid { + __u64 lgl_oid; + __u32 lgl_ogen; +}; + +/* Log data record types - there is no specific reason that these need to + * be related to the RPC opcodes, but no reason not to (may be handy later?) + */ +typedef enum { + OST_CREATE_REC = 0x10600000 | (OST_CREATE << 8), + OST_ORPHAN_REC = 0x10600000 | (OST_DESTROY << 8), + MDS_UNLINK_REC = 0x10610000 | (MDS_REINT << 8) | REINT_UNLINK, + LLOG_CATALOG_MAGIC = 0x1062e67d, + LLOG_OBJECT_MAGIC = 0x10645539, +} llog_op_type; + +/* Log record header - stored in originating host endian order (use magic to + * check order). + * Each record must start with this struct, end with a __u32 for the struct + * length, and be a multiple of 64 bits in size. + */ +struct llog_trans_hdr { + __u32 lth_len; + __u32 lth_type; +}; + +struct llog_create_rec { + struct llog_trans_hdr lcr_hdr; + struct ll_fid lcr_fid; + obd_id lcr_oid; + obd_count lcr_ogen; + __u32 lcr_end_len; +} __attribute__((packed)); + +struct llog_orphan_rec { + struct llog_trans_hdr lor_hdr; + obd_id lor_oid; + obd_count lor_ogen; + __u32 lor_end_len; +} __attribute__((packed)); + +struct llog_unlink_rec { + struct llog_trans_hdr lur_hdr; + obd_id lur_oid; + obd_count lur_ogen; + __u32 lur_end_len; +} __attribute__((packed)); + +/* On-disk header structure of each log object - stored in creating host + * endian order, with the exception of the bitmap - stored in little endian + * order so that we can use ext2_{clear,set,test}_bit() for proper/optimized + * little-endian handling of bitmaps (which are otherwise a pain to handle). + */ +#define LLOG_CHUNK_SIZE 4096 +#define LLOG_HEADER_SIZE (96) +#define LLOG_BITMAP_BYTES (LLOG_CHUNK_SIZE - LLOG_HEADER_SIZE) + +#define LLOG_MIN_REC_SIZE (16) /* round(struct llog_trans_hdr+end_len) */ + +struct llog_object_hdr { + struct llog_trans_hdr llh_hdr; + __u64 llh_timestamp; + __u32 llh_count; + __u16 llh_bitmap_offset; + __u16 llh_unused; + struct obd_uuid llh_tgtuuid; + __u8 llh_padding[3]; + __u32 llh_reserved[LLOG_HEADER_SIZE/sizeof(__u32)-17]; + __u32 llh_bitmap[LLOG_BITMAP_BYTES/sizeof(__u32)]; + __u32 llh_hdr_end_len; +}; + +static inline int llog_log_swabbed(struct llog_object_hdr *hdr) +{ + if (hdr->llh_hdr.lth_type == __swab32(LLOG_OBJECT_MAGIC)) + return 1; + if (hdr->llh_hdr.lth_type == LLOG_OBJECT_MAGIC) + return 0; + return -1; +} + +/* log cookies are used to reference a specific log file and a record therein */ +struct llog_cookie { + struct llog_logid lgc_lgl; + __u32 lgc_index; +}; #endif diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h index b18e2d2..467132b 100644 --- a/lustre/include/linux/lustre_lib.h +++ b/lustre/include/linux/lustre_lib.h @@ -80,7 +80,7 @@ void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id); int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf); int client_sanobd_setup(struct obd_device *obddev, obd_count len, void *buf); -int client_obd_cleanup(struct obd_device * obddev, int force, int failover); +int client_obd_cleanup(struct obd_device * obddev, int flags); struct client_obd *client_conn2cli(struct lustre_handle *conn); struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid); @@ -89,13 +89,16 @@ struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid); * the server, we can just send the whole struct unaltered. */ struct obd_client_handle { struct lustre_handle och_fh; + struct llog_cookie och_cookie; struct ptlrpc_request *och_req; __u32 och_magic; }; #define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed /* statfs_pack.c */ -int obd_self_statfs(struct obd_device *dev, struct statfs *sfs); +struct statfs; +void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs); +void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs); /* l_lock.c */ struct lustre_lock { diff --git a/lustre/include/linux/lustre_lite.h b/lustre/include/linux/lustre_lite.h index 81184e7..fa83fb2 100644 --- a/lustre/include/linux/lustre_lite.h +++ b/lustre/include/linux/lustre_lite.h @@ -16,7 +16,14 @@ #ifdef __KERNEL__ +#include + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +#include +#endif + #include +#include #include #include @@ -46,20 +53,62 @@ struct lustre_intent_data { __u32 it_lock_mode; }; +#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0") + +static inline struct lookup_intent *ll_nd2it(struct nameidata *nd) +{ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) + return &nd->it; +#else + return nd->it; +#endif +} + struct ll_dentry_data { - struct semaphore lld_it_sem; + int lld_cwd_count; + int lld_mnt_count; + struct obd_client_handle lld_cwd_och; + struct obd_client_handle lld_mnt_och; }; -#define ll_d2d(dentry) ((struct ll_dentry_data*) dentry->d_fsdata) +#define ll_d2d(de) ((struct ll_dentry_data*) de->d_fsdata) extern struct file_operations ll_pgcache_seq_fops; +/* + * XXX used in obdecho/echo_client.c must move (pjb) + *'p' list as its a list of pages linked together + * by ->private.. + */ +struct plist { + struct page *pl_head; + struct page *pl_tail; + int pl_num; +}; + +struct ll_dirty_offsets { + rb_root_t do_root; + spinlock_t do_lock; + unsigned long do_num_dirty; +}; + +struct ll_writeback_pages { + obd_count npgs, max; +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) + int rw; + struct inode *inode; + struct brw_page pga[0]; +#else + struct brw_page *pga; +#endif +}; + struct ll_inode_info { struct lov_stripe_md *lli_smd; char *lli_symlink_name; struct semaphore lli_open_sem; struct list_head lli_read_extents; - loff_t lli_maxbytes; + __u64 lli_maxbytes; spinlock_t lli_read_extent_lock; unsigned long lli_flags; #define LLI_F_HAVE_SIZE_LOCK 0 @@ -81,13 +130,6 @@ struct ll_read_extent { struct ldlm_extent re_extent; }; -int ll_check_dirty( struct super_block *sb ); -int ll_batch_writepage( struct inode *inode, struct page *page ); - -/* interpet return codes from intent lookup */ -#define LL_LOOKUP_POSITIVE 1 -#define LL_LOOKUP_NEGATIVE 2 - #define LL_SUPER_MAGIC 0x0BD00BD0 #define LL_COMMITCBD_STOPPING 0x1 @@ -118,14 +160,22 @@ struct ll_sb_info { struct lprocfs_stats *ll_stats; /* lprocfs stats counter */ }; -static inline struct ll_sb_info *ll_s2sbi(struct super_block *sb) -{ + #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) - return (struct ll_sb_info *)(sb->s_fs_info); -#else - return (struct ll_sb_info *)(sb->u.generic_sbp); -#endif +#define ll_s2sbi(sb) ((struct ll_sb_info *)((sb)->s_fs_info)) +void __d_rehash(struct dentry * entry, int lock); +static inline __u64 ll_ts2u64(struct timespec *time) +{ + __u64 t = time->tv_sec; + return t; +} +#else /* 2.4 here */ +#define ll_s2sbi(sb) ((struct ll_sb_info *)((sb)->u.generic_sbp)) +static inline __u64 ll_ts2u64(time_t *time) +{ + return *time; } +#endif static inline struct lustre_handle *ll_s2obdconn(struct super_block *sb) { @@ -146,29 +196,6 @@ static inline struct ll_sb_info *ll_i2sbi(struct inode *inode) return ll_s2sbi(inode->i_sb); } -static inline void d_unhash_aliases(struct inode *inode) -{ - struct dentry *dentry = NULL; - struct list_head *tmp; - struct ll_sb_info *sbi = ll_i2sbi(inode); - ENTRY; - - CDEBUG(D_INODE, "marking dentries for ino %lx/%x invalid\n", - inode->i_ino, inode->i_generation); - - spin_lock(&dcache_lock); - list_for_each(tmp, &inode->i_dentry) { - dentry = list_entry(tmp, struct dentry, d_alias); - - list_del_init(&dentry->d_hash); - dentry->d_flags |= DCACHE_LUSTRE_INVALID; - list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list); - } - - spin_unlock(&dcache_lock); - EXIT; -} - // FIXME: replace the name of this with LL_I to conform to kernel stuff // static inline struct ll_inode_info *LL_I(struct inode *inode) static inline struct ll_inode_info *ll_i2info(struct inode *inode) @@ -199,21 +226,17 @@ static inline int ll_mds_max_easize(struct super_block *sb) return sbi2mdc(ll_s2sbi(sb))->cl_max_mds_easize; } -static inline loff_t ll_file_maxbytes(struct inode *inode) +static inline __u64 ll_file_maxbytes(struct inode *inode) { return ll_i2info(inode)->lli_maxbytes; } /* namei.c */ -int ll_lock(struct inode *dir, struct dentry *dentry, - struct lookup_intent *it, struct lustre_handle *lockh); -int ll_unlock(__u32 mode, struct lustre_handle *lockh); - -typedef int (*intent_finish_cb)(int flag, struct ptlrpc_request *, +typedef int (*intent_finish_cb)(struct ptlrpc_request *, struct inode *parent, struct dentry **, struct lookup_intent *, int offset, obd_id ino); int ll_intent_lock(struct inode *parent, struct dentry **, - struct lookup_intent *, intent_finish_cb); + struct lookup_intent *, int, intent_finish_cb); int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void *data, int flag); @@ -222,51 +245,7 @@ void ll_prepare_mdc_op_data(struct mdc_op_data *data, struct inode *i1, struct inode *i2, const char *name, int namelen, int mode); -/* dcache.c */ -void ll_intent_release(struct dentry *, struct lookup_intent *); - -/**** - -I originally implmented these as functions, then realized a macro -would be more helpful for debugging, so the CDEBUG messages show -the current calling function. The orignal functions are in llite/dcache.c - -int ll_save_intent(struct dentry * de, struct lookup_intent * it); -struct lookup_intent * ll_get_intent(struct dentry * de); -****/ - -#define IT_RELEASED_MAGIC 0xDEADCAFE - -#define LL_SAVE_INTENT(de, it) \ -do { \ - LASSERT(ll_d2d(de) != NULL); \ - \ - down(&ll_d2d(de)->lld_it_sem); \ - LASSERT(de->d_it == NULL); \ - de->d_it = it; \ - CDEBUG(D_DENTRY, \ - "D_IT DOWN dentry %p fsdata %p intent: %p %s sem %d\n", \ - de, ll_d2d(de), de->d_it, ldlm_it2str(de->d_it->it_op), \ - atomic_read(&(ll_d2d(de)->lld_it_sem.count))); \ -} while(0) - -#define LL_GET_INTENT(de, it) \ -do { \ - it = de->d_it; \ - \ - LASSERT(ll_d2d(de) != NULL); \ - LASSERT(it); \ - LASSERT(it->it_op != IT_RELEASED_MAGIC); \ - \ - CDEBUG(D_DENTRY, "D_IT UP dentry %p fsdata %p intent: %p %s\n", \ - de, ll_d2d(de), de->d_it, ldlm_it2str(de->d_it->it_op)); \ - de->d_it = NULL; \ - it->it_op = IT_RELEASED_MAGIC; \ - up(&ll_d2d(de)->lld_it_sem); \ -} while(0) - -#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0") - +/* lprocfs.c */ enum { LPROC_LL_DIRTY_HITS = 0, LPROC_LL_DIRTY_MISSES, @@ -312,8 +291,6 @@ extern struct file_operations ll_file_operations; extern struct inode_operations ll_file_inode_operations; extern struct inode_operations ll_special_inode_operations; struct ldlm_lock; -int ll_extent_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *, - void *data, int flag); int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode, struct lov_stripe_md *lsm, int mode, struct ldlm_extent *extent, struct lustre_handle *lockh); @@ -329,30 +306,22 @@ int ll_file_open(struct inode *inode, struct file *file); int ll_file_release(struct inode *inode, struct file *file); -/* rw.c */ -struct page *ll_getpage(struct inode *inode, unsigned long offset, - int create, int locked); -void ll_truncate(struct inode *inode); /* super.c */ void ll_update_inode(struct inode *, struct mds_body *, struct lov_stripe_md *); int ll_setattr_raw(struct inode *inode, struct iattr *attr); +int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs, + unsigned long maxage); /* symlink.c */ extern struct inode_operations ll_fast_symlink_inode_operations; extern struct inode_operations ll_symlink_inode_operations; -/* sysctl.c */ -void ll_sysctl_init(void); -void ll_sysctl_clean(void); - #else #include #endif /* __KERNEL__ */ -static inline void ll_ino2fid(struct ll_fid *fid, - obd_id ino, - __u32 generation, +static inline void ll_ino2fid(struct ll_fid *fid, obd_id ino, __u32 generation, int type) { fid->id = ino; @@ -360,11 +329,6 @@ static inline void ll_ino2fid(struct ll_fid *fid, fid->f_type = type; } -struct ll_read_inode2_cookie { - struct mds_body *lic_body; - struct lov_stripe_md *lic_lsm; -}; - #include #define LL_IOC_GETFLAGS _IOR ('f', 151, long) diff --git a/lustre/include/linux/lustre_mds.h b/lustre/include/linux/lustre_mds.h index 683d78d..e7ee6f0 100644 --- a/lustre/include/linux/lustre_mds.h +++ b/lustre/include/linux/lustre_mds.h @@ -35,6 +35,8 @@ #include #include #include +#include +#include struct ldlm_lock_desc; struct mds_obd; @@ -49,6 +51,11 @@ struct ll_file_data; #define LUSTRE_MDT_NAME "mdt" #define LUSTRE_MDC_NAME "mdc" +struct lustre_md { + struct mds_body *body; + struct lov_stripe_md *lsm; +}; + struct mdc_rpc_lock { struct semaphore rpcl_sem; struct lookup_intent *rpcl_it; @@ -144,6 +151,8 @@ struct mds_update_record { char *ur_tgt; int ur_eadatalen; void *ur_eadata; + int ur_cookielen; + struct llog_cookie *ur_logcookies; struct iattr ur_iattr; struct obd_ucred ur_uc; __u64 ur_rdev; @@ -160,8 +169,31 @@ struct mds_update_record { #define ur_suppgid1 ur_uc.ouc_suppgid1 #define ur_suppgid2 ur_uc.ouc_suppgid2 -#define MDS_LR_CLIENT 8192 -#define MDS_LR_SIZE 128 +/* i_attr_flags holds the open count in the inode in 2.4 */ +//Alex implement on 2.4 with i_attr_flags and find soln for 2.5 please +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +# define mds_open_orphan_count(inode) (0) +# define mds_open_orphan_inc(inode) do { } while (0); +# define mds_open_orphan_dec_test(inode) (0) +#else +# define mds_inode_oatomic(inode) ((atomic_t *)&(inode)->i_attr_flags) +# define mds_open_orphan_count(inode) \ + atomic_read(mds_inode_oatomic(inode)) +# define mds_open_orphan_inc(inode) \ + atomic_inc(mds_inode_oatomic(inode)) +# define mds_open_orphan_dec_test(inode) \ + atomic_dec_and_test(mds_inode_oatomic(inode)) +#endif +#define mds_inode_is_orphan(inode) ((inode)->i_flags & 0x4000000) +#define mds_inode_set_orphan(inode) (inode)->i_flags |= 0x4000000 + +#define MDS_LR_SERVER_SIZE 512 + +#define MDS_LR_CLIENT_START 8192 +#define MDS_LR_CLIENT_SIZE 128 +#if MDS_LR_CLIENT_START < MDS_LR_SERVER_SIZE +#error "Can't have MDS_LR_CLIENT_START < MDS_LR_SERVER_SIZE" +#endif #define MDS_CLIENT_SLOTS 17 @@ -169,11 +201,24 @@ struct mds_update_record { /* Data stored per server at the head of the last_rcvd file. In le32 order. */ struct mds_server_data { - __u8 msd_uuid[37]; /* server UUID */ - __u8 uuid_padding[3]; /* unused */ - __u64 msd_last_transno; /* last completed transaction ID */ - __u64 msd_mount_count; /* MDS incarnation number */ - __u8 padding[512 - 56]; + __u8 msd_uuid[37]; /* server UUID */ + __u8 uuid_padding[3]; /* unused */ +// __u64 msd_last_objid; /* last created object ID */ + __u64 msd_last_transno; /* last completed transaction ID */ + __u64 msd_mount_count; /* MDS incarnation number */ + __u64 msd_padding_until_last_objid_is_enabled; + __u32 msd_feature_compat; /* compatible feature flags */ + __u32 msd_feature_rocompat;/* read-only compatible feature flags */ + __u32 msd_feature_incompat;/* incompatible feature flags */ + __u32 msd_server_size; /* size of server data area */ + __u32 msd_client_start; /* start of per-client data area */ + __u16 msd_client_size; /* size of per-client data area */ + __u16 msd_subdir_count; /* number of subdirectories for objects */ + __u64 msd_catalog_oid; /* recovery catalog object id */ + __u32 msd_catalog_ogen; /* recovery catalog inode generation */ + __u8 msd_peeruuid[37]; /* UUID of LOV/OSC associated with MDS */ + __u8 peer_padding[3]; /* unused */ + __u8 msd_padding[MDS_LR_SERVER_SIZE - 140]; }; /* Data stored per client in the last_rcvd file. In le32 order. */ @@ -185,7 +230,7 @@ struct mds_client_data { __u64 mcd_last_xid; /* xid for the last transaction */ __u32 mcd_last_result; /* result from last RPC */ __u32 mcd_last_data; /* per-op data (disposition for open &c.) */ - __u8 padding[MDS_LR_SIZE - 74]; + __u8 mcd_padding[MDS_LR_CLIENT_SIZE - 72]; }; /* file data for open files on MDS */ @@ -202,10 +247,6 @@ struct mds_file_data { int mds_reint_rec(struct mds_update_record *r, int offset, struct ptlrpc_request *req, struct lustre_handle *); -/* mds/mds_open.c */ -int mds_open(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, struct lustre_handle *); - /* mds/handler.c */ #ifdef __KERNEL__ struct dentry *mds_name2locked_dentry(struct obd_device *, struct dentry *dir, @@ -223,13 +264,22 @@ int mds_pack_md(struct obd_device *mds, struct lustre_msg *msg, int offset, struct mds_body *body, struct inode *inode); void mds_steal_ack_locks(struct obd_export *exp, struct ptlrpc_request *req); +int mds_update_server_data(struct obd_device *); /* mds/mds_fs.c */ int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt); int mds_fs_cleanup(struct obd_device *obddev, int failover); #endif +/* mds/mds_lov.c */ +extern int mds_get_lovtgts(struct mds_obd *obd, int tgt_count, + struct obd_uuid *uuidarray); +extern int mds_get_lovdesc(struct mds_obd *obd, struct lov_desc *desc); + /* mdc/mdc_request.c */ +int mdc_req2lustre_md(struct ptlrpc_request *req, int offset, + struct lustre_handle *obd_import, + struct lustre_md *md); int mdc_enqueue(struct lustre_handle *conn, int lock_type, struct lookup_intent *it, int lock_mode, struct mdc_op_data *enq_data, @@ -248,7 +298,7 @@ int mdc_getattr_name(struct lustre_handle *conn, struct ll_fid *fid, unsigned int ea_size, struct ptlrpc_request **request); int mdc_setattr(struct lustre_handle *conn, struct mdc_op_data *data, - struct iattr *iattr, void *ea, int ealen, + struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len, struct ptlrpc_request **request); int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags, struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh, diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index ac87d7f..bc70b9a 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -96,9 +96,10 @@ /* OST_MAXREQSIZE ~= 1640 bytes = * lustre_msg + obdo + 16 * obd_ioobj + 64 * niobuf_remote * - * single object with 16 pages is 512 bytes + * - single object with 16 pages is 512 bytes + * - OST_MAXREQSIZE must be at least 1 page of cookies plus some spillover */ -#define OST_MAXREQSIZE (2 * 1024) +#define OST_MAXREQSIZE (5 * 1024) #define PTLBD_NUM_THREADS 4 #define PTLBD_NEVENTS 1024 @@ -188,15 +189,19 @@ union ptlrpc_async_args { * big enough. For _tons_ of context, OBD_ALLOC a struct and store * a pointer to it here. The pointer_arg ensures this struct is at * least big enough for that. */ - void *pointer_arg[4]; + void *pointer_arg[5]; __u64 space[4]; }; +struct ptlrpc_request_set; +typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int); + struct ptlrpc_request_set { int set_remaining; /* # uncompleted requests */ wait_queue_head_t set_waitq; + wait_queue_head_t *set_wakeup_ptr; struct list_head set_requests; - void *set_interpret; /* completion callback */ + set_interpreter_func set_interpret; /* completion callback */ union ptlrpc_async_args set_args; /* completion context */ }; diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index ba848a9..f30cbb2 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -10,6 +10,31 @@ #ifndef __OBD_H #define __OBD_H +#define IOC_OSC_TYPE 'h' +#define IOC_OSC_MIN_NR 20 +#define IOC_OSC_REGISTER_LOV _IOWR(IOC_OSC_TYPE, 20, struct obd_device *) +#define IOC_OSC_SET_ACTIVE _IOWR(IOC_OSC_TYPE, 21, struct obd_device *) +#define IOC_OSC_MAX_NR 50 + +#define IOC_MDC_TYPE 'i' +#define IOC_MDC_MIN_NR 20 +#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *) +#define IOC_MDC_GETSTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) +#define IOC_MDC_MAX_NR 50 + +#ifdef __KERNEL__ +# include +# include +# include /* for struct task_struct, for current.h */ +# include /* for smp_lock.h */ +# include +# include +# include +#endif + +#include +#include +#include #include struct lov_oinfo { /* per-child structure */ @@ -34,31 +59,6 @@ struct lov_stripe_md { struct lov_oinfo lsm_oinfo[0]; }; -#define IOC_OSC_TYPE 'h' -#define IOC_OSC_MIN_NR 20 -#define IOC_OSC_REGISTER_LOV _IOWR(IOC_OSC_TYPE, 20, struct obd_device *) -#define IOC_OSC_SET_ACTIVE _IOWR(IOC_OSC_TYPE, 21, struct obd_device *) -#define IOC_OSC_MAX_NR 50 - -#define IOC_MDC_TYPE 'i' -#define IOC_MDC_MIN_NR 20 -#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *) -#define IOC_MDC_MAX_NR 50 - -#ifdef __KERNEL__ -# include -# include -# include /* for struct task_struct, for current.h */ -# include /* for smp_lock.h */ -# include -# include - -# include -# include -# include -# include -#endif - struct obd_type { struct list_head typ_chain; struct obd_ops *typ_ops; @@ -80,7 +80,7 @@ struct ost_server_data; struct filter_obd { const char *fo_fstype; - char *fo_nspath; + char *fo_nspath; struct super_block *fo_sb; struct vfsmount *fo_vfsmnt; struct obd_run_ctxt fo_ctxt; @@ -103,28 +103,57 @@ struct filter_obd { spinlock_t fo_grant_lock; /* protects tot_granted */ obd_size fo_tot_granted; obd_size fo_tot_cached; + + struct llog_handle *fo_catalog; + struct obd_import *fo_mdc_imp; + struct obd_uuid fo_mdc_uuid; + struct lustre_handle fo_mdc_conn; + struct ptlrpc_client fo_mdc_client; + struct llog_commit_data *fo_llcd; + struct semaphore fo_sem; /* protects fo_llcd */ }; struct mds_server_data; struct client_obd { - struct obd_import *cl_import; - struct semaphore cl_sem; - int cl_conn_count; + struct obd_import *cl_import; + struct semaphore cl_sem; + int cl_conn_count; /* max_mds_easize is purely a performance thing so we don't have to * call obd_size_wiremd() all the time. */ - int cl_max_mds_easize; - struct obd_device *cl_containing_lov; - kdev_t cl_sandev; - struct semaphore cl_dirty_sem; - obd_size cl_dirty; /* both in bytes */ - obd_size cl_dirty_granted; - /* this is just to keep existing infinitely caching behaviour between - * clients and OSTs that don't have the grant code in yet.. it can + int cl_max_mds_easize; + int cl_max_mds_cookiesize; + /* XXX can we replace cl_containing_lov with mgmt-events? */ + struct obd_device *cl_containing_lov; + kdev_t cl_sandev; + + struct llog_commit_data *cl_llcd; + void *cl_llcd_offset; + + struct semaphore cl_dirty_sem; + obd_size cl_dirty; /* both in bytes */ + obd_size cl_dirty_granted; + + struct obd_device *cl_mgmtcli_obd; + + /* this is just to keep existing infinitely caching behaviour between + * clients and OSTs that don't have the grant code in yet.. it can * be yanked once everything speaks grants */ - char cl_ost_can_grant; + char cl_ost_can_grant; }; +/* Like a client, with some hangers-on. Keep mc_client_obd first so that we + * can reuse the various client setup/connect functions. */ +struct mgmtcli_obd { + struct client_obd mc_client_obd; /* nested */ + struct ptlrpc_thread *mc_ping_thread; + struct lustre_handle mc_ping_handle; /* XXX single-target */ + struct list_head mc_registered; + void *mc_hammer; +}; + +#define mc_import mc_client_obd.cl_import + struct mds_obd { struct ptlrpc_service *mds_service; struct ptlrpc_service *mds_setattr_service; @@ -139,12 +168,20 @@ struct mds_obd { struct address_space_operations *mds_aops; int mds_max_mdsize; + int mds_max_cookiesize; struct file *mds_rcvd_filp; spinlock_t mds_transno_lock; __u64 mds_last_transno; __u64 mds_mount_count; struct ll_fid mds_rootfid; struct mds_server_data *mds_server_data; + struct dentry *mds_pending_dir; + struct dentry *mds_logs_dir; + + struct llog_handle *mds_catalog; + struct obd_device *mds_osc_obd; + struct obd_uuid mds_osc_uuid; + struct lustre_handle mds_osc_conn; int mds_has_lov_desc; struct lov_desc mds_lov_desc; @@ -159,7 +196,6 @@ struct ldlm_obd { }; struct echo_obd { - char *eo_fstype; struct obdo oa; spinlock_t eo_lock; __u64 eo_lastino; @@ -221,6 +257,7 @@ struct cache_obd { struct lov_tgt_desc { struct obd_uuid uuid; struct lustre_handle conn; + struct llog_handle *ltd_cathandle; int active; /* is this target available for requests, etc */ }; @@ -230,6 +267,7 @@ struct lov_obd { struct lov_desc desc; int bufsize; int refcount; + int lo_catalog_loaded:1; struct lov_tgt_desc *tgts; }; @@ -247,14 +285,46 @@ struct niobuf_local { #define N_LOCAL_TEMP_PAGE 0x10000000 struct obd_trans_info { - __u64 oti_transno; + __u64 oti_transno; /* Only used on the server side for tracking acks. */ struct oti_req_ack_lock { struct lustre_handle lock; __u32 mode; } oti_ack_locks[4]; + void *oti_handle; + struct llog_cookie oti_onecookie; + struct llog_cookie *oti_logcookies; + int oti_numcookies; }; +static inline void oti_alloc_cookies(struct obd_trans_info *oti,int num_cookies) +{ + if (!oti) + return; + + if (num_cookies == 1) + oti->oti_logcookies = &oti->oti_onecookie; + else + OBD_ALLOC(oti->oti_logcookies, + num_cookies * sizeof(oti->oti_onecookie)); + + oti->oti_numcookies = num_cookies; +} + +static inline void oti_free_cookies(struct obd_trans_info *oti) +{ + if (!oti || !oti->oti_logcookies) + return; + + if (oti->oti_logcookies == &oti->oti_onecookie) + LASSERT(oti->oti_numcookies == 1); + else + OBD_FREE(oti->oti_logcookies, + oti->oti_numcookies * sizeof(oti->oti_onecookie)); + oti->oti_logcookies = NULL; + oti->oti_numcookies = 0; +} + /* corresponds to one of the obd's */ struct obd_device { struct obd_type *obd_type; @@ -276,9 +346,11 @@ struct obd_device { struct ldlm_namespace *obd_namespace; struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */ /* a spinlock is OK for what we do now, may need a semaphore later */ - spinlock_t obd_dev_lock; + spinlock_t obd_dev_lock; __u64 obd_last_committed; struct fsfilt_operations *obd_fsops; + struct obd_statfs obd_osfs; + unsigned long obd_osfs_age; /* XXX encapsulate all this recovery data into one struct */ svc_handler_t obd_recovery_handler; @@ -297,19 +369,25 @@ struct obd_device { struct mds_obd mds; struct client_obd cli; struct ost_obd ost; - struct echo_client_obd echo_client;; + struct echo_client_obd echo_client; struct ldlm_obd ldlm; struct echo_obd echo; struct recovd_obd recovd; struct lov_obd lov; struct cache_obd cobd; struct ptlbd_obd ptlbd; + struct mgmtcli_obd mgmtcli; } u; /* Fields used by LProcFS */ unsigned int obd_cntr_base; struct lprocfs_stats *obd_stats; }; +#define OBD_OPT_FORCE 0x0001 +#define OBD_OPT_FAILOVER 0x0002 + +#define OBD_LLOG_FL_SENDNOW 0x0001 + struct obd_ops { struct module *o_owner; int (*o_iocontrol)(unsigned int cmd, struct lustre_handle *, int len, @@ -321,16 +399,17 @@ struct obd_ops { int (*o_attach)(struct obd_device *dev, obd_count len, void *data); int (*o_detach)(struct obd_device *dev); int (*o_setup) (struct obd_device *dev, obd_count len, void *data); - int (*o_cleanup)(struct obd_device *dev, int force, int failover); + int (*o_cleanup)(struct obd_device *dev, int flags); int (*o_connect)(struct lustre_handle *conn, struct obd_device *src, struct obd_uuid *cluuid); - int (*o_disconnect)(struct lustre_handle *conn, int failover); + int (*o_disconnect)(struct lustre_handle *conn, int flags); - int (*o_statfs)(struct obd_export *exp, struct obd_statfs *osfs); - int (*o_syncfs)(struct obd_export *); + int (*o_statfs)(struct obd_device *obd, struct obd_statfs *osfs, + unsigned long max_age); + int (*o_syncfs)(struct obd_export *exp); int (*o_packmd)(struct lustre_handle *, struct lov_mds_md **disk_tgt, struct lov_stripe_md *mem_src); - int (*o_unpackmd)(struct lustre_handle *, + int (*o_unpackmd)(struct lustre_handle *conn, struct lov_stripe_md **mem_tgt, struct lov_mds_md *disk_src, int disk_len); int (*o_preallocate)(struct lustre_handle *, obd_count *req, @@ -344,42 +423,42 @@ struct obd_ops { int (*o_getattr)(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *ea); int (*o_getattr_async)(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *ea, + struct lov_stripe_md *ea, struct ptlrpc_request_set *set); int (*o_open)(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti, struct obd_client_handle *och); int (*o_close)(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti); - int (*o_brw)(int rw, struct lustre_handle *conn, + int (*o_brw)(int rw, struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *ea, obd_count oa_bufs, struct brw_page *pgarr, struct obd_trans_info *oti); - int (*o_brw_async)(int rw, struct lustre_handle *conn, + int (*o_brw_async)(int rw, struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *ea, obd_count oa_bufs, struct brw_page *pgarr, struct ptlrpc_request_set *, struct obd_trans_info *oti); - int (*o_punch)(struct lustre_handle *conn, struct obdo *tgt, + int (*o_punch)(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *ea, obd_size count, obd_off offset, struct obd_trans_info *oti); - int (*o_sync)(struct lustre_handle *conn, struct obdo *tgt, + int (*o_sync)(struct lustre_handle *conn, struct obdo *oa, obd_size count, obd_off offset); - int (*o_migrate)(struct lustre_handle *conn, struct obdo *dst, - struct obdo *src, obd_size count, obd_off offset); - int (*o_copy)(struct lustre_handle *dstconn, struct obdo *dst, - struct lustre_handle *srconn, struct obdo *src, + int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst, + struct lov_stripe_md *src, obd_size count, + obd_off offset); + int (*o_copy)(struct lustre_handle *dstconn, struct lov_stripe_md *dst, + struct lustre_handle *srconn, struct lov_stripe_md *src, obd_size count, obd_off offset, struct obd_trans_info *); int (*o_iterate)(struct lustre_handle *conn, int (*)(obd_id, obd_gr, void *), obd_id *startid, obd_gr group, void *data); - int (*o_preprw)(int cmd, struct obd_export *, struct obdo *obdo, + int (*o_preprw)(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *remote, - struct niobuf_local *local, void **desc_private, - struct obd_trans_info *oti); - int (*o_commitrw)(int cmd, struct obd_export *, + struct niobuf_local *local, struct obd_trans_info *oti); + int (*o_commitrw)(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_local *local, - void *desc_private, struct obd_trans_info *oti); + struct obd_trans_info *oti); int (*o_enqueue)(struct lustre_handle *conn, struct lov_stripe_md *md, struct lustre_handle *parent_lock, __u32 type, void *cookie, int cookielen, __u32 mode, @@ -391,10 +470,17 @@ struct obd_ops { int (*o_cancel)(struct lustre_handle *, struct lov_stripe_md *md, __u32 mode, struct lustre_handle *); int (*o_cancel_unused)(struct lustre_handle *, struct lov_stripe_md *, - int local_only, void *opaque); - int (*o_san_preprw)(int cmd, struct lustre_handle *conn, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *remote); + int flags, void *opaque); + int (*o_log_add)(struct lustre_handle *conn, + struct llog_handle *cathandle, + struct llog_trans_hdr *rec, struct lov_stripe_md *lsm, + struct llog_cookie *logcookies, int numcookies); + int (*o_log_cancel)(struct lustre_handle *, struct lov_stripe_md *, + int count, struct llog_cookie *, int flags); + int (*o_san_preprw)(int cmd, struct obd_export *exp, + struct obdo *oa, int objcount, + struct obd_ioobj *obj, int niocount, + struct niobuf_remote *remote); int (*o_mark_page_dirty)(struct lustre_handle *conn, struct lov_stripe_md *ea, unsigned long offset); @@ -406,14 +492,22 @@ struct obd_ops { int (*o_last_dirty_offset)(struct lustre_handle *conn, struct lov_stripe_md *ea, unsigned long *offset); - void (*o_destroy_export)(struct obd_export *export); + void (*o_destroy_export)(struct obd_export *exp); + + /* metadata-only methods */ + int (*o_pin)(struct lustre_handle *, obd_id ino, __u32 gen, int type, + struct obd_client_handle *, int flag); + int (*o_unpin)(struct lustre_handle *, struct obd_client_handle *, int); + + /* If adding ops, also update obdclass/lprocfs_status.c, + * and include/linux/obd_class.h */ }; static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno, int error) { if (error) { - CDEBUG(D_ERROR, "%s: transno "LPD64" commit error: %d\n", + CERROR("%s: transno "LPD64" commit error: %d\n", obd->obd_name, transno, error); return; } @@ -425,8 +519,4 @@ static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno, } } -/* When adding a function pointer to struct obd_ops, please update - * function lprocfs_alloc_obd_counters() in obdclass/lprocfs_status.c - * accordingly. */ - #endif /* __OBD_H */ diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 0c33ceb..2e57d2f 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -32,6 +32,7 @@ #include #include #include +#include #endif #include @@ -81,6 +82,17 @@ void class_disconnect_exports(struct obd_device *obddev, int failover); int class_multi_setup(struct obd_device *obddev, uint32_t len, void *data); int class_multi_cleanup(struct obd_device *obddev); +/* obdo.c */ +#ifdef __KERNEL__ +void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned ia_valid); +void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid); +void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid); +void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid); +void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid); +#endif +void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid); +int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_flag compare); + static inline int obd_check_conn(struct lustre_handle *conn) { struct obd_device *obd; @@ -277,7 +289,7 @@ static inline int obd_setup(struct obd_device *obd, int datalen, void *data) RETURN(rc); } -static inline int obd_cleanup(struct obd_device *obd, int force, int failover) +static inline int obd_cleanup(struct obd_device *obd, int flags) { int rc; ENTRY; @@ -286,7 +298,7 @@ static inline int obd_cleanup(struct obd_device *obd, int force, int failover) OBD_CHECK_OP(obd, cleanup); OBD_COUNTER_INCREMENT(obd, cleanup); - rc = OBP(obd, cleanup)(obd, force, failover); + rc = OBP(obd, cleanup)(obd, flags); RETURN(rc); } @@ -518,7 +530,7 @@ static inline int obd_connect(struct lustre_handle *conn, RETURN(rc); } -static inline int obd_disconnect(struct lustre_handle *conn, int failover) +static inline int obd_disconnect(struct lustre_handle *conn, int flags) { struct obd_export *exp; int rc; @@ -528,7 +540,7 @@ static inline int obd_disconnect(struct lustre_handle *conn, int failover) OBD_CHECK_OP(exp->exp_obd, disconnect); OBD_COUNTER_INCREMENT(exp->exp_obd, disconnect); - rc = OBP(exp->exp_obd, disconnect)(conn, failover); + rc = OBP(exp->exp_obd, disconnect)(conn, flags); class_export_put(exp); RETURN(rc); } @@ -541,15 +553,35 @@ static inline void obd_destroy_export(struct obd_export *exp) EXIT; } -static inline int obd_statfs(struct obd_export *exp, struct obd_statfs *osfs) +#ifndef time_before +#define time_before(t1, t2) ((long)t2 - (long)t1 > 0) +#endif + +static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs, + unsigned long max_age) { - int rc; + int rc = 0; ENTRY; - OBD_CHECK_OP(exp->exp_obd, statfs); - OBD_COUNTER_INCREMENT(exp->exp_obd, statfs); - - rc = OBP(exp->exp_obd, statfs)(exp, osfs); + if (obd == NULL) + RETURN(-EINVAL); + + OBD_CHECK_OP(obd, statfs); + OBD_COUNTER_INCREMENT(obd, statfs); + + CDEBUG(D_SUPER, "osfs %lu, max_age %lu\n", obd->obd_osfs_age, max_age); + if (obd->obd_osfs_age == 0 || time_before(obd->obd_osfs_age, max_age)) { + rc = OBP(obd, statfs)(obd, osfs, max_age); + spin_lock(&obd->obd_dev_lock); + memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs)); + obd->obd_osfs_age = jiffies; + spin_unlock(&obd->obd_dev_lock); + } else { + CDEBUG(D_SUPER, "using cached obd_statfs data\n"); + spin_lock(&obd->obd_dev_lock); + memcpy(osfs, &obd->obd_osfs, sizeof(*osfs)); + spin_unlock(&obd->obd_dev_lock); + } RETURN(rc); } @@ -582,7 +614,7 @@ static inline int obd_punch(struct lustre_handle *conn, struct obdo *oa, RETURN(rc); } -static inline int obd_brw(int cmd, struct lustre_handle *conn, +static inline int obd_brw(int cmd, struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *ea, obd_count oa_bufs, struct brw_page *pg, struct obd_trans_info *oti) { @@ -600,14 +632,14 @@ static inline int obd_brw(int cmd, struct lustre_handle *conn, LBUG(); } - rc = OBP(exp->exp_obd, brw)(cmd, conn, ea, oa_bufs, pg, oti); + rc = OBP(exp->exp_obd, brw)(cmd, conn, oa, ea, oa_bufs, pg, oti); class_export_put(exp); RETURN(rc); } static inline int obd_brw_async(int cmd, struct lustre_handle *conn, - struct lov_stripe_md *ea, obd_count oa_bufs, - struct brw_page *pg, + struct obdo *oa, struct lov_stripe_md *ea, + obd_count oa_bufs, struct brw_page *pg, struct ptlrpc_request_set *set, struct obd_trans_info *oti) { @@ -624,15 +656,16 @@ static inline int obd_brw_async(int cmd, struct lustre_handle *conn, LBUG(); } - rc = OBP(exp->exp_obd, brw_async)(cmd, conn, ea, oa_bufs, pg, set, oti); + rc = OBP(exp->exp_obd, brw_async)(cmd, conn, oa, ea, oa_bufs, pg, set, + oti); class_export_put(exp); RETURN(rc); } -static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo, +static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *remote, - struct niobuf_local *local, void **desc_private, + struct niobuf_local *local, struct obd_trans_info *oti) { int rc; @@ -641,15 +674,15 @@ static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo, OBD_CHECK_OP(exp->exp_obd, preprw); OBD_COUNTER_INCREMENT(exp->exp_obd, preprw); - rc = OBP(exp->exp_obd, preprw)(cmd, exp, obdo, objcount, obj, niocount, - remote, local, desc_private, oti); + rc = OBP(exp->exp_obd, preprw)(cmd, exp, oa, objcount, obj, niocount, + remote, local, oti); RETURN(rc); } -static inline int obd_commitrw(int cmd, struct obd_export *exp, +static inline int obd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_local *local, - void *desc_private, struct obd_trans_info *oti) + struct obd_trans_info *oti) { int rc; ENTRY; @@ -657,8 +690,8 @@ static inline int obd_commitrw(int cmd, struct obd_export *exp, OBD_CHECK_OP(exp->exp_obd, commitrw); OBD_COUNTER_INCREMENT(exp->exp_obd, commitrw); - rc = OBP(exp->exp_obd, commitrw)(cmd, exp, objcount, obj, niocount, - local, desc_private, oti); + rc = OBP(exp->exp_obd, commitrw)(cmd, exp, oa, objcount, obj, niocount, + local, oti); RETURN(rc); } @@ -754,25 +787,92 @@ static inline int obd_cancel_unused(struct lustre_handle *conn, RETURN(rc); } -static inline int obd_san_preprw(int cmd, struct lustre_handle *conn, +static inline int obd_log_add(struct lustre_handle *conn, + struct llog_handle *cathandle, + struct llog_trans_hdr *rec, + struct lov_stripe_md *lsm, + struct llog_cookie *logcookies, + int numcookies) +{ + struct obd_export *exp; + int rc; + ENTRY; + + OBD_CHECK_SETUP(conn, exp); + OBD_CHECK_OP(exp->exp_obd, log_add); + OBD_COUNTER_INCREMENT(exp->exp_obd, log_add); + + rc = OBP(exp->exp_obd, log_add)(conn, cathandle, rec, lsm, logcookies, + numcookies); + class_export_put(exp); + RETURN(rc); +} + +static inline int obd_log_cancel(struct lustre_handle *conn, + struct lov_stripe_md *lsm, int count, + struct llog_cookie *cookies, int flags) +{ + struct obd_export *exp; + int rc; + ENTRY; + + OBD_CHECK_SETUP(conn, exp); + OBD_CHECK_OP(exp->exp_obd, log_cancel); + OBD_COUNTER_INCREMENT(exp->exp_obd, log_cancel); + + rc = OBP(exp->exp_obd, log_cancel)(conn, lsm, count, cookies, flags); + class_export_put(exp); + RETURN(rc); +} + +static inline int obd_san_preprw(int cmd, struct obd_export *exp, + struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *remote) { - struct obd_export *exp; int rc; - OBD_CHECK_ACTIVE(conn, exp); OBD_CHECK_OP(exp->exp_obd, preprw); OBD_COUNTER_INCREMENT(exp->exp_obd, preprw); - rc = OBP(exp->exp_obd, san_preprw)(cmd, conn, objcount, obj, + rc = OBP(exp->exp_obd, san_preprw)(cmd, exp, oa, objcount, obj, niocount, remote); class_export_put(exp); - RETURN(rc); + return(rc); +} + +static inline int obd_pin(struct lustre_handle *conn, obd_id ino, __u32 gen, + int type, struct obd_client_handle *handle, int flag) +{ + struct obd_export *exp; + int rc; + + OBD_CHECK_ACTIVE(conn, exp); + OBD_CHECK_OP(exp->exp_obd, pin); + OBD_COUNTER_INCREMENT(exp->exp_obd, pin); + + rc = OBP(exp->exp_obd, pin)(conn, ino, gen, type, handle, flag); + class_export_put(exp); + return(rc); +} + +static inline int obd_unpin(struct lustre_handle *conn, + struct obd_client_handle *handle, int flag) +{ + struct obd_export *exp; + int rc; + + OBD_CHECK_ACTIVE(conn, exp); + OBD_CHECK_OP(exp->exp_obd, unpin); + OBD_COUNTER_INCREMENT(exp->exp_obd, unpin); + + rc = OBP(exp->exp_obd, unpin)(conn, handle, flag); + class_export_put(exp); + return(rc); } static inline int obd_mark_page_dirty(struct lustre_handle *conn, - struct lov_stripe_md *lsm, + struct lov_stripe_md *lsm, unsigned long offset) { struct obd_export *exp; @@ -780,14 +880,15 @@ static inline int obd_mark_page_dirty(struct lustre_handle *conn, OBD_CHECK_SETUP(conn, exp); OBD_CHECK_OP(exp->exp_obd, mark_page_dirty); + OBD_COUNTER_INCREMENT(exp->exp_obd, mark_page_dirty); rc = OBP(exp->exp_obd, mark_page_dirty)(conn, lsm, offset); class_export_put(exp); - RETURN(rc); + return(rc); } static inline int obd_clear_dirty_pages(struct lustre_handle *conn, - struct lov_stripe_md *lsm, + struct lov_stripe_md *lsm, unsigned long start, unsigned long end, unsigned long *cleared) @@ -797,11 +898,12 @@ static inline int obd_clear_dirty_pages(struct lustre_handle *conn, OBD_CHECK_SETUP(conn, exp); OBD_CHECK_OP(exp->exp_obd, clear_dirty_pages); + OBD_COUNTER_INCREMENT(exp->exp_obd, clear_dirty_pages); rc = OBP(exp->exp_obd, clear_dirty_pages)(conn, lsm, start, end, cleared); class_export_put(exp); - RETURN(rc); + return(rc); } static inline int obd_last_dirty_offset(struct lustre_handle *conn, @@ -813,10 +915,11 @@ static inline int obd_last_dirty_offset(struct lustre_handle *conn, OBD_CHECK_SETUP(conn, exp); OBD_CHECK_OP(exp->exp_obd, last_dirty_offset); + OBD_COUNTER_INCREMENT(exp->exp_obd, last_dirty_offset); rc = OBP(exp->exp_obd, last_dirty_offset)(conn, lsm, offset); class_export_put(exp); - RETURN(rc); + return(rc); } /* OBD Metadata Support */ @@ -824,11 +927,6 @@ static inline int obd_last_dirty_offset(struct lustre_handle *conn, extern int obd_init_caches(void); extern void obd_cleanup_caches(void); -static inline struct lustre_handle *obdo_handle(struct obdo *oa) -{ - return (struct lustre_handle *)&oa->o_inline; -} - /* support routines */ extern kmem_cache_t *obdo_cachep; static inline struct obdo *obdo_alloc(void) @@ -838,6 +936,7 @@ static inline struct obdo *obdo_alloc(void) oa = kmem_cache_alloc(obdo_cachep, SLAB_KERNEL); if (oa == NULL) LBUG(); + CDEBUG(D_MALLOC, "kmem_cache_alloced oa at %p\n", oa); memset(oa, 0, sizeof (*oa)); return oa; @@ -847,6 +946,7 @@ static inline void obdo_free(struct obdo *oa) { if (!oa) return; + CDEBUG(D_MALLOC, "kmem_cache_freed oa at %p\n", oa); kmem_cache_free(obdo_cachep, oa); } @@ -855,268 +955,6 @@ static inline void obdo_free(struct obdo *oa) #define kdev_t_to_nr(dev) dev #endif -#ifdef __KERNEL__ -static inline void obdo_from_iattr(struct obdo *oa, struct iattr *attr) -{ - unsigned int ia_valid = attr->ia_valid; - - if (ia_valid & ATTR_ATIME) { - oa->o_atime = LTIME_S(attr->ia_atime); - oa->o_valid |= OBD_MD_FLATIME; - } - if (ia_valid & ATTR_MTIME) { - oa->o_mtime = LTIME_S(attr->ia_mtime); - oa->o_valid |= OBD_MD_FLMTIME; - } - if (ia_valid & ATTR_CTIME) { - oa->o_ctime = LTIME_S(attr->ia_ctime); - oa->o_valid |= OBD_MD_FLCTIME; - } - if (ia_valid & ATTR_SIZE) { - oa->o_size = attr->ia_size; - oa->o_valid |= OBD_MD_FLSIZE; - } - if (ia_valid & ATTR_MODE) { - oa->o_mode = attr->ia_mode; - oa->o_valid |= OBD_MD_FLTYPE | OBD_MD_FLMODE; - if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID)) - oa->o_mode &= ~S_ISGID; - } - if (ia_valid & ATTR_UID) { - oa->o_uid = attr->ia_uid; - oa->o_valid |= OBD_MD_FLUID; - } - if (ia_valid & ATTR_GID) { - oa->o_gid = attr->ia_gid; - oa->o_valid |= OBD_MD_FLGID; - } -} - - -static inline void iattr_from_obdo(struct iattr *attr, struct obdo *oa, - obd_flag valid) -{ - memset(attr, 0, sizeof(*attr)); - if (valid & OBD_MD_FLATIME) { - LTIME_S(attr->ia_atime) = oa->o_atime; - attr->ia_valid |= ATTR_ATIME; - } - if (valid & OBD_MD_FLMTIME) { - LTIME_S(attr->ia_mtime) = oa->o_mtime; - attr->ia_valid |= ATTR_MTIME; - } - if (valid & OBD_MD_FLCTIME) { - LTIME_S(attr->ia_ctime) = oa->o_ctime; - attr->ia_valid |= ATTR_CTIME; - } - if (valid & OBD_MD_FLSIZE) { - attr->ia_size = oa->o_size; - attr->ia_valid |= ATTR_SIZE; - } - if (valid & OBD_MD_FLTYPE) { - attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT); - attr->ia_valid |= ATTR_MODE; - } - if (valid & OBD_MD_FLMODE) { - attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT); - attr->ia_valid |= ATTR_MODE; - if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID)) - attr->ia_mode &= ~S_ISGID; - } - if (valid & OBD_MD_FLUID) - { - attr->ia_uid = oa->o_uid; - attr->ia_valid |= ATTR_UID; - } - if (valid & OBD_MD_FLGID) { - attr->ia_gid = oa->o_gid; - attr->ia_valid |= ATTR_GID; - } -} - - -/* WARNING: the file systems must take care not to tinker with - attributes they don't manage (such as blocks). */ - - -static inline void obdo_from_inode(struct obdo *dst, struct inode *src, - obd_flag valid) -{ - if (valid & OBD_MD_FLATIME) - dst->o_atime = LTIME_S(src->i_atime); - if (valid & OBD_MD_FLMTIME) - dst->o_mtime = LTIME_S(src->i_mtime); - if (valid & OBD_MD_FLCTIME) - dst->o_ctime = LTIME_S(src->i_ctime); - if (valid & OBD_MD_FLSIZE) - dst->o_size = src->i_size; - if (valid & OBD_MD_FLBLOCKS) /* allocation of space */ - dst->o_blocks = src->i_blocks; - if (valid & OBD_MD_FLBLKSZ) - dst->o_blksize = src->i_blksize; - if (valid & OBD_MD_FLTYPE) - dst->o_mode = (dst->o_mode & ~S_IFMT) | (src->i_mode & S_IFMT); - if (valid & OBD_MD_FLMODE) - dst->o_mode = (dst->o_mode & S_IFMT) | (src->i_mode & ~S_IFMT); - if (valid & OBD_MD_FLUID) - dst->o_uid = src->i_uid; - if (valid & OBD_MD_FLGID) - dst->o_gid = src->i_gid; - if (valid & OBD_MD_FLFLAGS) - dst->o_flags = src->i_flags; - if (valid & OBD_MD_FLNLINK) - dst->o_nlink = src->i_nlink; - if (valid & OBD_MD_FLGENER) - dst->o_generation = src->i_generation; - if (valid & OBD_MD_FLRDEV) - dst->o_rdev = (__u32)kdev_t_to_nr(src->i_rdev); - - dst->o_valid |= (valid & ~OBD_MD_FLID); -} - -static inline void obdo_refresh_inode(struct inode *dst, struct obdo *src, - obd_flag valid) -{ - valid &= src->o_valid; - - if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(dst->i_atime)) - LTIME_S(dst->i_atime) = src->o_atime; - if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(dst->i_mtime)) - LTIME_S(dst->i_mtime) = src->o_mtime; - if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime)) - LTIME_S(dst->i_ctime) = src->o_ctime; - if (valid & OBD_MD_FLSIZE && src->o_size > dst->i_size) - dst->i_size = src->o_size; - /* allocation of space */ - if (valid & OBD_MD_FLBLOCKS && src->o_blocks > dst->i_blocks) - dst->i_blocks = src->o_blocks; -} - -static inline void obdo_to_inode(struct inode *dst, struct obdo *src, - obd_flag valid) -{ - valid &= src->o_valid; - - if (valid & OBD_MD_FLATIME) - LTIME_S(dst->i_atime) = src->o_atime; - if (valid & OBD_MD_FLMTIME) - LTIME_S(dst->i_mtime) = src->o_mtime; - if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime)) - LTIME_S(dst->i_ctime) = src->o_ctime; - if (valid & OBD_MD_FLSIZE) - dst->i_size = src->o_size; - if (valid & OBD_MD_FLBLOCKS) /* allocation of space */ - dst->i_blocks = src->o_blocks; - if (valid & OBD_MD_FLBLKSZ) - dst->i_blksize = src->o_blksize; - if (valid & OBD_MD_FLTYPE) - dst->i_mode = (dst->i_mode & ~S_IFMT) | (src->o_mode & S_IFMT); - if (valid & OBD_MD_FLMODE) - dst->i_mode = (dst->i_mode & S_IFMT) | (src->o_mode & ~S_IFMT); - if (valid & OBD_MD_FLUID) - dst->i_uid = src->o_uid; - if (valid & OBD_MD_FLGID) - dst->i_gid = src->o_gid; - if (valid & OBD_MD_FLFLAGS) - dst->i_flags = src->o_flags; - if (valid & OBD_MD_FLNLINK) - dst->i_nlink = src->o_nlink; - if (valid & OBD_MD_FLGENER) - dst->i_generation = src->o_generation; - if (valid & OBD_MD_FLRDEV) - dst->i_rdev = to_kdev_t(src->o_rdev); -} -#endif - -static inline void obdo_cpy_md(struct obdo *dst, struct obdo *src, - obd_flag valid) -{ -#ifdef __KERNEL__ - CDEBUG(D_INODE, "src obdo %Ld valid 0x%x, dst obdo %Ld\n", - (unsigned long long)src->o_id, src->o_valid, - (unsigned long long)dst->o_id); -#endif - if (valid & OBD_MD_FLATIME) - dst->o_atime = src->o_atime; - if (valid & OBD_MD_FLMTIME) - dst->o_mtime = src->o_mtime; - if (valid & OBD_MD_FLCTIME) - dst->o_ctime = src->o_ctime; - if (valid & OBD_MD_FLSIZE) - dst->o_size = src->o_size; - if (valid & OBD_MD_FLBLOCKS) /* allocation of space */ - dst->o_blocks = src->o_blocks; - if (valid & OBD_MD_FLBLKSZ) - dst->o_blksize = src->o_blksize; - if (valid & OBD_MD_FLTYPE) - dst->o_mode = (dst->o_mode & ~S_IFMT) | (src->o_mode & S_IFMT); - if (valid & OBD_MD_FLMODE) - dst->o_mode = (dst->o_mode & S_IFMT) | (src->o_mode & ~S_IFMT); - if (valid & OBD_MD_FLUID) - dst->o_uid = src->o_uid; - if (valid & OBD_MD_FLGID) - dst->o_gid = src->o_gid; - if (valid & OBD_MD_FLFLAGS) - dst->o_flags = src->o_flags; - /* - if (valid & OBD_MD_FLOBDFLG) - dst->o_obdflags = src->o_obdflags; - */ - if (valid & OBD_MD_FLNLINK) - dst->o_nlink = src->o_nlink; - if (valid & OBD_MD_FLGENER) - dst->o_generation = src->o_generation; - if (valid & OBD_MD_FLRDEV) - dst->o_rdev = src->o_rdev; - if (valid & OBD_MD_FLINLINE && - src->o_obdflags & OBD_FL_INLINEDATA) { - memcpy(dst->o_inline, src->o_inline, sizeof(src->o_inline)); - dst->o_obdflags |= OBD_FL_INLINEDATA; - } - - dst->o_valid |= valid; -} - - -/* returns FALSE if comparison (by flags) is same, TRUE if changed */ -static inline int obdo_cmp_md(struct obdo *dst, struct obdo *src, - obd_flag compare) -{ - int res = 0; - - if ( compare & OBD_MD_FLATIME ) - res = (res || (dst->o_atime != src->o_atime)); - if ( compare & OBD_MD_FLMTIME ) - res = (res || (dst->o_mtime != src->o_mtime)); - if ( compare & OBD_MD_FLCTIME ) - res = (res || (dst->o_ctime != src->o_ctime)); - if ( compare & OBD_MD_FLSIZE ) - res = (res || (dst->o_size != src->o_size)); - if ( compare & OBD_MD_FLBLOCKS ) /* allocation of space */ - res = (res || (dst->o_blocks != src->o_blocks)); - if ( compare & OBD_MD_FLBLKSZ ) - res = (res || (dst->o_blksize != src->o_blksize)); - if ( compare & OBD_MD_FLTYPE ) - res = (res || (((dst->o_mode ^ src->o_mode) & S_IFMT) != 0)); - if ( compare & OBD_MD_FLMODE ) - res = (res || (((dst->o_mode ^ src->o_mode) & ~S_IFMT) != 0)); - if ( compare & OBD_MD_FLUID ) - res = (res || (dst->o_uid != src->o_uid)); - if ( compare & OBD_MD_FLGID ) - res = (res || (dst->o_gid != src->o_gid)); - if ( compare & OBD_MD_FLFLAGS ) - res = (res || (dst->o_flags != src->o_flags)); - if ( compare & OBD_MD_FLNLINK ) - res = (res || (dst->o_nlink != src->o_nlink)); - if ( compare & OBD_MD_FLGENER ) - res = (res || (dst->o_generation != src->o_generation)); - /* XXX Don't know if thses should be included here - wasn't previously - if ( compare & OBD_MD_FLINLINE ) - res = (res || memcmp(dst->o_inline, src->o_inline)); - */ - return res; -} - /* I'm as embarrassed about this as you are. * * // XXX do not look into _superhack with remaining eye @@ -1124,11 +962,6 @@ static inline int obdo_cmp_md(struct obdo *dst, struct obdo *src, extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); extern void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp); -struct obd_statfs; -struct statfs; -void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs); -void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs); - struct obd_class_user_state { struct obd_device *ocus_current_obd; struct list_head ocus_conns; diff --git a/lustre/include/linux/obd_lov.h b/lustre/include/linux/obd_lov.h index b12a062..6d68ae9 100644 --- a/lustre/include/linux/obd_lov.h +++ b/lustre/include/linux/obd_lov.h @@ -8,14 +8,17 @@ #define OBD_LOV_DEVICENAME "lov" struct lov_brw_async_args { - obd_count aa_oa_bufs; - struct brw_page *aa_ioarr; + struct lov_stripe_md *aa_lsm; + struct obdo *aa_obdos; + struct obdo *aa_oa; + struct brw_page *aa_ioarr; + obd_count aa_oa_bufs; }; struct lov_getattr_async_args { struct lov_stripe_md *aa_lsm; struct obdo *aa_oa; - struct obdo *aa_stripe_oas; + struct obdo *aa_obdos; }; static inline int lov_stripe_md_size(int stripes) @@ -28,15 +31,6 @@ static inline int lov_mds_md_size(int stripes) return sizeof(struct lov_mds_md) + stripes*sizeof(struct lov_object_id); } -extern int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmm, - struct lov_stripe_md *lsm); -extern int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsm, - struct lov_mds_md *lmm, int lmmsize); -extern int lov_setstripe(struct lustre_handle *conn, - struct lov_stripe_md **lsmp, struct lov_mds_md *lmmu); -extern int lov_getstripe(struct lustre_handle *conn, - struct lov_stripe_md *lsm, struct lov_mds_md *lmmu); - #define IOC_LOV_TYPE 'g' #define IOC_LOV_MIN_NR 50 #define IOC_LOV_SET_OSC_ACTIVE _IOWR('g', 50, long) diff --git a/lustre/include/linux/obd_ost.h b/lustre/include/linux/obd_ost.h index 22fe694..ac2e24b 100644 --- a/lustre/include/linux/obd_ost.h +++ b/lustre/include/linux/obd_ost.h @@ -35,6 +35,7 @@ #define LUSTRE_SANOST_NAME "sanost" struct osc_brw_async_args { + struct obdo *aa_oa; int aa_requested_nob; int aa_nio_count; obd_count aa_page_count; diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index 2a76905..28a9a3d 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -74,8 +74,10 @@ extern unsigned long obd_sync_filter; #define OBD_FAIL_MDS_STATFS_PACK 0x11d #define OBD_FAIL_MDS_STATFS_NET 0x11e #define OBD_FAIL_MDS_GETATTR_NAME_NET 0x11f -#define OBD_FAIL_MDS_ALL_REPLY_NET 0x120 -#define OBD_FAIL_MDS_ALL_REQUEST_NET 0x121 +#define OBD_FAIL_MDS_PIN_NET 0x120 +#define OBD_FAIL_MDS_UNPIN_NET 0x121 +#define OBD_FAIL_MDS_ALL_REPLY_NET 0x122 +#define OBD_FAIL_MDS_ALL_REQUEST_NET 0x123 #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 @@ -116,6 +118,9 @@ extern unsigned long obd_sync_filter; #define OBD_FAIL_PTLRPC 0x500 #define OBD_FAIL_PTLRPC_ACK 0x501 +#define OBD_FAIL_OBD_PING_NET 0x600 +#define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 + /* preparation for a more advanced failure testbed (not functional yet) */ #define OBD_FAIL_MASK_SYS 0x0000FF00 #define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS) @@ -169,37 +174,27 @@ do { \ #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#define ll_bdevname(a) __bdevname((a)) +#define BDEVNAME_DECLARE_STORAGE(foo) char foo[BDEVNAME_SIZE] +#define ll_bdevname(DEV, STORAGE) __bdevname(DEV, STORAGE) #define ll_lock_kernel lock_kernel() -#define LTIME_S(time) (time.tv_sec) #else +#define BDEVNAME_DECLARE_STORAGE(foo) char __unused_##foo +#define ll_bdevname(DEV, STORAGE) ((void)__unused_##STORAGE, bdevname((DEV))) #define ll_lock_kernel -#define ll_bdevname(a) bdevname((a)) -#define LTIME_S(time) (time) #endif static inline void OBD_FAIL_WRITE(int id, kdev_t dev) { if (OBD_FAIL_CHECK(id)) { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + BDEVNAME_DECLARE_STORAGE(tmp); #ifdef CONFIG_DEV_RDONLY CERROR("obd_fail_loc=%x, fail write operation on %s\n", - id, ll_bdevname(dev)); + id, ll_bdevname(kdev_t_to_nr(dev), tmp)); dev_set_rdonly(dev, 2); #else CERROR("obd_fail_loc=%x, can't fail write operation on %s\n", - id, ll_bdevname(dev)); -#endif -#else -#ifdef CONFIG_DEV_RDONLY - CERROR("obd_fail_loc=%x, fail write operation on %s\n", - id, ll_bdevname(dev.value)); - dev_set_rdonly(dev, 2); -#else - CERROR("obd_fail_loc=%x, can't fail write operation on %s\n", - id, ll_bdevname(dev.value)); -#endif + id, ll_bdevname(kdev_t_to_nr(dev), tmp)); #endif /* We set FAIL_ONCE because we never "un-fail" a device */ obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE; @@ -209,9 +204,9 @@ static inline void OBD_FAIL_WRITE(int id, kdev_t dev) #define LTIME_S(time) (time) #endif /* __KERNEL__ */ -#define OBD_ALLOC(ptr, size) \ +#define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ do { \ - (ptr) = kmalloc(size, GFP_KERNEL); \ + (ptr) = kmalloc(size, gfp_mask); \ if ((ptr) == NULL) { \ CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ (int)(size), __FILE__, __LINE__); \ @@ -225,6 +220,12 @@ do { \ } \ } while (0) +#ifndef OBD_GFP_MASK +# define OBD_GFP_MASK GFP_KERNEL +#endif + +#define OBD_ALLOC(ptr, size) OBD_ALLOC_GFP(ptr, size, OBD_GFP_MASK) + #ifdef __arch_um__ # define OBD_VMALLOC(ptr, size) OBD_ALLOC(ptr, size) #else @@ -246,9 +247,9 @@ do { \ #endif #ifdef CONFIG_DEBUG_SLAB -#define POISON(lptr, c, s) do {} while (0) +#define POISON(ptr, c, s) do {} while (0) #else -#define POISON(lptr, c, s) memset(lptr, c, s) +#define POISON(ptr, c, s) memset(ptr, c, s) #endif #define OBD_FREE(ptr, size) \ @@ -277,9 +278,12 @@ do { \ } while (0) #endif +/* we memset() the slab object to 0 when allocation succeeds, so DO NOT + * HAVE A CTOR THAT DOES ANYTHING. its work will be cleared here. we'd + * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */ #define OBD_SLAB_ALLOC(ptr, slab, type, size) \ do { \ - LASSERT (!in_interrupt()); \ + LASSERT(!in_interrupt()); \ (ptr) = kmem_cache_alloc(slab, type); \ if ((ptr) == NULL) { \ CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \ diff --git a/lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch b/lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch index 55057d9..1b589b9 100644 --- a/lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch +++ b/lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch @@ -1,13 +1,10 @@ - - - drivers/block/blkpg.c | 35 +++++++++++++++++++++++++++++++++++ drivers/block/loop.c | 3 +++ - drivers/ide/ide-disk.c | 5 ++++- - 3 files changed, 42 insertions(+), 1 deletion(-) + drivers/ide/ide-disk.c | 5 +++++ + 3 files changed, 43 insertions(+) ---- rh-2.4.20/drivers/block/blkpg.c~dev_read_only_2.4.20 2003-04-11 14:05:03.000000000 +0800 -+++ rh-2.4.20-root/drivers/block/blkpg.c 2003-04-12 13:11:31.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/drivers/block/blkpg.c~dev_read_only_2.4.20-rh 2003-05-15 21:12:48.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/drivers/block/blkpg.c 2003-07-12 15:10:31.000000000 -0600 @@ -297,3 +297,38 @@ int blk_ioctl(kdev_t dev, unsigned int c } @@ -47,8 +44,8 @@ +EXPORT_SYMBOL(dev_set_rdonly); +EXPORT_SYMBOL(dev_check_rdonly); +EXPORT_SYMBOL(dev_clear_rdonly); ---- rh-2.4.20/drivers/block/loop.c~dev_read_only_2.4.20 2003-04-11 14:05:08.000000000 +0800 -+++ rh-2.4.20-root/drivers/block/loop.c 2003-04-12 13:11:31.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/drivers/block/loop.c~dev_read_only_2.4.20-rh 2003-05-15 21:12:50.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/drivers/block/loop.c 2003-07-12 15:10:31.000000000 -0600 @@ -491,6 +491,9 @@ static int loop_make_request(request_que spin_unlock_irq(&lo->lo_lock); @@ -59,17 +56,17 @@ if (lo->lo_flags & LO_FLAGS_READ_ONLY) goto err; } else if (rw == READA) { ---- rh-2.4.20/drivers/ide/ide-disk.c~dev_read_only_2.4.20 2003-04-11 14:04:53.000000000 +0800 -+++ rh-2.4.20-root/drivers/ide/ide-disk.c 2003-04-12 13:14:48.000000000 +0800 -@@ -381,7 +381,10 @@ static ide_startstop_t do_rw_disk (ide_d - if (IS_PDC4030_DRIVE) - return promise_rw_disk(drive, rq, block); - #endif /* CONFIG_BLK_DEV_PDC4030 */ -- -+ if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) { -+ ide_end_request(1, HWGROUP(drive)); -+ return ide_stopped; -+ } +--- kernel-2.4.20-6chaos_18_7/drivers/ide/ide-disk.c~dev_read_only_2.4.20-rh 2003-05-15 21:13:09.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/drivers/ide/ide-disk.c 2003-07-12 15:12:03.000000000 -0600 +@@ -371,6 +371,11 @@ ide_startstop_t __ide_do_rw_disk (ide_dr + if (driver_blocked) + panic("Request while ide driver is blocked?"); + ++ if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) { ++ ide_end_request(1, HWGROUP(drive)); ++ return ide_stopped; ++ } ++ if (IDE_CONTROL_REG) hwif->OUTB(drive->ctl, IDE_CONTROL_REG); diff --git a/lustre/kernel_patches/patches/export-truncate-2.5.63.patch b/lustre/kernel_patches/patches/export-truncate-2.5.63.patch index 3d82572..3063be4 100644 --- a/lustre/kernel_patches/patches/export-truncate-2.5.63.patch +++ b/lustre/kernel_patches/patches/export-truncate-2.5.63.patch @@ -9,7 +9,7 @@ return 0; } +/* truncate.c */ -+extern void truncate_complete_page(struct page *); ++extern void truncate_complete_page(struct address_space *mapping,struct page *); /* filemap.c */ extern unsigned long page_unuse(struct page *); diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch index e01feca..a173981 100644 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch +++ b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch @@ -1,11 +1,17 @@ - fs/ext3/super.c | 229 +++++++++++++++++++++++++++++++++++++++++++++ - include/linux/ext3_fs.h | 2 + +Create a service thread to handle delete and truncate of inodes, to avoid +long latency while truncating very large files. + + + fs/ext3/inode.c | 116 ++++++++++++++++++++++ + fs/ext3/super.c | 231 +++++++++++++++++++++++++++++++++++++++++++++ + include/linux/ext3_fs.h | 5 include/linux/ext3_fs_sb.h | 10 + - 3 files changed, 241 insertions(+) + 4 files changed, 362 insertions(+) --- linux-2.4.18-18.8.0-l15/fs/ext3/super.c~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:21 2003 -+++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c Wed Jun 18 11:59:14 2003 -@@ -396,6 +396,219 @@ static void dump_orphan_list(struct supe ++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c Wed Jul 2 23:49:40 2003 +@@ -396,6 +396,220 @@ static void dump_orphan_list(struct supe } } @@ -130,14 +136,12 @@ + * If we have any problem deferring the delete, just delete it right away. + * If we defer it, we also mark how many blocks it would free, so that we + * can keep the statfs data correct, and we know if we should sleep on the -+ * truncate thread when we run out of space. -+ * -+ * In 2.5 this can be done much more cleanly by just registering a "drop" -+ * method in the super_operations struct. ++ * delete thread when we run out of space. + */ +static void ext3_delete_inode_thread(struct inode *old_inode) +{ + struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); ++ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); + struct inode *new_inode; + unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); + @@ -146,24 +150,22 @@ + return; + } + -+ if (!test_opt(old_inode->i_sb, ASYNCDEL)) { -+ ext3_delete_inode(old_inode); -+ return; -+ } ++ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) ++ goto out_delete; + + /* We may want to delete the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || -+ !sbi->s_delete_list.next) { -+ ext3_delete_inode(old_inode); -+ return; -+ } ++ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS) ++ goto out_delete; + -+ if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) || -+ (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { ++ /* We can't use the delete thread as-is during real orphan recovery, ++ * as we add to the orphan list here, causing ext3_orphan_cleanup() ++ * to loop endlessly. It would be nice to do so, but needs work. ++ */ ++ if (oei->i_state & EXT3_STATE_DELETE || ++ sbi->s_mount_state & EXT3_ORPHAN_FS) { + ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", + old_inode->i_ino, blocks); -+ ext3_delete_inode(old_inode); -+ return; ++ goto out_delete; + } + + /* We can iget this inode again here, because our caller has unhashed @@ -175,9 +177,9 @@ + */ + down(&sbi->s_orphan_lock); + -+ EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS; ++ sbi->s_mount_state |= EXT3_ORPHAN_FS; + new_inode = iget(old_inode->i_sb, old_inode->i_ino); -+ EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS; ++ sbi->s_mount_state &= ~EXT3_ORPHAN_FS; + if (is_bad_inode(new_inode)) { + printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino); + iput(new_inode); @@ -187,20 +189,21 @@ + up(&sbi->s_orphan_lock); + ext3_debug("delete inode %lu directly (bad read)\n", + old_inode->i_ino); -+ ext3_delete_inode(old_inode); -+ return; ++ goto out_delete; + } + J_ASSERT(new_inode != old_inode); + -+ J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan)); ++ J_ASSERT(!list_empty(&oei->i_orphan)); ++ ++ nei = EXT3_I(new_inode); + /* Ugh. We need to insert new_inode into the same spot on the list + * as old_inode was, to ensure the in-memory orphan list is still + * in the same order as the on-disk orphan list (badness otherwise). + */ -+ EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan; -+ EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan; -+ EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan; -+ EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE; ++ nei->i_orphan = oei->i_orphan; ++ nei->i_orphan.next->prev = &nei->i_orphan; ++ nei->i_orphan.prev->next = &nei->i_orphan; ++ nei->i_state |= EXT3_STATE_DELETE; + up(&sbi->s_orphan_lock); + + clear_inode(old_inode); @@ -216,6 +219,10 @@ + new_inode->i_ino, blocks); + + wake_up(&sbi->s_delete_thread_queue); ++ return; ++ ++out_delete: ++ ext3_delete_inode(old_inode); +} +#else +#define ext3_start_delete_thread(sbi) do {} while(0) @@ -225,7 +232,7 @@ void ext3_put_super (struct super_block * sb) { struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -403,6 +615,7 @@ void ext3_put_super (struct super_block +@@ -403,6 +617,7 @@ void ext3_put_super (struct super_block kdev_t j_dev = sbi->s_journal->j_dev; int i; @@ -233,7 +240,7 @@ ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); if (!(sb->s_flags & MS_RDONLY)) { -@@ -451,7 +664,11 @@ static struct super_operations ext3_sops +@@ -451,7 +666,11 @@ static struct super_operations ext3_sops write_inode: ext3_write_inode, /* BKL not held. Don't need */ dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */ put_inode: ext3_put_inode, /* BKL not held. Don't need */ @@ -245,7 +252,7 @@ put_super: ext3_put_super, /* BKL held */ write_super: ext3_write_super, /* BKL held */ write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */ -@@ -511,6 +728,14 @@ static int parse_options (char * options +@@ -511,6 +730,14 @@ static int parse_options (char * options this_char = strtok (NULL, ",")) { if ((value = strchr (this_char, '=')) != NULL) *value++ = 0; @@ -260,7 +267,7 @@ if (!strcmp (this_char, "bsddf")) clear_opt (*mount_options, MINIX_DF); else if (!strcmp (this_char, "nouid32")) { -@@ -1206,6 +1431,7 @@ struct super_block * ext3_read_super (st +@@ -1206,6 +1433,7 @@ struct super_block * ext3_read_super (st } ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); @@ -268,7 +275,7 @@ /* * akpm: core read_super() calls in here with the superblock locked. * That deadlocks, because orphan cleanup needs to lock the superblock -@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s +@@ -1648,6 +1876,9 @@ int ext3_remount (struct super_block * s if (!parse_options(data, &tmp, sbi, &tmp, 1)) return -EINVAL; @@ -278,8 +285,143 @@ if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) ext3_abort(sb, __FUNCTION__, "Abort forced by user"); +--- linux/fs/ext3/file.c.orig Fri Jan 17 10:57:31 2003 ++++ linux/fs/ext3/file.c Mon Jun 30 13:28:52 2003 +@@ -121,7 +121,11 @@ struct file_operations ext3_file_operati + }; + + struct inode_operations ext3_file_inode_operations = { ++#ifdef EXT3_DELETE_THREAD ++ truncate: ext3_truncate_thread, /* BKL held */ ++#else + truncate: ext3_truncate, /* BKL held */ ++#endif + setattr: ext3_setattr, /* BKL held */ + }; + +--- linux-2.4.18-18.8.0-l15/fs/ext3/inode.c~ext3-delete_thread-2.4.18 Wed Jul 2 23:13:58 2003 ++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/inode.c Wed Jul 2 23:50:29 2003 +@@ -2004,6 +2004,118 @@ out_stop: + ext3_journal_stop(handle, inode); + } + ++#ifdef EXT3_DELETE_THREAD ++/* Move blocks from to-be-truncated inode over to a new inode, and delete ++ * that one from the delete thread instead. This avoids a lot of latency ++ * when truncating large files. ++ * ++ * If we have any problem deferring the truncate, just truncate it right away. ++ * If we defer it, we also mark how many blocks it would free, so that we ++ * can keep the statfs data correct, and we know if we should sleep on the ++ * delete thread when we run out of space. ++ */ ++void ext3_truncate_thread(struct inode *old_inode) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); ++ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); ++ struct inode *new_inode; ++ handle_t *handle; ++ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); ++ ++ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) ++ goto out_truncate; ++ ++ /* XXX This is a temporary limitation for code simplicity. ++ * We could truncate to arbitrary sizes at some later time. ++ */ ++ if (old_inode->i_size != 0) ++ goto out_truncate; ++ ++ /* We may want to truncate the inode immediately and not defer it */ ++ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || ++ old_inode->i_size > oei->i_disksize) ++ goto out_truncate; ++ ++ /* We can't use the delete thread as-is during real orphan recovery, ++ * as we add to the orphan list here, causing ext3_orphan_cleanup() ++ * to loop endlessly. It would be nice to do so, but needs work. ++ */ ++ if (oei->i_state & EXT3_STATE_DELETE || ++ sbi->s_mount_state & EXT3_ORPHAN_FS) { ++ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", ++ old_inode->i_ino, blocks); ++ goto out_truncate; ++ } ++ ++ ext3_discard_prealloc(old_inode); ++ ++ /* old_inode = 1 ++ * new_inode = sb + GDT + ibitmap ++ * orphan list = 1 inode/superblock for add, 2 inodes for del ++ * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS ++ */ ++ handle = ext3_journal_start(old_inode, 7); ++ if (IS_ERR(handle)) ++ goto out_truncate; ++ ++ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode); ++ if (IS_ERR(new_inode)) { ++ ext3_debug("truncate inode %lu directly (no new inodes)\n", ++ old_inode->i_ino); ++ goto out_journal; ++ } ++ ++ nei = EXT3_I(new_inode); ++ ++ down_write(&oei->truncate_sem); ++ new_inode->i_size = old_inode->i_size; ++ new_inode->i_blocks = old_inode->i_blocks; ++ new_inode->i_uid = old_inode->i_uid; ++ new_inode->i_gid = old_inode->i_gid; ++ new_inode->i_nlink = 0; ++ ++ /* FIXME when we do arbitrary truncates */ ++ old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0; ++ old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME; ++ ++ memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data)); ++ memset(oei->i_data, 0, sizeof(oei->i_data)); ++ ++ nei->i_disksize = oei->i_disksize; ++ nei->i_state |= EXT3_STATE_DELETE; ++ up_write(&oei->truncate_sem); ++ ++ if (ext3_orphan_add(handle, new_inode) < 0) ++ goto out_journal; ++ ++ if (ext3_orphan_del(handle, old_inode) < 0) { ++ ext3_orphan_del(handle, new_inode); ++ iput(new_inode); ++ goto out_journal; ++ } ++ ++ ext3_journal_stop(handle, old_inode); ++ ++ spin_lock(&sbi->s_delete_lock); ++ J_ASSERT(list_empty(&new_inode->i_dentry)); ++ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); ++ sbi->s_delete_blocks += blocks; ++ sbi->s_delete_inodes++; ++ spin_unlock(&sbi->s_delete_lock); ++ ++ ext3_debug("delete inode %lu (%lu blocks) by thread\n", ++ new_inode->i_ino, blocks); ++ ++ wake_up(&sbi->s_delete_thread_queue); ++ return; ++ ++out_journal: ++ ext3_journal_stop(handle, old_inode); ++out_truncate: ++ ext3_truncate(old_inode); ++} ++#endif /* EXT3_DELETE_THREAD */ ++ + /* + * ext3_get_inode_loc returns with an extra refcount against the + * inode's underlying buffer_head on success. --- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:20 2003 -+++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h Tue Jun 17 12:36:56 2003 ++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h Wed Jul 2 23:19:09 2003 @@ -190,6 +190,7 @@ struct ext3_group_desc */ #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ @@ -296,8 +438,18 @@ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H +@@ -651,6 +653,9 @@ extern void ext3_discard_prealloc (struc + extern void ext3_dirty_inode(struct inode *); + extern int ext3_change_inode_journal_flag(struct inode *, int); + extern void ext3_truncate (struct inode *); ++#ifdef EXT3_DELETE_THREAD ++extern void ext3_truncate_thread(struct inode *inode); ++#endif + + /* ioctl.c */ + extern int ext3_ioctl (struct inode *, struct file *, unsigned int, --- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:21 2003 -+++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h Tue Jun 17 12:36:56 2003 ++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h Wed Jul 2 23:19:09 2003 @@ -29,6 +29,8 @@ #define EXT3_MAX_GROUP_LOADED 32 diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch index 34c5158..a8816ec 100644 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch +++ b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch @@ -1,7 +1,13 @@ -diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c ---- origin/fs/ext3/super.c 2003-05-04 17:23:52.000000000 +0400 -+++ linux/fs/ext3/super.c 2003-05-04 17:09:20.000000000 +0400 -@@ -398,6 +398,219 @@ static void dump_orphan_list(struct supe + fs/ext3/file.c | 4 + fs/ext3/inode.c | 116 ++++++++++++++++++++++ + fs/ext3/super.c | 230 +++++++++++++++++++++++++++++++++++++++++++++ + include/linux/ext3_fs.h | 5 + include/linux/ext3_fs_sb.h | 10 + + 5 files changed, 365 insertions(+) + +--- linux/fs/ext3/super.c~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:32 2003 ++++ linux-mmonroe/fs/ext3/super.c Thu Jul 10 14:11:33 2003 +@@ -400,6 +400,220 @@ static void dump_orphan_list(struct supe } } @@ -126,14 +132,12 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + * If we have any problem deferring the delete, just delete it right away. + * If we defer it, we also mark how many blocks it would free, so that we + * can keep the statfs data correct, and we know if we should sleep on the -+ * truncate thread when we run out of space. -+ * -+ * In 2.5 this can be done much more cleanly by just registering a "drop" -+ * method in the super_operations struct. ++ * delete thread when we run out of space. + */ +static void ext3_delete_inode_thread(struct inode *old_inode) +{ + struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); ++ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); + struct inode *new_inode; + unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); + @@ -142,24 +146,22 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + return; + } + -+ if (!test_opt(old_inode->i_sb, ASYNCDEL)) { -+ ext3_delete_inode(old_inode); -+ return; -+ } ++ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) ++ goto out_delete; + + /* We may want to delete the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || -+ !sbi->s_delete_list.next) { -+ ext3_delete_inode(old_inode); -+ return; -+ } ++ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS) ++ goto out_delete; + -+ if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) || -+ (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { ++ /* We can't use the delete thread as-is during real orphan recovery, ++ * as we add to the orphan list here, causing ext3_orphan_cleanup() ++ * to loop endlessly. It would be nice to do so, but needs work. ++ */ ++ if (oei->i_state & EXT3_STATE_DELETE || ++ sbi->s_mount_state & EXT3_ORPHAN_FS) { + ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", + old_inode->i_ino, blocks); -+ ext3_delete_inode(old_inode); -+ return; ++ goto out_delete; + } + + /* We can iget this inode again here, because our caller has unhashed @@ -171,9 +173,9 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + */ + down(&sbi->s_orphan_lock); + -+ EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS; ++ sbi->s_mount_state |= EXT3_ORPHAN_FS; + new_inode = iget(old_inode->i_sb, old_inode->i_ino); -+ EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS; ++ sbi->s_mount_state &= ~EXT3_ORPHAN_FS; + if (is_bad_inode(new_inode)) { + printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino); + iput(new_inode); @@ -183,20 +185,21 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + up(&sbi->s_orphan_lock); + ext3_debug("delete inode %lu directly (bad read)\n", + old_inode->i_ino); -+ ext3_delete_inode(old_inode); -+ return; ++ goto out_delete; + } + J_ASSERT(new_inode != old_inode); + -+ J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan)); ++ J_ASSERT(!list_empty(&oei->i_orphan)); ++ ++ nei = EXT3_I(new_inode); + /* Ugh. We need to insert new_inode into the same spot on the list + * as old_inode was, to ensure the in-memory orphan list is still + * in the same order as the on-disk orphan list (badness otherwise). + */ -+ EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan; -+ EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan; -+ EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan; -+ EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE; ++ nei->i_orphan = oei->i_orphan; ++ nei->i_orphan.next->prev = &nei->i_orphan; ++ nei->i_orphan.prev->next = &nei->i_orphan; ++ nei->i_state |= EXT3_STATE_DELETE; + up(&sbi->s_orphan_lock); + + clear_inode(old_inode); @@ -212,6 +215,10 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + new_inode->i_ino, blocks); + + wake_up(&sbi->s_delete_thread_queue); ++ return; ++ ++out_delete: ++ ext3_delete_inode(old_inode); +} +#else +#define ext3_start_delete_thread(sbi) do {} while(0) @@ -221,7 +228,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c void ext3_put_super (struct super_block * sb) { struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -405,6 +611,7 @@ void ext3_put_super (struct super_block +@@ -407,6 +621,7 @@ void ext3_put_super (struct super_block kdev_t j_dev = sbi->s_journal->j_dev; int i; @@ -229,7 +236,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); if (!(sb->s_flags & MS_RDONLY)) { -@@ -453,7 +660,11 @@ static struct super_operations ext3_sops +@@ -455,7 +670,11 @@ static struct super_operations ext3_sops write_inode: ext3_write_inode, /* BKL not held. Don't need */ dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */ put_inode: ext3_put_inode, /* BKL not held. Don't need */ @@ -240,11 +247,11 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c +#endif put_super: ext3_put_super, /* BKL held */ write_super: ext3_write_super, /* BKL held */ - write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */ -@@ -514,6 +725,13 @@ static int parse_options (char * options - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; + sync_fs: ext3_sync_fs, +@@ -524,6 +743,13 @@ static int parse_options (char * options + clear_opt (*mount_options, XATTR_USER); + else + #endif +#ifdef EXT3_DELETE_THREAD + if (!strcmp(this_char, "asyncdel")) + set_opt(*mount_options, ASYNCDEL); @@ -252,10 +259,10 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + clear_opt(*mount_options, ASYNCDEL); + else +#endif - #ifdef CONFIG_EXT3_FS_XATTR_USER - if (!strcmp (this_char, "user_xattr")) - set_opt (*mount_options, XATTR_USER); -@@ -1220,6 +1436,7 @@ struct super_block * ext3_read_super (st + if (!strcmp (this_char, "bsddf")) + clear_opt (*mount_options, MINIX_DF); + else if (!strcmp (this_char, "nouid32")) { +@@ -1223,6 +1449,7 @@ struct super_block * ext3_read_super (st } ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); @@ -263,7 +270,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c /* * akpm: core read_super() calls in here with the superblock locked. * That deadlocks, because orphan cleanup needs to lock the superblock -@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s +@@ -1678,6 +1905,9 @@ int ext3_remount (struct super_block * s if (!parse_options(data, &tmp, sbi, &tmp, 1)) return -EINVAL; @@ -273,9 +280,143 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) ext3_abort(sb, __FUNCTION__, "Abort forced by user"); -diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h ---- origin/include/linux/ext3_fs.h 2003-05-04 17:22:49.000000000 +0400 -+++ linux/include/linux/ext3_fs.h 2003-05-04 15:06:10.000000000 +0400 +--- linux/fs/ext3/inode.c~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:29 2003 ++++ linux-mmonroe/fs/ext3/inode.c Thu Jul 10 14:11:33 2003 +@@ -2013,6 +2013,118 @@ out_stop: + ext3_journal_stop(handle, inode); + } + ++#ifdef EXT3_DELETE_THREAD ++/* Move blocks from to-be-truncated inode over to a new inode, and delete ++ * that one from the delete thread instead. This avoids a lot of latency ++ * when truncating large files. ++ * ++ * If we have any problem deferring the truncate, just truncate it right away. ++ * If we defer it, we also mark how many blocks it would free, so that we ++ * can keep the statfs data correct, and we know if we should sleep on the ++ * delete thread when we run out of space. ++ */ ++void ext3_truncate_thread(struct inode *old_inode) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); ++ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); ++ struct inode *new_inode; ++ handle_t *handle; ++ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); ++ ++ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) ++ goto out_truncate; ++ ++ /* XXX This is a temporary limitation for code simplicity. ++ * We could truncate to arbitrary sizes at some later time. ++ */ ++ if (old_inode->i_size != 0) ++ goto out_truncate; ++ ++ /* We may want to truncate the inode immediately and not defer it */ ++ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || ++ old_inode->i_size > oei->i_disksize) ++ goto out_truncate; ++ ++ /* We can't use the delete thread as-is during real orphan recovery, ++ * as we add to the orphan list here, causing ext3_orphan_cleanup() ++ * to loop endlessly. It would be nice to do so, but needs work. ++ */ ++ if (oei->i_state & EXT3_STATE_DELETE || ++ sbi->s_mount_state & EXT3_ORPHAN_FS) { ++ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", ++ old_inode->i_ino, blocks); ++ goto out_truncate; ++ } ++ ++ ext3_discard_prealloc(old_inode); ++ ++ /* old_inode = 1 ++ * new_inode = sb + GDT + ibitmap ++ * orphan list = 1 inode/superblock for add, 2 inodes for del ++ * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS ++ */ ++ handle = ext3_journal_start(old_inode, 7); ++ if (IS_ERR(handle)) ++ goto out_truncate; ++ ++ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode); ++ if (IS_ERR(new_inode)) { ++ ext3_debug("truncate inode %lu directly (no new inodes)\n", ++ old_inode->i_ino); ++ goto out_journal; ++ } ++ ++ nei = EXT3_I(new_inode); ++ ++ down_write(&oei->truncate_sem); ++ new_inode->i_size = old_inode->i_size; ++ new_inode->i_blocks = old_inode->i_blocks; ++ new_inode->i_uid = old_inode->i_uid; ++ new_inode->i_gid = old_inode->i_gid; ++ new_inode->i_nlink = 0; ++ ++ /* FIXME when we do arbitrary truncates */ ++ old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0; ++ old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME; ++ ++ memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data)); ++ memset(oei->i_data, 0, sizeof(oei->i_data)); ++ ++ nei->i_disksize = oei->i_disksize; ++ nei->i_state |= EXT3_STATE_DELETE; ++ up_write(&oei->truncate_sem); ++ ++ if (ext3_orphan_add(handle, new_inode) < 0) ++ goto out_journal; ++ ++ if (ext3_orphan_del(handle, old_inode) < 0) { ++ ext3_orphan_del(handle, new_inode); ++ iput(new_inode); ++ goto out_journal; ++ } ++ ++ ext3_journal_stop(handle, old_inode); ++ ++ spin_lock(&sbi->s_delete_lock); ++ J_ASSERT(list_empty(&new_inode->i_dentry)); ++ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); ++ sbi->s_delete_blocks += blocks; ++ sbi->s_delete_inodes++; ++ spin_unlock(&sbi->s_delete_lock); ++ ++ ext3_debug("delete inode %lu (%lu blocks) by thread\n", ++ new_inode->i_ino, blocks); ++ ++ wake_up(&sbi->s_delete_thread_queue); ++ return; ++ ++out_journal: ++ ext3_journal_stop(handle, old_inode); ++out_truncate: ++ ext3_truncate(old_inode); ++} ++#endif /* EXT3_DELETE_THREAD */ ++ + /* + * ext3_get_inode_loc returns with an extra refcount against the + * inode's underlying buffer_head on success. +--- linux/fs/ext3/file.c~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:21 2003 ++++ linux-mmonroe/fs/ext3/file.c Thu Jul 10 14:12:17 2003 +@@ -125,7 +125,11 @@ struct file_operations ext3_file_operati + }; + + struct inode_operations ext3_file_inode_operations = { ++#ifdef EXT3_DELETE_THREAD ++ truncate: ext3_truncate_thread, /* BKL held */ ++#else + truncate: ext3_truncate, /* BKL held */ ++#endif + setattr: ext3_setattr, /* BKL held */ + setxattr: ext3_setxattr, /* BKL held */ + getxattr: ext3_getxattr, /* BKL held */ +--- linux/include/linux/ext3_fs.h~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:26 2003 ++++ linux-mmonroe/include/linux/ext3_fs.h Thu Jul 10 14:11:33 2003 @@ -193,6 +193,7 @@ struct ext3_group_desc */ #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ @@ -284,17 +425,26 @@ diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h /* * ioctl commands -@@ -321,6 +322,7 @@ struct ext3_inode { +@@ -320,6 +321,7 @@ struct ext3_inode { #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ -+#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ ++#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H -diff -puNr origin/include/linux/ext3_fs_sb.h linux/include/linux/ext3_fs_sb.h ---- origin/include/linux/ext3_fs_sb.h 2003-05-04 17:23:52.000000000 +0400 -+++ linux/include/linux/ext3_fs_sb.h 2003-05-04 11:37:04.000000000 +0400 +@@ -694,6 +696,9 @@ extern void ext3_discard_prealloc (struc + extern void ext3_dirty_inode(struct inode *); + extern int ext3_change_inode_journal_flag(struct inode *, int); + extern void ext3_truncate (struct inode *); ++#ifdef EXT3_DELETE_THREAD ++extern void ext3_truncate_thread(struct inode *inode); ++#endif + + /* ioctl.c */ + extern int ext3_ioctl (struct inode *, struct file *, unsigned int, +--- linux/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:32 2003 ++++ linux-mmonroe/include/linux/ext3_fs_sb.h Thu Jul 10 14:11:33 2003 @@ -29,6 +29,8 @@ #define EXT3_MAX_GROUP_LOADED 8 @@ -319,3 +469,5 @@ diff -puNr origin/include/linux/ext3_fs_sb.h linux/include/linux/ext3_fs_sb.h }; #endif /* _LINUX_EXT3_FS_SB */ + +_ diff --git a/lustre/kernel_patches/patches/extN-misc-fixup.patch b/lustre/kernel_patches/patches/extN-misc-fixup.patch index 06ea72a..65d9347 100644 --- a/lustre/kernel_patches/patches/extN-misc-fixup.patch +++ b/lustre/kernel_patches/patches/extN-misc-fixup.patch @@ -1,6 +1,9 @@ ---- linux-2.4.17/fs/ext3/super.c.orig Fri Dec 21 10:41:55 2001 -+++ linux-2.4.17/fs/ext3/super.c Fri Mar 22 11:00:41 2002 -@@ -1344,10 +1342,10 @@ + fs/ext3/super.c | 4 ++-- + 1 files changed, 2 insertions(+), 2 deletions(-) + +--- linux-2.4.18-p4smp/fs/ext3/super.c~extN-misc-fixup 2003-07-21 23:07:50.000000000 -0600 ++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c 2003-07-21 23:08:06.000000000 -0600 +@@ -1578,10 +1578,10 @@ static journal_t *ext3_get_dev_journal(s printk(KERN_ERR "EXT3-fs: I/O error on journal device\n"); goto out_journal; } @@ -13,11 +16,5 @@ goto out_journal; } EXT3_SB(sb)->journal_bdev = bdev; -@@ -1560,6 +1560,7 @@ - unlock_kernel(); - return ret; - } -+EXPORT_SYMBOL(ext3_force_commit); /* here to avoid potential patch collisions */ - - /* - * Ext3 always journals updates to the superblock itself, so we don't + +_ diff --git a/lustre/kernel_patches/patches/extN-noread.patch b/lustre/kernel_patches/patches/extN-noread.patch index 63f4463..305f6fd 100644 --- a/lustre/kernel_patches/patches/extN-noread.patch +++ b/lustre/kernel_patches/patches/extN-noread.patch @@ -83,9 +83,7 @@ DQUOT_DROP(inode); --- linux-2.4.18-chaos52/fs/ext3/inode.c~extN-noread 2003-05-16 12:26:29.000000000 +0800 +++ linux-2.4.18-chaos52-root/fs/ext3/inode.c 2003-05-16 12:27:06.000000000 +0800 -@@ -2011,23 +2011,28 @@ out_stop: - ext3_journal_stop(handle, inode); - } +@@ -2013,21 +2013,26 @@ out_stop: -/* - * ext3_get_inode_loc returns with an extra refcount against the diff --git a/lustre/kernel_patches/patches/extN-wantedi.patch b/lustre/kernel_patches/patches/extN-wantedi.patch index fc74c6b..d40d678 100644 --- a/lustre/kernel_patches/patches/extN-wantedi.patch +++ b/lustre/kernel_patches/patches/extN-wantedi.patch @@ -107,6 +107,17 @@ j += i * EXT3_INODES_PER_GROUP(sb) + 1; if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) { ext3_error (sb, "ext3_new_inode", +--- linux-2.4.18-18.8.0-l15/fs/ext3/inode.c~extN-wantedi Thu Jul 3 00:15:41 2003 ++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/inode.c Thu Jul 3 00:17:28 2003 +@@ -2070,7 +2070,7 @@ void ext3_truncate_thread(struct inode * + if (IS_ERR(handle)) + goto out_truncate; + +- new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode); ++ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode, 0); + if (IS_ERR(new_inode)) { + ext3_debug("truncate inode %lu directly (no new inodes)\n", + old_inode->i_ino); --- linux-2.4.20/fs/ext3/ioctl.c~extN-wantedi 2003-04-08 23:35:55.000000000 -0600 +++ linux-2.4.20-braam/fs/ext3/ioctl.c 2003-04-08 23:35:55.000000000 -0600 @@ -23,6 +23,31 @@ int ext3_ioctl (struct inode * inode, st diff --git a/lustre/kernel_patches/patches/iopen-2.4.18.patch b/lustre/kernel_patches/patches/iopen-2.4.18.patch index 6eabe85..b983b33 100644 --- a/lustre/kernel_patches/patches/iopen-2.4.18.patch +++ b/lustre/kernel_patches/patches/iopen-2.4.18.patch @@ -1,7 +1,15 @@ - 0 files changed + Documentation/filesystems/ext2.txt | 16 ++ + fs/ext3/Makefile | 2 + fs/ext3/inode.c | 4 + fs/ext3/iopen.c | 259 +++++++++++++++++++++++++++++++++++++ + fs/ext3/iopen.h | 13 + + fs/ext3/namei.c | 12 + + fs/ext3/super.c | 11 + + include/linux/ext3_fs.h | 2 + 8 files changed, 318 insertions(+), 1 deletion(-) ---- linux-2.4.18-chaos52/Documentation/filesystems/ext2.txt~iopen-2.4.18 2003-04-13 15:21:33.000000000 +0800 -+++ linux-2.4.18-chaos52-root/Documentation/filesystems/ext2.txt 2003-06-03 17:10:55.000000000 +0800 +--- linux-2.4.18-p4smp/Documentation/filesystems/ext2.txt~iopen-2.4.18 2003-07-09 12:17:30.000000000 -0600 ++++ linux-2.4.18-p4smp-braam/Documentation/filesystems/ext2.txt 2003-07-09 17:13:02.000000000 -0600 @@ -35,6 +35,22 @@ resgid=n The group ID which may use th sb=n Use alternate superblock at this location. @@ -25,19 +33,19 @@ grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2. ---- linux-2.4.18-chaos52/fs/ext3/Makefile~iopen-2.4.18 2003-06-01 03:24:07.000000000 +0800 -+++ linux-2.4.18-chaos52-root/fs/ext3/Makefile 2003-06-03 17:10:55.000000000 +0800 +--- linux-2.4.18-p4smp/fs/ext3/Makefile~iopen-2.4.18 2003-07-09 17:12:12.000000000 -0600 ++++ linux-2.4.18-p4smp-braam/fs/ext3/Makefile 2003-07-09 17:13:15.000000000 -0600 @@ -11,7 +11,7 @@ O_TARGET := ext3.o - export-objs := super.o inode.o xattr.o + export-objs := super.o inode.o xattr.o ext3-exports.o -obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -+obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o xattr.o ++obj-y := balloc.o iopen.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ + ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o obj-m := $(O_TARGET) ---- linux-2.4.18-chaos52/fs/ext3/inode.c~iopen-2.4.18 2003-06-03 17:10:21.000000000 +0800 -+++ linux-2.4.18-chaos52-root/fs/ext3/inode.c 2003-06-03 17:10:55.000000000 +0800 +--- linux-2.4.18-p4smp/fs/ext3/inode.c~iopen-2.4.18 2003-07-09 17:11:19.000000000 -0600 ++++ linux-2.4.18-p4smp-braam/fs/ext3/inode.c 2003-07-09 17:13:02.000000000 -0600 @@ -31,6 +31,7 @@ #include #include @@ -46,7 +54,7 @@ /* * SEARCH_FROM_ZERO forces each block allocation to search from the start -@@ -2135,6 +2136,9 @@ void ext3_read_inode(struct inode * inod +@@ -2165,6 +2166,9 @@ void ext3_read_inode(struct inode * inod struct buffer_head *bh; int block; @@ -56,8 +64,8 @@ if(ext3_get_inode_loc(inode, &iloc)) goto bad_inode; bh = iloc.bh; ---- /dev/null 2002-08-31 07:31:37.000000000 +0800 -+++ linux-2.4.18-chaos52-root/fs/ext3/iopen.c 2003-06-03 17:10:55.000000000 +0800 +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ linux-2.4.18-p4smp-braam/fs/ext3/iopen.c 2003-07-09 17:13:02.000000000 -0600 @@ -0,0 +1,259 @@ +/* + * linux/fs/ext3/iopen.c @@ -318,8 +326,8 @@ + + return 1; +} ---- /dev/null 2002-08-31 07:31:37.000000000 +0800 -+++ linux-2.4.18-chaos52-root/fs/ext3/iopen.h 2003-06-03 17:10:55.000000000 +0800 +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ linux-2.4.18-p4smp-braam/fs/ext3/iopen.h 2003-07-09 17:13:02.000000000 -0600 @@ -0,0 +1,13 @@ +/* + * iopen.h @@ -334,8 +342,8 @@ + +extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); +extern int ext3_iopen_get_inode(struct inode *inode); ---- linux-2.4.18-chaos52/fs/ext3/namei.c~iopen-2.4.18 2003-06-03 17:10:20.000000000 +0800 -+++ linux-2.4.18-chaos52-root/fs/ext3/namei.c 2003-06-03 17:10:55.000000000 +0800 +--- linux-2.4.18-p4smp/fs/ext3/namei.c~iopen-2.4.18 2003-07-09 13:32:38.000000000 -0600 ++++ linux-2.4.18-p4smp-braam/fs/ext3/namei.c 2003-07-09 17:13:02.000000000 -0600 @@ -34,6 +34,7 @@ #include #include @@ -379,9 +387,9 @@ d_add(dentry, inode); return NULL; } ---- linux-2.4.18-chaos52/fs/ext3/super.c~iopen-2.4.18 2003-06-03 17:10:21.000000000 +0800 -+++ linux-2.4.18-chaos52-root/fs/ext3/super.c 2003-06-03 17:10:55.000000000 +0800 -@@ -820,6 +820,17 @@ static int parse_options (char * options +--- linux-2.4.18-p4smp/fs/ext3/super.c~iopen-2.4.18 2003-07-09 13:32:38.000000000 -0600 ++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c 2003-07-09 17:13:02.000000000 -0600 +@@ -831,6 +831,17 @@ static int parse_options (char * options || !strcmp (this_char, "quota") || !strcmp (this_char, "usrquota")) /* Don't do anything ;-) */ ; @@ -399,8 +407,8 @@ else if (!strcmp (this_char, "journal")) { /* @@@ FIXME */ /* Eventually we will want to be able to create ---- linux-2.4.18-chaos52/include/linux/ext3_fs.h~iopen-2.4.18 2003-06-03 17:10:22.000000000 +0800 -+++ linux-2.4.18-chaos52-root/include/linux/ext3_fs.h 2003-06-03 17:12:08.000000000 +0800 +--- linux-2.4.18-p4smp/include/linux/ext3_fs.h~iopen-2.4.18 2003-07-09 13:32:38.000000000 -0600 ++++ linux-2.4.18-p4smp-braam/include/linux/ext3_fs.h 2003-07-09 17:13:02.000000000 -0600 @@ -321,6 +321,8 @@ struct ext3_inode { #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ diff --git a/lustre/kernel_patches/patches/iopen-2.4.20.patch b/lustre/kernel_patches/patches/iopen-2.4.20.patch index 3038cc87..ec48814 100644 --- a/lustre/kernel_patches/patches/iopen-2.4.20.patch +++ b/lustre/kernel_patches/patches/iopen-2.4.20.patch @@ -1,15 +1,15 @@ Documentation/filesystems/ext2.txt | 16 ++ fs/ext3/Makefile | 2 fs/ext3/inode.c | 4 - fs/ext3/iopen.c | 240 +++++++++++++++++++++++++++++++++++++ - fs/ext3/iopen.h | 15 ++ - fs/ext3/namei.c | 13 +- + fs/ext3/iopen.c | 259 +++++++++++++++++++++++++++++++++++++ + fs/ext3/iopen.h | 13 + + fs/ext3/namei.c | 13 + fs/ext3/super.c | 11 + include/linux/ext3_fs.h | 2 - 8 files changed, 301 insertions(+), 2 deletions(-) + 8 files changed, 318 insertions(+), 2 deletions(-) ---- linux-2.4.20/Documentation/filesystems/ext2.txt~iopen 2001-07-11 16:44:45.000000000 -0600 -+++ linux-2.4.20-braam/Documentation/filesystems/ext2.txt 2003-05-17 14:06:00.000000000 -0600 +--- linux/Documentation/filesystems/ext2.txt~iopen-2.4.20 Wed Jul 11 15:44:45 2001 ++++ linux-mmonroe/Documentation/filesystems/ext2.txt Thu Jul 10 12:28:54 2003 @@ -35,6 +35,22 @@ resgid=n The group ID which may use th sb=n Use alternate superblock at this location. @@ -33,8 +33,8 @@ grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2. ---- linux-2.4.20/fs/ext3/Makefile~iopen 2003-05-17 14:05:57.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/Makefile 2003-05-17 14:06:00.000000000 -0600 +--- linux/fs/ext3/Makefile~iopen-2.4.20 Thu Jul 10 12:28:44 2003 ++++ linux-mmonroe/fs/ext3/Makefile Thu Jul 10 12:28:54 2003 @@ -11,7 +11,7 @@ O_TARGET := ext3.o export-objs := ext3-exports.o @@ -44,8 +44,8 @@ ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o obj-m := $(O_TARGET) ---- linux-2.4.20/fs/ext3/inode.c~iopen 2003-05-17 14:06:00.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/inode.c 2003-05-17 14:06:00.000000000 -0600 +--- linux/fs/ext3/inode.c~iopen-2.4.20 Thu Jul 10 12:28:46 2003 ++++ linux-mmonroe/fs/ext3/inode.c Thu Jul 10 12:28:54 2003 @@ -31,6 +31,7 @@ #include #include @@ -54,7 +54,7 @@ /* * SEARCH_FROM_ZERO forces each block allocation to search from the start -@@ -2137,6 +2138,9 @@ void ext3_read_inode(struct inode * inod +@@ -2253,6 +2254,9 @@ void ext3_read_inode(struct inode * inod struct buffer_head *bh; int block; @@ -64,8 +64,8 @@ if(ext3_get_inode_loc(inode, &iloc)) goto bad_inode; bh = iloc.bh; ---- /dev/null 2003-01-30 03:24:37.000000000 -0700 -+++ linux-2.4.20-braam/fs/ext3/iopen.c 2003-05-17 22:18:55.000000000 -0600 +--- /dev/null Tue Jan 28 04:00:01 2003 ++++ linux-mmonroe/fs/ext3/iopen.c Thu Jul 10 12:28:54 2003 @@ -0,0 +1,259 @@ +/* + * linux/fs/ext3/iopen.c @@ -326,8 +326,8 @@ + + return 1; +} ---- /dev/null 2003-01-30 03:24:37.000000000 -0700 -+++ linux-2.4.20-braam/fs/ext3/iopen.h 2003-05-17 14:06:00.000000000 -0600 +--- /dev/null Tue Jan 28 04:00:01 2003 ++++ linux-mmonroe/fs/ext3/iopen.h Thu Jul 10 12:28:54 2003 @@ -0,0 +1,13 @@ +/* + * iopen.h @@ -342,8 +342,8 @@ + +extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); +extern int ext3_iopen_get_inode(struct inode *inode); ---- linux-2.4.20/fs/ext3/namei.c~iopen 2003-05-17 14:05:59.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/namei.c 2003-05-17 22:23:08.000000000 -0600 +--- linux/fs/ext3/namei.c~iopen-2.4.20 Thu Jul 10 12:28:46 2003 ++++ linux-mmonroe/fs/ext3/namei.c Thu Jul 10 12:28:54 2003 @@ -35,7 +35,7 @@ #include #include @@ -388,9 +388,9 @@ d_add(dentry, inode); return NULL; } ---- linux-2.4.20/fs/ext3/super.c~iopen 2003-05-17 14:05:59.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/super.c 2003-05-17 14:06:00.000000000 -0600 -@@ -820,6 +820,17 @@ static int parse_options (char * options +--- linux/fs/ext3/super.c~iopen-2.4.20 Thu Jul 10 12:28:45 2003 ++++ linux-mmonroe/fs/ext3/super.c Thu Jul 10 12:28:54 2003 +@@ -835,6 +835,17 @@ static int parse_options (char * options || !strcmp (this_char, "quota") || !strcmp (this_char, "usrquota")) /* Don't do anything ;-) */ ; @@ -408,15 +408,15 @@ else if (!strcmp (this_char, "journal")) { /* @@@ FIXME */ /* Eventually we will want to be able to create ---- linux-2.4.20/include/linux/ext3_fs.h~iopen 2003-05-17 14:05:59.000000000 -0600 -+++ linux-2.4.20-braam/include/linux/ext3_fs.h 2003-05-17 14:06:29.000000000 -0600 +--- linux/include/linux/ext3_fs.h~iopen-2.4.20 Thu Jul 10 12:28:46 2003 ++++ linux-mmonroe/include/linux/ext3_fs.h Thu Jul 10 12:30:12 2003 @@ -322,6 +322,8 @@ struct ext3_inode { #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ +#define EXT3_MOUNT_IOPEN 0x8000 /* Allow access via iopen */ +#define EXT3_MOUNT_IOPEN_NOPRIV 0x10000 /* Make iopen world-readable */ - #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ + #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ diff --git a/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch b/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch index 75ebcd0..15f1b2a 100644 --- a/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch +++ b/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch @@ -1,7 +1,18 @@ - 0 files changed + fs/ext3/Makefile | 4 + fs/ext3/ext3-exports.c | 13 + fs/ext3/ialloc.c | 2 + fs/ext3/inode.c | 29 - + fs/ext3/namei.c | 12 + fs/ext3/super.c | 22 + fs/ext3/xattr.c | 1242 +++++++++++++++++++++++++++++++++++++++++++++ + include/linux/ext3_fs.h | 46 - + include/linux/ext3_jbd.h | 8 + include/linux/ext3_xattr.h | 155 +++++ + include/linux/xattr.h | 15 + 11 files changed, 1496 insertions(+), 52 deletions(-) ---- linux-2.4.18-18/fs/ext3/ialloc.c~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800 -+++ linux-2.4.18-18-root/fs/ext3/ialloc.c 2003-04-20 16:14:31.000000000 +0800 +--- linux-2.4.18-p4smp/fs/ext3/ialloc.c~linux-2.4.18ea-0.8.26 2003-07-20 17:12:43.000000000 -0600 ++++ linux-2.4.18-p4smp-braam/fs/ext3/ialloc.c 2003-07-21 22:49:05.000000000 -0600 @@ -17,6 +17,7 @@ #include #include @@ -18,8 +29,8 @@ DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); ---- linux-2.4.18-18/fs/ext3/inode.c~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800 -+++ linux-2.4.18-18-root/fs/ext3/inode.c 2003-04-20 16:14:31.000000000 +0800 +--- linux-2.4.18-p4smp/fs/ext3/inode.c~linux-2.4.18ea-0.8.26 2003-07-20 17:12:43.000000000 -0600 ++++ linux-2.4.18-p4smp-braam/fs/ext3/inode.c 2003-07-21 22:49:05.000000000 -0600 @@ -39,6 +39,18 @@ */ #undef SEARCH_FROM_ZERO @@ -59,7 +70,7 @@ goto no_delete; lock_kernel(); -@@ -1861,6 +1871,8 @@ void ext3_truncate(struct inode * inode) +@@ -1877,6 +1887,8 @@ void ext3_truncate(struct inode * inode) if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return; @@ -68,7 +79,7 @@ if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; -@@ -2008,8 +2020,6 @@ int ext3_get_inode_loc (struct inode *in +@@ -2038,8 +2050,6 @@ int ext3_get_inode_loc (struct inode *in struct ext3_group_desc * gdp; if ((inode->i_ino != EXT3_ROOT_INO && @@ -77,7 +88,7 @@ inode->i_ino != EXT3_JOURNAL_INO && inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || inode->i_ino > le32_to_cpu( -@@ -2136,10 +2146,7 @@ void ext3_read_inode(struct inode * inod +@@ -2166,10 +2176,7 @@ void ext3_read_inode(struct inode * inod brelse (iloc.bh); @@ -89,7 +100,7 @@ inode->i_op = &ext3_file_inode_operations; inode->i_fop = &ext3_file_operations; inode->i_mapping->a_ops = &ext3_aops; -@@ -2147,7 +2154,7 @@ void ext3_read_inode(struct inode * inod +@@ -2177,7 +2184,7 @@ void ext3_read_inode(struct inode * inod inode->i_op = &ext3_dir_inode_operations; inode->i_fop = &ext3_dir_operations; } else if (S_ISLNK(inode->i_mode)) { @@ -98,8 +109,8 @@ inode->i_op = &ext3_fast_symlink_inode_operations; else { inode->i_op = &page_symlink_inode_operations; ---- linux-2.4.18-18/fs/ext3/namei.c~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800 -+++ linux-2.4.18-18-root/fs/ext3/namei.c 2003-04-20 16:14:31.000000000 +0800 +--- linux-2.4.18-p4smp/fs/ext3/namei.c~linux-2.4.18ea-0.8.26 2003-07-21 22:29:27.000000000 -0600 ++++ linux-2.4.18-p4smp-braam/fs/ext3/namei.c 2003-07-21 22:49:05.000000000 -0600 @@ -27,6 +27,7 @@ #include #include @@ -153,8 +164,8 @@ inode->i_op = &page_symlink_inode_operations; inode->i_mapping->a_ops = &ext3_aops; /* ---- linux-2.4.18-18/fs/ext3/super.c~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800 -+++ linux-2.4.18-18-root/fs/ext3/super.c 2003-04-20 16:14:31.000000000 +0800 +--- linux-2.4.18-p4smp/fs/ext3/super.c~linux-2.4.18ea-0.8.26 2003-07-21 22:29:27.000000000 -0600 ++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c 2003-07-21 22:50:28.000000000 -0600 @@ -24,6 +24,7 @@ #include #include @@ -163,7 +174,7 @@ #include #include #include -@@ -404,6 +405,7 @@ void ext3_put_super (struct super_block +@@ -406,6 +407,7 @@ void ext3_put_super (struct super_block kdev_t j_dev = sbi->s_journal->j_dev; int i; @@ -171,7 +182,7 @@ journal_destroy(sbi->s_journal); if (!(sb->s_flags & MS_RDONLY)) { EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -@@ -1748,14 +1750,25 @@ int ext3_statfs (struct super_block * sb +@@ -1749,17 +1751,27 @@ int ext3_statfs (struct super_block * sb static DECLARE_FSTYPE_DEV(ext3_fs_type, "ext3", ext3_read_super); @@ -200,10 +211,29 @@ + return error; } - EXPORT_SYMBOL(ext3_bread); ---- /dev/null 2002-08-31 07:31:37.000000000 +0800 -+++ linux-2.4.18-18-root/fs/ext3/xattr.c 2003-04-20 16:14:31.000000000 +0800 -@@ -0,0 +1,1247 @@ +-EXPORT_SYMBOL(ext3_bread); + + MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); + MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ linux-2.4.18-p4smp-braam/fs/ext3/ext3-exports.c 2003-07-21 22:49:05.000000000 -0600 +@@ -0,0 +1,13 @@ ++#include ++#include ++#include ++#include ++#include ++ ++EXPORT_SYMBOL(ext3_force_commit); ++EXPORT_SYMBOL(ext3_bread); ++EXPORT_SYMBOL(ext3_xattr_register); ++EXPORT_SYMBOL(ext3_xattr_unregister); ++EXPORT_SYMBOL(ext3_xattr_get); ++EXPORT_SYMBOL(ext3_xattr_list); ++EXPORT_SYMBOL(ext3_xattr_set); +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ linux-2.4.18-p4smp-braam/fs/ext3/xattr.c 2003-07-21 22:50:40.000000000 -0600 +@@ -0,0 +1,1242 @@ +/* + * linux/fs/ext3/xattr.c + * @@ -277,11 +307,6 @@ +#include + +/* These symbols may be needed by a module. */ -+EXPORT_SYMBOL(ext3_xattr_register); -+EXPORT_SYMBOL(ext3_xattr_unregister); -+EXPORT_SYMBOL(ext3_xattr_get); -+EXPORT_SYMBOL(ext3_xattr_list); -+EXPORT_SYMBOL(ext3_xattr_set); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) +# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) @@ -1451,8 +1476,8 @@ +} + +#endif /* CONFIG_EXT3_FS_XATTR_SHARING */ ---- linux-2.4.18-18/include/linux/ext3_fs.h~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800 -+++ linux-2.4.18-18-root/include/linux/ext3_fs.h 2003-04-20 16:14:31.000000000 +0800 +--- linux-2.4.18-p4smp/include/linux/ext3_fs.h~linux-2.4.18ea-0.8.26 2003-07-21 22:29:27.000000000 -0600 ++++ linux-2.4.18-p4smp-braam/include/linux/ext3_fs.h 2003-07-21 22:49:05.000000000 -0600 @@ -58,8 +58,6 @@ */ #define EXT3_BAD_INO 1 /* Bad blocks inode */ @@ -1541,8 +1566,8 @@ extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); ---- linux-2.4.18-18/include/linux/ext3_jbd.h~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800 -+++ linux-2.4.18-18-root/include/linux/ext3_jbd.h 2003-04-20 16:14:31.000000000 +0800 +--- linux-2.4.18-p4smp/include/linux/ext3_jbd.h~linux-2.4.18ea-0.8.26 2003-07-21 22:29:27.000000000 -0600 ++++ linux-2.4.18-p4smp-braam/include/linux/ext3_jbd.h 2003-07-21 22:49:05.000000000 -0600 @@ -30,13 +30,19 @@ #define EXT3_SINGLEDATA_TRANS_BLOCKS 8 @@ -1564,8 +1589,8 @@ extern int ext3_writepage_trans_blocks(struct inode *inode); ---- /dev/null 2002-08-31 07:31:37.000000000 +0800 -+++ linux-2.4.18-18-root/include/linux/ext3_xattr.h 2003-04-20 16:14:31.000000000 +0800 +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ linux-2.4.18-p4smp-braam/include/linux/ext3_xattr.h 2003-07-21 22:49:05.000000000 -0600 @@ -0,0 +1,155 @@ +/* + File: linux/ext3_xattr.h @@ -1722,8 +1747,8 @@ + +#endif /* __KERNEL__ */ + ---- /dev/null 2002-08-31 07:31:37.000000000 +0800 -+++ linux-2.4.18-18-root/include/linux/xattr.h 2003-04-20 16:14:31.000000000 +0800 +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ linux-2.4.18-p4smp-braam/include/linux/xattr.h 2003-07-21 22:49:05.000000000 -0600 @@ -0,0 +1,15 @@ +/* + File: linux/xattr.h @@ -1740,18 +1765,18 @@ +#define XATTR_REPLACE 2 /* set value, fail if attr does not exist */ + +#endif /* _LINUX_XATTR_H */ ---- linux-2.4.18-18/fs/ext3/Makefile~linux-2.4.18ea-0.8.26 2003-04-20 16:14:54.000000000 +0800 -+++ linux-2.4.18-18-root/fs/ext3/Makefile 2003-04-20 16:15:15.000000000 +0800 +--- linux-2.4.18-p4smp/fs/ext3/Makefile~linux-2.4.18ea-0.8.26 2003-07-21 22:27:37.000000000 -0600 ++++ linux-2.4.18-p4smp-braam/fs/ext3/Makefile 2003-07-21 22:51:23.000000000 -0600 @@ -9,10 +9,10 @@ O_TARGET := ext3.o -export-objs := super.o inode.o -+export-objs := super.o inode.o xattr.o ++export-objs := ext3-exports.o obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o -+ ioctl.o namei.o super.o symlink.o xattr.o ++ ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o obj-m := $(O_TARGET) include $(TOPDIR)/Rules.make diff --git a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch b/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch index 5c6c6a9..6d8eac6 100644 --- a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch +++ b/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch @@ -31,6 +31,7 @@ fs/ext2/xattr.c | 1212 +++++++++++++++++++++++++++++++++++++++++ fs/ext2/xattr_user.c | 103 +++ fs/ext3/Makefile | 10 + fs/ext3/ext3-exports.c | 13 fs/ext3/file.c | 5 fs/ext3/ialloc.c | 2 fs/ext3/inode.c | 35 - @@ -59,12 +60,11 @@ include/linux/mbcache.h | 69 ++ kernel/ksyms.c | 4 mm/vmscan.c | 36 + - fs/ext3/ext3-exports.c | 14 + - 62 files changed, 4331 insertions(+), 197 deletions(-) + 62 files changed, 4344 insertions(+), 183 deletions(-) ---- linux-rh-2.4.20-8/Documentation/Configure.help~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:50.000000000 +0800 -+++ linux-rh-2.4.20-8-root/Documentation/Configure.help 2003-05-07 17:34:25.000000000 +0800 -@@ -15226,6 +15226,39 @@ CONFIG_EXT2_FS +--- kernel-2.4.20-6chaos_18_7/Documentation/Configure.help~linux-2.4.20-xattr-0.8.54-chaos 2003-06-23 10:39:21.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/Documentation/Configure.help 2003-07-12 15:34:44.000000000 -0600 +@@ -15253,6 +15253,39 @@ CONFIG_EXT2_FS be compiled as a module, and so this could be dangerous. Most everyone wants to say Y here. @@ -104,7 +104,7 @@ Ext3 journalling file system support (EXPERIMENTAL) CONFIG_EXT3_FS This is the journalling version of the Second extended file system -@@ -15258,6 +15291,39 @@ CONFIG_EXT3_FS +@@ -15285,6 +15318,39 @@ CONFIG_EXT3_FS of your root partition (the one containing the directory /) cannot be compiled as a module, and so this may be dangerous. @@ -144,8 +144,8 @@ Journal Block Device support (JBD for ext3) (EXPERIMENTAL) CONFIG_JBD This is a generic journalling layer for block devices. It is ---- linux-rh-2.4.20-8/arch/alpha/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2001-11-20 07:19:42.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/alpha/defconfig 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/alpha/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:54.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/alpha/defconfig 2003-07-12 15:34:44.000000000 -0600 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -160,8 +160,8 @@ CONFIG_ALPHA=y # CONFIG_UID16 is not set # CONFIG_RWSEM_GENERIC_SPINLOCK is not set ---- linux-rh-2.4.20-8/arch/alpha/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:53.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/alpha/kernel/entry.S 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/alpha/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:11:53.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/alpha/kernel/entry.S 2003-07-12 15:34:44.000000000 -0600 @@ -1162,6 +1162,18 @@ sys_call_table: .quad sys_readahead .quad sys_ni_syscall /* 380, sys_security */ @@ -181,8 +181,8 @@ /* Remember to update everything, kids. */ .ifne (. - sys_call_table) - (NR_SYSCALLS * 8) ---- linux-rh-2.4.20-8/arch/arm/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2001-05-20 08:43:05.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/arm/defconfig 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/arm/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:56.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/arm/defconfig 2003-07-12 15:34:44.000000000 -0600 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -197,8 +197,8 @@ CONFIG_ARM=y # CONFIG_EISA is not set # CONFIG_SBUS is not set ---- linux-rh-2.4.20-8/arch/arm/kernel/calls.S~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:42.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/arm/kernel/calls.S 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/arm/kernel/calls.S~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:09:16.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/arm/kernel/calls.S 2003-07-12 15:34:44.000000000 -0600 @@ -240,18 +240,18 @@ __syscall_start: .long SYMBOL_NAME(sys_ni_syscall) /* Security */ .long SYMBOL_NAME(sys_gettid) @@ -230,8 +230,8 @@ .long SYMBOL_NAME(sys_tkill) /* * Please check 2.5 _before_ adding calls here, ---- linux-rh-2.4.20-8/arch/i386/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:53.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/i386/defconfig 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/i386/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:12:00.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/i386/defconfig 2003-07-12 15:34:44.000000000 -0600 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -246,8 +246,8 @@ CONFIG_X86=y CONFIG_ISA=y # CONFIG_SBUS is not set ---- linux-rh-2.4.20-8/arch/ia64/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:43.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/ia64/defconfig 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/ia64/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:12:04.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/ia64/defconfig 2003-07-12 15:34:44.000000000 -0600 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -262,8 +262,8 @@ # # Code maturity level options ---- linux-rh-2.4.20-8/arch/m68k/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2000-06-20 03:56:08.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/m68k/defconfig 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/m68k/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:55.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/m68k/defconfig 2003-07-12 15:34:44.000000000 -0600 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -278,8 +278,8 @@ CONFIG_UID16=y # ---- linux-rh-2.4.20-8/arch/mips/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:10.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/mips/defconfig 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/mips/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:06.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/mips/defconfig 2003-07-12 15:34:44.000000000 -0600 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -294,8 +294,8 @@ CONFIG_MIPS=y CONFIG_MIPS32=y # CONFIG_MIPS64 is not set ---- linux-rh-2.4.20-8/arch/mips64/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:10.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/mips64/defconfig 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/mips64/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:11.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/mips64/defconfig 2003-07-12 15:34:44.000000000 -0600 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -310,8 +310,8 @@ CONFIG_MIPS=y # CONFIG_MIPS32 is not set CONFIG_MIPS64=y ---- linux-rh-2.4.20-8/arch/ppc/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:43.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/ppc/defconfig 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/ppc/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:12:20.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/ppc/defconfig 2003-07-12 15:34:44.000000000 -0600 @@ -1,6 +1,20 @@ # # Automatically generated make config: don't edit @@ -333,8 +333,8 @@ # CONFIG_UID16 is not set # CONFIG_RWSEM_GENERIC_SPINLOCK is not set CONFIG_RWSEM_XCHGADD_ALGORITHM=y ---- linux-rh-2.4.20-8/arch/ppc64/kernel/misc.S~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/ppc64/kernel/misc.S 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/ppc64/kernel/misc.S~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:20.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/ppc64/kernel/misc.S 2003-07-12 15:34:44.000000000 -0600 @@ -731,6 +731,7 @@ _GLOBAL(sys_call_table32) .llong .sys_gettid /* 207 */ #if 0 /* Reserved syscalls */ @@ -351,8 +351,8 @@ .llong .sys_futex #endif .llong .sys_perfmonctl /* Put this here for now ... */ ---- linux-rh-2.4.20-8/arch/s390/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/s390/defconfig 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/s390/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:20.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390/defconfig 2003-07-12 15:34:44.000000000 -0600 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -367,8 +367,8 @@ # CONFIG_ISA is not set # CONFIG_EISA is not set # CONFIG_MCA is not set ---- linux-rh-2.4.20-8/arch/s390/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/s390/kernel/entry.S 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/s390/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:20.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390/kernel/entry.S 2003-07-12 15:34:44.000000000 -0600 @@ -558,18 +558,18 @@ sys_call_table: .long sys_fcntl64 .long sys_ni_syscall @@ -400,8 +400,8 @@ .long sys_gettid .long sys_tkill .rept 255-237 ---- linux-rh-2.4.20-8/arch/s390x/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/s390x/defconfig 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/s390x/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:21.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390x/defconfig 2003-07-12 15:34:44.000000000 -0600 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -416,8 +416,8 @@ # CONFIG_ISA is not set # CONFIG_EISA is not set # CONFIG_MCA is not set ---- linux-rh-2.4.20-8/arch/s390x/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/s390x/kernel/entry.S 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/s390x/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:58:21.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390x/kernel/entry.S 2003-07-12 15:34:44.000000000 -0600 @@ -591,18 +591,18 @@ sys_call_table: .long SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper) .long SYSCALL(sys_ni_syscall,sys_ni_syscall) @@ -449,8 +449,8 @@ .long SYSCALL(sys_gettid,sys_gettid) .long SYSCALL(sys_tkill,sys_tkill) .rept 255-237 ---- linux-rh-2.4.20-8/arch/s390x/kernel/wrapper32.S~linux-2.4.20-xattr-0.8.54-chaos 2002-02-26 03:37:56.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/s390x/kernel/wrapper32.S 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/s390x/kernel/wrapper32.S~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:59.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/s390x/kernel/wrapper32.S 2003-07-12 15:34:44.000000000 -0600 @@ -1091,3 +1091,95 @@ sys32_fstat64_wrapper: llgtr %r3,%r3 # struct stat64 * llgfr %r4,%r4 # long @@ -547,8 +547,8 @@ + jg sys_fremovexattr + + ---- linux-rh-2.4.20-8/arch/sparc/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:43.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/sparc/defconfig 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/sparc/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:10:50.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc/defconfig 2003-07-12 15:34:44.000000000 -0600 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -563,8 +563,8 @@ CONFIG_UID16=y CONFIG_HIGHMEM=y ---- linux-rh-2.4.20-8/arch/sparc/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:43.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/sparc/kernel/systbls.S 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/sparc/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:10:52.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc/kernel/systbls.S 2003-07-12 15:34:44.000000000 -0600 @@ -51,11 +51,11 @@ sys_call_table: /*150*/ .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64 /*155*/ .long sys_fcntl64, sys_nis_syscall, sys_statfs, sys_fstatfs, sys_oldumount @@ -582,8 +582,8 @@ /*190*/ .long sys_init_module, sys_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall /*195*/ .long sys_nis_syscall, sys_nis_syscall, sys_getppid, sparc_sigaction, sys_sgetmask /*200*/ .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, old_readdir ---- linux-rh-2.4.20-8/arch/sparc64/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:43.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/sparc64/defconfig 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/sparc64/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:12:29.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc64/defconfig 2003-07-12 15:34:44.000000000 -0600 @@ -1,6 +1,13 @@ # # Automatically generated make config: don't edit @@ -598,8 +598,8 @@ # # Code maturity level options ---- linux-rh-2.4.20-8/arch/sparc64/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:43.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/sparc64/kernel/systbls.S 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/arch/sparc64/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:10:55.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/arch/sparc64/kernel/systbls.S 2003-07-12 15:34:44.000000000 -0600 @@ -52,11 +52,11 @@ sys_call_table32: /*150*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64 .word sys32_fcntl64, sys_nis_syscall, sys32_statfs, sys32_fstatfs, sys_oldumount @@ -634,8 +634,8 @@ /*190*/ .word sys_init_module, sparc64_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall .word sys_nis_syscall, sys_nis_syscall, sys_getppid, sys_nis_syscall, sys_sgetmask /*200*/ .word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall ---- linux-rh-2.4.20-8/fs/Config.in~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:05:03.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/Config.in 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/Config.in~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:14:24.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/Config.in 2003-07-12 15:34:44.000000000 -0600 @@ -34,6 +34,11 @@ dep_mbool ' Debug Befs' CONFIG_BEFS_DEB dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL @@ -671,8 +671,8 @@ mainmenu_option next_comment comment 'Partition Types' source fs/partitions/Config.in ---- linux-rh-2.4.20-8/fs/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:58.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/Makefile 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:34.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/Makefile 2003-07-12 15:34:44.000000000 -0600 @@ -84,6 +84,9 @@ obj-y += binfmt_script.o obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o @@ -683,8 +683,8 @@ # persistent filesystems obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) ---- linux-rh-2.4.20-8/fs/ext2/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2001-10-11 23:05:18.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/Makefile 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/ext2/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:46.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/Makefile 2003-07-12 15:34:44.000000000 -0600 @@ -13,4 +13,8 @@ obj-y := balloc.o bitmap.o dir.o file ioctl.o namei.o super.o symlink.o obj-m := $(O_TARGET) @@ -694,8 +694,8 @@ +obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o + include $(TOPDIR)/Rules.make ---- linux-rh-2.4.20-8/fs/ext2/file.c~linux-2.4.20-xattr-0.8.54-chaos 2001-10-11 23:05:18.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/file.c 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/ext2/file.c~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:46.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/file.c 2003-07-12 15:34:44.000000000 -0600 @@ -20,6 +20,7 @@ #include @@ -713,8 +713,8 @@ + listxattr: ext2_listxattr, + removexattr: ext2_removexattr, }; ---- linux-rh-2.4.20-8/fs/ext2/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/ialloc.c 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/ext2/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:59:09.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/ialloc.c 2003-07-12 15:34:44.000000000 -0600 @@ -15,6 +15,7 @@ #include #include @@ -731,8 +731,8 @@ DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); } ---- linux-rh-2.4.20-8/fs/ext2/inode.c~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/inode.c 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/ext2/inode.c~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:59:09.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/inode.c 2003-07-12 15:34:44.000000000 -0600 @@ -39,6 +39,18 @@ MODULE_LICENSE("GPL"); static int ext2_update_inode(struct inode * inode, int do_sync); @@ -815,8 +815,8 @@ brelse (bh); inode->i_attr_flags = 0; if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) { ---- linux-rh-2.4.20-8/fs/ext2/namei.c~linux-2.4.20-xattr-0.8.54-chaos 2001-10-04 13:57:36.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/namei.c 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/ext2/namei.c~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:46.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/namei.c 2003-07-12 15:34:44.000000000 -0600 @@ -31,6 +31,7 @@ #include @@ -850,8 +850,8 @@ + listxattr: ext2_listxattr, + removexattr: ext2_removexattr, }; ---- linux-rh-2.4.20-8/fs/ext2/super.c~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/super.c 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/ext2/super.c~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:59:09.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/super.c 2003-07-12 15:34:44.000000000 -0600 @@ -21,6 +21,7 @@ #include #include @@ -921,8 +921,8 @@ } EXPORT_NO_SYMBOLS; ---- linux-rh-2.4.20-8/fs/ext2/symlink.c~linux-2.4.20-xattr-0.8.54-chaos 2000-09-28 04:41:33.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/symlink.c 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/ext2/symlink.c~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:46.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/symlink.c 2003-07-12 15:34:44.000000000 -0600 @@ -19,6 +19,7 @@ #include @@ -952,8 +952,8 @@ + listxattr: ext2_listxattr, + removexattr: ext2_removexattr, }; ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/xattr.c 2003-05-07 17:34:25.000000000 +0800 +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/xattr.c 2003-07-12 15:34:44.000000000 -0600 @@ -0,0 +1,1212 @@ +/* + * linux/fs/ext2/xattr.c @@ -2167,8 +2167,8 @@ +} + +#endif /* CONFIG_EXT2_FS_XATTR_SHARING */ ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/xattr_user.c 2003-05-07 17:34:25.000000000 +0800 +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext2/xattr_user.c 2003-07-12 15:34:44.000000000 -0600 @@ -0,0 +1,103 @@ +/* + * linux/fs/ext2/xattr_user.c @@ -2273,8 +2273,8 @@ + ext2_xattr_unregister(EXT2_XATTR_INDEX_USER, + &ext2_xattr_user_handler); +} ---- linux-rh-2.4.20-8/fs/ext3/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/Makefile 2003-05-07 17:45:13.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/ext3/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:38.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/Makefile 2003-07-12 15:34:44.000000000 -0600 @@ -1,5 +1,5 @@ # -# Makefile for the linux ext2-filesystem routines. @@ -2299,8 +2299,8 @@ +obj-$(CONFIG_EXT3_FS_XATTR_USER) += xattr_user.o + include $(TOPDIR)/Rules.make ---- linux-rh-2.4.20-8/fs/ext3/file.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/file.c 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/ext3/file.c~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:38.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/file.c 2003-07-12 15:34:44.000000000 -0600 @@ -23,6 +23,7 @@ #include #include @@ -2319,8 +2319,8 @@ + removexattr: ext3_removexattr, /* BKL held */ }; ---- linux-rh-2.4.20-8/fs/ext3/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:48.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/ialloc.c 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/ext3/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:14:30.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/ialloc.c 2003-07-12 15:34:44.000000000 -0600 @@ -17,6 +17,7 @@ #include #include @@ -2337,8 +2337,8 @@ DQUOT_FREE_INODE(inode); DQUOT_DROP(inode); ---- linux-rh-2.4.20-8/fs/ext3/inode.c~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:58.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/inode.c 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/ext3/inode.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:14:30.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/inode.c 2003-07-12 15:34:44.000000000 -0600 @@ -39,6 +39,18 @@ */ #undef SEARCH_FROM_ZERO @@ -2429,8 +2429,8 @@ /* inode->i_attr_flags = 0; unused */ if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) { /* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */ ---- linux-rh-2.4.20-8/fs/ext3/namei.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/namei.c 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/ext3/namei.c~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:43.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/namei.c 2003-07-12 15:34:44.000000000 -0600 @@ -29,6 +29,7 @@ #include #include @@ -2492,8 +2492,8 @@ + removexattr: ext3_removexattr, /* BKL held */ +}; + ---- linux-rh-2.4.20-8/fs/ext3/super.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/super.c 2003-05-07 17:40:45.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/ext3/super.c~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:38.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/super.c 2003-07-12 15:34:44.000000000 -0600 @@ -24,6 +24,7 @@ #include #include @@ -2579,8 +2579,8 @@ MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); MODULE_LICENSE("GPL"); ---- linux-rh-2.4.20-8/fs/ext3/symlink.c~linux-2.4.20-xattr-0.8.54-chaos 2001-11-10 06:25:04.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/symlink.c 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/ext3/symlink.c~linux-2.4.20-xattr-0.8.54-chaos 2002-05-07 15:53:46.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/symlink.c 2003-07-12 15:34:44.000000000 -0600 @@ -20,6 +20,7 @@ #include #include @@ -2610,8 +2610,8 @@ + listxattr: ext3_listxattr, /* BKL held */ + removexattr: ext3_removexattr, /* BKL held */ }; ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/xattr.c 2003-05-07 17:42:06.000000000 +0800 +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/xattr.c 2003-07-12 15:34:44.000000000 -0600 @@ -0,0 +1,1225 @@ +/* + * linux/fs/ext3/xattr.c @@ -3838,8 +3838,8 @@ +} + +#endif /* CONFIG_EXT3_FS_XATTR_SHARING */ ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/xattr_user.c 2003-05-07 17:34:25.000000000 +0800 +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/xattr_user.c 2003-07-12 15:34:44.000000000 -0600 @@ -0,0 +1,111 @@ +/* + * linux/fs/ext3/xattr_user.c @@ -3952,8 +3952,8 @@ + ext3_xattr_unregister(EXT3_XATTR_INDEX_USER, + &ext3_xattr_user_handler); +} ---- linux-rh-2.4.20-8/fs/jfs/jfs_xattr.h~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/jfs/jfs_xattr.h 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/jfs/jfs_xattr.h~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:59:11.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/jfs/jfs_xattr.h 2003-07-12 15:34:44.000000000 -0600 @@ -52,8 +52,10 @@ struct jfs_ea_list { #define END_EALIST(ealist) \ ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist))) @@ -3967,8 +3967,8 @@ extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t); extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t); extern ssize_t jfs_listxattr(struct dentry *, char *, size_t); ---- linux-rh-2.4.20-8/fs/jfs/xattr.c~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/jfs/xattr.c 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/fs/jfs/xattr.c~linux-2.4.20-xattr-0.8.54-chaos 2003-02-14 15:59:11.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/jfs/xattr.c 2003-07-12 15:34:44.000000000 -0600 @@ -641,7 +641,7 @@ static int ea_put(struct inode *inode, s } @@ -3996,8 +3996,8 @@ size_t value_len, int flags) { if (value == NULL) { /* empty EA, do not remove */ ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/mbcache.c 2003-05-07 17:34:25.000000000 +0800 +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/mbcache.c 2003-07-12 15:34:44.000000000 -0600 @@ -0,0 +1,648 @@ +/* + * linux/fs/mbcache.c @@ -4647,8 +4647,8 @@ +module_init(init_mbcache) +module_exit(exit_mbcache) + ---- linux-rh-2.4.20-8/include/asm-arm/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:53.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/asm-arm/unistd.h 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/include/asm-arm/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:14:42.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-arm/unistd.h 2003-07-12 15:34:44.000000000 -0600 @@ -244,7 +244,6 @@ #define __NR_security (__NR_SYSCALL_BASE+223) #define __NR_gettid (__NR_SYSCALL_BASE+224) @@ -4665,8 +4665,8 @@ #define __NR_tkill (__NR_SYSCALL_BASE+238) /* * Please check 2.5 _before_ adding calls here, ---- linux-rh-2.4.20-8/include/asm-ppc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:45.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/asm-ppc64/unistd.h 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/include/asm-ppc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:13:42.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-ppc64/unistd.h 2003-07-12 15:34:44.000000000 -0600 @@ -218,6 +218,7 @@ #define __NR_gettid 207 #if 0 /* Reserved syscalls */ @@ -4683,8 +4683,8 @@ #define __NR_futex 221 #endif ---- linux-rh-2.4.20-8/include/asm-s390/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:45.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/asm-s390/unistd.h 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/include/asm-s390/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:13:44.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-s390/unistd.h 2003-07-12 15:34:44.000000000 -0600 @@ -212,9 +212,18 @@ #define __NR_madvise 219 #define __NR_getdents64 220 @@ -4707,8 +4707,8 @@ #define __NR_gettid 236 #define __NR_tkill 237 ---- linux-rh-2.4.20-8/include/asm-s390x/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:45.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/asm-s390x/unistd.h 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/include/asm-s390x/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:13:45.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-s390x/unistd.h 2003-07-12 15:34:44.000000000 -0600 @@ -180,9 +180,18 @@ #define __NR_pivot_root 217 #define __NR_mincore 218 @@ -4731,8 +4731,8 @@ #define __NR_gettid 236 #define __NR_tkill 237 ---- linux-rh-2.4.20-8/include/asm-sparc/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:45.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/asm-sparc/unistd.h 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/include/asm-sparc/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:13:46.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-sparc/unistd.h 2003-07-12 15:34:44.000000000 -0600 @@ -184,24 +184,24 @@ /* #define __NR_exportfs 166 SunOS Specific */ #define __NR_mount 167 /* Common */ @@ -4770,8 +4770,8 @@ #define __NR_tkill 187 /* SunOS: fpathconf */ /* #define __NR_sysconf 188 SunOS Specific */ #define __NR_uname 189 /* Linux Specific */ ---- linux-rh-2.4.20-8/include/asm-sparc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:45.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/asm-sparc64/unistd.h 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/include/asm-sparc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-09-25 11:13:48.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/include/asm-sparc64/unistd.h 2003-07-12 15:34:44.000000000 -0600 @@ -184,24 +184,24 @@ /* #define __NR_exportfs 166 SunOS Specific */ #define __NR_mount 167 /* Common */ @@ -4809,8 +4809,8 @@ #define __NR_tkill 187 /* SunOS: fpathconf */ /* #define __NR_sysconf 188 SunOS Specific */ #define __NR_uname 189 /* Linux Specific */ ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/cache_def.h 2003-05-07 17:34:25.000000000 +0800 +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/cache_def.h 2003-07-12 15:34:44.000000000 -0600 @@ -0,0 +1,15 @@ +/* + * linux/cache_def.h @@ -4827,8 +4827,8 @@ + +extern void register_cache(struct cache_definition *); +extern void unregister_cache(struct cache_definition *); ---- linux-rh-2.4.20-8/include/linux/errno.h~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:53.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/errno.h 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/include/linux/errno.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-15 21:15:06.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/errno.h 2003-07-12 15:34:44.000000000 -0600 @@ -26,4 +26,8 @@ #endif @@ -4838,8 +4838,8 @@ +#define ENOTSUP EOPNOTSUPP /* Operation not supported */ + #endif ---- linux-rh-2.4.20-8/include/linux/ext2_fs.h~linux-2.4.20-xattr-0.8.54-chaos 2003-04-12 15:46:42.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/ext2_fs.h 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/include/linux/ext2_fs.h~linux-2.4.20-xattr-0.8.54-chaos 2003-06-24 11:31:16.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext2_fs.h 2003-07-12 15:34:44.000000000 -0600 @@ -57,8 +57,6 @@ */ #define EXT2_BAD_INO 1 /* Bad blocks inode */ @@ -4911,7 +4911,7 @@ #define EXT2_FEATURE_INCOMPAT_SUPP EXT2_FEATURE_INCOMPAT_FILETYPE #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \ -@@ -623,8 +600,10 @@ extern struct address_space_operations e +@@ -624,8 +601,10 @@ extern struct address_space_operations e /* namei.c */ extern struct inode_operations ext2_dir_inode_operations; @@ -4922,8 +4922,8 @@ extern struct inode_operations ext2_fast_symlink_inode_operations; #endif /* __KERNEL__ */ ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/ext2_xattr.h 2003-05-07 17:34:25.000000000 +0800 +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext2_xattr.h 2003-07-12 15:34:44.000000000 -0600 @@ -0,0 +1,157 @@ +/* + File: linux/ext2_xattr.h @@ -5082,8 +5082,8 @@ + +#endif /* __KERNEL__ */ + ---- linux-rh-2.4.20-8/include/linux/ext3_fs.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/ext3_fs.h 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/include/linux/ext3_fs.h~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:41.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext3_fs.h 2003-07-12 15:34:44.000000000 -0600 @@ -63,8 +63,6 @@ */ #define EXT3_BAD_INO 1 /* Bad blocks inode */ @@ -5138,7 +5138,7 @@ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H -@@ -520,7 +496,7 @@ struct ext3_super_block { +@@ -521,7 +497,7 @@ struct ext3_super_block { #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ @@ -5147,7 +5147,7 @@ #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ EXT3_FEATURE_INCOMPAT_RECOVER) #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ -@@ -703,6 +679,7 @@ extern void ext3_check_inodes_bitmap (st +@@ -704,6 +680,7 @@ extern void ext3_check_inodes_bitmap (st extern unsigned long ext3_count_free (struct buffer_head *, unsigned); /* inode.c */ @@ -5155,7 +5155,7 @@ extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -@@ -771,8 +748,10 @@ extern struct address_space_operations e +@@ -773,8 +750,10 @@ extern struct address_space_operations e /* namei.c */ extern struct inode_operations ext3_dir_inode_operations; @@ -5166,8 +5166,8 @@ extern struct inode_operations ext3_fast_symlink_inode_operations; ---- linux-rh-2.4.20-8/include/linux/ext3_jbd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/ext3_jbd.h 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/include/linux/ext3_jbd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:38.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext3_jbd.h 2003-07-12 15:34:44.000000000 -0600 @@ -30,13 +30,19 @@ #define EXT3_SINGLEDATA_TRANS_BLOCKS 8U @@ -5189,8 +5189,8 @@ extern int ext3_writepage_trans_blocks(struct inode *inode); ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/ext3_xattr.h 2003-05-07 17:34:25.000000000 +0800 +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/ext3_xattr.h 2003-07-12 15:34:44.000000000 -0600 @@ -0,0 +1,157 @@ +/* + File: linux/ext3_xattr.h @@ -5349,19 +5349,19 @@ + +#endif /* __KERNEL__ */ + ---- linux-rh-2.4.20-8/include/linux/fs.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:58.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/fs.h 2003-05-07 17:34:25.000000000 +0800 -@@ -915,7 +915,7 @@ struct inode_operations { +--- kernel-2.4.20-6chaos_18_7/include/linux/fs.h~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:31:35.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/fs.h 2003-07-12 15:34:44.000000000 -0600 +@@ -914,7 +914,7 @@ struct inode_operations { int (*setattr) (struct dentry *, struct iattr *); - int (*setattr_raw) (struct inode *, struct iattr *); + int (*setattr_raw) (struct inode *, struct iattr *); int (*getattr) (struct dentry *, struct iattr *); - int (*setxattr) (struct dentry *, const char *, void *, size_t, int); + int (*setxattr) (struct dentry *, const char *, const void *, size_t, int); ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/mbcache.h 2003-05-07 17:34:25.000000000 +0800 +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/include/linux/mbcache.h 2003-07-12 15:34:44.000000000 -0600 @@ -0,0 +1,69 @@ +/* + File: linux/mbcache.h @@ -5432,8 +5432,8 @@ +struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int, + kdev_t, unsigned int); +#endif ---- linux-rh-2.4.20-8/kernel/ksyms.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:58.000000000 +0800 -+++ linux-rh-2.4.20-8-root/kernel/ksyms.c 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/kernel/ksyms.c~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:14:02.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/kernel/ksyms.c 2003-07-12 15:35:19.000000000 -0600 @@ -12,6 +12,7 @@ #define __KERNEL_SYSCALLS__ #include @@ -5442,15 +5442,15 @@ #include #include #include -@@ -107,6 +108,7 @@ EXPORT_SYMBOL(exit_mm); +@@ -106,6 +107,7 @@ EXPORT_SYMBOL(do_brk); + EXPORT_SYMBOL(exit_mm); EXPORT_SYMBOL(exit_files); EXPORT_SYMBOL(exit_fs); - EXPORT_SYMBOL(exit_sighand); +EXPORT_SYMBOL(copy_fs_struct); + EXPORT_SYMBOL(exit_sighand); + EXPORT_SYMBOL_GPL(make_pages_present); - /* internal kernel memory management */ - EXPORT_SYMBOL(_alloc_pages); -@@ -125,6 +127,8 @@ EXPORT_SYMBOL(kmem_cache_alloc); +@@ -126,6 +128,8 @@ EXPORT_SYMBOL(kmem_cache_alloc); EXPORT_SYMBOL(kmem_cache_free); EXPORT_SYMBOL(kmem_cache_validate); EXPORT_SYMBOL(kmem_cache_size); @@ -5459,8 +5459,8 @@ EXPORT_SYMBOL(kmalloc); EXPORT_SYMBOL(kfree); EXPORT_SYMBOL(vfree); ---- linux-rh-2.4.20-8/mm/vmscan.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:58.000000000 +0800 -+++ linux-rh-2.4.20-8-root/mm/vmscan.c 2003-05-07 17:34:25.000000000 +0800 +--- kernel-2.4.20-6chaos_18_7/mm/vmscan.c~linux-2.4.20-xattr-0.8.54-chaos 2003-07-12 15:33:34.000000000 -0600 ++++ kernel-2.4.20-6chaos_18_7-braam/mm/vmscan.c 2003-07-12 15:34:44.000000000 -0600 @@ -21,6 +21,7 @@ #include #include @@ -5518,8 +5518,8 @@ #ifdef CONFIG_QUOTA ret += shrink_dqcache_memory(DEF_PRIORITY, gfp_mask); #endif ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-root/fs/ext3/ext3-exports.c 2003-05-05 18:19:11.000000000 +0800 +--- /dev/null 2003-01-30 03:24:37.000000000 -0700 ++++ kernel-2.4.20-6chaos_18_7-braam/fs/ext3/ext3-exports.c 2003-07-12 15:34:44.000000000 -0600 @@ -0,0 +1,13 @@ +#include +#include diff --git a/lustre/kernel_patches/patches/lustre_version.patch b/lustre/kernel_patches/patches/lustre_version.patch index 78855ac..c987485 100644 --- a/lustre/kernel_patches/patches/lustre_version.patch +++ b/lustre/kernel_patches/patches/lustre_version.patch @@ -7,6 +7,6 @@ --- /dev/null Fri Aug 30 17:31:37 2002 +++ linux-2.4.18-18.8.0-l12-braam/include/linux/lustre_version.h Thu Feb 13 07:58:33 2003 @@ -0,0 +1 @@ -+#define LUSTRE_KERNEL_VERSION 19 ++#define LUSTRE_KERNEL_VERSION 21 _ diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch index 710cdc9..7aa5941 100644 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch +++ b/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch @@ -1,7 +1,7 @@ 0 files changed ---- linux-2.4.20-rh/fs/dcache.c~vfs_intent-2.4.20-rh 2003-04-11 14:04:58.000000000 +0800 -+++ linux-2.4.20-rh-root/fs/dcache.c 2003-06-09 23:18:07.000000000 +0800 +--- linux-2.4.20/fs/dcache.c~vfs_intent-2.4.20-rh 2003-07-17 08:32:59.000000000 -0700 ++++ linux-2.4.20-mmonroe/fs/dcache.c 2003-07-17 08:35:22.000000000 -0700 @@ -186,6 +186,13 @@ int d_invalidate(struct dentry * dentry) spin_unlock(&dcache_lock); return 0; @@ -16,15 +16,7 @@ /* * Check whether to do a partial shrink_dcache * to get rid of unused child entries. -@@ -624,6 +631,7 @@ struct dentry * d_alloc(struct dentry * - dentry->d_fsdata = NULL; - dentry->d_extra_attributes = NULL; - dentry->d_mounted = 0; -+ dentry->d_it = NULL; - dentry->d_cookie = NULL; - INIT_LIST_HEAD(&dentry->d_hash); - INIT_LIST_HEAD(&dentry->d_lru); -@@ -839,13 +847,19 @@ void d_delete(struct dentry * dentry) +@@ -839,13 +846,19 @@ void d_delete(struct dentry * dentry) * Adds a dentry to the hash according to its name. */ @@ -47,16 +39,16 @@ } #define do_switch(x,y) do { \ ---- linux-2.4.20-rh/fs/namei.c~vfs_intent-2.4.20-rh 2003-04-11 14:04:57.000000000 +0800 -+++ linux-2.4.20-rh-root/fs/namei.c 2003-06-09 23:18:07.000000000 +0800 +--- linux-2.4.20/fs/namei.c~vfs_intent-2.4.20-rh 2003-07-17 08:32:47.000000000 -0700 ++++ linux-2.4.20-mmonroe/fs/namei.c 2003-07-17 08:35:22.000000000 -0700 @@ -94,6 +94,13 @@ * XEmacs seems to be relying on it... */ -+void intent_release(struct dentry *de, struct lookup_intent *it) ++void intent_release(struct lookup_intent *it) +{ -+ if (it && de->d_op && de->d_op->d_intent_release) -+ de->d_op->d_intent_release(de, it); ++ if (it && it->it_op_release) ++ it->it_op_release(it); + +} + @@ -73,8 +65,8 @@ { struct dentry * dentry = d_lookup(parent, name); -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) { -+ if (!dentry->d_op->d_revalidate2(dentry, flags, it) && ++ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { ++ if (!dentry->d_op->d_revalidate_it(dentry, flags, it) && + !d_invalidate(dentry)) { + dput(dentry); + dentry = NULL; @@ -104,8 +96,8 @@ result = ERR_PTR(-ENOMEM); if (dentry) { lock_kernel(); -+ if (dir->i_op->lookup2) -+ result = dir->i_op->lookup2(dir, dentry, it); ++ if (dir->i_op->lookup_it) ++ result = dir->i_op->lookup_it(dir, dentry, it, flags); + else result = dir->i_op->lookup(dir, dentry); unlock_kernel(); @@ -114,8 +106,8 @@ dput(result); result = ERR_PTR(-ENOENT); } -+ } else if (result->d_op && result->d_op->d_revalidate2) { -+ if (!result->d_op->d_revalidate2(result, flags, it) && ++ } else if (result->d_op && result->d_op->d_revalidate_it) { ++ if (!result->d_op->d_revalidate_it(result, flags, it) && + !d_invalidate(result)) { + dput(result); + goto again; @@ -133,30 +125,26 @@ { int err; if (current->link_count >= max_recursive_link) -@@ -348,10 +377,21 @@ static inline int do_follow_link(struct +@@ -348,10 +377,18 @@ static inline int do_follow_link(struct current->link_count++; current->total_link_count++; UPDATE_ATIME(dentry->d_inode); -- err = dentry->d_inode->i_op->follow_link(dentry, nd); + nd->it = it; -+ if (dentry->d_inode->i_op->follow_link2) -+ err = dentry->d_inode->i_op->follow_link2(dentry, nd, it); -+ else -+ err = dentry->d_inode->i_op->follow_link(dentry, nd); + err = dentry->d_inode->i_op->follow_link(dentry, nd); + if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) { + /* vfs_follow_link was never called */ -+ intent_release(dentry, it); ++ intent_release(it); + path_release(nd); + err = -ENOLINK; + } current->link_count--; return err; loop: -+ intent_release(dentry, it); ++ intent_release(it); path_release(nd); return -ELOOP; } -@@ -381,15 +421,26 @@ int follow_up(struct vfsmount **mnt, str +@@ -381,15 +418,26 @@ int follow_up(struct vfsmount **mnt, str return __follow_up(mnt, dentry); } @@ -176,7 +164,7 @@ + opc = it->it_op; + mode = it->it_mode; + } -+ intent_release(*dentry, it); ++ intent_release(it); + if (it) { + it->it_op = opc; + it->it_mode = mode; @@ -184,7 +172,7 @@ dput(*dentry); mntput(mounted->mnt_parent); *dentry = dget(mounted->mnt_root); -@@ -401,7 +452,7 @@ static inline int __follow_down(struct v +@@ -401,7 +449,7 @@ static inline int __follow_down(struct v int follow_down(struct vfsmount **mnt, struct dentry **dentry) { @@ -193,7 +181,7 @@ } static inline void follow_dotdot(struct nameidata *nd) -@@ -437,7 +488,7 @@ static inline void follow_dotdot(struct +@@ -437,7 +485,7 @@ static inline void follow_dotdot(struct mntput(nd->mnt); nd->mnt = parent; } @@ -202,7 +190,7 @@ ; } -@@ -449,7 +500,8 @@ static inline void follow_dotdot(struct +@@ -449,7 +497,8 @@ static inline void follow_dotdot(struct * * We expect 'base' to be positive and a directory. */ @@ -212,117 +200,114 @@ { struct dentry *dentry; struct inode *inode; -@@ -526,18 +578,18 @@ int link_path_walk(const char * name, st +@@ -526,19 +575,18 @@ int link_path_walk(const char * name, st break; } /* This does the actual lookups.. */ - dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); ++ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); if (!dentry) { err = -EWOULDBLOCKIO; if (atomic) break; - dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); ++ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); err = PTR_ERR(dentry); if (IS_ERR(dentry)) break; } /* Check mountpoints.. */ - while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) -+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL)) - ; +- ; ++ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL)); err = -ENOENT; -@@ -548,8 +600,8 @@ int link_path_walk(const char * name, st - if (!inode->i_op) + inode = dentry->d_inode; +@@ -549,7 +597,7 @@ int link_path_walk(const char * name, st goto out_dput; -- if (inode->i_op->follow_link) { + if (inode->i_op->follow_link) { - err = do_follow_link(dentry, nd); -+ if (inode->i_op->follow_link || inode->i_op->follow_link2) { + err = do_follow_link(dentry, nd, NULL); dput(dentry); if (err) goto return_err; -@@ -565,7 +617,7 @@ int link_path_walk(const char * name, st +@@ -565,7 +613,7 @@ int link_path_walk(const char * name, st nd->dentry = dentry; } err = -ENOTDIR; - if (!inode->i_op->lookup) -+ if (!inode->i_op->lookup && !inode->i_op->lookup2) ++ if (!inode->i_op->lookup && !inode->i_op->lookup_it) break; continue; /* here ends the main loop */ -@@ -592,22 +644,23 @@ last_component: +@@ -592,22 +640,22 @@ last_component: if (err < 0) break; } - dentry = cached_lookup(nd->dentry, &this, 0); -+ dentry = cached_lookup(nd->dentry, &this, 0, it); ++ dentry = cached_lookup(nd->dentry, &this, 0, it); if (!dentry) { err = -EWOULDBLOCKIO; if (atomic) break; - dentry = real_lookup(nd->dentry, &this, 0); -+ dentry = real_lookup(nd->dentry, &this, 0, it); ++ dentry = real_lookup(nd->dentry, &this, 0, it); err = PTR_ERR(dentry); if (IS_ERR(dentry)) break; } - while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) -+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it)) ++ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it)) ; inode = dentry->d_inode; if ((lookup_flags & LOOKUP_FOLLOW) -- && inode && inode->i_op && inode->i_op->follow_link) { + && inode && inode->i_op && inode->i_op->follow_link) { - err = do_follow_link(dentry, nd); -+ && inode && inode->i_op && -+ (inode->i_op->follow_link || inode->i_op->follow_link2)) { -+ err = do_follow_link(dentry, nd, it); ++ err = do_follow_link(dentry, nd, it); dput(dentry); if (err) goto return_err; -@@ -621,7 +674,8 @@ last_component: +@@ -621,7 +669,8 @@ last_component: goto no_inode; if (lookup_flags & LOOKUP_DIRECTORY) { err = -ENOTDIR; - if (!inode->i_op || !inode->i_op->lookup) + if (!inode->i_op || -+ (!inode->i_op->lookup && !inode->i_op->lookup2)) ++ (!inode->i_op->lookup && !inode->i_op->lookup_it)) break; } goto return_base; -@@ -645,6 +699,23 @@ return_reval: +@@ -645,6 +694,23 @@ return_reval: * Check the cached dentry for staleness. */ dentry = nd->dentry; -+ revalidate_again: -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) { ++ revalidate_again: ++ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { + err = -ESTALE; -+ if (!dentry->d_op->d_revalidate2(dentry, 0, it)) { -+ struct dentry *new; -+ err = permission(dentry->d_parent->d_inode, -+ MAY_EXEC); -+ if (err) -+ break; -+ new = real_lookup(dentry->d_parent, -+ &dentry->d_name, 0, NULL); ++ if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) { ++ struct dentry *new; ++ err = permission(dentry->d_parent->d_inode, ++ MAY_EXEC); ++ if (err) ++ break; ++ new = real_lookup(dentry->d_parent, ++ &dentry->d_name, 0, NULL); + d_invalidate(dentry); -+ dput(dentry); -+ dentry = new; -+ goto revalidate_again; -+ } ++ dput(dentry); ++ dentry = new; ++ goto revalidate_again; ++ } + } else if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { err = -ESTALE; if (!dentry->d_op->d_revalidate(dentry, 0)) { -@@ -658,15 +729,28 @@ out_dput: +@@ -658,15 +724,28 @@ out_dput: dput(dentry); break; } + if (err) -+ intent_release(nd->dentry, it); ++ intent_release(it); path_release(nd); return_err: return err; @@ -347,7 +332,7 @@ } /* SMP-safe */ -@@ -751,6 +835,17 @@ walk_init_root(const char *name, struct +@@ -751,6 +830,17 @@ walk_init_root(const char *name, struct } /* SMP-safe */ @@ -365,7 +350,7 @@ int path_lookup(const char *path, unsigned flags, struct nameidata *nd) { int error = 0; -@@ -765,6 +860,7 @@ int path_init(const char *name, unsigned +@@ -765,6 +855,7 @@ int path_init(const char *name, unsigned { nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags; @@ -373,7 +358,7 @@ if (*name=='/') return walk_init_root(name,nd); read_lock(¤t->fs->lock); -@@ -779,7 +875,8 @@ int path_init(const char *name, unsigned +@@ -779,7 +870,8 @@ int path_init(const char *name, unsigned * needs parent already locked. Doesn't follow mounts. * SMP-safe. */ @@ -383,7 +368,7 @@ { struct dentry * dentry; struct inode *inode; -@@ -802,13 +899,16 @@ struct dentry * lookup_hash(struct qstr +@@ -802,13 +894,16 @@ struct dentry * lookup_hash(struct qstr goto out; } @@ -395,13 +380,13 @@ if (!new) goto out; lock_kernel(); -+ if (inode->i_op->lookup2) -+ dentry = inode->i_op->lookup2(inode, new, it); ++ if (inode->i_op->lookup_it) ++ dentry = inode->i_op->lookup_it(inode, new, it, 0); + else dentry = inode->i_op->lookup(inode, new); unlock_kernel(); if (!dentry) -@@ -820,6 +920,12 @@ out: +@@ -820,6 +915,12 @@ out: return dentry; } @@ -414,7 +399,7 @@ /* SMP-safe */ struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) { -@@ -841,7 +947,7 @@ struct dentry * lookup_one_len(const cha +@@ -841,7 +942,7 @@ struct dentry * lookup_one_len(const cha } this.hash = end_name_hash(hash); @@ -423,7 +408,7 @@ access: return ERR_PTR(-EACCES); } -@@ -872,6 +978,23 @@ int __user_walk(const char *name, unsign +@@ -872,6 +973,23 @@ int __user_walk(const char *name, unsign return err; } @@ -447,7 +432,47 @@ /* * It's inline, so penalty for filesystems that don't use sticky bit is * minimal. -@@ -1010,7 +1133,8 @@ exit_lock: +@@ -969,7 +1087,8 @@ static inline int lookup_flags(unsigned + return retval; + } + +-int vfs_create(struct inode *dir, struct dentry *dentry, int mode) ++static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode, ++ struct lookup_intent *it) + { + int error; + +@@ -982,12 +1101,15 @@ int vfs_create(struct inode *dir, struct + goto exit_lock; + + error = -EACCES; /* shouldn't it be ENOSYS? */ +- if (!dir->i_op || !dir->i_op->create) ++ if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it)) + goto exit_lock; + + DQUOT_INIT(dir); + lock_kernel(); +- error = dir->i_op->create(dir, dentry, mode); ++ if (dir->i_op->create_it) ++ error = dir->i_op->create_it(dir, dentry, mode, it); ++ else ++ error = dir->i_op->create(dir, dentry, mode); + unlock_kernel(); + exit_lock: + up(&dir->i_zombie); +@@ -996,6 +1118,11 @@ exit_lock: + return error; + } + ++int vfs_create(struct inode *dir, struct dentry *dentry, int mode) ++{ ++ return vfs_create_it(dir, dentry, mode, NULL); ++} ++ + /* + * open_namei() + * +@@ -1010,7 +1137,8 @@ exit_lock: * for symlinks (where the permissions are checked later). * SMP-safe */ @@ -457,7 +482,7 @@ { int acc_mode, error = 0; struct inode *inode; -@@ -1024,7 +1148,7 @@ int open_namei(const char * pathname, in +@@ -1024,7 +1152,7 @@ int open_namei(const char * pathname, in * The simplest case - just a plain lookup. */ if (!(flag & O_CREAT)) { @@ -466,7 +491,7 @@ if (error) return error; dentry = nd->dentry; -@@ -1034,6 +1158,10 @@ int open_namei(const char * pathname, in +@@ -1034,6 +1162,10 @@ int open_namei(const char * pathname, in /* * Create - we need to know the parent. */ @@ -477,7 +502,7 @@ error = path_lookup(pathname, LOOKUP_PARENT, nd); if (error) return error; -@@ -1049,7 +1177,7 @@ int open_namei(const char * pathname, in +@@ -1049,7 +1181,7 @@ int open_namei(const char * pathname, in dir = nd->dentry; down(&dir->d_inode->i_sem); @@ -486,15 +511,21 @@ do_last: error = PTR_ERR(dentry); -@@ -1058,6 +1186,7 @@ do_last: +@@ -1058,10 +1190,11 @@ do_last: goto exit; } + it->it_mode = mode; /* Negative dentry, just create the file */ if (!dentry->d_inode) { - error = vfs_create(dir->d_inode, dentry, -@@ -1086,12 +1215,13 @@ do_last: +- error = vfs_create(dir->d_inode, dentry, +- mode & ~current->fs->umask); ++ error = vfs_create_it(dir->d_inode, dentry, ++ mode & ~current->fs->umask, it); + up(&dir->d_inode->i_sem); + dput(nd->dentry); + nd->dentry = dentry; +@@ -1086,7 +1219,7 @@ do_last: error = -ELOOP; if (flag & O_NOFOLLOW) goto exit_dput; @@ -503,14 +534,7 @@ } error = -ENOENT; if (!dentry->d_inode) - goto exit_dput; -- if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link) -+ if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link || -+ dentry->d_inode->i_op->follow_link2)) - goto do_link; - - dput(nd->dentry); -@@ -1165,7 +1295,7 @@ ok: +@@ -1165,7 +1298,7 @@ ok: if (!error) { DQUOT_INIT(inode); @@ -519,32 +543,28 @@ } put_write_access(inode); if (error) -@@ -1177,8 +1307,10 @@ ok: +@@ -1177,8 +1310,10 @@ ok: return 0; exit_dput: -+ intent_release(dentry, it); ++ intent_release(it); dput(dentry); exit: -+ intent_release(nd->dentry, it); ++ intent_release(it); path_release(nd); return error; -@@ -1197,7 +1329,19 @@ do_link: +@@ -1197,7 +1332,16 @@ do_link: * are done. Procfs-like symlinks just set LAST_BIND. */ UPDATE_ATIME(dentry->d_inode); -- error = dentry->d_inode->i_op->follow_link(dentry, nd); + nd->it = it; -+ if (dentry->d_inode->i_op->follow_link2) -+ error = dentry->d_inode->i_op->follow_link2(dentry, nd, it); -+ else -+ error = dentry->d_inode->i_op->follow_link(dentry, nd); + error = dentry->d_inode->i_op->follow_link(dentry, nd); + if (error) { -+ intent_release(dentry, it); ++ intent_release(it); + } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) { + /* vfs_follow_link was never called */ -+ intent_release(dentry, it); ++ intent_release(it); + path_release(nd); + error = -ENOLINK; + } @@ -583,18 +603,15 @@ if (IS_ERR(dentry)) goto fail; if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) -@@ -1289,7 +1440,19 @@ asmlinkage long sys_mknod(const char * f +@@ -1289,7 +1440,16 @@ asmlinkage long sys_mknod(const char * f error = path_lookup(tmp, LOOKUP_PARENT, &nd); if (error) goto out; - dentry = lookup_create(&nd, 0); + -+ if (nd.dentry->d_inode->i_op->mknod2) { ++ if (nd.dentry->d_inode->i_op->mknod_raw) { + struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mknod2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len, -+ mode, dev); ++ error = op->mknod_raw(&nd, mode, dev); + /* the file system wants to use normal vfs path now */ + if (error != -EOPNOTSUPP) + goto out2; @@ -604,7 +621,7 @@ error = PTR_ERR(dentry); mode &= ~current->fs->umask; -@@ -1310,6 +1473,7 @@ asmlinkage long sys_mknod(const char * f +@@ -1310,6 +1470,7 @@ asmlinkage long sys_mknod(const char * f dput(dentry); } up(&nd.dentry->d_inode->i_sem); @@ -612,17 +629,14 @@ path_release(&nd); out: putname(tmp); -@@ -1357,7 +1521,17 @@ asmlinkage long sys_mkdir(const char * p +@@ -1357,7 +1518,14 @@ asmlinkage long sys_mkdir(const char * p error = path_lookup(tmp, LOOKUP_PARENT, &nd); if (error) goto out; - dentry = lookup_create(&nd, 1); -+ if (nd.dentry->d_inode->i_op->mkdir2) { ++ if (nd.dentry->d_inode->i_op->mkdir_raw) { + struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mkdir2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len, -+ mode); ++ error = op->mkdir_raw(&nd, mode); + /* the file system wants to use normal vfs path now */ + if (error != -EOPNOTSUPP) + goto out2; @@ -631,7 +645,7 @@ error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { error = vfs_mkdir(nd.dentry->d_inode, dentry, -@@ -1365,6 +1539,7 @@ asmlinkage long sys_mkdir(const char * p +@@ -1365,6 +1533,7 @@ asmlinkage long sys_mkdir(const char * p dput(dentry); } up(&nd.dentry->d_inode->i_sem); @@ -639,71 +653,49 @@ path_release(&nd); out: putname(tmp); -@@ -1465,8 +1640,33 @@ asmlinkage long sys_rmdir(const char * p +@@ -1465,8 +1634,16 @@ asmlinkage long sys_rmdir(const char * p error = -EBUSY; goto exit1; } -+ if (nd.dentry->d_inode->i_op->rmdir2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ struct dentry *last; -+ -+ down(&nd.dentry->d_inode->i_sem); -+ last = lookup_hash_it(&nd.last, nd.dentry, NULL); -+ up(&nd.dentry->d_inode->i_sem); -+ if (IS_ERR(last)) { -+ error = PTR_ERR(last); -+ goto exit1; -+ } -+ if (d_mountpoint(last)) { -+ dput(last); -+ error = -EBUSY; -+ goto exit1; -+ } -+ dput(last); ++ if (nd.dentry->d_inode->i_op->rmdir_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; + -+ error = op->rmdir2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } ++ error = op->rmdir_raw(&nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit1; ++ } down(&nd.dentry->d_inode->i_sem); - dentry = lookup_hash(&nd.last, nd.dentry); + dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { error = vfs_rmdir(nd.dentry->d_inode, dentry); -@@ -1524,8 +1724,17 @@ asmlinkage long sys_unlink(const char * +@@ -1524,8 +1701,15 @@ asmlinkage long sys_unlink(const char * error = -EISDIR; if (nd.last_type != LAST_NORM) goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } ++ if (nd.dentry->d_inode->i_op->unlink_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->unlink_raw(&nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit1; ++ } down(&nd.dentry->d_inode->i_sem); - dentry = lookup_hash(&nd.last, nd.dentry); + dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { /* Why not before? Because we want correct error value */ -@@ -1592,15 +1801,26 @@ asmlinkage long sys_symlink(const char * +@@ -1592,15 +1776,23 @@ asmlinkage long sys_symlink(const char * error = path_lookup(to, LOOKUP_PARENT, &nd); if (error) goto out; - dentry = lookup_create(&nd, 0); -+ if (nd.dentry->d_inode->i_op->symlink2) { ++ if (nd.dentry->d_inode->i_op->symlink_raw) { + struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->symlink2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len, -+ from); ++ error = op->symlink_raw(&nd, from); + /* the file system wants to use normal vfs path now */ + if (error != -EOPNOTSUPP) + goto out2; @@ -722,17 +714,14 @@ putname(to); } putname(from); -@@ -1676,7 +1896,17 @@ asmlinkage long sys_link(const char * ol +@@ -1676,7 +1868,14 @@ asmlinkage long sys_link(const char * ol error = -EXDEV; if (old_nd.mnt != nd.mnt) goto out_release; - new_dentry = lookup_create(&nd, 0); -+ if (nd.dentry->d_inode->i_op->link2) { ++ if (nd.dentry->d_inode->i_op->link_raw) { + struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->link2(old_nd.dentry->d_inode, -+ nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len); ++ error = op->link_raw(&old_nd, &nd); + /* the file system wants to use normal vfs path now */ + if (error != -EOPNOTSUPP) + goto out_release; @@ -741,62 +730,37 @@ error = PTR_ERR(new_dentry); if (!IS_ERR(new_dentry)) { error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); -@@ -1720,7 +1950,8 @@ exit: +@@ -1720,7 +1919,7 @@ exit: * locking]. */ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) ++ struct inode *new_dir, struct dentry *new_dentry) { int error; struct inode *target; -@@ -1778,6 +2009,7 @@ int vfs_rename_dir(struct inode *old_dir - error = -EBUSY; - else - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); -+ intent_release(new_dentry, it); - if (target) { - if (!error) - target->i_flags |= S_DEAD; -@@ -1799,7 +2031,8 @@ out_unlock: +@@ -1799,7 +1998,7 @@ out_unlock: } int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) ++ struct inode *new_dir, struct dentry *new_dentry) { int error; -@@ -1830,6 +2063,7 @@ int vfs_rename_other(struct inode *old_d - error = -EBUSY; - else - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); -+ intent_release(new_dentry, it); - double_up(&old_dir->i_zombie, &new_dir->i_zombie); - if (error) - return error; -@@ -1841,13 +2075,14 @@ int vfs_rename_other(struct inode *old_d - } - - int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) - { - int error; - if (S_ISDIR(old_dentry->d_inode->i_mode)) -- error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); -+ error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it); - else -- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); -+ error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it); - if (!error) { - if (old_dir == new_dir) - inode_dir_notify(old_dir, DN_RENAME); -@@ -1889,7 +2124,7 @@ static inline int do_rename(const char * +@@ -1887,9 +2086,18 @@ static inline int do_rename(const char * + if (newnd.last_type != LAST_NORM) + goto exit2; ++ if (old_dir->d_inode->i_op->rename_raw) { ++ lock_kernel(); ++ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); ++ unlock_kernel(); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit2; ++ } ++ double_lock(new_dir, old_dir); - old_dentry = lookup_hash(&oldnd.last, old_dir); @@ -804,7 +768,7 @@ error = PTR_ERR(old_dentry); if (IS_ERR(old_dentry)) goto exit3; -@@ -1905,16 +2140,37 @@ static inline int do_rename(const char * +@@ -1905,16 +2113,16 @@ static inline int do_rename(const char * if (newnd.last.name[newnd.last.len]) goto exit4; } @@ -814,38 +778,16 @@ if (IS_ERR(new_dentry)) goto exit4; -+ if (old_dir->d_inode->i_op->rename2) { -+ lock_kernel(); -+ /* don't rename mount point. mds will take care of -+ * the rest sanity checking */ -+ if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) { -+ error = -EBUSY; -+ goto exit5; -+ } -+ -+ error = old_dir->d_inode->i_op->rename2(old_dir->d_inode, -+ new_dir->d_inode, -+ oldnd.last.name, -+ oldnd.last.len, -+ newnd.last.name, -+ newnd.last.len); -+ unlock_kernel(); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit5; -+ } + lock_kernel(); error = vfs_rename(old_dir->d_inode, old_dentry, -- new_dir->d_inode, new_dentry); -+ new_dir->d_inode, new_dentry, NULL); + new_dir->d_inode, new_dentry); unlock_kernel(); - -+exit5: dput(new_dentry); exit4: dput(old_dentry); -@@ -1965,20 +2221,28 @@ out: +@@ -1965,20 +2173,28 @@ out: } static inline int @@ -876,7 +818,7 @@ out: if (current->link_count || res || nd->last_type!=LAST_NORM) return res; -@@ -2002,7 +2266,13 @@ fail: +@@ -2002,7 +2218,13 @@ fail: int vfs_follow_link(struct nameidata *nd, const char *link) { @@ -891,7 +833,7 @@ } /* get the link contents into pagecache */ -@@ -2044,7 +2314,7 @@ int page_follow_link(struct dentry *dent +@@ -2044,7 +2266,7 @@ int page_follow_link(struct dentry *dent { struct page *page = NULL; char *s = page_getlink(dentry, &page); @@ -900,19 +842,8 @@ if (page) { kunmap(page); page_cache_release(page); ---- linux-2.4.20-rh/fs/nfsd/vfs.c~vfs_intent-2.4.20-rh 2003-04-11 14:04:48.000000000 +0800 -+++ linux-2.4.20-rh-root/fs/nfsd/vfs.c 2003-06-09 23:18:07.000000000 +0800 -@@ -1293,7 +1293,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru - err = nfserr_perm; - } else - #endif -- err = vfs_rename(fdir, odentry, tdir, ndentry); -+ err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); - if (!err && EX_ISSYNC(tfhp->fh_export)) { - nfsd_sync_dir(tdentry); - nfsd_sync_dir(fdentry); ---- linux-2.4.20-rh/fs/open.c~vfs_intent-2.4.20-rh 2003-04-11 14:04:57.000000000 +0800 -+++ linux-2.4.20-rh-root/fs/open.c 2003-06-09 23:18:07.000000000 +0800 +--- linux-2.4.20/fs/open.c~vfs_intent-2.4.20-rh 2003-07-17 08:32:45.000000000 -0700 ++++ linux-2.4.20-mmonroe/fs/open.c 2003-07-17 08:35:22.000000000 -0700 @@ -19,6 +19,8 @@ #include @@ -934,7 +865,7 @@ int error; struct iattr newattrs; -@@ -108,7 +111,14 @@ int do_truncate(struct dentry *dentry, l +@@ -108,7 +111,13 @@ int do_truncate(struct dentry *dentry, l down(&inode->i_sem); newattrs.ia_size = length; newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; @@ -943,14 +874,13 @@ + newattrs.ia_valid |= ATTR_FROM_OPEN; + if (op->setattr_raw) { + newattrs.ia_valid |= ATTR_RAW; -+ newattrs.ia_ctime = CURRENT_TIME; + error = op->setattr_raw(inode, &newattrs); -+ } else ++ } else + error = notify_change(dentry, &newattrs); up(&inode->i_sem); return error; } -@@ -118,12 +128,13 @@ static inline long do_sys_truncate(const +@@ -118,12 +127,13 @@ static inline long do_sys_truncate(const struct nameidata nd; struct inode * inode; int error; @@ -965,22 +895,22 @@ if (error) goto out; inode = nd.dentry->d_inode; -@@ -163,11 +174,13 @@ static inline long do_sys_truncate(const +@@ -163,11 +173,13 @@ static inline long do_sys_truncate(const error = locks_verify_truncate(inode, NULL, length); if (!error) { DQUOT_INIT(inode); - error = do_truncate(nd.dentry, length); -+ intent_release(nd.dentry, &it); ++ intent_release(&it); + error = do_truncate(nd.dentry, length, 0); } put_write_access(inode); dput_and_out: -+ intent_release(nd.dentry, &it); ++ intent_release(&it); path_release(&nd); out: return error; -@@ -215,7 +228,7 @@ static inline long do_sys_ftruncate(unsi +@@ -215,7 +227,7 @@ static inline long do_sys_ftruncate(unsi error = locks_verify_truncate(inode, file, length); if (!error) @@ -989,7 +919,7 @@ out_putf: fput(file); out: -@@ -260,11 +273,13 @@ asmlinkage long sys_utime(char * filenam +@@ -260,11 +272,13 @@ asmlinkage long sys_utime(char * filenam struct inode * inode; struct iattr newattrs; @@ -1004,7 +934,7 @@ error = -EROFS; if (IS_RDONLY(inode)) goto dput_and_out; -@@ -279,11 +294,29 @@ asmlinkage long sys_utime(char * filenam +@@ -279,11 +293,25 @@ asmlinkage long sys_utime(char * filenam goto dput_and_out; newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; @@ -1021,10 +951,6 @@ + goto dput_and_out; + } + -+ error = -EROFS; -+ if (IS_RDONLY(inode)) -+ goto dput_and_out; -+ + error = -EPERM; + if (!times) { if (current->fsuid != inode->i_uid && @@ -1035,7 +961,7 @@ error = notify_change(nd.dentry, &newattrs); dput_and_out: path_release(&nd); -@@ -304,12 +337,14 @@ asmlinkage long sys_utimes(char * filena +@@ -304,12 +332,14 @@ asmlinkage long sys_utimes(char * filena struct inode * inode; struct iattr newattrs; @@ -1051,7 +977,7 @@ error = -EROFS; if (IS_RDONLY(inode)) goto dput_and_out; -@@ -324,7 +359,20 @@ asmlinkage long sys_utimes(char * filena +@@ -324,7 +354,20 @@ asmlinkage long sys_utimes(char * filena newattrs.ia_atime = times[0].tv_sec; newattrs.ia_mtime = times[1].tv_sec; newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; @@ -1073,7 +999,7 @@ if (current->fsuid != inode->i_uid && (error = permission(inode,MAY_WRITE)) != 0) goto dput_and_out; -@@ -347,6 +395,7 @@ asmlinkage long sys_access(const char * +@@ -347,6 +390,7 @@ asmlinkage long sys_access(const char * int old_fsuid, old_fsgid; kernel_cap_t old_cap; int res; @@ -1081,7 +1007,7 @@ if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; -@@ -364,13 +413,14 @@ asmlinkage long sys_access(const char * +@@ -364,13 +408,14 @@ asmlinkage long sys_access(const char * else current->cap_effective = current->cap_permitted; @@ -1093,11 +1019,11 @@ if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) && !special_file(nd.dentry->d_inode->i_mode)) res = -EROFS; -+ intent_release(nd.dentry, &it); ++ intent_release(&it); path_release(&nd); } -@@ -385,8 +435,9 @@ asmlinkage long sys_chdir(const char * f +@@ -385,8 +430,9 @@ asmlinkage long sys_chdir(const char * f { int error; struct nameidata nd; @@ -1108,15 +1034,15 @@ if (error) goto out; -@@ -397,6 +448,7 @@ asmlinkage long sys_chdir(const char * f +@@ -397,6 +443,7 @@ asmlinkage long sys_chdir(const char * f set_fs_pwd(current->fs, nd.mnt, nd.dentry); dput_and_out: -+ intent_release(nd.dentry, &it); ++ intent_release(&it); path_release(&nd); out: return error; -@@ -436,9 +488,10 @@ asmlinkage long sys_chroot(const char * +@@ -436,9 +483,10 @@ asmlinkage long sys_chroot(const char * { int error; struct nameidata nd; @@ -1129,15 +1055,15 @@ if (error) goto out; -@@ -454,6 +507,7 @@ asmlinkage long sys_chroot(const char * +@@ -454,6 +502,7 @@ asmlinkage long sys_chroot(const char * set_fs_altroot(); error = 0; dput_and_out: -+ intent_release(nd.dentry, &it); ++ intent_release(&it); path_release(&nd); out: return error; -@@ -508,6 +562,18 @@ asmlinkage long sys_chmod(const char * f +@@ -508,6 +557,18 @@ asmlinkage long sys_chmod(const char * f if (IS_RDONLY(inode)) goto dput_and_out; @@ -1156,7 +1082,7 @@ error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto dput_and_out; -@@ -538,6 +604,20 @@ static int chown_common(struct dentry * +@@ -538,6 +599,20 @@ static int chown_common(struct dentry * error = -EROFS; if (IS_RDONLY(inode)) goto out; @@ -1166,7 +1092,7 @@ + + newattrs.ia_uid = user; + newattrs.ia_gid = group; -+ newattrs.ia_valid = ATTR_UID | ATTR_GID; ++ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME; + newattrs.ia_valid |= ATTR_RAW; + error = op->setattr_raw(inode, &newattrs); + /* the file system wants to use normal vfs path now */ @@ -1177,15 +1103,19 @@ error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto out; -@@ -642,6 +722,7 @@ struct file *filp_open(const char * file +@@ -642,8 +717,9 @@ struct file *filp_open(const char * file { int namei_flags, error; struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = flags }; - - flags &= ~O_DIRECT; +- +- flags &= ~O_DIRECT; ++ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = flags }; ++ ++ //flags &= ~O_DIRECT; -@@ -651,14 +732,15 @@ struct file *filp_open(const char * file + namei_flags = flags; + if ((namei_flags+1) & O_ACCMODE) +@@ -651,14 +727,15 @@ struct file *filp_open(const char * file if (namei_flags & O_TRUNC) namei_flags |= 2; @@ -1206,19 +1136,27 @@ { struct file * f; struct inode *inode; -@@ -701,6 +783,7 @@ struct file *dentry_open(struct dentry * +@@ -695,12 +772,15 @@ struct file *dentry_open(struct dentry * + } + + if (f->f_op && f->f_op->open) { ++ f->f_it = it; + error = f->f_op->open(inode,f); ++ f->f_it = NULL; + if (error) + goto cleanup_all; } f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); -+ intent_release(dentry, it); ++ intent_release(it); return f; cleanup_all: -@@ -715,11 +798,17 @@ cleanup_all: +@@ -715,11 +795,17 @@ cleanup_all: cleanup_file: put_filp(f); cleanup_dentry: -+ intent_release(dentry, it); ++ intent_release(it); dput(dentry); mntput(mnt); return ERR_PTR(error); @@ -1232,56 +1170,114 @@ /* * Find an empty file descriptor entry, and mark it busy. */ ---- linux-2.4.20-rh/fs/stat.c~vfs_intent-2.4.20-rh 2003-04-11 14:05:08.000000000 +0800 -+++ linux-2.4.20-rh-root/fs/stat.c 2003-06-09 23:18:07.000000000 +0800 -@@ -110,11 +110,13 @@ static int do_getattr(struct vfsmount *m - int vfs_stat(char *name, struct kstat *stat) +--- linux-2.4.20/fs/stat.c~vfs_intent-2.4.20-rh 2003-07-17 08:33:05.000000000 -0700 ++++ linux-2.4.20-mmonroe/fs/stat.c 2003-07-17 08:51:33.000000000 -0700 +@@ -17,10 +17,12 @@ + * Revalidate the inode. This is required for proper NFS attribute caching. + */ + static __inline__ int +-do_revalidate(struct dentry *dentry) ++do_revalidate(struct dentry *dentry, struct lookup_intent *it) + { + struct inode * inode = dentry->d_inode; +- if (inode->i_op && inode->i_op->revalidate) ++ if (inode->i_op && inode->i_op->revalidate_it) ++ return inode->i_op->revalidate_it(dentry, it); ++ else if (inode->i_op && inode->i_op->revalidate) + return inode->i_op->revalidate(dentry); + return 0; + } +@@ -32,13 +34,13 @@ static inline nlink_t user_nlink(struct + return inode->i_nlink; + } + +-static int do_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) ++static int do_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat, struct lookup_intent *it) + { + int res = 0; + unsigned int blocks, indirect; + struct inode *inode = dentry->d_inode; + +- res = do_revalidate(dentry); ++ res = do_revalidate(dentry, it); + if (res) + return res; + +@@ -111,10 +113,12 @@ int vfs_stat(char *name, struct kstat *s { struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; int error; ++ struct lookup_intent it = { .it_op = IT_GETATTR }; - error = user_path_walk(name, &nd); -+ error = user_path_walk_it(name, &nd, &it); ++ error = user_path_walk_it(name, &nd, &it); if (!error) { - error = do_getattr(nd.mnt, nd.dentry, stat); -+ intent_release(nd.dentry, &it); +- error = do_getattr(nd.mnt, nd.dentry, stat); ++ error = do_getattr(nd.mnt, nd.dentry, stat, &it); ++ intent_release(&it); path_release(&nd); } return error; -@@ -123,11 +125,13 @@ int vfs_stat(char *name, struct kstat *s - int vfs_lstat(char *name, struct kstat *stat) +@@ -124,10 +128,12 @@ int vfs_lstat(char *name, struct kstat * { struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; int error; ++ struct lookup_intent it = { .it_op = IT_GETATTR }; - error = user_path_walk_link(name, &nd); -+ error = user_path_walk_link_it(name, &nd, &it); ++ error = user_path_walk_link_it(name, &nd, &it); if (!error) { - error = do_getattr(nd.mnt, nd.dentry, stat); -+ intent_release(nd.dentry, &it); +- error = do_getattr(nd.mnt, nd.dentry, stat); ++ error = do_getattr(nd.mnt, nd.dentry, stat, &it); ++ intent_release(&it); path_release(&nd); } return error; ---- linux-2.4.20-rh/include/linux/dcache.h~vfs_intent-2.4.20-rh 2003-04-12 15:46:39.000000000 +0800 -+++ linux-2.4.20-rh-root/include/linux/dcache.h 2003-06-09 23:18:07.000000000 +0800 -@@ -7,6 +7,28 @@ +@@ -139,7 +145,7 @@ int vfs_fstat(unsigned int fd, struct ks + int error = -EBADF; + + if (f) { +- error = do_getattr(f->f_vfsmnt, f->f_dentry, stat); ++ error = do_getattr(f->f_vfsmnt, f->f_dentry, stat, NULL); + fput(f); + } + return error; +@@ -286,7 +292,7 @@ asmlinkage long sys_readlink(const char + + error = -EINVAL; + if (inode->i_op && inode->i_op->readlink && +- !(error = do_revalidate(nd.dentry))) { ++ !(error = do_revalidate(nd.dentry, NULL))) { + UPDATE_ATIME(inode); + error = inode->i_op->readlink(nd.dentry, buf, bufsiz); + } +--- linux-2.4.20/include/linux/dcache.h~vfs_intent-2.4.20-rh 2003-07-17 08:32:48.000000000 -0700 ++++ linux-2.4.20-mmonroe/include/linux/dcache.h 2003-07-17 08:35:22.000000000 -0700 +@@ -6,6 +6,45 @@ + #include #include #include - -+#define IT_OPEN (1) -+#define IT_CREAT (1<<1) -+#define IT_READDIR (1<<2) -+#define IT_GETATTR (1<<3) -+#define IT_LOOKUP (1<<4) -+#define IT_UNLINK (1<<5) ++#include ++ ++#define IT_OPEN 0x0001 ++#define IT_CREAT 0x0002 ++#define IT_READDIR 0x0004 ++#define IT_GETATTR 0x0008 ++#define IT_LOOKUP 0x0010 ++#define IT_UNLINK 0x0020 ++#define IT_GETXATTR 0x0040 ++#define IT_EXEC 0x0080 ++#define IT_PIN 0x0100 + -+#define IT_FL_LOCKED (1) -+#define IT_FL_FOLLOWED (1<<1) /* set by vfs_follow_link */ ++#define IT_FL_LOCKED 0x0001 ++#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */ ++ ++#define INTENT_MAGIC 0x19620323 + +struct lookup_intent { + int it_op; ++ void (*it_op_release)(struct lookup_intent *); ++ int it_magic; + int it_mode; + int it_flags; + int it_disposition; @@ -1292,34 +1288,41 @@ + void *it_data; +}; + ++static inline void intent_init(struct lookup_intent *it, int op, int flags) ++{ ++ memset(it, 0, sizeof(*it)); ++ it->it_magic = INTENT_MAGIC; ++ it->it_op = op; ++ it->it_flags = flags; ++} ++ + /* * linux/include/linux/dcache.h - * -@@ -82,6 +104,7 @@ struct dentry { - unsigned long d_time; /* used by d_revalidate */ - struct dentry_operations *d_op; - struct super_block * d_sb; /* The root of the dentry tree */ -+ struct lookup_intent *d_it; - unsigned long d_vfs_flags; - void * d_fsdata; /* fs-specific data */ - void * d_extra_attributes; /* TUX-specific data */ -@@ -96,8 +119,15 @@ struct dentry_operations { +@@ -96,8 +135,22 @@ struct dentry_operations { int (*d_delete)(struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); -+ int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *); -+ void (*d_intent_release)(struct dentry *, struct lookup_intent *); ++ int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *); ++ void (*d_pin)(struct dentry *, struct vfsmount * , int); ++ void (*d_unpin)(struct dentry *, struct vfsmount *, int); }; ++#define PIN(de,mnt,flag) if (de->d_op && de->d_op->d_pin) \ ++ de->d_op->d_pin(de, mnt, flag); ++#define UNPIN(de,mnt,flag) if (de->d_op && de->d_op->d_unpin) \ ++ de->d_op->d_unpin(de, mnt, flag); ++ ++ +/* defined in fs/namei.c */ -+extern void intent_release(struct dentry *de, struct lookup_intent *it); ++extern void intent_release(struct lookup_intent *it); +/* defined in fs/dcache.c */ +extern void __d_rehash(struct dentry * entry, int lock); + /* the dentry parameter passed to d_hash and d_compare is the parent * directory of the entries to be compared. It is used in case these * functions need any directory specific information for determining -@@ -129,6 +159,7 @@ d_iput: no no yes +@@ -129,6 +182,7 @@ d_iput: no no yes * s_nfsd_free_path semaphore will be down */ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ @@ -1327,26 +1330,27 @@ extern spinlock_t dcache_lock; ---- linux-2.4.20-rh/include/linux/fs.h~vfs_intent-2.4.20-rh 2003-05-30 02:07:39.000000000 +0800 -+++ linux-2.4.20-rh-root/include/linux/fs.h 2003-06-09 23:18:07.000000000 +0800 -@@ -337,6 +337,8 @@ extern void set_bh_page(struct buffer_he +--- linux-2.4.20/include/linux/fs.h~vfs_intent-2.4.20-rh 2003-07-17 08:34:44.000000000 -0700 ++++ linux-2.4.20-mmonroe/include/linux/fs.h 2003-07-17 08:35:22.000000000 -0700 +@@ -337,6 +337,9 @@ extern void set_bh_page(struct buffer_he #define ATTR_MTIME_SET 256 #define ATTR_FORCE 512 /* Not a change, but a change it */ #define ATTR_ATTR_FLAG 1024 -+#define ATTR_RAW 2048 /* file system, not vfs will massage attrs */ -+#define ATTR_FROM_OPEN 4096 /* called from open path, ie O_TRUNC */ ++#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ ++#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ ++#define ATTR_CTIME_SET 0x2000 /* * This is the Inode Attributes structure, used for notify_change(). It -@@ -574,6 +576,7 @@ struct file { +@@ -574,6 +577,7 @@ struct file { /* needed for tty driver, and maybe others */ void *private_data; -+ struct lookup_intent *f_intent; ++ struct lookup_intent *f_it; /* preallocated helper kiobuf to speedup O_DIRECT */ struct kiobuf *f_iobuf; -@@ -701,6 +704,7 @@ struct nameidata { +@@ -701,6 +705,7 @@ struct nameidata { struct qstr last; unsigned int flags; int last_type; @@ -1354,52 +1358,50 @@ }; /* -@@ -821,7 +825,9 @@ extern int vfs_symlink(struct inode *, s +@@ -821,7 +826,8 @@ extern int vfs_symlink(struct inode *, s extern int vfs_link(struct dentry *, struct inode *, struct dentry *); extern int vfs_rmdir(struct inode *, struct dentry *); extern int vfs_unlink(struct inode *, struct dentry *); -extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it); ++ struct inode *new_dir, struct dentry *new_dentry); /* * File types -@@ -882,20 +888,33 @@ struct file_operations { +@@ -881,21 +887,32 @@ struct file_operations { + struct inode_operations { int (*create) (struct inode *,struct dentry *,int); ++ int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *); struct dentry * (*lookup) (struct inode *,struct dentry *); -+ struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *); ++ struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags); int (*link) (struct dentry *,struct inode *,struct dentry *); -+ int (*link2) (struct inode *,struct inode *, const char *, int); ++ int (*link_raw) (struct nameidata *,struct nameidata *); int (*unlink) (struct inode *,struct dentry *); -+ int (*unlink2) (struct inode *, const char *, int); ++ int (*unlink_raw) (struct nameidata *); int (*symlink) (struct inode *,struct dentry *,const char *); -+ int (*symlink2) (struct inode *, const char *, int, const char *); ++ int (*symlink_raw) (struct nameidata *,const char *); int (*mkdir) (struct inode *,struct dentry *,int); -+ int (*mkdir2) (struct inode *, const char *, int,int); ++ int (*mkdir_raw) (struct nameidata *,int); int (*rmdir) (struct inode *,struct dentry *); -+ int (*rmdir2) (struct inode *, const char *, int); ++ int (*rmdir_raw) (struct nameidata *); int (*mknod) (struct inode *,struct dentry *,int,int); -+ int (*mknod2) (struct inode *, const char *, int,int,int); ++ int (*mknod_raw) (struct nameidata *,int,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); -+ int (*rename2) (struct inode *, struct inode *, -+ const char *oldname, int oldlen, -+ const char *newname, int newlen); ++ int (*rename_raw) (struct nameidata *, struct nameidata *); int (*readlink) (struct dentry *, char *,int); int (*follow_link) (struct dentry *, struct nameidata *); -+ int (*follow_link2) (struct dentry *, struct nameidata *, -+ struct lookup_intent *it); void (*truncate) (struct inode *); int (*permission) (struct inode *, int); int (*revalidate) (struct dentry *); ++ int (*revalidate_it) (struct dentry *, struct lookup_intent *); int (*setattr) (struct dentry *, struct iattr *); -+ int (*setattr_raw) (struct inode *, struct iattr *); ++ int (*setattr_raw) (struct inode *, struct iattr *); int (*getattr) (struct dentry *, struct iattr *); int (*setxattr) (struct dentry *, const char *, void *, size_t, int); ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); -@@ -1091,10 +1110,14 @@ static inline int get_lease(struct inode +@@ -1091,10 +1108,14 @@ static inline int get_lease(struct inode asmlinkage long sys_open(const char *, int, int); asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ @@ -1415,7 +1417,7 @@ extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char *); -@@ -1385,6 +1408,7 @@ typedef int (*read_actor_t)(read_descrip +@@ -1385,6 +1406,7 @@ typedef int (*read_actor_t)(read_descrip extern loff_t default_llseek(struct file *file, loff_t offset, int origin); extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *)); @@ -1423,7 +1425,7 @@ extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *)); extern int FASTCALL(path_walk(const char *, struct nameidata *)); extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); -@@ -1396,6 +1420,8 @@ extern struct dentry * lookup_one_len(co +@@ -1396,6 +1418,8 @@ extern struct dentry * lookup_one_len(co extern struct dentry * lookup_hash(struct qstr *, struct dentry *); #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) @@ -1432,7 +1434,7 @@ extern void inode_init_once(struct inode *); extern void iput(struct inode *); -@@ -1495,6 +1521,8 @@ extern struct file_operations generic_ro +@@ -1497,6 +1521,8 @@ extern struct file_operations generic_ro extern int vfs_readlink(struct dentry *, char *, int, const char *); extern int vfs_follow_link(struct nameidata *, const char *); @@ -1441,8 +1443,8 @@ extern int page_readlink(struct dentry *, char *, int); extern int page_follow_link(struct dentry *, struct nameidata *); extern struct inode_operations page_symlink_inode_operations; ---- linux-2.4.20-rh/kernel/ksyms.c~vfs_intent-2.4.20-rh 2003-05-30 02:07:42.000000000 +0800 -+++ linux-2.4.20-rh-root/kernel/ksyms.c 2003-06-09 23:18:07.000000000 +0800 +--- linux-2.4.20/kernel/ksyms.c~vfs_intent-2.4.20-rh 2003-07-17 08:34:45.000000000 -0700 ++++ linux-2.4.20-mmonroe/kernel/ksyms.c 2003-07-17 08:35:22.000000000 -0700 @@ -298,6 +298,7 @@ EXPORT_SYMBOL(read_cache_page); EXPORT_SYMBOL(set_page_dirty); EXPORT_SYMBOL(vfs_readlink); @@ -1451,17 +1453,16 @@ EXPORT_SYMBOL(page_readlink); EXPORT_SYMBOL(page_follow_link); EXPORT_SYMBOL(page_symlink_inode_operations); ---- linux-2.4.20-rh/fs/exec.c~vfs_intent-2.4.20-rh 2003-04-13 10:07:02.000000000 +0800 -+++ linux-2.4.20-rh-root/fs/exec.c 2003-06-09 23:18:07.000000000 +0800 +--- linux-2.4.20/fs/exec.c~vfs_intent-2.4.20-rh 2003-07-17 08:33:09.000000000 -0700 ++++ linux-2.4.20-mmonroe/fs/exec.c 2003-07-17 08:35:22.000000000 -0700 @@ -114,8 +114,9 @@ asmlinkage long sys_uselib(const char * struct file * file; struct nameidata nd; int error; -- ++ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY }; + - error = user_path_walk(library, &nd); -+ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY }; -+ -+ error = user_path_walk_it(library, &nd, &it); ++ error = user_path_walk_it(library, &nd, &it); if (error) goto out; @@ -1470,8 +1471,8 @@ goto exit; - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(nd.dentry, &it); ++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); ++ intent_release(&it); error = PTR_ERR(file); if (IS_ERR(file)) goto out; @@ -1479,32 +1480,32 @@ struct inode *inode; struct file *file; int err = 0; -- -- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); + struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY }; -+ + +- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); + err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); file = ERR_PTR(err); if (!err) { inode = nd.dentry->d_inode; -@@ -395,7 +398,7 @@ struct file *open_exec(const char *name) +@@ -395,7 +398,8 @@ struct file *open_exec(const char *name) err = -EACCES; file = ERR_PTR(err); if (!err) { - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); ++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); ++ intent_release(&it); if (!IS_ERR(file)) { err = deny_write_access(file); if (err) { -@@ -404,6 +407,7 @@ struct file *open_exec(const char *name) - } - } - out: -+ intent_release(nd.dentry, &it); +@@ -407,6 +411,7 @@ out: return file; } } -@@ -1283,7 +1287,7 @@ int do_coredump(long signr, int exit_cod ++ intent_release(&it); + path_release(&nd); + } + goto out; +@@ -1283,7 +1288,7 @@ int do_coredump(long signr, int exit_cod goto close_fail; if (!file->f_op->write) goto close_fail; @@ -1513,15 +1514,15 @@ goto close_fail; retval = binfmt->core_dump(signr, regs, file); ---- linux-2.4.20-rh/fs/proc/base.c~vfs_intent-2.4.20-rh 2003-06-09 23:16:51.000000000 +0800 -+++ linux-2.4.20-rh-root/fs/proc/base.c 2003-06-09 23:18:52.000000000 +0800 +--- linux-2.4.20/fs/proc/base.c~vfs_intent-2.4.20-rh 2003-07-17 08:33:05.000000000 -0700 ++++ linux-2.4.20-mmonroe/fs/proc/base.c 2003-07-17 08:35:22.000000000 -0700 @@ -464,6 +464,9 @@ static int proc_pid_follow_link(struct d error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt); nd->last_type = LAST_BIND; + -+ if (nd->it != NULL) -+ nd->it->it_int_flags |= IT_FL_FOLLOWED; ++ if (nd->it != NULL) ++ nd->it->it_int_flags |= IT_FL_FOLLOWED; out: return error; } diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch index 09bcb22..e522896 100644 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch +++ b/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch @@ -1,17 +1,20 @@ - fs/dcache.c | 20 ++ - fs/exec.c | 15 + - fs/namei.c | 378 ++++++++++++++++++++++++++++++++++++++++++------- - fs/nfsd/vfs.c | 2 - fs/open.c | 126 ++++++++++++++-- - fs/proc/base.c | 3 - fs/stat.c | 24 ++- - include/linux/dcache.h | 31 ++++ - include/linux/fs.h | 32 +++- - kernel/ksyms.c | 1 - 10 files changed, 543 insertions(+), 89 deletions(-) + fs/dcache.c | 19 ++ + fs/exec.c | 15 +- + fs/namei.c | 329 ++++++++++++++++++++++++++++++++++++++-------- + fs/namespace.c | 30 +++- + fs/open.c | 128 +++++++++++++++-- + fs/proc/base.c | 3 + fs/stat.c | 50 ++++-- + include/linux/dcache.h | 53 +++++++ + include/linux/fs.h | 29 +++- + include/linux/fs_struct.h | 4 + kernel/exit.c | 3 + kernel/fork.c | 3 + kernel/ksyms.c | 1 + 13 files changed, 560 insertions(+), 107 deletions(-) ---- linux-2.4.20-l18/fs/exec.c~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002 -+++ linux-2.4.20-l18-phil/fs/exec.c Wed May 28 01:39:18 2003 +--- linux-2.4.20-ad/fs/exec.c~vfs_intent-2.4.20-vanilla 2002-11-28 16:53:15.000000000 -0700 ++++ linux-2.4.20-ad-braam/fs/exec.c 2003-07-07 15:13:53.000000000 -0600 @@ -107,8 +107,9 @@ asmlinkage long sys_uselib(const char * struct file * file; struct nameidata nd; @@ -29,7 +32,7 @@ - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(nd.dentry, &it); ++ intent_release(&it); error = PTR_ERR(file); if (IS_ERR(file)) goto out; @@ -50,7 +53,7 @@ if (!err) { - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(nd.dentry, &it); ++ intent_release(&it); if (!IS_ERR(file)) { err = deny_write_access(file); if (err) { @@ -58,7 +61,7 @@ return file; } } -+ intent_release(nd.dentry, &it); ++ intent_release(&it); path_release(&nd); } goto out; @@ -71,8 +74,8 @@ goto close_fail; retval = binfmt->core_dump(signr, regs, file); ---- linux-2.4.20-l18/fs/dcache.c~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002 -+++ linux-2.4.20-l18-phil/fs/dcache.c Wed May 28 01:39:18 2003 +--- linux-2.4.20-ad/fs/dcache.c~vfs_intent-2.4.20-vanilla 2002-11-28 16:53:15.000000000 -0700 ++++ linux-2.4.20-ad-braam/fs/dcache.c 2003-07-09 01:46:27.000000000 -0600 @@ -181,6 +181,13 @@ int d_invalidate(struct dentry * dentry) spin_unlock(&dcache_lock); return 0; @@ -87,15 +90,7 @@ /* * Check whether to do a partial shrink_dcache * to get rid of unused child entries. -@@ -616,6 +623,7 @@ struct dentry * d_alloc(struct dentry * - dentry->d_op = NULL; - dentry->d_fsdata = NULL; - dentry->d_mounted = 0; -+ dentry->d_it = NULL; - INIT_LIST_HEAD(&dentry->d_hash); - INIT_LIST_HEAD(&dentry->d_lru); - INIT_LIST_HEAD(&dentry->d_subdirs); -@@ -830,13 +838,19 @@ void d_delete(struct dentry * dentry) +@@ -830,13 +837,19 @@ void d_delete(struct dentry * dentry) * Adds a dentry to the hash according to its name. */ @@ -118,16 +113,133 @@ } #define do_switch(x,y) do { \ ---- linux-2.4.20-l18/fs/namei.c~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002 -+++ linux-2.4.20-l18-phil/fs/namei.c Sun Jun 1 23:41:35 2003 +--- linux-2.4.20-ad/fs/namespace.c~vfs_intent-2.4.20-vanilla 2002-11-28 16:53:15.000000000 -0700 ++++ linux-2.4.20-ad-braam/fs/namespace.c 2003-07-07 15:13:53.000000000 -0600 +@@ -99,6 +99,7 @@ static void detach_mnt(struct vfsmount * + { + old_nd->dentry = mnt->mnt_mountpoint; + old_nd->mnt = mnt->mnt_parent; ++ UNPIN(old_nd->dentry, old_nd->mnt, 1); + mnt->mnt_parent = mnt; + mnt->mnt_mountpoint = mnt->mnt_root; + list_del_init(&mnt->mnt_child); +@@ -110,6 +111,7 @@ static void attach_mnt(struct vfsmount * + { + mnt->mnt_parent = mntget(nd->mnt); + mnt->mnt_mountpoint = dget(nd->dentry); ++ PIN(nd->dentry, nd->mnt, 1); + list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry)); + list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts); + nd->dentry->d_mounted++; +@@ -485,14 +487,17 @@ static int do_loopback(struct nameidata + { + struct nameidata old_nd; + struct vfsmount *mnt = NULL; ++ struct lookup_intent it = { .it_op = IT_GETATTR }; + int err = mount_is_safe(nd); + if (err) + return err; + if (!old_name || !*old_name) + return -EINVAL; +- err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd); +- if (err) ++ err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it); ++ if (err) { ++ intent_release(&it); + return err; ++ } + + down_write(¤t->namespace->sem); + err = -EINVAL; +@@ -515,6 +520,7 @@ static int do_loopback(struct nameidata + } + + up_write(¤t->namespace->sem); ++ intent_release(&it); + path_release(&old_nd); + return err; + } +@@ -698,7 +704,8 @@ long do_mount(char * dev_name, char * di + unsigned long flags, void *data_page) + { + struct nameidata nd; +- int retval = 0; ++ struct lookup_intent it = { .it_op = IT_GETATTR }; ++ int retval = 0; + int mnt_flags = 0; + + /* Discard magic */ +@@ -722,10 +729,11 @@ long do_mount(char * dev_name, char * di + flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV); + + /* ... and get the mountpoint */ +- retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); +- if (retval) ++ retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); ++ if (retval) { ++ intent_release(&it); + return retval; +- ++ } + if (flags & MS_REMOUNT) + retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, + data_page); +@@ -736,6 +744,8 @@ long do_mount(char * dev_name, char * di + else + retval = do_add_mount(&nd, type_page, flags, mnt_flags, + dev_name, data_page); ++ ++ intent_release(&it); + path_release(&nd); + return retval; + } +@@ -901,6 +911,8 @@ asmlinkage long sys_pivot_root(const cha + { + struct vfsmount *tmp; + struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd; ++ struct lookup_intent new_it = { .it_op = IT_GETATTR }; ++ struct lookup_intent old_it = { .it_op = IT_GETATTR }; + int error; + + if (!capable(CAP_SYS_ADMIN)) +@@ -908,14 +920,14 @@ asmlinkage long sys_pivot_root(const cha + + lock_kernel(); + +- error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd); ++ error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it); + if (error) + goto out0; + error = -EINVAL; + if (!check_mnt(new_nd.mnt)) + goto out1; + +- error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd); ++ error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it); + if (error) + goto out1; + +@@ -970,8 +982,10 @@ out2: + up(&old_nd.dentry->d_inode->i_zombie); + up_write(¤t->namespace->sem); + path_release(&user_nd); ++ intent_release(&old_it); + path_release(&old_nd); + out1: ++ intent_release(&new_it); + path_release(&new_nd); + out0: + unlock_kernel(); +--- linux-2.4.20-ad/fs/namei.c~vfs_intent-2.4.20-vanilla 2002-11-28 16:53:15.000000000 -0700 ++++ linux-2.4.20-ad-braam/fs/namei.c 2003-07-08 13:53:48.000000000 -0600 @@ -94,6 +94,13 @@ * XEmacs seems to be relying on it... */ -+void intent_release(struct dentry *de, struct lookup_intent *it) ++void intent_release(struct lookup_intent *it) +{ -+ if (it && de->d_op && de->d_op->d_intent_release) -+ de->d_op->d_intent_release(de, it); ++ if (it && it->it_op_release) ++ it->it_op_release(it); + +} + @@ -144,8 +256,8 @@ { struct dentry * dentry = d_lookup(parent, name); -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) { -+ if (!dentry->d_op->d_revalidate2(dentry, flags, it) && ++ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { ++ if (!dentry->d_op->d_revalidate_it(dentry, flags, it) && + !d_invalidate(dentry)) { + dput(dentry); + dentry = NULL; @@ -175,8 +287,8 @@ result = ERR_PTR(-ENOMEM); if (dentry) { lock_kernel(); -+ if (dir->i_op->lookup2) -+ result = dir->i_op->lookup2(dir, dentry, it); ++ if (dir->i_op->lookup_it) ++ result = dir->i_op->lookup_it(dir, dentry, it, flags); + else result = dir->i_op->lookup(dir, dentry); unlock_kernel(); @@ -185,8 +297,8 @@ dput(result); result = ERR_PTR(-ENOENT); } -+ } else if (result->d_op && result->d_op->d_revalidate2) { -+ if (!result->d_op->d_revalidate2(result, flags, it) && ++ } else if (result->d_op && result->d_op->d_revalidate_it) { ++ if (!result->d_op->d_revalidate_it(result, flags, it) && + !d_invalidate(result)) { + dput(result); + goto again; @@ -204,30 +316,27 @@ { int err; if (current->link_count >= 5) -@@ -346,10 +375,21 @@ static inline int do_follow_link(struct +@@ -346,10 +375,18 @@ static inline int do_follow_link(struct current->link_count++; current->total_link_count++; UPDATE_ATIME(dentry->d_inode); - err = dentry->d_inode->i_op->follow_link(dentry, nd); -+ nd->it = it; -+ if (dentry->d_inode->i_op->follow_link2) -+ err = dentry->d_inode->i_op->follow_link2(dentry, nd, it); -+ else -+ err = dentry->d_inode->i_op->follow_link(dentry, nd); -+ if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) { -+ /* vfs_follow_link was never called */ -+ intent_release(dentry, it); -+ path_release(nd); -+ err = -ENOLINK; -+ } ++ nd->it = it; ++ err = dentry->d_inode->i_op->follow_link(dentry, nd); ++ if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) { ++ /* vfs_follow_link was never called */ ++ intent_release(it); ++ path_release(nd); ++ err = -ENOLINK; ++ } current->link_count--; return err; loop: -+ intent_release(dentry, it); ++ intent_release(it); path_release(nd); return -ELOOP; } -@@ -379,15 +419,26 @@ int follow_up(struct vfsmount **mnt, str +@@ -379,15 +416,26 @@ int follow_up(struct vfsmount **mnt, str return __follow_up(mnt, dentry); } @@ -247,7 +356,7 @@ + opc = it->it_op; + mode = it->it_mode; + } -+ intent_release(*dentry, it); ++ intent_release(it); + if (it) { + it->it_op = opc; + it->it_mode = mode; @@ -255,7 +364,7 @@ dput(*dentry); mntput(mounted->mnt_parent); *dentry = dget(mounted->mnt_root); -@@ -399,7 +450,7 @@ static inline int __follow_down(struct v +@@ -399,7 +447,7 @@ static inline int __follow_down(struct v int follow_down(struct vfsmount **mnt, struct dentry **dentry) { @@ -264,7 +373,7 @@ } static inline void follow_dotdot(struct nameidata *nd) -@@ -435,7 +486,7 @@ static inline void follow_dotdot(struct +@@ -435,7 +483,7 @@ static inline void follow_dotdot(struct mntput(nd->mnt); nd->mnt = parent; } @@ -273,7 +382,7 @@ ; } -@@ -447,7 +498,8 @@ static inline void follow_dotdot(struct +@@ -447,7 +495,8 @@ static inline void follow_dotdot(struct * * We expect 'base' to be positive and a directory. */ @@ -283,7 +392,7 @@ { struct dentry *dentry; struct inode *inode; -@@ -520,15 +572,15 @@ int link_path_walk(const char * name, st +@@ -520,15 +569,15 @@ int link_path_walk(const char * name, st break; } /* This does the actual lookups.. */ @@ -302,27 +411,27 @@ ; err = -ENOENT; -@@ -539,8 +591,8 @@ int link_path_walk(const char * name, st +@@ -539,8 +588,8 @@ int link_path_walk(const char * name, st if (!inode->i_op) goto out_dput; - if (inode->i_op->follow_link) { - err = do_follow_link(dentry, nd); -+ if (inode->i_op->follow_link || inode->i_op->follow_link2) { ++ if (inode->i_op->follow_link) { + err = do_follow_link(dentry, nd, NULL); dput(dentry); if (err) goto return_err; -@@ -556,7 +608,7 @@ int link_path_walk(const char * name, st +@@ -556,7 +605,7 @@ int link_path_walk(const char * name, st nd->dentry = dentry; } err = -ENOTDIR; - if (!inode->i_op->lookup) -+ if (!inode->i_op->lookup && !inode->i_op->lookup2) ++ if (!inode->i_op->lookup && !inode->i_op->lookup_it) break; continue; /* here ends the main loop */ -@@ -583,19 +635,20 @@ last_component: +@@ -583,19 +632,19 @@ last_component: if (err < 0) break; } @@ -340,54 +449,52 @@ ; inode = dentry->d_inode; if ((lookup_flags & LOOKUP_FOLLOW) -- && inode && inode->i_op && inode->i_op->follow_link) { + && inode && inode->i_op && inode->i_op->follow_link) { - err = do_follow_link(dentry, nd); -+ && inode && inode->i_op && -+ (inode->i_op->follow_link || inode->i_op->follow_link2)) { + err = do_follow_link(dentry, nd, it); dput(dentry); if (err) goto return_err; -@@ -609,7 +662,8 @@ last_component: +@@ -609,7 +658,8 @@ last_component: goto no_inode; if (lookup_flags & LOOKUP_DIRECTORY) { err = -ENOTDIR; - if (!inode->i_op || !inode->i_op->lookup) + if (!inode->i_op || -+ (!inode->i_op->lookup && !inode->i_op->lookup2)) ++ (!inode->i_op->lookup && !inode->i_op->lookup_it)) break; } goto return_base; -@@ -633,6 +687,23 @@ return_reval: +@@ -633,6 +683,23 @@ return_reval: * Check the cached dentry for staleness. */ dentry = nd->dentry; -+ revalidate_again: -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) { ++ revalidate_again: ++ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) { + err = -ESTALE; -+ if (!dentry->d_op->d_revalidate2(dentry, 0, it)) { -+ struct dentry *new; -+ err = permission(dentry->d_parent->d_inode, -+ MAY_EXEC); -+ if (err) -+ break; -+ new = real_lookup(dentry->d_parent, -+ &dentry->d_name, 0, NULL); ++ if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) { ++ struct dentry *new; ++ err = permission(dentry->d_parent->d_inode, ++ MAY_EXEC); ++ if (err) ++ break; ++ new = real_lookup(dentry->d_parent, ++ &dentry->d_name, 0, NULL); + d_invalidate(dentry); -+ dput(dentry); -+ dentry = new; -+ goto revalidate_again; -+ } ++ dput(dentry); ++ dentry = new; ++ goto revalidate_again; ++ } + } else if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { err = -ESTALE; if (!dentry->d_op->d_revalidate(dentry, 0)) { -@@ -646,15 +717,28 @@ out_dput: +@@ -646,15 +713,28 @@ out_dput: dput(dentry); break; } + if (err) -+ intent_release(nd->dentry, it); ++ intent_release(it); path_release(nd); return_err: return err; @@ -412,7 +519,7 @@ } /* SMP-safe */ -@@ -739,6 +823,17 @@ walk_init_root(const char *name, struct +@@ -739,6 +819,17 @@ walk_init_root(const char *name, struct } /* SMP-safe */ @@ -430,15 +537,15 @@ int path_lookup(const char *path, unsigned flags, struct nameidata *nd) { int error = 0; -@@ -753,6 +848,7 @@ int path_init(const char *name, unsigned +@@ -753,6 +844,7 @@ int path_init(const char *name, unsigned { nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags; -+ nd->it = NULL; ++ nd->it = NULL; if (*name=='/') return walk_init_root(name,nd); read_lock(¤t->fs->lock); -@@ -767,7 +863,8 @@ int path_init(const char *name, unsigned +@@ -767,7 +859,8 @@ int path_init(const char *name, unsigned * needs parent already locked. Doesn't follow mounts. * SMP-safe. */ @@ -448,7 +555,7 @@ { struct dentry * dentry; struct inode *inode; -@@ -790,13 +887,16 @@ struct dentry * lookup_hash(struct qstr +@@ -790,13 +883,16 @@ struct dentry * lookup_hash(struct qstr goto out; } @@ -460,13 +567,13 @@ if (!new) goto out; lock_kernel(); -+ if (inode->i_op->lookup2) -+ dentry = inode->i_op->lookup2(inode, new, it); ++ if (inode->i_op->lookup_it) ++ dentry = inode->i_op->lookup_it(inode, new, it, 0); + else dentry = inode->i_op->lookup(inode, new); unlock_kernel(); if (!dentry) -@@ -808,6 +908,12 @@ out: +@@ -808,6 +904,12 @@ out: return dentry; } @@ -479,7 +586,7 @@ /* SMP-safe */ struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) { -@@ -829,7 +935,7 @@ struct dentry * lookup_one_len(const cha +@@ -829,7 +931,7 @@ struct dentry * lookup_one_len(const cha } this.hash = end_name_hash(hash); @@ -488,7 +595,7 @@ access: return ERR_PTR(-EACCES); } -@@ -860,6 +966,23 @@ int __user_walk(const char *name, unsign +@@ -860,6 +962,23 @@ int __user_walk(const char *name, unsign return err; } @@ -512,7 +619,47 @@ /* * It's inline, so penalty for filesystems that don't use sticky bit is * minimal. -@@ -996,7 +1119,8 @@ exit_lock: +@@ -955,7 +1074,8 @@ static inline int lookup_flags(unsigned + return retval; + } + +-int vfs_create(struct inode *dir, struct dentry *dentry, int mode) ++static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode, ++ struct lookup_intent *it) + { + int error; + +@@ -968,12 +1088,15 @@ int vfs_create(struct inode *dir, struct + goto exit_lock; + + error = -EACCES; /* shouldn't it be ENOSYS? */ +- if (!dir->i_op || !dir->i_op->create) ++ if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it)) + goto exit_lock; + + DQUOT_INIT(dir); + lock_kernel(); +- error = dir->i_op->create(dir, dentry, mode); ++ if (dir->i_op->create_it) ++ error = dir->i_op->create_it(dir, dentry, mode, it); ++ else ++ error = dir->i_op->create(dir, dentry, mode); + unlock_kernel(); + exit_lock: + up(&dir->i_zombie); +@@ -982,6 +1105,11 @@ exit_lock: + return error; + } + ++int vfs_create(struct inode *dir, struct dentry *dentry, int mode) ++{ ++ return vfs_create_it(dir, dentry, mode, NULL); ++} ++ + /* + * open_namei() + * +@@ -996,7 +1124,8 @@ exit_lock: * for symlinks (where the permissions are checked later). * SMP-safe */ @@ -522,7 +669,7 @@ { int acc_mode, error = 0; struct inode *inode; -@@ -1010,7 +1134,7 @@ int open_namei(const char * pathname, in +@@ -1010,7 +1139,7 @@ int open_namei(const char * pathname, in * The simplest case - just a plain lookup. */ if (!(flag & O_CREAT)) { @@ -531,7 +678,7 @@ if (error) return error; dentry = nd->dentry; -@@ -1020,6 +1144,10 @@ int open_namei(const char * pathname, in +@@ -1020,6 +1149,10 @@ int open_namei(const char * pathname, in /* * Create - we need to know the parent. */ @@ -542,7 +689,7 @@ error = path_lookup(pathname, LOOKUP_PARENT, nd); if (error) return error; -@@ -1035,7 +1163,7 @@ int open_namei(const char * pathname, in +@@ -1035,7 +1168,7 @@ int open_namei(const char * pathname, in dir = nd->dentry; down(&dir->d_inode->i_sem); @@ -551,15 +698,21 @@ do_last: error = PTR_ERR(dentry); -@@ -1044,6 +1172,7 @@ do_last: +@@ -1044,10 +1177,11 @@ do_last: goto exit; } + it->it_mode = mode; /* Negative dentry, just create the file */ if (!dentry->d_inode) { - error = vfs_create(dir->d_inode, dentry, -@@ -1072,12 +1201,13 @@ do_last: +- error = vfs_create(dir->d_inode, dentry, +- mode & ~current->fs->umask); ++ error = vfs_create_it(dir->d_inode, dentry, ++ mode & ~current->fs->umask, it); + up(&dir->d_inode->i_sem); + dput(nd->dentry); + nd->dentry = dentry; +@@ -1072,7 +1206,7 @@ do_last: error = -ELOOP; if (flag & O_NOFOLLOW) goto exit_dput; @@ -568,14 +721,7 @@ } error = -ENOENT; if (!dentry->d_inode) - goto exit_dput; -- if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link) -+ if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link || -+ dentry->d_inode->i_op->follow_link2)) - goto do_link; - - dput(nd->dentry); -@@ -1151,7 +1281,7 @@ ok: +@@ -1151,7 +1285,7 @@ ok: if (!error) { DQUOT_INIT(inode); @@ -584,39 +730,36 @@ } put_write_access(inode); if (error) -@@ -1163,8 +1293,10 @@ ok: +@@ -1163,8 +1297,10 @@ ok: return 0; exit_dput: -+ intent_release(dentry, it); ++ intent_release(it); dput(dentry); exit: -+ intent_release(nd->dentry, it); ++ intent_release(it); path_release(nd); return error; -@@ -1183,7 +1315,19 @@ do_link: +@@ -1183,7 +1319,16 @@ do_link: * are done. Procfs-like symlinks just set LAST_BIND. */ UPDATE_ATIME(dentry->d_inode); - error = dentry->d_inode->i_op->follow_link(dentry, nd); -+ nd->it = it; -+ if (dentry->d_inode->i_op->follow_link2) -+ error = dentry->d_inode->i_op->follow_link2(dentry, nd, it); -+ else -+ error = dentry->d_inode->i_op->follow_link(dentry, nd); ++ nd->it = it; ++ error = dentry->d_inode->i_op->follow_link(dentry, nd); + if (error) { -+ intent_release(dentry, it); -+ } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) { -+ /* vfs_follow_link was never called */ -+ intent_release(dentry, it); -+ path_release(nd); -+ error = -ENOLINK; -+ } ++ intent_release(it); ++ } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) { ++ /* vfs_follow_link was never called */ ++ intent_release(it); ++ path_release(nd); ++ error = -ENOLINK; ++ } dput(dentry); if (error) return error; -@@ -1205,13 +1349,20 @@ do_link: +@@ -1205,13 +1350,20 @@ do_link: } dir = nd->dentry; down(&dir->d_inode->i_sem); @@ -639,7 +782,7 @@ { struct dentry *dentry; -@@ -1219,7 +1370,7 @@ static struct dentry *lookup_create(stru +@@ -1219,7 +1371,7 @@ static struct dentry *lookup_create(stru dentry = ERR_PTR(-EEXIST); if (nd->last_type != LAST_NORM) goto fail; @@ -648,18 +791,15 @@ if (IS_ERR(dentry)) goto fail; if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) -@@ -1275,7 +1426,19 @@ asmlinkage long sys_mknod(const char * f +@@ -1275,7 +1427,16 @@ asmlinkage long sys_mknod(const char * f error = path_lookup(tmp, LOOKUP_PARENT, &nd); if (error) goto out; - dentry = lookup_create(&nd, 0); + -+ if (nd.dentry->d_inode->i_op->mknod2) { ++ if (nd.dentry->d_inode->i_op->mknod_raw) { + struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mknod2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len, -+ mode, dev); ++ error = op->mknod_raw(&nd, mode, dev); + /* the file system wants to use normal vfs path now */ + if (error != -EOPNOTSUPP) + goto out2; @@ -669,7 +809,7 @@ error = PTR_ERR(dentry); mode &= ~current->fs->umask; -@@ -1296,6 +1459,7 @@ asmlinkage long sys_mknod(const char * f +@@ -1296,6 +1457,7 @@ asmlinkage long sys_mknod(const char * f dput(dentry); } up(&nd.dentry->d_inode->i_sem); @@ -677,17 +817,14 @@ path_release(&nd); out: putname(tmp); -@@ -1343,7 +1507,17 @@ asmlinkage long sys_mkdir(const char * p +@@ -1343,7 +1505,14 @@ asmlinkage long sys_mkdir(const char * p error = path_lookup(tmp, LOOKUP_PARENT, &nd); if (error) goto out; - dentry = lookup_create(&nd, 1); -+ if (nd.dentry->d_inode->i_op->mkdir2) { ++ if (nd.dentry->d_inode->i_op->mkdir_raw) { + struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mkdir2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len, -+ mode); ++ error = op->mkdir_raw(&nd, mode); + /* the file system wants to use normal vfs path now */ + if (error != -EOPNOTSUPP) + goto out2; @@ -696,7 +833,7 @@ error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { error = vfs_mkdir(nd.dentry->d_inode, dentry, -@@ -1351,6 +1525,7 @@ asmlinkage long sys_mkdir(const char * p +@@ -1351,6 +1520,7 @@ asmlinkage long sys_mkdir(const char * p dput(dentry); } up(&nd.dentry->d_inode->i_sem); @@ -704,71 +841,49 @@ path_release(&nd); out: putname(tmp); -@@ -1451,8 +1626,33 @@ asmlinkage long sys_rmdir(const char * p +@@ -1451,8 +1621,16 @@ asmlinkage long sys_rmdir(const char * p error = -EBUSY; goto exit1; } -+ if (nd.dentry->d_inode->i_op->rmdir2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ struct dentry *last; -+ -+ down(&nd.dentry->d_inode->i_sem); -+ last = lookup_hash_it(&nd.last, nd.dentry, NULL); -+ up(&nd.dentry->d_inode->i_sem); -+ if (IS_ERR(last)) { -+ error = PTR_ERR(last); -+ goto exit1; -+ } -+ if (d_mountpoint(last)) { -+ dput(last); -+ error = -EBUSY; -+ goto exit1; -+ } -+ dput(last); ++ if (nd.dentry->d_inode->i_op->rmdir_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; + -+ error = op->rmdir2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } ++ error = op->rmdir_raw(&nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit1; ++ } down(&nd.dentry->d_inode->i_sem); - dentry = lookup_hash(&nd.last, nd.dentry); + dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { error = vfs_rmdir(nd.dentry->d_inode, dentry); -@@ -1510,8 +1710,17 @@ asmlinkage long sys_unlink(const char * +@@ -1510,8 +1688,15 @@ asmlinkage long sys_unlink(const char * error = -EISDIR; if (nd.last_type != LAST_NORM) goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } ++ if (nd.dentry->d_inode->i_op->unlink_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->unlink_raw(&nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit1; ++ } down(&nd.dentry->d_inode->i_sem); - dentry = lookup_hash(&nd.last, nd.dentry); + dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { /* Why not before? Because we want correct error value */ -@@ -1578,15 +1787,26 @@ asmlinkage long sys_symlink(const char * +@@ -1578,15 +1763,23 @@ asmlinkage long sys_symlink(const char * error = path_lookup(to, LOOKUP_PARENT, &nd); if (error) goto out; - dentry = lookup_create(&nd, 0); -+ if (nd.dentry->d_inode->i_op->symlink2) { ++ if (nd.dentry->d_inode->i_op->symlink_raw) { + struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->symlink2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len, -+ from); ++ error = op->symlink_raw(&nd, from); + /* the file system wants to use normal vfs path now */ + if (error != -EOPNOTSUPP) + goto out2; @@ -787,17 +902,14 @@ putname(to); } putname(from); -@@ -1662,7 +1882,17 @@ asmlinkage long sys_link(const char * ol +@@ -1662,7 +1855,14 @@ asmlinkage long sys_link(const char * ol error = -EXDEV; if (old_nd.mnt != nd.mnt) goto out_release; - new_dentry = lookup_create(&nd, 0); -+ if (nd.dentry->d_inode->i_op->link2) { ++ if (nd.dentry->d_inode->i_op->link_raw) { + struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->link2(old_nd.dentry->d_inode, -+ nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len); ++ error = op->link_raw(&old_nd, &nd); + /* the file system wants to use normal vfs path now */ + if (error != -EOPNOTSUPP) + goto out_release; @@ -806,62 +918,37 @@ error = PTR_ERR(new_dentry); if (!IS_ERR(new_dentry)) { error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); -@@ -1706,7 +1936,8 @@ exit: +@@ -1706,7 +1906,7 @@ exit: * locking]. */ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) ++ struct inode *new_dir, struct dentry *new_dentry) { int error; struct inode *target; -@@ -1764,6 +1995,7 @@ int vfs_rename_dir(struct inode *old_dir - error = -EBUSY; - else - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); -+ intent_release(new_dentry, it); - if (target) { - if (!error) - target->i_flags |= S_DEAD; -@@ -1785,7 +2017,8 @@ out_unlock: +@@ -1785,7 +1985,7 @@ out_unlock: } int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) ++ struct inode *new_dir, struct dentry *new_dentry) { int error; -@@ -1816,6 +2049,7 @@ int vfs_rename_other(struct inode *old_d - error = -EBUSY; - else - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); -+ intent_release(new_dentry, it); - double_up(&old_dir->i_zombie, &new_dir->i_zombie); - if (error) - return error; -@@ -1827,13 +2061,14 @@ int vfs_rename_other(struct inode *old_d - } - - int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) - { - int error; - if (S_ISDIR(old_dentry->d_inode->i_mode)) -- error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); -+ error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it); - else -- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); -+ error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it); - if (!error) { - if (old_dir == new_dir) - inode_dir_notify(old_dir, DN_RENAME); -@@ -1875,7 +2110,7 @@ static inline int do_rename(const char * +@@ -1873,9 +2073,18 @@ static inline int do_rename(const char * + if (newnd.last_type != LAST_NORM) + goto exit2; ++ if (old_dir->d_inode->i_op->rename_raw) { ++ lock_kernel(); ++ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); ++ unlock_kernel(); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit2; ++ } ++ double_lock(new_dir, old_dir); - old_dentry = lookup_hash(&oldnd.last, old_dir); @@ -869,7 +956,7 @@ error = PTR_ERR(old_dentry); if (IS_ERR(old_dentry)) goto exit3; -@@ -1891,16 +2126,37 @@ static inline int do_rename(const char * +@@ -1891,16 +2100,16 @@ static inline int do_rename(const char * if (newnd.last.name[newnd.last.len]) goto exit4; } @@ -879,38 +966,16 @@ if (IS_ERR(new_dentry)) goto exit4; -+ if (old_dir->d_inode->i_op->rename2) { -+ lock_kernel(); -+ /* don't rename mount point. mds will take care of -+ * the rest sanity checking */ -+ if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) { -+ error = -EBUSY; -+ goto exit5; -+ } -+ -+ error = old_dir->d_inode->i_op->rename2(old_dir->d_inode, -+ new_dir->d_inode, -+ oldnd.last.name, -+ oldnd.last.len, -+ newnd.last.name, -+ newnd.last.len); -+ unlock_kernel(); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit5; -+ } + lock_kernel(); error = vfs_rename(old_dir->d_inode, old_dentry, -- new_dir->d_inode, new_dentry); -+ new_dir->d_inode, new_dentry, NULL); + new_dir->d_inode, new_dentry); unlock_kernel(); - -+exit5: dput(new_dentry); exit4: dput(old_dentry); -@@ -1951,20 +2207,28 @@ out: +@@ -1951,20 +2160,28 @@ out: } static inline int @@ -923,12 +988,12 @@ if (IS_ERR(link)) goto fail; -+ if (it == NULL) -+ it = nd->it; -+ else if (it != nd->it) -+ printk("it != nd->it: tell phil@clusterfs.com\n"); -+ if (it != NULL) -+ it->it_int_flags |= IT_FL_FOLLOWED; ++ if (it == NULL) ++ it = nd->it; ++ else if (it != nd->it) ++ printk("it != nd->it: tell phil@clusterfs.com\n"); ++ if (it != NULL) ++ it->it_int_flags |= IT_FL_FOLLOWED; + if (*link == '/') { path_release(nd); @@ -941,7 +1006,7 @@ out: if (current->link_count || res || nd->last_type!=LAST_NORM) return res; -@@ -1986,7 +2250,13 @@ fail: +@@ -1986,7 +2203,13 @@ fail: int vfs_follow_link(struct nameidata *nd, const char *link) { @@ -956,7 +1021,7 @@ } /* get the link contents into pagecache */ -@@ -2028,7 +2298,7 @@ int page_follow_link(struct dentry *dent +@@ -2028,7 +2251,7 @@ int page_follow_link(struct dentry *dent { struct page *page = NULL; char *s = page_getlink(dentry, &page); @@ -965,19 +1030,8 @@ if (page) { kunmap(page); page_cache_release(page); ---- linux-2.4.20-l18/fs/nfsd/vfs.c~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002 -+++ linux-2.4.20-l18-phil/fs/nfsd/vfs.c Wed May 28 01:39:18 2003 -@@ -1291,7 +1291,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru - err = nfserr_perm; - } else - #endif -- err = vfs_rename(fdir, odentry, tdir, ndentry); -+ err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); - if (!err && EX_ISSYNC(tfhp->fh_export)) { - nfsd_sync_dir(tdentry); - nfsd_sync_dir(fdentry); ---- linux-2.4.20-l18/fs/open.c~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002 -+++ linux-2.4.20-l18-phil/fs/open.c Wed May 28 01:39:18 2003 +--- linux-2.4.20-ad/fs/open.c~vfs_intent-2.4.20-vanilla 2002-11-28 16:53:15.000000000 -0700 ++++ linux-2.4.20-ad-braam/fs/open.c 2003-07-08 13:51:14.000000000 -0600 @@ -19,6 +19,8 @@ #include @@ -999,7 +1053,7 @@ int error; struct iattr newattrs; -@@ -108,7 +111,14 @@ int do_truncate(struct dentry *dentry, l +@@ -108,7 +111,13 @@ int do_truncate(struct dentry *dentry, l down(&inode->i_sem); newattrs.ia_size = length; newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; @@ -1008,9 +1062,8 @@ + newattrs.ia_valid |= ATTR_FROM_OPEN; + if (op->setattr_raw) { + newattrs.ia_valid |= ATTR_RAW; -+ newattrs.ia_ctime = CURRENT_TIME; + error = op->setattr_raw(inode, &newattrs); -+ } else ++ } else + error = notify_change(dentry, &newattrs); up(&inode->i_sem); return error; @@ -1035,13 +1088,13 @@ if (!error) { DQUOT_INIT(inode); - error = do_truncate(nd.dentry, length); -+ intent_release(nd.dentry, &it); ++ intent_release(&it); + error = do_truncate(nd.dentry, length, 0); } put_write_access(inode); dput_and_out: -+ intent_release(nd.dentry, &it); ++ intent_release(&it); path_release(&nd); out: return error; @@ -1069,7 +1122,7 @@ error = -EROFS; if (IS_RDONLY(inode)) goto dput_and_out; -@@ -279,11 +294,29 @@ asmlinkage long sys_utime(char * filenam +@@ -279,11 +294,25 @@ asmlinkage long sys_utime(char * filenam goto dput_and_out; newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; @@ -1086,10 +1139,6 @@ + goto dput_and_out; + } + -+ error = -EROFS; -+ if (IS_RDONLY(inode)) -+ goto dput_and_out; -+ + error = -EPERM; + if (!times) { if (current->fsuid != inode->i_uid && @@ -1158,7 +1207,7 @@ if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) && !special_file(nd.dentry->d_inode->i_mode)) res = -EROFS; -+ intent_release(nd.dentry, &it); ++ intent_release(&it); path_release(&nd); } @@ -1177,7 +1226,7 @@ set_fs_pwd(current->fs, nd.mnt, nd.dentry); dput_and_out: -+ intent_release(nd.dentry, &it); ++ intent_release(&it); path_release(&nd); out: return error; @@ -1198,7 +1247,7 @@ set_fs_altroot(); error = 0; dput_and_out: -+ intent_release(nd.dentry, &it); ++ intent_release(&it); path_release(&nd); out: return error; @@ -1231,7 +1280,7 @@ + + newattrs.ia_uid = user; + newattrs.ia_gid = group; -+ newattrs.ia_valid = ATTR_UID | ATTR_GID; ++ newattrs.ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME; + newattrs.ia_valid |= ATTR_RAW; + error = op->setattr_raw(inode, &newattrs); + /* the file system wants to use normal vfs path now */ @@ -1276,19 +1325,27 @@ { struct file * f; struct inode *inode; -@@ -699,6 +782,7 @@ struct file *dentry_open(struct dentry * +@@ -693,12 +776,15 @@ struct file *dentry_open(struct dentry * + } + + if (f->f_op && f->f_op->open) { ++ f->f_it = it; + error = f->f_op->open(inode,f); ++ f->f_it = NULL; + if (error) + goto cleanup_all; } f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); -+ intent_release(dentry, it); ++ intent_release(it); return f; cleanup_all: -@@ -713,11 +797,17 @@ cleanup_all: +@@ -713,11 +799,17 @@ cleanup_all: cleanup_file: put_filp(f); cleanup_dentry: -+ intent_release(dentry, it); ++ intent_release(it); dput(dentry); mntput(mnt); return ERR_PTR(error); @@ -1302,9 +1359,24 @@ /* * Find an empty file descriptor entry, and mark it busy. */ ---- linux-2.4.20-l18/fs/stat.c~vfs_intent-2.4.20-vanilla Thu Sep 13 19:04:43 2001 -+++ linux-2.4.20-l18-phil/fs/stat.c Wed May 28 01:39:18 2003 -@@ -135,13 +135,15 @@ static int cp_new_stat(struct inode * in +--- linux-2.4.20-ad/fs/stat.c~vfs_intent-2.4.20-vanilla 2001-09-13 17:04:43.000000000 -0600 ++++ linux-2.4.20-ad-braam/fs/stat.c 2003-07-07 15:13:53.000000000 -0600 +@@ -17,10 +17,12 @@ + * Revalidate the inode. This is required for proper NFS attribute caching. + */ + static __inline__ int +-do_revalidate(struct dentry *dentry) ++do_revalidate(struct dentry *dentry, struct lookup_intent *it) + { + struct inode * inode = dentry->d_inode; +- if (inode->i_op && inode->i_op->revalidate) ++ if (inode->i_op && inode->i_op->revalidate_it) ++ return inode->i_op->revalidate_it(dentry, it); ++ else if (inode->i_op && inode->i_op->revalidate) + return inode->i_op->revalidate(dentry); + return 0; + } +@@ -135,13 +137,15 @@ static int cp_new_stat(struct inode * in asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf) { struct nameidata nd; @@ -1314,14 +1386,15 @@ - error = user_path_walk(filename, &nd); + error = user_path_walk_it(filename, &nd, &it); if (!error) { - error = do_revalidate(nd.dentry); +- error = do_revalidate(nd.dentry); ++ error = do_revalidate(nd.dentry, &it); if (!error) error = cp_old_stat(nd.dentry->d_inode, statbuf); -+ intent_release(nd.dentry, &it); ++ intent_release(&it); path_release(&nd); } return error; -@@ -151,13 +153,15 @@ asmlinkage long sys_stat(char * filename +@@ -151,13 +155,15 @@ asmlinkage long sys_stat(char * filename asmlinkage long sys_newstat(char * filename, struct stat * statbuf) { struct nameidata nd; @@ -1331,14 +1404,15 @@ - error = user_path_walk(filename, &nd); + error = user_path_walk_it(filename, &nd, &it); if (!error) { - error = do_revalidate(nd.dentry); +- error = do_revalidate(nd.dentry); ++ error = do_revalidate(nd.dentry, &it); if (!error) error = cp_new_stat(nd.dentry->d_inode, statbuf); -+ intent_release(nd.dentry, &it); ++ intent_release(&it); path_release(&nd); } return error; -@@ -172,13 +176,15 @@ asmlinkage long sys_newstat(char * filen +@@ -172,13 +178,15 @@ asmlinkage long sys_newstat(char * filen asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf) { struct nameidata nd; @@ -1348,14 +1422,15 @@ - error = user_path_walk_link(filename, &nd); + error = user_path_walk_link_it(filename, &nd, &it); if (!error) { - error = do_revalidate(nd.dentry); +- error = do_revalidate(nd.dentry); ++ error = do_revalidate(nd.dentry, &it); if (!error) error = cp_old_stat(nd.dentry->d_inode, statbuf); -+ intent_release(nd.dentry, &it); ++ intent_release(&it); path_release(&nd); } return error; -@@ -189,13 +195,15 @@ asmlinkage long sys_lstat(char * filenam +@@ -189,13 +197,15 @@ asmlinkage long sys_lstat(char * filenam asmlinkage long sys_newlstat(char * filename, struct stat * statbuf) { struct nameidata nd; @@ -1365,14 +1440,42 @@ - error = user_path_walk_link(filename, &nd); + error = user_path_walk_link_it(filename, &nd, &it); if (!error) { - error = do_revalidate(nd.dentry); +- error = do_revalidate(nd.dentry); ++ error = do_revalidate(nd.dentry, &it); if (!error) error = cp_new_stat(nd.dentry->d_inode, statbuf); -+ intent_release(nd.dentry, &it); ++ intent_release(&it); path_release(&nd); } return error; -@@ -333,12 +341,14 @@ asmlinkage long sys_stat64(char * filena +@@ -216,7 +226,7 @@ asmlinkage long sys_fstat(unsigned int f + if (f) { + struct dentry * dentry = f->f_dentry; + +- err = do_revalidate(dentry); ++ err = do_revalidate(dentry, NULL); + if (!err) + err = cp_old_stat(dentry->d_inode, statbuf); + fput(f); +@@ -235,7 +245,7 @@ asmlinkage long sys_newfstat(unsigned in + if (f) { + struct dentry * dentry = f->f_dentry; + +- err = do_revalidate(dentry); ++ err = do_revalidate(dentry, NULL); + if (!err) + err = cp_new_stat(dentry->d_inode, statbuf); + fput(f); +@@ -257,7 +267,7 @@ asmlinkage long sys_readlink(const char + + error = -EINVAL; + if (inode->i_op && inode->i_op->readlink && +- !(error = do_revalidate(nd.dentry))) { ++ !(error = do_revalidate(nd.dentry, NULL))) { + UPDATE_ATIME(inode); + error = inode->i_op->readlink(nd.dentry, buf, bufsiz); + } +@@ -333,12 +343,14 @@ asmlinkage long sys_stat64(char * filena { struct nameidata nd; int error; @@ -1381,14 +1484,15 @@ - error = user_path_walk(filename, &nd); + error = user_path_walk_it(filename, &nd, &it); if (!error) { - error = do_revalidate(nd.dentry); +- error = do_revalidate(nd.dentry); ++ error = do_revalidate(nd.dentry, &it); if (!error) error = cp_new_stat64(nd.dentry->d_inode, statbuf); -+ intent_release(nd.dentry, &it); ++ intent_release(&it); path_release(&nd); } return error; -@@ -348,12 +358,14 @@ asmlinkage long sys_lstat64(char * filen +@@ -348,12 +360,14 @@ asmlinkage long sys_lstat64(char * filen { struct nameidata nd; int error; @@ -1397,43 +1501,60 @@ - error = user_path_walk_link(filename, &nd); + error = user_path_walk_link_it(filename, &nd, &it); if (!error) { - error = do_revalidate(nd.dentry); +- error = do_revalidate(nd.dentry); ++ error = do_revalidate(nd.dentry, &it); if (!error) error = cp_new_stat64(nd.dentry->d_inode, statbuf); -+ intent_release(nd.dentry, &it); ++ intent_release(&it); path_release(&nd); } return error; ---- linux-2.4.20-l18/fs/proc/base.c~vfs_intent-2.4.20-vanilla Wed Jun 4 22:53:14 2003 -+++ linux-2.4.20-l18-phil/fs/proc/base.c Wed Jun 4 22:50:35 2003 +@@ -368,7 +382,7 @@ asmlinkage long sys_fstat64(unsigned lon + if (f) { + struct dentry * dentry = f->f_dentry; + +- err = do_revalidate(dentry); ++ err = do_revalidate(dentry, NULL); + if (!err) + err = cp_new_stat64(dentry->d_inode, statbuf); + fput(f); +--- linux-2.4.20-ad/fs/proc/base.c~vfs_intent-2.4.20-vanilla 2002-08-02 18:39:45.000000000 -0600 ++++ linux-2.4.20-ad-braam/fs/proc/base.c 2003-07-07 15:13:53.000000000 -0600 @@ -464,6 +464,9 @@ static int proc_pid_follow_link(struct d error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt); nd->last_type = LAST_BIND; + -+ if (nd->it != NULL) -+ nd->it->it_int_flags |= IT_FL_FOLLOWED; ++ if (nd->it != NULL) ++ nd->it->it_int_flags |= IT_FL_FOLLOWED; out: return error; } ---- linux-2.4.20-l18/include/linux/dcache.h~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002 -+++ linux-2.4.20-l18-phil/include/linux/dcache.h Sun Jun 1 22:35:10 2003 -@@ -7,6 +7,28 @@ +--- linux-2.4.20-ad/include/linux/dcache.h~vfs_intent-2.4.20-vanilla 2002-11-28 16:53:15.000000000 -0700 ++++ linux-2.4.20-ad-braam/include/linux/dcache.h 2003-07-09 01:40:11.000000000 -0600 +@@ -7,6 +7,44 @@ #include #include -+#define IT_OPEN (1) -+#define IT_CREAT (1<<1) -+#define IT_READDIR (1<<2) -+#define IT_GETATTR (1<<3) -+#define IT_LOOKUP (1<<4) -+#define IT_UNLINK (1<<5) ++#define IT_OPEN 0x0001 ++#define IT_CREAT 0x0002 ++#define IT_READDIR 0x0004 ++#define IT_GETATTR 0x0008 ++#define IT_LOOKUP 0x0010 ++#define IT_UNLINK 0x0020 ++#define IT_GETXATTR 0x0040 ++#define IT_EXEC 0x0080 ++#define IT_PIN 0x0100 ++ ++#define IT_FL_LOCKED 0x0001 ++#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */ + -+#define IT_FL_LOCKED (1) -+#define IT_FL_FOLLOWED (1<<1) /* set by vfs_follow_link */ ++#define INTENT_MAGIC 0x19620323 + +struct lookup_intent { + int it_op; ++ void (*it_op_release)(struct lookup_intent *); ++ int it_magic; + int it_mode; + int it_flags; + int it_disposition; @@ -1444,34 +1565,42 @@ + void *it_data; +}; + ++static inline void intent_init(struct lookup_intent *it, int op, int flags) ++{ ++ memset(it, 0, sizeof(*it)); ++ it->it_magic = INTENT_MAGIC; ++ it->it_op = op; ++ it->it_flags = flags; ++} ++ ++ /* * linux/include/linux/dcache.h * -@@ -79,6 +101,7 @@ struct dentry { - unsigned long d_time; /* used by d_revalidate */ - struct dentry_operations *d_op; - struct super_block * d_sb; /* The root of the dentry tree */ -+ struct lookup_intent *d_it; - unsigned long d_vfs_flags; - void * d_fsdata; /* fs-specific data */ - unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ -@@ -91,8 +114,15 @@ struct dentry_operations { +@@ -91,8 +129,22 @@ struct dentry_operations { int (*d_delete)(struct dentry *); void (*d_release)(struct dentry *); void (*d_iput)(struct dentry *, struct inode *); -+ int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *); -+ void (*d_intent_release)(struct dentry *, struct lookup_intent *); ++ int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *); ++ void (*d_pin)(struct dentry *, struct vfsmount * , int); ++ void (*d_unpin)(struct dentry *, struct vfsmount *, int); }; ++#define PIN(de,mnt,flag) if (de->d_op && de->d_op->d_pin) \ ++ de->d_op->d_pin(de, mnt, flag); ++#define UNPIN(de,mnt,flag) if (de->d_op && de->d_op->d_unpin) \ ++ de->d_op->d_unpin(de, mnt, flag); ++ ++ +/* defined in fs/namei.c */ -+extern void intent_release(struct dentry *de, struct lookup_intent *it); ++extern void intent_release(struct lookup_intent *it); +/* defined in fs/dcache.c */ +extern void __d_rehash(struct dentry * entry, int lock); + /* the dentry parameter passed to d_hash and d_compare is the parent * directory of the entries to be compared. It is used in case these * functions need any directory specific information for determining -@@ -124,6 +154,7 @@ d_iput: no no yes +@@ -124,6 +176,7 @@ d_iput: no no yes * s_nfsd_free_path semaphore will be down */ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ @@ -1479,14 +1608,15 @@ extern spinlock_t dcache_lock; ---- linux-2.4.20-l18/include/linux/fs.h~vfs_intent-2.4.20-vanilla Wed May 28 01:39:17 2003 -+++ linux-2.4.20-l18-phil/include/linux/fs.h Sun Jun 1 22:07:11 2003 -@@ -338,6 +338,8 @@ extern void set_bh_page(struct buffer_he +--- linux-2.4.20/include/linux/fs.h~vfs_intent-2.4.20-vanilla 2003-06-12 03:24:59.000000000 -0600 ++++ linux-2.4.20-braam/include/linux/fs.h 2003-06-12 03:25:00.000000000 -0600 +@@ -338,6 +338,9 @@ extern void set_bh_page(struct buffer_he #define ATTR_MTIME_SET 256 #define ATTR_FORCE 512 /* Not a change, but a change it */ #define ATTR_ATTR_FLAG 1024 -+#define ATTR_RAW 2048 /* file system, not vfs will massage attrs */ -+#define ATTR_FROM_OPEN 4096 /* called from open path, ie O_TRUNC */ ++#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ ++#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ ++#define ATTR_CTIME_SET 0x2000 /* * This is the Inode Attributes structure, used for notify_change(). It @@ -1494,7 +1624,7 @@ /* needed for tty driver, and maybe others */ void *private_data; -+ struct lookup_intent *f_intent; ++ struct lookup_intent *f_it; /* preallocated helper kiobuf to speedup O_DIRECT */ struct kiobuf *f_iobuf; @@ -1502,56 +1632,54 @@ struct qstr last; unsigned int flags; int last_type; -+ struct lookup_intent *it; ++ struct lookup_intent *it; }; #define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */ -@@ -794,7 +798,9 @@ extern int vfs_symlink(struct inode *, s +@@ -794,7 +798,8 @@ extern int vfs_symlink(struct inode *, s extern int vfs_link(struct dentry *, struct inode *, struct dentry *); extern int vfs_rmdir(struct inode *, struct dentry *); extern int vfs_unlink(struct inode *, struct dentry *); -extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it); ++ struct inode *new_dir, struct dentry *new_dentry); /* * File types -@@ -855,20 +861,33 @@ struct file_operations { +@@ -854,21 +859,32 @@ struct file_operations { + struct inode_operations { int (*create) (struct inode *,struct dentry *,int); ++ int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *); struct dentry * (*lookup) (struct inode *,struct dentry *); -+ struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *); ++ struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags); int (*link) (struct dentry *,struct inode *,struct dentry *); -+ int (*link2) (struct inode *,struct inode *, const char *, int); ++ int (*link_raw) (struct nameidata *,struct nameidata *); int (*unlink) (struct inode *,struct dentry *); -+ int (*unlink2) (struct inode *, const char *, int); ++ int (*unlink_raw) (struct nameidata *); int (*symlink) (struct inode *,struct dentry *,const char *); -+ int (*symlink2) (struct inode *, const char *, int, const char *); ++ int (*symlink_raw) (struct nameidata *,const char *); int (*mkdir) (struct inode *,struct dentry *,int); -+ int (*mkdir2) (struct inode *, const char *, int,int); ++ int (*mkdir_raw) (struct nameidata *,int); int (*rmdir) (struct inode *,struct dentry *); -+ int (*rmdir2) (struct inode *, const char *, int); ++ int (*rmdir_raw) (struct nameidata *); int (*mknod) (struct inode *,struct dentry *,int,int); -+ int (*mknod2) (struct inode *, const char *, int,int,int); ++ int (*mknod_raw) (struct nameidata *,int,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); -+ int (*rename2) (struct inode *, struct inode *, -+ const char *oldname, int oldlen, -+ const char *newname, int newlen); ++ int (*rename_raw) (struct nameidata *, struct nameidata *); int (*readlink) (struct dentry *, char *,int); int (*follow_link) (struct dentry *, struct nameidata *); -+ int (*follow_link2) (struct dentry *, struct nameidata *, -+ struct lookup_intent *it); void (*truncate) (struct inode *); int (*permission) (struct inode *, int); int (*revalidate) (struct dentry *); ++ int (*revalidate_it) (struct dentry *, struct lookup_intent *); int (*setattr) (struct dentry *, struct iattr *); -+ int (*setattr_raw) (struct inode *, struct iattr *); ++ int (*setattr_raw) (struct inode *, struct iattr *); int (*getattr) (struct dentry *, struct iattr *); int (*setxattr) (struct dentry *, const char *, void *, size_t, int); ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); -@@ -1070,10 +1089,14 @@ static inline int get_lease(struct inode +@@ -1070,10 +1086,14 @@ static inline int get_lease(struct inode asmlinkage long sys_open(const char *, int, int); asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ @@ -1567,7 +1695,7 @@ extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char *); -@@ -1335,6 +1358,7 @@ typedef int (*read_actor_t)(read_descrip +@@ -1335,6 +1355,7 @@ typedef int (*read_actor_t)(read_descrip extern loff_t default_llseek(struct file *file, loff_t offset, int origin); extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *)); @@ -1575,7 +1703,7 @@ extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *)); extern int FASTCALL(path_walk(const char *, struct nameidata *)); extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); -@@ -1346,6 +1370,8 @@ extern struct dentry * lookup_one_len(co +@@ -1346,6 +1367,8 @@ extern struct dentry * lookup_one_len(co extern struct dentry * lookup_hash(struct qstr *, struct dentry *); #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) @@ -1584,7 +1712,7 @@ extern void iput(struct inode *); extern void force_delete(struct inode *); -@@ -1455,6 +1481,8 @@ extern struct file_operations generic_ro +@@ -1455,6 +1478,8 @@ extern struct file_operations generic_ro extern int vfs_readlink(struct dentry *, char *, int, const char *); extern int vfs_follow_link(struct nameidata *, const char *); @@ -1593,8 +1721,36 @@ extern int page_readlink(struct dentry *, char *, int); extern int page_follow_link(struct dentry *, struct nameidata *); extern struct inode_operations page_symlink_inode_operations; ---- linux-2.4.20-l18/kernel/ksyms.c~vfs_intent-2.4.20-vanilla Wed May 28 01:39:18 2003 -+++ linux-2.4.20-l18-phil/kernel/ksyms.c Wed May 28 01:39:18 2003 +--- linux-2.4.20-ad/include/linux/fs_struct.h~vfs_intent-2.4.20-vanilla 2001-07-13 16:10:44.000000000 -0600 ++++ linux-2.4.20-ad-braam/include/linux/fs_struct.h 2003-07-07 15:13:53.000000000 -0600 +@@ -34,10 +34,12 @@ static inline void set_fs_root(struct fs + write_lock(&fs->lock); + old_root = fs->root; + old_rootmnt = fs->rootmnt; ++ PIN(dentry, mnt, 1); + fs->rootmnt = mntget(mnt); + fs->root = dget(dentry); + write_unlock(&fs->lock); + if (old_root) { ++ UNPIN(old_root, old_rootmnt, 1); + dput(old_root); + mntput(old_rootmnt); + } +@@ -57,10 +59,12 @@ static inline void set_fs_pwd(struct fs_ + write_lock(&fs->lock); + old_pwd = fs->pwd; + old_pwdmnt = fs->pwdmnt; ++ PIN(dentry, mnt, 0); + fs->pwdmnt = mntget(mnt); + fs->pwd = dget(dentry); + write_unlock(&fs->lock); + if (old_pwd) { ++ UNPIN(old_pwd, old_pwdmnt, 0); + dput(old_pwd); + mntput(old_pwdmnt); + } +--- linux-2.4.20-ad/kernel/ksyms.c~vfs_intent-2.4.20-vanilla 2003-07-07 15:13:52.000000000 -0600 ++++ linux-2.4.20-ad-braam/kernel/ksyms.c 2003-07-07 15:13:53.000000000 -0600 @@ -269,6 +269,7 @@ EXPORT_SYMBOL(read_cache_page); EXPORT_SYMBOL(set_page_dirty); EXPORT_SYMBOL(vfs_readlink); @@ -1603,5 +1759,38 @@ EXPORT_SYMBOL(page_readlink); EXPORT_SYMBOL(page_follow_link); EXPORT_SYMBOL(page_symlink_inode_operations); +--- linux-2.4.20-ad/kernel/fork.c~vfs_intent-2.4.20-vanilla 2002-11-28 16:53:15.000000000 -0700 ++++ linux-2.4.20-ad-braam/kernel/fork.c 2003-07-07 15:13:53.000000000 -0600 +@@ -384,10 +384,13 @@ static inline struct fs_struct *__copy_f + fs->umask = old->umask; + read_lock(&old->lock); + fs->rootmnt = mntget(old->rootmnt); ++ PIN(old->pwd, old->pwdmnt, 0); ++ PIN(old->root, old->rootmnt, 1); + fs->root = dget(old->root); + fs->pwdmnt = mntget(old->pwdmnt); + fs->pwd = dget(old->pwd); + if (old->altroot) { ++ PIN(old->altroot, old->altrootmnt, 1); + fs->altrootmnt = mntget(old->altrootmnt); + fs->altroot = dget(old->altroot); + } else { +--- linux-2.4.20-ad/kernel/exit.c~vfs_intent-2.4.20-vanilla 2002-11-28 16:53:15.000000000 -0700 ++++ linux-2.4.20-ad-braam/kernel/exit.c 2003-07-07 15:13:53.000000000 -0600 +@@ -238,11 +238,14 @@ static inline void __put_fs_struct(struc + { + /* No need to hold fs->lock if we are killing it */ + if (atomic_dec_and_test(&fs->count)) { ++ UNPIN(fs->pwd, fs->pwdmnt, 0); ++ UNPIN(fs->root, fs->rootmnt, 1); + dput(fs->root); + mntput(fs->rootmnt); + dput(fs->pwd); + mntput(fs->pwdmnt); + if (fs->altroot) { ++ UNPIN(fs->altroot, fs->altrootmnt, 1); + dput(fs->altroot); + mntput(fs->altrootmnt); + } _ diff --git a/lustre/kernel_patches/pc/ext3-delete_thread-2.4.18.pc b/lustre/kernel_patches/pc/ext3-delete_thread-2.4.18.pc index 5770132..1afa4d4 100644 --- a/lustre/kernel_patches/pc/ext3-delete_thread-2.4.18.pc +++ b/lustre/kernel_patches/pc/ext3-delete_thread-2.4.18.pc @@ -1,3 +1,5 @@ fs/ext3/super.c +fs/ext3/file.c +fs/ext3/inode.c include/linux/ext3_fs.h include/linux/ext3_fs_sb.h diff --git a/lustre/kernel_patches/pc/ext3-delete_thread-2.4.20.pc b/lustre/kernel_patches/pc/ext3-delete_thread-2.4.20.pc index 5770132..a2c3109 100644 --- a/lustre/kernel_patches/pc/ext3-delete_thread-2.4.20.pc +++ b/lustre/kernel_patches/pc/ext3-delete_thread-2.4.20.pc @@ -1,3 +1,5 @@ fs/ext3/super.c +fs/ext3/inode.c +fs/ext3/file.c include/linux/ext3_fs.h include/linux/ext3_fs_sb.h diff --git a/lustre/kernel_patches/pc/extN-wantedi.pc b/lustre/kernel_patches/pc/extN-wantedi.pc index 31901ee..6ad2589 100644 --- a/lustre/kernel_patches/pc/extN-wantedi.pc +++ b/lustre/kernel_patches/pc/extN-wantedi.pc @@ -1,4 +1,5 @@ fs/ext3/namei.c fs/ext3/ialloc.c +fs/ext3/inode.c fs/ext3/ioctl.c include/linux/ext3_fs.h diff --git a/lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26.pc b/lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26.pc index b647d5a..6c80106 100644 --- a/lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26.pc +++ b/lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26.pc @@ -2,6 +2,7 @@ fs/ext3/ialloc.c fs/ext3/inode.c fs/ext3/namei.c fs/ext3/super.c +fs/ext3/ext3-exports.c fs/ext3/xattr.c include/linux/ext3_fs.h include/linux/ext3_jbd.h diff --git a/lustre/kernel_patches/pc/vfs_intent-2.4.20-vanilla.pc b/lustre/kernel_patches/pc/vfs_intent-2.4.20-vanilla.pc index f8a99ea..f3375a3 100644 --- a/lustre/kernel_patches/pc/vfs_intent-2.4.20-vanilla.pc +++ b/lustre/kernel_patches/pc/vfs_intent-2.4.20-vanilla.pc @@ -1,5 +1,6 @@ fs/exec.c fs/dcache.c +fs/namespace.c fs/namei.c fs/nfsd/vfs.c fs/open.c @@ -7,4 +8,7 @@ fs/stat.c fs/proc/base.c include/linux/dcache.h include/linux/fs.h +include/linux/fs_struct.h kernel/ksyms.c +kernel/fork.c +kernel/exit.c diff --git a/lustre/kernel_patches/scripts/patchfns b/lustre/kernel_patches/scripts/patchfns index 78e494b..8d3d4f0 100644 --- a/lustre/kernel_patches/scripts/patchfns +++ b/lustre/kernel_patches/scripts/patchfns @@ -78,7 +78,7 @@ check_pc_match() if [ $? != 0 ]; then echo " $1 do not match with $2 " echo " $2 will be changed to match $2" - cat $tmpfile > $P/pc/$PATCH_NAME.pc + # cat $tmpfile > $P/pc/$PATCH_NAME.pc fi rm -rf $tmpfile fi diff --git a/lustre/kernel_patches/series/hp-pnnl-2.4.20 b/lustre/kernel_patches/series/hp-pnnl-2.4.20 index b951209..c2cc2fa 100644 --- a/lustre/kernel_patches/series/hp-pnnl-2.4.20 +++ b/lustre/kernel_patches/series/hp-pnnl-2.4.20 @@ -2,7 +2,7 @@ dev_read_only_hp_2.4.20.patch exports_2.4.20-rh-hp.patch kmem_cache_validate_hp.patch lustre_version.patch -vfs_intent-2.4.20-vanilla.patch +vfs_intent-2.4.20-hp.patch invalidate_show.patch export-truncate.patch iod-stock-24-exports_hp.patch @@ -21,5 +21,7 @@ ext3-delete_thread-2.4.20.patch ext3-noread-2.4.20.patch extN-wantedi.patch ext3-san-2.4.20.patch +ext3-map_inode_page.patch +ext3-error-export.patch iopen-2.4.20.patch tcp-zero-copy.patch diff --git a/lustre/kernel_patches/series/rh-2.4.20 b/lustre/kernel_patches/series/rh-2.4.20 index a97c37c..970061d 100644 --- a/lustre/kernel_patches/series/rh-2.4.20 +++ b/lustre/kernel_patches/series/rh-2.4.20 @@ -15,9 +15,12 @@ ext-2.4-patch-4.patch linux-2.4.20-xattr-0.8.54-chaos.patch ext3-2.4.20-fixes.patch ext3_orphan_lock-2.4.20-rh.patch -ext3-delete_thread-2.4.20.patch +ext3_delete_thread_2.4.20_chaos.patch ext3-noread-2.4.20.patch extN-wantedi.patch ext3-san-2.4.20.patch +ext3-map_inode_page.patch +ext3-error-export.patch iopen-2.4.20.patch -tcp-zero-copy.patch +tcp_zero_copy_2.4.20_chaos.patch +gpl_header-chaos-2.4.20.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.20 b/lustre/kernel_patches/series/vanilla-2.4.20 index e56cac6c..726a028 100644 --- a/lustre/kernel_patches/series/vanilla-2.4.20 +++ b/lustre/kernel_patches/series/vanilla-2.4.20 @@ -1,4 +1,4 @@ -uml-patch-2.4.20-4.patch +uml-patch-2.4.20-6.patch dev_read_only_2.4.20.patch exports_2.4.20.patch kmem_cache_validate_2.4.20.patch @@ -25,5 +25,7 @@ ext3-noread-2.4.20.patch ext3-delete_thread-2.4.20.patch extN-wantedi.patch ext3-san-2.4.20.patch +ext3-map_inode_page.patch +ext3-error-export.patch iopen-2.4.20.patch tcp-zero-copy.patch diff --git a/lustre/kernel_patches/which_patch b/lustre/kernel_patches/which_patch index 2ef001d..28e8648 100644 --- a/lustre/kernel_patches/which_patch +++ b/lustre/kernel_patches/which_patch @@ -1,13 +1,8 @@ -SERIES MEMNONIC COMMENT +SERIES MEMNONIC COMMENT ARCH -hp-pnnl-2.4.20 linux-2.4.20-hp4_pnnl1 same as vanilla but no uml -vanilla-2.4.20 linux-2.4.20 patch includes uml -chaos-2.4.20 linux-chaos-2.4.20 same as rh-2.4.20-8 -rh-2.4.20 linux-rh-2.4.20-8 same as chaos-2.4.20 -rh-2.4.18-18 linux-rh-2.4.18-18 same as chaos but includes uml -chaos linux-chaos-2.4.18 same as rh-2.4.18-18 but no uml - -REVIEW: - -vanilla-2.5 linux-2.5.63 -hp-pnnl linux-2.4.19-hp2_pnnl6 +chaos-2.4.18 linux-chaos-2.4.18 LLNL 2.4.18 chaos ~65 i386 +hp-pnnl-2.4.20 linux-2.4.20-hp4_pnnl1 same as vanilla but no uml ia64 +vanilla-2.4.20 linux-2.4.20 patch with uml-2.4.20-6 um +chaos-2.4.20 linux-chaos-2.4.20 same as rh-2.4.20-8 i386 +rh-2.4.20 linux-rh-2.4.20-8 same as chaos-2.4.20 i386 +kgdb-2.5.73 linux-2.5.73 vanilla 2.5.73 with kgdb i386 diff --git a/lustre/ldlm/.cvsignore b/lustre/ldlm/.cvsignore index e995588..e69dc6d 100644 --- a/lustre/ldlm/.cvsignore +++ b/lustre/ldlm/.cvsignore @@ -1,3 +1,4 @@ .deps Makefile Makefile.in +.*.cmd diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 9b53b54..e3f8673 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -32,7 +32,7 @@ #include #include -int client_import_connect(struct lustre_handle *dlm_handle, +int client_import_connect(struct lustre_handle *dlm_handle, struct obd_device *obd, struct obd_uuid *cluuid) { @@ -47,7 +47,6 @@ int client_import_connect(struct lustre_handle *dlm_handle, char *tmp[] = {imp->imp_target_uuid.uuid, obd->obd_uuid.uuid, (char *)dlm_handle}; - int rq_opc = (obd->obd_type->typ_ops->o_brw) ? OST_CONNECT :MDS_CONNECT; int msg_flags; ENTRY; @@ -67,13 +66,15 @@ int client_import_connect(struct lustre_handle *dlm_handle, if (obd->obd_namespace == NULL) GOTO(out_disco, rc = -ENOMEM); - request = ptlrpc_prep_req(imp, rq_opc, 3, size, tmp); + request = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp); if (!request) GOTO(out_ldlm, rc = -ENOMEM); request->rq_level = LUSTRE_CONN_NEW; request->rq_replen = lustre_msg_size(0, NULL); + lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_PEER); + imp->imp_dlm_handle = *dlm_handle; imp->imp_level = LUSTRE_CONN_CON; @@ -88,7 +89,7 @@ int client_import_connect(struct lustre_handle *dlm_handle, class_export_put(exp); msg_flags = lustre_msg_get_op_flags(request->rq_repmsg); - if (rq_opc == MDS_CONNECT || msg_flags & MSG_CONNECT_REPLAYABLE) { + if (msg_flags & MSG_CONNECT_REPLAYABLE) { imp->imp_replayable = 1; CDEBUG(D_HA, "connected to replayable target: %s\n", imp->imp_target_uuid.uuid); @@ -130,7 +131,16 @@ int client_import_disconnect(struct lustre_handle *dlm_handle, int failover) RETURN(-EINVAL); } - rq_opc = obd->obd_type->typ_ops->o_brw ? OST_DISCONNECT:MDS_DISCONNECT; + switch (imp->imp_connect_op) { + case OST_CONNECT: rq_opc = OST_DISCONNECT; break; + case MDS_CONNECT: rq_opc = MDS_DISCONNECT; break; + case MGMT_CONNECT:rq_opc = MGMT_DISCONNECT;break; + default: + CERROR("don't know how to disconnect from %s (connect_op %d)\n", + imp->imp_target_uuid.uuid, imp->imp_connect_op); + RETURN(-EINVAL); + } + down(&cli->cl_sem); if (!cli->cl_conn_count) { CERROR("disconnecting disconnected device (%s)\n", @@ -229,36 +239,31 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) struct obd_uuid remote_uuid; struct list_head *p; char *str, *tmp; - int rc, i, abort_recovery; + int rc = 0, abort_recovery; ENTRY; LASSERT_REQSWAB (req, 0); - str = lustre_msg_string (req->rq_reqmsg, 0, sizeof (tgtuuid.uuid) - 1); + str = lustre_msg_string(req->rq_reqmsg, 0, sizeof(tgtuuid) - 1); if (str == NULL) { CERROR("bad target UUID for connect\n"); GOTO(out, rc = -EINVAL); } + obd_str2uuid (&tgtuuid, str); + target = class_uuid2obd(&tgtuuid); + if (!target || target->obd_stopping || !target->obd_set_up) { + CERROR("UUID '%s' is not available for connect\n", str); + GOTO(out, rc = -ENODEV); + } LASSERT_REQSWAB (req, 1); - str = lustre_msg_string (req->rq_reqmsg, 1, sizeof (cluuid.uuid) - 1); + str = lustre_msg_string(req->rq_reqmsg, 1, sizeof(cluuid) - 1); if (str == NULL) { CERROR("bad client UUID for connect\n"); GOTO(out, rc = -EINVAL); } - obd_str2uuid (&cluuid, str); - i = class_uuid2dev(&tgtuuid); - if (i == -1) { - CERROR("UUID '%s' not found for connect\n", tgtuuid.uuid); - GOTO(out, rc = -ENODEV); - } - - target = &obd_dev[i]; - if (!target || target->obd_stopping || !target->obd_set_up) { - CERROR("UUID '%s' is not available for connect\n", str); - GOTO(out, rc = -ENODEV); - } + obd_str2uuid (&cluuid, str); /* XXX extract a nettype and format accordingly */ snprintf(remote_uuid.uuid, sizeof remote_uuid, @@ -491,8 +496,7 @@ static void reset_recovery_timer(struct obd_device *obd) if (!recovering) return; - CDEBUG(D_ERROR, "timer will expire in %ld seconds\n", - OBD_RECOVERY_TIMEOUT / HZ); + CERROR("timer will expire in %ld seconds\n", OBD_RECOVERY_TIMEOUT / HZ); mod_timer(&obd->obd_recovery_timer, jiffies + OBD_RECOVERY_TIMEOUT); } @@ -568,7 +572,8 @@ static void process_recovery_queue(struct obd_device *obd) DEBUG_REQ(D_ERROR, req, "processing: "); (void)obd->obd_recovery_handler(req); reset_recovery_timer(obd); -#warning FIXME: mds_fsync_super(mds->mds_sb); + /* bug 1580: decide how to properly sync() in recovery */ + //mds_fsync_super(mds->mds_sb); class_export_put(req->rq_export); OBD_FREE(req->rq_reqmsg, req->rq_reqlen); OBD_FREE(req, sizeof *req); @@ -715,8 +720,7 @@ int target_queue_final_reply(struct ptlrpc_request *req, int rc) if (recovery_done) { struct list_head *tmp, *n; ldlm_reprocess_all_ns(req->rq_export->exp_obd->obd_namespace); - CDEBUG(D_ERROR, - "%s: all clients recovered, sending delayed replies\n", + CERROR("%s: all clients recovered, sending delayed replies\n", obd->obd_name); obd->obd_recovering = 0; list_for_each_safe(tmp, n, &obd->obd_delayed_reply_queue) { diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 2dc60cf..3995e95 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -71,6 +71,8 @@ char *ldlm_it2str(int it) return "lookup"; case IT_UNLINK: return "unlink"; + case IT_GETXATTR: + return "getxattr"; default: CERROR("Unknown intent %d\n", it); return "UNKNOWN"; @@ -954,8 +956,8 @@ int ldlm_run_ast_work(struct list_head *rpc_list) if (rc == -ERESTART) retval = rc; else if (rc) - CERROR("Failed AST - should clean & disconnect " - "client\n"); + CDEBUG(D_DLMTRACE, "Failed AST - should clean & " + "disconnect client\n"); LDLM_LOCK_PUT(w->w_lock); list_del(&w->w_list); OBD_FREE(w, sizeof(*w)); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index de304d4..50bc96a 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -243,8 +243,7 @@ int ldlm_del_waiting_lock(struct ldlm_lock *lock) #endif /* __KERNEL__ */ -static inline void ldlm_failed_ast(struct ldlm_lock *lock, int rc, - char *ast_type) +static void ldlm_failed_ast(struct ldlm_lock *lock, int rc, char *ast_type) { CERROR("%s AST failed (%d) for res "LPU64"/"LPU64 ", mode %s: evicting client %s@%s NID "LPU64"\n", @@ -347,10 +346,19 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock, RETURN(rc); } +/* XXX copied from ptlrpc/service.c */ +static long timeval_sub(struct timeval *large, struct timeval *small) +{ + return (large->tv_sec - small->tv_sec) * 1000000 + + (large->tv_usec - small->tv_usec); +} + int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data) { struct ldlm_request *body; struct ptlrpc_request *req; + struct timeval granted_time; + long total_enqueue_wait; int rc = 0, size = sizeof(*body); ENTRY; @@ -359,6 +367,12 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data) RETURN(-EINVAL); } + do_gettimeofday(&granted_time); + total_enqueue_wait = timeval_sub(&granted_time, &lock->l_enqueued_time); + + if (total_enqueue_wait / 1000000 > obd_timeout) + LDLM_ERROR(lock, "enqueue wait took %ldus", total_enqueue_wait); + req = ptlrpc_prep_req(lock->l_export->exp_ldlm_data.led_import, LDLM_CP_CALLBACK, 1, &size, NULL); if (!req) @@ -370,7 +384,8 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data) body->lock_flags = flags; ldlm_lock2desc(lock, &body->lock_desc); - LDLM_DEBUG(lock, "server preparing completion AST"); + LDLM_DEBUG(lock, "server preparing completion AST (after %ldus wait)", + total_enqueue_wait); req->rq_replen = lustre_msg_size(0, NULL); req->rq_level = LUSTRE_CONN_RECOVER; @@ -447,6 +462,7 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req, if (!lock) GOTO(out, err = -ENOMEM); + do_gettimeofday(&lock->l_enqueued_time); memcpy(&lock->l_remote_handle, &dlm_req->lock_handle1, sizeof(lock->l_remote_handle)); LDLM_DEBUG(lock, "server-side enqueue handler, new lock created"); @@ -640,22 +656,10 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req, lock->l_req_mode = dlm_req->lock_desc.l_granted_mode; LDLM_DEBUG(lock, "completion AST, new lock mode"); } - if (lock->l_resource->lr_type == LDLM_EXTENT) { + if (lock->l_resource->lr_type == LDLM_EXTENT) memcpy(&lock->l_extent, &dlm_req->lock_desc.l_extent, sizeof(lock->l_extent)); - if ((lock->l_extent.end & ~PAGE_MASK) != ~PAGE_MASK) { - /* XXX Old versions of BA OST code have a fencepost bug - * which will cause them to grant a lock that's one - * byte too large. This can be safely removed after BA - * ships their next release -phik (02 Apr 2003) */ - lock->l_extent.end--; - } else if ((lock->l_extent.start & ~PAGE_MASK) == - ~PAGE_MASK) { - lock->l_extent.start++; - } - } - ldlm_resource_unlink_lock(lock); if (memcmp(&dlm_req->lock_desc.l_resource.lr_name, &lock->l_resource->lr_name, @@ -961,7 +965,7 @@ static int ldlm_setup(struct obd_device *obddev, obd_count len, void *buf) return rc; } -static int ldlm_cleanup(struct obd_device *obddev, int force, int failover) +static int ldlm_cleanup(struct obd_device *obddev, int flags) { struct ldlm_obd *ldlm = &obddev->u.ldlm; ENTRY; @@ -973,7 +977,7 @@ static int ldlm_cleanup(struct obd_device *obddev, int force, int failover) } #ifdef __KERNEL__ - if (force) { + if (flags & OBD_OPT_FORCE) { ptlrpc_put_ldlm_hooks(); } else if (ptlrpc_ldlm_hooks_referenced()) { CERROR("Some connections weren't cleaned up; run lconf with " @@ -1084,6 +1088,7 @@ EXPORT_SYMBOL(ldlm_replay_locks); EXPORT_SYMBOL(ldlm_resource_foreach); EXPORT_SYMBOL(ldlm_namespace_foreach); EXPORT_SYMBOL(ldlm_namespace_foreach_res); +EXPORT_SYMBOL(ldlm_change_cbdata); /* ldlm_lockd.c */ EXPORT_SYMBOL(ldlm_server_blocking_ast); diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index e6a8229..75e6dbd 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -273,6 +273,7 @@ int ldlm_cli_enqueue(struct lustre_handle *connh, /* Set a flag to prevent us from sending a CANCEL (bug 407) */ l_lock(&ns->ns_lock); lock->l_flags |= LDLM_FL_LOCAL_ONLY; + LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY"); l_unlock(&ns->ns_lock); ldlm_lock_decref_and_cancel(lockh, mode); @@ -295,7 +296,7 @@ int ldlm_cli_enqueue(struct lustre_handle *connh, CERROR ("Can't unpack ldlm_reply\n"); GOTO (out_req, rc = -EPROTO); } - + memcpy(&lock->l_remote_handle, &reply->lock_handle, sizeof(lock->l_remote_handle)); *flags = reply->lock_flags; @@ -309,17 +310,6 @@ int ldlm_cli_enqueue(struct lustre_handle *connh, body->lock_desc.l_extent.end, reply->lock_extent.start, reply->lock_extent.end); - if ((reply->lock_extent.end & ~PAGE_MASK) != ~PAGE_MASK) { - /* XXX Old versions of BA OST code have a fencepost bug - * which will cause them to grant a lock that's one - * byte too large. This can be safely removed after BA - * ships their next release -phik (02 Apr 2003) */ - reply->lock_extent.end--; - } else if ((reply->lock_extent.start & ~PAGE_MASK) == - ~PAGE_MASK) { - reply->lock_extent.start++; - } - cookie = &reply->lock_extent; /* FIXME bug 267 */ cookielen = sizeof(reply->lock_extent); } @@ -454,7 +444,7 @@ int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags) CERROR ("Can't unpack ldlm_reply\n"); GOTO (out, rc = -EPROTO); } - + res = ldlm_lock_convert(lock, new_mode, &reply->lock_flags); if (res != NULL) ldlm_reprocess_all(res); @@ -535,11 +525,11 @@ int ldlm_cli_cancel(struct lustre_handle *lockh) local_cancel: ldlm_lock_cancel(lock); } else { - LDLM_DEBUG(lock, "client-side local cancel"); if (lock->l_resource->lr_namespace->ns_client) { - CERROR("Trying to cancel local lock\n"); + LDLM_ERROR(lock, "Trying to cancel local lock\n"); LBUG(); } + LDLM_DEBUG(lock, "client-side local cancel"); ldlm_lock_cancel(lock); ldlm_reprocess_all(lock->l_resource); LDLM_DEBUG(lock, "client-side local cancel handler END"); @@ -631,9 +621,8 @@ static int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns, lock = list_entry(tmp, struct ldlm_lock, l_res_link); if (opaque != NULL && lock->l_data != opaque) { - LDLM_ERROR(lock, "data %p doesn't match opaque %p res" - LPU64":"LPU64, lock->l_data, opaque, - res_id.name[0], res_id.name[1]); + LDLM_ERROR(lock, "data %p doesn't match opaque %p", + lock->l_data, opaque); //LBUG(); continue; } @@ -797,12 +786,12 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns, ldlm_res_iterator_t iter, void *closure) { int i, rc = LDLM_ITER_CONTINUE; - + l_lock(&ns->ns_lock); for (i = 0; i < RES_HASH_SIZE; i++) { struct list_head *tmp, *next; list_for_each_safe(tmp, next, &(ns->ns_hash[i])) { - struct ldlm_resource *res = + struct ldlm_resource *res = list_entry(tmp, struct ldlm_resource, lr_hash); ldlm_resource_getref(res); @@ -817,6 +806,34 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns, RETURN(rc); } +/* non-blocking function to manipulate a lock whose cb_data is being put away.*/ +void ldlm_change_cbdata(struct ldlm_namespace *ns, + struct ldlm_res_id *res_id, + ldlm_iterator_t iter, + void *data) +{ + struct ldlm_resource *res; + int rc = 0; + ENTRY; + + if (ns == NULL) { + CERROR("must pass in namespace"); + LBUG(); + } + + res = ldlm_resource_get(ns, NULL, *res_id, 0, 0); + if (res == NULL) { + EXIT; + return; + } + + l_lock(&ns->ns_lock); + rc = ldlm_resource_foreach(res, iter, data); + l_unlock(&ns->ns_lock); + ldlm_resource_putref(res); + EXIT; +} + /* Lock replay */ static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure) @@ -858,7 +875,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock) flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_WAIT; else flags = LDLM_FL_REPLAY; - + size = sizeof(*body); req = ptlrpc_prep_req(imp, LDLM_ENQUEUE, 1, &size, NULL); if (!req) @@ -866,7 +883,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock) /* We're part of recovery, so don't wait for it. */ req->rq_level = LUSTRE_CONN_RECOVER; - + body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); ldlm_lock2desc(lock, &body->lock_desc); body->lock_flags = flags; @@ -879,14 +896,14 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock) rc = ptlrpc_queue_wait(req); if (rc != ELDLM_OK) GOTO(out, rc); - + reply = lustre_swab_repbuf(req, 0, sizeof (*reply), lustre_swab_ldlm_reply); if (reply == NULL) { CERROR("Can't unpack ldlm_reply\n"); GOTO (out, rc = -EPROTO); } - + memcpy(&lock->l_remote_handle, &reply->lock_handle, sizeof(lock->l_remote_handle)); LDLM_DEBUG(lock, "replayed lock:"); @@ -901,7 +918,7 @@ int ldlm_replay_locks(struct obd_import *imp) struct list_head list, *pos, *next; struct ldlm_lock *lock; int rc = 0; - + ENTRY; INIT_LIST_HEAD(&list); diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index 84fdecc..4449c79 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -114,12 +114,10 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client) if (!ns) RETURN(NULL); - ns->ns_hash = vmalloc(sizeof(*ns->ns_hash) * RES_HASH_SIZE); + OBD_VMALLOC(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE); if (!ns->ns_hash) GOTO(out_ns, NULL); - atomic_add(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory); - OBD_ALLOC(ns->ns_name, strlen(name) + 1); if (!ns->ns_name) GOTO(out_hash, NULL); @@ -152,8 +150,7 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client) out_hash: POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE); - vfree(ns->ns_hash); - atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory); + OBD_VFREE(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE); out_ns: OBD_FREE(ns, sizeof(*ns)); return NULL; @@ -186,6 +183,7 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q, lock->l_flags |= LDLM_FL_CBPENDING; /* ... without sending a CANCEL message. */ lock->l_flags |= LDLM_FL_LOCAL_ONLY; + LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY"); /* ... and without calling the cancellation callback */ lock->l_flags |= LDLM_FL_CANCEL; LDLM_LOCK_PUT(lock); @@ -272,8 +270,7 @@ int ldlm_namespace_free(struct ldlm_namespace *ns) ldlm_namespace_cleanup(ns, 0); POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE); - vfree(ns->ns_hash /* , sizeof(*ns->ns_hash) * RES_HASH_SIZE */); - atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory); + OBD_VFREE(ns->ns_hash, sizeof(*ns->ns_hash) * RES_HASH_SIZE); OBD_FREE(ns->ns_name, strlen(ns->ns_name) + 1); OBD_FREE(ns, sizeof(*ns)); diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c index 8344af5..88af047 100644 --- a/lustre/liblustre/file.c +++ b/lustre/liblustre/file.c @@ -145,7 +145,7 @@ int llu_create(struct inode *dir, struct pnode_base *pnode, int mode) it = dentry->d_it; - rc = ll_it_open_error(IT_OPEN_CREATE, it); + rc = ll_it_open_error(DISP_OPEN_CREATE, it); if (rc) { LL_GET_INTENT(dentry, it); ptlrpc_req_finished(it->it_data); @@ -317,7 +317,7 @@ static int llu_file_open(struct inode *inode) #if 0 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino); LL_GET_INTENT(file->f_dentry, it); - rc = ll_it_open_error(IT_OPEN_OPEN, it); + rc = ll_it_open_error(DISP_OPEN_OPEN, it); if (rc) RETURN(rc); #endif @@ -477,7 +477,7 @@ static int llu_file_release(struct inode *inode) oa.o_id = lsm->lsm_object_id; oa.o_mode = S_IFREG; oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID; - + memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE); oa.o_valid |= OBD_MD_FLHANDLE; diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index 0e88933..0939352 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -715,7 +715,7 @@ llu_fsswop_mount(const char *source, /* XXX do we need this?? memset(&osfs, 0, sizeof(osfs)); - rc = obd_statfs(&sbi->ll_mdc_conn, &osfs); + rc = obd_statfs(class_conn2obd(&sbi->ll_mdc_conn),&osfs,jiffies-100*HZ); */ /* fetch attr of root inode */ err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid, @@ -765,9 +765,9 @@ out_inode: out_request: ptlrpc_req_finished(request); out_osc: - obd_disconnect(&sbi->ll_osc_conn); + obd_disconnect(&sbi->ll_osc_conn, 0); out_mdc: - obd_disconnect(&sbi->ll_mdc_conn); + obd_disconnect(&sbi->ll_mdc_conn, 0); out_free: OBD_FREE(sbi, sizeof(*sbi)); return err; diff --git a/lustre/llite/.cvsignore b/lustre/llite/.cvsignore index e530020..49c6100 100644 --- a/lustre/llite/.cvsignore +++ b/lustre/llite/.cvsignore @@ -6,3 +6,4 @@ Makefile Makefile.in .deps TAGS +.*.cmd diff --git a/lustre/llite/Makefile.am b/lustre/llite/Makefile.am index b6fc501..9ef9b7f 100644 --- a/lustre/llite/Makefile.am +++ b/lustre/llite/Makefile.am @@ -9,8 +9,8 @@ MODULE = llite modulefs_DATA = llite.o EXTRA_PROGRAMS = llite -llite_SOURCES = dcache.c commit_callback.c super.c rw.c iod.c super25.c -llite_SOURCES += file.c dir.c sysctl.c symlink.c -llite_SOURCES += namei.c lproc_llite.c llite_internal.h +llite_SOURCES = dcache.c commit_callback.c rw.c super25.c +llite_SOURCES += file.c dir.c sysctl.c symlink.c llite_lib.c +llite_SOURCES += namei.c lproc_llite.c super.c iod.c llite_internal.h include $(top_srcdir)/Rules diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index 0684968..8c55b3d 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -31,22 +31,19 @@ #include #include +#include "llite_internal.h" + /* should NOT be called with the dcache lock, see fs/dcache.c */ -void ll_release(struct dentry *de) +static void ll_release(struct dentry *de) { + struct ll_dentry_data *lld = ll_d2d(de); ENTRY; + + LASSERT(lld->lld_cwd_count == 0); + LASSERT(lld->lld_mnt_count == 0); OBD_FREE(de->d_fsdata, sizeof(struct ll_dentry_data)); - EXIT; -} -int ll_delete(struct dentry *de) -{ - if (de->d_it != 0) { - CERROR("%s put dentry %p+%p with d_it %p\n", current->comm, - de, de->d_fsdata, de->d_it); - LBUG(); - } - return 0; + EXIT; } void ll_set_dd(struct dentry *de) @@ -55,23 +52,20 @@ void ll_set_dd(struct dentry *de) LASSERT(de != NULL); lock_kernel(); - if (de->d_fsdata == NULL) { OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data)); - sema_init(&ll_d2d(de)->lld_it_sem, 1); } - unlock_kernel(); EXIT; } -void ll_intent_release(struct dentry *de, struct lookup_intent *it) +void ll_intent_release(struct lookup_intent *it) { struct lustre_handle *handle; ENTRY; - if (it->it_lock_mode) { + if (it->it_op && it->it_lock_mode) { handle = (struct lustre_handle *)it->it_lock_handle; CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64 " from it %p\n", @@ -83,84 +77,73 @@ void ll_intent_release(struct dentry *de, struct lookup_intent *it) lock (see bug 494) */ it->it_lock_mode = 0; } + it->it_magic = 0; + it->it_op_release = 0; + EXIT; +} - if (!de->d_it || it->it_op == IT_RELEASED_MAGIC) { - EXIT; +void ll_unhash_aliases(struct inode *inode) +{ + struct dentry *dentry = NULL; + struct list_head *tmp; + struct ll_sb_info *sbi; + ENTRY; + + if (inode == NULL) { + CERROR("unexpected NULL inode, tell phil\n"); return; } - if (de->d_it == it) - LL_GET_INTENT(de, it); - else - CDEBUG(D_INODE, "STRANGE intent release: %p %p\n", - de->d_it, it); + sbi = ll_i2sbi(inode); + + CDEBUG(D_INODE, "marking dentries for ino %lx/%x invalid\n", + inode->i_ino, inode->i_generation); + spin_lock(&dcache_lock); + list_for_each(tmp, &inode->i_dentry) { + dentry = list_entry(tmp, struct dentry, d_alias); + + list_del_init(&dentry->d_hash); + dentry->d_flags |= DCACHE_LUSTRE_INVALID; + list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list); + } + + spin_unlock(&dcache_lock); EXIT; } extern struct dentry *ll_find_alias(struct inode *, struct dentry *); -static int revalidate2_finish(int flag, struct ptlrpc_request *request, +static int revalidate_it_finish(struct ptlrpc_request *request, struct inode *parent, struct dentry **de, struct lookup_intent *it, int offset, obd_id ino) { struct ll_sb_info *sbi = ll_i2sbi(parent); - struct mds_body *body; - struct lov_stripe_md *lsm = NULL; - struct lov_mds_md *lmm; - int lmmsize; + struct lustre_md md; int rc = 0; ENTRY; /* NB 1 request reference will be taken away by ll_intent_lock() * when I return */ - if ((flag & LL_LOOKUP_NEGATIVE) != 0) - GOTO (out, rc = -ENOENT); + if (it_disposition(it, DISP_LOOKUP_NEG)) + RETURN(-ENOENT); - /* We only get called if the mdc_enqueue() called from - * ll_intent_lock() was successful. Therefore the mds_body is - * present and correct, and the eadata is present (but still - * opaque, so only obd_unpackmd() can check the size) */ - body = lustre_msg_buf(request->rq_repmsg, offset, sizeof (*body)); - LASSERT (body != NULL); - LASSERT_REPSWABBED (request, offset); + /* ll_intent_lock was successful, now prepare the lustre_md) */ + rc = mdc_req2lustre_md(request, offset, &sbi->ll_osc_conn, &md); + if (rc) + RETURN(rc); - if (body->valid & OBD_MD_FLEASIZE) { - /* Only bother with this if inodes's LSM not set? */ - - if (body->eadatasize == 0) { - CERROR ("OBD_MD_FLEASIZE set, but eadatasize 0\n"); - GOTO (out, rc = -EPROTO); - } - lmmsize = body->eadatasize; - lmm = lustre_msg_buf (request->rq_repmsg, offset + 1, lmmsize); - LASSERT (lmm != NULL); - LASSERT_REPSWABBED (request, offset + 1); - - rc = obd_unpackmd (&sbi->ll_osc_conn, - &lsm, lmm, lmmsize); - if (rc < 0) { - CERROR ("Error %d unpacking eadata\n", rc); - LBUG(); - /* XXX don't know if I should do this... */ - GOTO (out, rc); - /* or skip the ll_update_inode but still do - * mdc_lock_set_inode() */ - } - LASSERT (rc >= sizeof (*lsm)); - rc = 0; - } + ll_update_inode((*de)->d_inode, md.body, md.lsm); - ll_update_inode((*de)->d_inode, body, lsm); + if (md.lsm != NULL && ll_i2info((*de)->d_inode)->lli_smd != md.lsm) + obd_free_memmd (&sbi->ll_osc_conn, &md.lsm); - if (lsm != NULL && - ll_i2info((*de)->d_inode)->lli_smd != lsm) - obd_free_memmd (&sbi->ll_osc_conn, &lsm); - - ll_mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle, - (*de)->d_inode); - out: + CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n", + (*de)->d_inode, (*de)->d_inode->i_ino, + (*de)->d_inode->i_generation); + ldlm_lock_set_data((struct lustre_handle *)it->it_lock_handle, + (*de)->d_inode); RETURN(rc); } @@ -197,20 +180,26 @@ int ll_have_md_lock(struct dentry *de) RETURN(0); } -int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it) +int ll_revalidate_it(struct dentry *de, int flags, struct lookup_intent *it) { int rc; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name, LL_IT2STR(it)); - /* We don't want to cache negative dentries, so return 0 immediately. - * We believe that this is safe, that negative dentries cannot be - * pinned by someone else */ - if (de->d_inode == NULL) { - CDEBUG(D_INODE, "negative dentry: ret 0 to force lookup2\n"); + /* Cached negative dentries are unsafe for now - look them up again */ + if (de->d_inode == NULL) RETURN(0); - } + + /* + * never execute intents for mount points + * - attrs will be fixed up in ll_revalidate_inode + */ + if (d_mountpoint(de)) + RETURN(1); + + if (it) + it->it_op_release = ll_intent_release; if (it == NULL || it->it_op == IT_GETATTR) { /* We could just return 1 immediately, but since we should only @@ -233,7 +222,6 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it) memcpy(it->it_lock_handle, &lockh, sizeof(lockh)); it->it_lock_mode = LCK_PR; - LL_SAVE_INTENT(de, it); } else { ldlm_lock_decref(&lockh, LCK_PR); } @@ -248,7 +236,6 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it) memcpy(it->it_lock_handle, &lockh, sizeof(lockh)); it->it_lock_mode = LCK_PW; - LL_SAVE_INTENT(de, it); } else { ldlm_lock_decref(&lockh, LCK_PW); } @@ -256,31 +243,123 @@ int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it) } if (S_ISDIR(de->d_inode->i_mode)) ll_invalidate_inode_pages(de->d_inode); - d_unhash_aliases(de->d_inode); + ll_unhash_aliases(de->d_inode); RETURN(0); } - rc = ll_intent_lock(de->d_parent->d_inode, &de, it, revalidate2_finish); + rc = ll_intent_lock(de->d_parent->d_inode, &de, it, flags, + revalidate_it_finish); if (rc < 0) { if (rc != -ESTALE) { CERROR("ll_intent_lock: rc %d : it->it_status %d\n", rc, it->it_status); } + ll_unhash_aliases(de->d_inode); RETURN(0); } /* unfortunately ll_intent_lock may cause a callback and revoke our dentry */ spin_lock(&dcache_lock); - list_del_init(&de->d_hash); + hlist_del_init(&de->d_hash); __d_rehash(de, 0); spin_unlock(&dcache_lock); RETURN(1); } +static void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag) +{ + struct inode *inode= de->d_inode; + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct ll_dentry_data *ldd = ll_d2d(de); + struct obd_client_handle *handle; + int rc = 0; + ENTRY; + LASSERT(ldd); + + lock_kernel(); + /* Strictly speaking this introduces an additional race: the + * increments should wait until the rpc has returned. + * However, given that at present the function is void, this + * issue is moot. */ + if (flag == 1 && (++ldd->lld_mnt_count) > 1) { + unlock_kernel(); + EXIT; + return; + } + + if (flag == 0 && (++ldd->lld_cwd_count) > 1) { + unlock_kernel(); + EXIT; + return; + } + unlock_kernel(); + + handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och; + rc = obd_pin(&sbi->ll_mdc_conn, inode->i_ino, inode->i_generation, + inode->i_mode & S_IFMT, handle, flag); + + if (rc) { + lock_kernel(); + memset(handle, 0, sizeof(*handle)); + if (flag == 0) + ldd->lld_cwd_count--; + else + ldd->lld_mnt_count--; + unlock_kernel(); + } + + EXIT; + return; +} + +static void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag) +{ + struct ll_sb_info *sbi = ll_i2sbi(de->d_inode); + struct ll_dentry_data *ldd = ll_d2d(de); + struct obd_client_handle handle; + int count, rc = 0; + ENTRY; + LASSERT(ldd); + + lock_kernel(); + /* Strictly speaking this introduces an additional race: the + * increments should wait until the rpc has returned. + * However, given that at present the function is void, this + * issue is moot. */ + handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och; + if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) { + /* the "pin" failed */ + unlock_kernel(); + EXIT; + return; + } + + if (flag) + count = --ldd->lld_mnt_count; + else + count = --ldd->lld_cwd_count; + unlock_kernel(); + + if (count != 0) { + EXIT; + return; + } + + rc = obd_unpin(&sbi->ll_mdc_conn, &handle, flag); + EXIT; + return; +} + struct dentry_operations ll_d_ops = { - .d_revalidate2 = ll_revalidate2, - .d_intent_release = ll_intent_release, +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) + .d_revalidate_nd = ll_revalidate_nd, +#else + .d_revalidate_it = ll_revalidate_it, +#endif .d_release = ll_release, - .d_delete = ll_delete, +#if 0 + .d_pin = ll_pin, + .d_unpin = ll_unpin, +#endif }; diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 115ed4e..a81a7d4 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -54,14 +54,6 @@ typedef struct ext2_dir_entry_2 ext2_dirent; #define PageChecked(page) test_bit(PG_checked, &(page)->flags) #define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) - -static int ll_dir_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - CDEBUG(D_VFSTRACE, "VFS Op:\n"); - return 0; -} - /* returns the page unlocked, but with a reference */ static int ll_dir_readpage(struct file *file, struct page *page) { @@ -98,7 +90,7 @@ static int ll_dir_readpage(struct file *file, struct page *page) &lockh); if (!rc) { ll_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0); - + rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_PR, &data, &lockh, NULL, 0, ldlm_completion_ast, ll_mdc_blocking_ast, @@ -137,39 +129,14 @@ static int ll_dir_readpage(struct file *file, struct page *page) SetPageUptodate(page); unlock_page(page); - ll_unlock(LCK_PR, &lockh); - if (rc != ELDLM_OK) - CERROR("ll_unlock: err: %d\n", rc); + ldlm_lock_decref(&lockh, LCK_PR); return rc; } struct address_space_operations ll_dir_aops = { readpage: ll_dir_readpage, - prepare_write: ll_dir_prepare_write }; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3)) -int waitfor_one_page(struct page *page) -{ - int error = 0; - struct buffer_head *bh, *head = page->buffers; - - bh = head; - do { - wait_on_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) - error = -EIO; - } while ((bh = bh->b_this_page) != head); - return error; -} -#elif (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -int waitfor_one_page(struct page *page) -{ - wait_on_page_locked(page); - return 0; -} -#endif - /* * ext2 uses block-sized chunks. Arguably, sector-sized ones would be * more robust, but we have what we have @@ -190,27 +157,6 @@ static inline unsigned long dir_pages(struct inode *inode) return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; } -extern void set_page_clean(struct page *page); - -static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to) -{ - struct inode *dir = page->mapping->host; - loff_t new_size = (page->index << PAGE_CACHE_SHIFT) + to; - int err = 0; - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - dir->i_version = ++event; -#endif - if (new_size > dir->i_size) - dir->i_size = new_size; - SetPageUptodate(page); - set_page_clean(page); - - //page->mapping->a_ops->commit_write(NULL, page, from, to); - //if (IS_SYNC(dir)) - // err = waitfor_one_page(page); - return err; -} static void ext2_check_page(struct page *page) { @@ -324,20 +270,6 @@ fail: return ERR_PTR(-EIO); } -/* - * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure. - * - * len <= EXT2_NAME_LEN and de != NULL are guaranteed by caller. - */ -static inline int ext2_match (int len, const char * const name, - struct ext2_dir_entry_2 * de) -{ - if (len != de->name_len) - return 0; - if (!de->inode) - return 0; - return !memcmp(name, de->name, len); -} /* * p is at least 6 bytes before the end of page @@ -368,33 +300,6 @@ static unsigned char ext2_filetype_table[EXT2_FT_MAX] = { [EXT2_FT_SYMLINK] DT_LNK, }; -static unsigned int ll_dt2fmt[DT_WHT + 1] = { - [EXT2_FT_UNKNOWN] 0, - [EXT2_FT_REG_FILE] S_IFREG, - [EXT2_FT_DIR] S_IFDIR, - [EXT2_FT_CHRDEV] S_IFCHR, - [EXT2_FT_BLKDEV] S_IFBLK, - [EXT2_FT_FIFO] S_IFIFO, - [EXT2_FT_SOCK] S_IFSOCK, - [EXT2_FT_SYMLINK] S_IFLNK -}; - -#define S_SHIFT 12 -static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] EXT2_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] EXT2_FT_DIR, - [S_IFCHR >> S_SHIFT] EXT2_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] EXT2_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] EXT2_FT_FIFO, - [S_IFSOCK >> S_SHIFT] EXT2_FT_SOCK, - [S_IFLNK >> S_SHIFT] EXT2_FT_SYMLINK, -}; - -static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode) -{ - mode_t mode = inode->i_mode; - de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; -} int ll_readdir(struct file * filp, void * dirent, filldir_t filldir) { @@ -437,7 +342,7 @@ int ll_readdir(struct file * filp, void * dirent, filldir_t filldir) } de = (ext2_dirent *)(kaddr+offset); limit = kaddr + PAGE_CACHE_SIZE - EXT2_DIR_REC_LEN(1); - for ( ;(char*)de <= limit; de = ext2_next_entry(de)) + for ( ;(char*)de <= limit; de = ext2_next_entry(de)) { if (de->inode) { int over; unsigned char d_type = DT_UNKNOWN; @@ -454,334 +359,31 @@ int ll_readdir(struct file * filp, void * dirent, filldir_t filldir) GOTO(done,0); } } + } ext2_put_page(page); } done: filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset; filp->f_version = inode->i_version; - UPDATE_ATIME(inode); + update_atime(inode); RETURN(0); } -/* - * ext2_find_entry() - * - * finds an entry in the specified directory with the wanted name. It - * returns the page in which the entry was found, and the entry itself - * (as a parameter - res_dir). Page is returned mapped and unlocked. - * Entry is guaranteed to be valid. - */ -struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, - struct dentry *dentry, struct page ** res_page) -{ - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; - unsigned reclen = EXT2_DIR_REC_LEN(namelen); - unsigned long start, n; - unsigned long npages = dir_pages(dir); - struct page *page = NULL; - ext2_dirent * de; - - /* OFFSET_CACHE */ - *res_page = NULL; - - // start = dir->u.ext2_i.i_dir_start_lookup; - start = 0; - if (start >= npages) - start = 0; - n = start; - do { - char *kaddr; - page = ll_get_dir_page(dir, n); - if (!IS_ERR(page)) { - kaddr = page_address(page); - de = (ext2_dirent *) kaddr; - kaddr += PAGE_CACHE_SIZE - reclen; - while ((char *) de <= kaddr) { - if (ext2_match (namelen, name, de)) - goto found; - de = ext2_next_entry(de); - } - ext2_put_page(page); - } - if (++n >= npages) - n = 0; - } while (n != start); - return NULL; - -found: - *res_page = page; - // dir->u.ext2_i.i_dir_start_lookup = n; - return de; -} - -struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) -{ - struct page *page = ll_get_dir_page(dir, 0); - ext2_dirent *de = NULL; - - if (!IS_ERR(page)) { - de = ext2_next_entry((ext2_dirent *) page_address(page)); - *p = page; - } - return de; -} - -obd_id ll_inode_by_name(struct inode * dir, struct dentry *dentry, int *type) -{ - obd_id res = 0; - struct ext2_dir_entry_2 * de; - struct page *page; - - de = ext2_find_entry (dir, dentry, &page); - if (de) { - res = le32_to_cpu(de->inode); - *type = ll_dt2fmt[de->file_type]; - kunmap(page); - page_cache_release(page); - } - return res; -} - -/* Releases the page */ -void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, - struct page *page, struct inode *inode) -{ - unsigned from = (char *) de - (char *) page_address(page); - unsigned to = from + le16_to_cpu(de->rec_len); - int err; - - lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); - if (err) - LBUG(); - de->inode = cpu_to_le32(inode->i_ino); - ext2_set_de_type (de, inode); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - err = ext2_commit_chunk(page, from, to); - unlock_page(page); - ext2_put_page(page); -} - -/* - * Parent is locked. - */ -int ll_add_link (struct dentry *dentry, struct inode *inode) -{ - struct inode *dir = dentry->d_parent->d_inode; - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; - unsigned reclen = EXT2_DIR_REC_LEN(namelen); - unsigned short rec_len, name_len; - struct page *page = NULL; - ext2_dirent * de; - unsigned long npages = dir_pages(dir); - unsigned long n; - char *kaddr; - unsigned from, to; - int err; - - /* We take care of directory expansion in the same loop */ - for (n = 0; n <= npages; n++) { - page = ll_get_dir_page(dir, n); - err = PTR_ERR(page); - if (IS_ERR(page)) - goto out; - kaddr = page_address(page); - de = (ext2_dirent *)kaddr; - kaddr += PAGE_CACHE_SIZE - reclen; - while ((char *)de <= kaddr) { - err = -EEXIST; - if (ext2_match (namelen, name, de)) - goto out_page; - name_len = EXT2_DIR_REC_LEN(de->name_len); - rec_len = le16_to_cpu(de->rec_len); - if ( n==npages && rec_len == 0) { - CERROR("Fatal dir behaviour\n"); - goto out_page; - } - if (!de->inode && rec_len >= reclen) - goto got_it; - if (rec_len >= name_len + reclen) - goto got_it; - de = (ext2_dirent *) ((char *) de + rec_len); - } - ext2_put_page(page); - } - LBUG(); - return -EINVAL; - -got_it: - from = (char*)de - (char*)page_address(page); - to = from + rec_len; - lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); - if (err) - goto out_unlock; - if (de->inode) { - ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len); - de1->rec_len = cpu_to_le16(rec_len - name_len); - de->rec_len = cpu_to_le16(name_len); - de = de1; - } - de->name_len = namelen; - memcpy (de->name, name, namelen); - de->inode = cpu_to_le32(inode->i_ino); - ext2_set_de_type (de, inode); - CDEBUG(D_INODE, "type set to %o\n", de->file_type); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - err = ext2_commit_chunk(page, from, to); - - // change_inode happens with the commit_chunk - /* XXX OFFSET_CACHE */ - -out_unlock: - unlock_page(page); -out_page: - ext2_put_page(page); -out: - return err; -} - -/* - * ext2_delete_entry deletes a directory entry by merging it with the - * previous entry. Page is up-to-date. Releases the page. - */ -int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) -{ - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; - char *kaddr = page_address(page); - unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); - unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len); - ext2_dirent * pde = NULL; - ext2_dirent * de = (ext2_dirent *) (kaddr + from); - int err; - - while ((char*)de < (char*)dir) { - pde = de; - de = ext2_next_entry(de); - } - if (pde) - from = (char*)pde - (char*)page_address(page); - lock_page(page); - err = mapping->a_ops->prepare_write(NULL, page, from, to); - if (err) - LBUG(); - if (pde) - pde->rec_len = cpu_to_le16(to-from); - dir->inode = 0; - inode->i_ctime = inode->i_mtime = CURRENT_TIME; - err = ext2_commit_chunk(page, from, to); - unlock_page(page); - ext2_put_page(page); - return err; -} - -/* - * Set the first fragment of directory. - */ -int ext2_make_empty(struct inode *inode, struct inode *parent) -{ - struct address_space *mapping = inode->i_mapping; - struct page *page = grab_cache_page(mapping, 0); - unsigned chunk_size = ext2_chunk_size(inode); - struct ext2_dir_entry_2 * de; - char *base; - int err; - ENTRY; - - if (!page) - return -ENOMEM; - base = kmap(page); - if (!base) - return -ENOMEM; - - err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size); - if (err) - goto fail; - - de = (struct ext2_dir_entry_2 *) base; - de->name_len = 1; - de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1)); - memcpy (de->name, ".\0\0", 4); - de->inode = cpu_to_le32(inode->i_ino); - ext2_set_de_type (de, inode); - - de = (struct ext2_dir_entry_2 *) (base + EXT2_DIR_REC_LEN(1)); - de->name_len = 2; - de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1)); - de->inode = cpu_to_le32(parent->i_ino); - memcpy (de->name, "..\0", 4); - ext2_set_de_type (de, inode); - - err = ext2_commit_chunk(page, 0, chunk_size); -fail: - kunmap(page); - unlock_page(page); - page_cache_release(page); - ENTRY; - return err; -} - -/* - * routine to check that the specified directory is empty (for rmdir) - */ -int ext2_empty_dir (struct inode * inode) -{ - struct page *page = NULL; - unsigned long i, npages = dir_pages(inode); - - for (i = 0; i < npages; i++) { - char *kaddr; - ext2_dirent * de; - page = ll_get_dir_page(inode, i); - - if (IS_ERR(page)) - continue; - - kaddr = page_address(page); - de = (ext2_dirent *)kaddr; - kaddr += PAGE_CACHE_SIZE-EXT2_DIR_REC_LEN(1); - - while ((char *)de <= kaddr) { - if (de->inode != 0) { - /* check for . and .. */ - if (de->name[0] != '.') - goto not_empty; - if (de->name_len > 2) - goto not_empty; - if (de->name_len < 2) { - if (de->inode != - cpu_to_le32(inode->i_ino)) - goto not_empty; - } else if (de->name[1] != '.') - goto not_empty; - } - de = ext2_next_entry(de); - } - ext2_put_page(page); - } - return 1; - -not_empty: - ext2_put_page(page); - return 0; -} - static int ll_dir_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { struct ll_sb_info *sbi = ll_i2sbi(inode); struct obd_ioctl_data *data; ENTRY; + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino, inode->i_generation, inode, cmd); if (_IOC_TYPE(cmd) == 'T') /* tty ioctls */ return -ENOTTY; + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_IOCTL); switch(cmd) { case IOC_MDC_LOOKUP: { struct ptlrpc_request *request = NULL; @@ -834,9 +436,61 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, obd_ioctl_freedata(buf, len); return rc; } - default: - CERROR("unrecognized ioctl %#x\n", cmd); + case LL_IOC_LOV_SETSTRIPE: + case LL_IOC_LOV_GETSTRIPE: RETURN(-ENOTTY); + case IOC_MDC_GETSTRIPE: { + struct ptlrpc_request *request = NULL; + struct ll_fid fid; + struct mds_body *body; + struct lov_mds_md *lmm; + char *filename; + int rc, lmmsize; + + filename = getname((const char *)arg); + if (IS_ERR(filename)) + RETURN(PTR_ERR(filename)); + + ll_inode2fid(&fid, inode); + rc = mdc_getattr_name(&sbi->ll_mdc_conn, &fid, filename, + strlen(filename)+1, OBD_MD_FLEASIZE, + obd_size_diskmd(&sbi->ll_osc_conn, NULL), + &request); + if (rc < 0) { + CERROR("mdc_getattr_name: failed on %s: rc %d\n", + filename, rc); + GOTO(out_name, rc); + } + + body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body)); + LASSERT(body != NULL); /* checked by mdc_getattr_name */ + LASSERT_REPSWABBED(request, 0);/* swabbed by mdc_getattr_name */ + + lmmsize = body->eadatasize; + + if (!(body->valid & OBD_MD_FLEASIZE) || lmmsize == 0) + GOTO(out_req, rc = -ENODATA); + + if (lmmsize > 4096) + GOTO(out_req, rc = -EFBIG); + + lmm = lustre_msg_buf(request->rq_repmsg, 1, lmmsize); + LASSERT(lmm != NULL); + LASSERT_REPSWABBED(request, 1); + + rc = copy_to_user((struct lov_mds_md *)arg, lmm, lmmsize); + if (rc) + GOTO(out_req, rc = -EFAULT); + + EXIT; + out_req: + ptlrpc_req_finished(request); + out_name: + putname(filename); + return rc; + } + default: + return obd_iocontrol(cmd,&sbi->ll_osc_conn,0,NULL,(void *)arg); } } diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 943ba1b..67d18fd 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -32,8 +32,7 @@ #include #endif -int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc); -extern int ll_setattr(struct dentry *de, struct iattr *attr); +#include "llite_internal.h" static int ll_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode, struct file *file) @@ -135,28 +134,21 @@ int ll_file_release(struct inode *inode, struct file *file) lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_RELEASE); fd = (struct ll_file_data *)file->private_data; if (!fd) /* no process opened the file after an mcreate */ - RETURN(rc = 0); + RETURN(0); /* we might not be able to get a valid handle on this file * again so we really want to flush our write cache.. */ - if (S_ISREG(inode->i_mode)) { - filemap_fdatasync(inode->i_mapping); - filemap_fdatawait(inode->i_mapping); - - if (lsm != NULL) { - memset(&oa, 0, sizeof(oa)); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID; - - memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE); - oa.o_valid |= OBD_MD_FLHANDLE; + if (S_ISREG(inode->i_mode) && lsm) { + write_inode_now(inode, 0); + obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME | + OBD_MD_FLMTIME | OBD_MD_FLCTIME); + memcpy(obdo_handle(&oa), &fd->fd_ost_och, FD_OSTDATA_SIZE); + oa.o_valid |= OBD_MD_FLHANDLE; - rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL); - if (rc) - CERROR("inode %lu object close failed: rc = " - "%d\n", inode->i_ino, rc); - } + rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL); + if (rc) + CERROR("inode %lu object close failed: rc %d\n", + inode->i_ino, rc); } rc2 = ll_mdc_close(&sbi->ll_mdc_conn, inode, file); @@ -206,16 +198,16 @@ static int ll_osc_open(struct lustre_handle *conn, struct inode *inode, RETURN(-ENOMEM); oa->o_id = lsm->lsm_object_id; oa->o_mode = S_IFREG; - oa->o_valid = (OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLBLOCKS | - OBD_MD_FLMTIME | OBD_MD_FLCTIME); + oa->o_valid = OBD_MD_FLID; + obdo_from_inode(oa, inode, OBD_MD_FLTYPE); rc = obd_open(conn, oa, lsm, NULL, &fd->fd_ost_och); if (rc) GOTO(out, rc); file->f_flags &= ~O_LOV_DELAY_CREATE; - obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | - OBD_MD_FLMTIME | OBD_MD_FLCTIME); - + obdo_refresh_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | + OBD_MD_FLATIME | OBD_MD_FLMTIME | + OBD_MD_FLCTIME); EXIT; out: obdo_free(oa); @@ -236,24 +228,33 @@ static int ll_create_obj(struct lustre_handle *conn, struct inode *inode, struct obdo *oa; struct iattr iattr; struct mdc_op_data op_data; - int rc, err, lmm_size = 0;; + struct obd_trans_info oti = { 0 }; + int rc, err, lmm_size = 0; ENTRY; oa = obdo_alloc(); if (!oa) RETURN(-ENOMEM); + LASSERT(S_ISREG(inode->i_mode)); oa->o_mode = S_IFREG | 0600; oa->o_id = inode->i_ino; + oa->o_generation = inode->i_generation; /* Keep these 0 for now, because chown/chgrp does not change the * ownership on the OST, and we don't want to allow BA OST NFS * users to access these objects by mistake. */ oa->o_uid = 0; oa->o_gid = 0; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE | - OBD_MD_FLUID | OBD_MD_FLGID; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLGENER | OBD_MD_FLTYPE | + OBD_MD_FLMODE | OBD_MD_FLUID | OBD_MD_FLGID; +#ifdef ENABLE_ORPHANS + oa->o_valid |= OBD_MD_FLCOOKIE; +#endif - rc = obd_create(conn, oa, &lsm, NULL); + obdo_from_inode(oa, inode, OBD_MD_FLTYPE|OBD_MD_FLATIME|OBD_MD_FLMTIME| + OBD_MD_FLCTIME | (inode->i_size ? OBD_MD_FLSIZE : 0)); + + rc = obd_create(conn, oa, &lsm, &oti); if (rc) { CERROR("error creating objects for inode %lu: rc = %d\n", inode->i_ino, rc); @@ -263,7 +264,7 @@ static int ll_create_obj(struct lustre_handle *conn, struct inode *inode, } GOTO(out_oa, rc); } - obdo_to_inode(inode, oa, OBD_MD_FLBLKSZ); + obdo_refresh_inode(inode, oa, OBD_MD_FLBLKSZ); LASSERT(lsm && lsm->lsm_object_id); rc = obd_packmd(conn, &lmm, lsm); @@ -278,11 +279,18 @@ static int ll_create_obj(struct lustre_handle *conn, struct inode *inode, ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - rc = mdc_setattr(&ll_i2sbi(inode)->ll_mdc_conn, &op_data, - &iattr, lmm, lmm_size, &req); +#if 0 +#warning FIXME: next line is for debugging purposes only + obd_log_cancel(&ll_i2sbi(inode)->ll_osc_conn, lsm, oti.oti_numcookies, + oti.oti_logcookies, OBD_LLOG_FL_SENDNOW); +#endif + + rc = mdc_setattr(&ll_i2sbi(inode)->ll_mdc_conn, &op_data, &iattr, + lmm, lmm_size, oti.oti_logcookies, + oti.oti_numcookies * sizeof(oti.oti_onecookie), &req); ptlrpc_req_finished(req); - obd_free_diskmd (conn, &lmm); + obd_free_diskmd(conn, &lmm); /* If we couldn't complete mdc_open() and store the stripe MD on the * MDS, we need to destroy the objects now or they will be leaked. @@ -297,13 +305,21 @@ static int ll_create_obj(struct lustre_handle *conn, struct inode *inode, EXIT; out_oa: + oti_free_cookies(&oti); obdo_free(oa); return rc; out_destroy: - obdo_from_inode(oa, inode, OBD_MD_FLTYPE); oa->o_id = lsm->lsm_object_id; - oa->o_valid |= OBD_MD_FLID; + oa->o_valid = OBD_MD_FLID; + obdo_from_inode(oa, inode, OBD_MD_FLTYPE); +#if 0 + err = obd_log_cancel(conn, lsm, oti.oti_numcookies, oti.oti_logcookies, + OBD_LLOG_FL_SENDNOW); + if (err) + CERROR("error cancelling inode %lu log cookies: rc %d\n", + inode->i_ino, err); +#endif err = obd_destroy(conn, oa, lsm, NULL); obd_free_memmd(conn, &lsm); if (err) @@ -327,8 +343,6 @@ out_destroy: * before returning in the O_LOV_DELAY_CREATE case and dropping it here * or in ll_file_release(), but I'm not sure that is desirable/necessary. */ -extern int ll_it_open_error(int phase, struct lookup_intent *it); - int ll_file_open(struct inode *inode, struct file *file) { struct ll_sb_info *sbi = ll_i2sbi(inode); @@ -346,9 +360,10 @@ int ll_file_open(struct inode *inode, struct file *file) if (inode->i_sb->s_root == file->f_dentry) RETURN(0); + it = file->f_it; lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN); - LL_GET_INTENT(file->f_dentry, it); - rc = ll_it_open_error(IT_OPEN_OPEN, it); + + rc = ll_it_open_error(DISP_OPEN_OPEN, it); if (rc) RETURN(rc); @@ -363,7 +378,8 @@ int ll_file_open(struct inode *inode, struct file *file) lsm = lli->lli_smd; if (lsm == NULL) { - if (file->f_flags & O_LOV_DELAY_CREATE) { + if (file->f_flags & O_LOV_DELAY_CREATE || + !(file->f_mode & FMODE_WRITE)) { CDEBUG(D_INODE, "delaying object creation\n"); RETURN(0); } @@ -418,7 +434,7 @@ int ll_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm, OBD_MD_FLCTIME; if (ostdata != NULL) { - memcpy(&oa.o_inline, ostdata, FD_OSTDATA_SIZE); + memcpy(obdo_handle(&oa), ostdata, FD_OSTDATA_SIZE); oa.o_valid |= OBD_MD_FLHANDLE; } @@ -455,8 +471,8 @@ int ll_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm, (aft != 0 || after < before) && oa.o_size < ((u64)before + 1) << PAGE_CACHE_SHIFT); - obdo_to_inode(inode, &oa, (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | - OBD_MD_FLMTIME | OBD_MD_FLCTIME)); + obdo_refresh_inode(inode, &oa, OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | + OBD_MD_FLMTIME | OBD_MD_FLCTIME); if (inode->i_blksize < PAGE_CACHE_SIZE) inode->i_blksize = PAGE_CACHE_SIZE; @@ -477,102 +493,6 @@ int ll_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm, RETURN(0); } -/* - * some callers, notably truncate, really don't want i_size set based - * on the the size returned by the getattr, or lock acquisition in - * the future. - */ -int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode, - struct lov_stripe_md *lsm, - int mode, struct ldlm_extent *extent, - struct lustre_handle *lockh) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - int rc, flags = 0; - ENTRY; - - LASSERT(lockh->cookie == 0); - - /* XXX phil: can we do this? won't it screw the file size up? */ - if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) || - (sbi->ll_flags & LL_SBI_NOLCK)) - RETURN(0); - - CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n", - inode->i_ino, extent->start, extent->end); - - rc = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, extent, - sizeof(extent), mode, &flags, ll_extent_lock_callback, - inode, lockh); - - RETURN(rc); -} - -/* - * this grabs a lock and manually implements behaviour that makes it look like - * the OST is returning the file size with each lock acquisition. - */ -int ll_extent_lock(struct ll_file_data *fd, struct inode *inode, - struct lov_stripe_md *lsm, int mode, - struct ldlm_extent *extent, struct lustre_handle *lockh) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct ldlm_extent size_lock; - struct lustre_handle match_lockh = {0}; - int flags, rc, matched; - ENTRY; - - rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh); - if (rc != ELDLM_OK) - RETURN(rc); - - if (test_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags)) - RETURN(0); - - rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL); - if (rc) { - ll_extent_unlock(fd, inode, lsm, mode, lockh); - RETURN(rc); - } - - size_lock.start = inode->i_size; - size_lock.end = OBD_OBJECT_EOF; - - /* XXX I bet we should be checking the lock ignore flags.. */ - flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA; - matched = obd_match(&ll_i2sbi(inode)->ll_osc_conn, lsm, LDLM_EXTENT, - &size_lock, sizeof(size_lock), LCK_PR, &flags, - inode, &match_lockh); - - /* hey, alright, we hold a size lock that covers the size we - * just found, its not going to change for a while.. */ - if (matched == 1) { - set_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags); - obd_cancel(&ll_i2sbi(inode)->ll_osc_conn, lsm, LCK_PR, - &match_lockh); - } - - RETURN(0); -} - -int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode, - struct lov_stripe_md *lsm, int mode, - struct lustre_handle *lockh) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - int rc; - ENTRY; - - /* XXX phil: can we do this? won't it screw the file size up? */ - if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) || - (sbi->ll_flags & LL_SBI_NOLCK)) - RETURN(0); - - rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh); - - RETURN(rc); -} - static inline void ll_remove_suid(struct inode *inode) { unsigned int mode; @@ -591,22 +511,10 @@ static inline void ll_remove_suid(struct inode *inode) #if 0 static void ll_update_atime(struct inode *inode) { -#ifdef USE_ATIME - struct iattr attr; - - attr.ia_atime = LTIME_S(CURRENT_TIME); - attr.ia_valid = ATTR_ATIME; - - if (inode->i_atime == attr.ia_atime) return; if (IS_RDONLY(inode)) return; - if (IS_NOATIME(inode)) return; - /* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */ - ll_inode_setattr(inode, &attr, 0); -#else /* update atime, but don't explicitly write it out just this change */ inode->i_atime = CURRENT_TIME; -#endif } #endif @@ -676,19 +584,19 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, /* start writeback on dirty pages in the extent when its PW */ for (i = start, j = start % count; - lock->l_granted_mode == LCK_PW && i < end; j++, i++) { + lock->l_granted_mode == LCK_PW && i < end; j++, i++) { if (j == count) { i += skip; j = 0; } /* its unlikely, but give us a chance to bail when we're out */ - PGCACHE_WRLOCK(inode->i_mapping); + ll_pgcache_lock(inode->i_mapping); if (list_empty(&inode->i_mapping->dirty_pages)) { CDEBUG(D_INODE, "dirty list empty\n"); - PGCACHE_WRUNLOCK(inode->i_mapping); + ll_pgcache_unlock(inode->i_mapping); break; } - PGCACHE_WRUNLOCK(inode->i_mapping); + ll_pgcache_unlock(inode->i_mapping); if (need_resched()) schedule(); @@ -702,10 +610,10 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, } if (PageDirty(page)) { CDEBUG(D_INODE, "writing page %p\n", page); - PGCACHE_WRLOCK(inode->i_mapping); + ll_pgcache_lock(inode->i_mapping); list_del(&page->list); list_add(&page->list, &inode->i_mapping->locked_pages); - PGCACHE_WRUNLOCK(inode->i_mapping); + ll_pgcache_unlock(inode->i_mapping); /* this writepage might write out pages outside * this extent, but that's ok, the pages are only @@ -730,19 +638,19 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, LASSERT((extent->start & ~PAGE_CACHE_MASK) == 0); LASSERT(((extent->end+1) & ~PAGE_CACHE_MASK) == 0); for (i = start, j = start % count ; i < end ; j++, i++) { - if ( j == count ) { + if (j == count) { i += skip; j = 0; } - PGCACHE_WRLOCK(inode->i_mapping); + ll_pgcache_lock(inode->i_mapping); if (list_empty(&inode->i_mapping->dirty_pages) && list_empty(&inode->i_mapping->clean_pages) && list_empty(&inode->i_mapping->locked_pages)) { CDEBUG(D_INODE, "nothing left\n"); - PGCACHE_WRUNLOCK(inode->i_mapping); + ll_pgcache_unlock(inode->i_mapping); break; } - PGCACHE_WRUNLOCK(inode->i_mapping); + ll_pgcache_unlock(inode->i_mapping); if (need_resched()) schedule(); page = find_get_page(inode->i_mapping, i); @@ -755,15 +663,16 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, truncate_complete_page(page); #else truncate_complete_page(page->mapping, page); -#endif +#endif unlock_page(page); page_cache_release(page); } EXIT; } -int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new, - void *data, int flag) +static int ll_extent_lock_callback(struct ldlm_lock *lock, + struct ldlm_lock_desc *new, void *data, + int flag) { struct inode *inode = data; struct ll_inode_info *lli = ll_i2info(inode); @@ -771,7 +680,10 @@ int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new, int rc; ENTRY; - LASSERT(inode != NULL); + if ((unsigned long)inode < 0x1000) { + LDLM_ERROR(lock, "cancelling lock with bad data %p", data); + LBUG(); + } switch (flag) { case LDLM_CB_BLOCKING: @@ -785,9 +697,15 @@ int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new, * could know to write-back or simply throw away the pages * based on if the cancel comes from a desire to, say, * read or truncate.. */ - LASSERT((unsigned long)inode > 0x1000); - LASSERT((unsigned long)lli > 0x1000); - LASSERT((unsigned long)lli->lli_smd > 0x1000); + if ((unsigned long)lli->lli_smd < 0x1000) { + /* note that lli is part of the inode itself, so it + * is valid if as checked the inode pointer above. */ + CERROR("inode %lu, sb %p, lli %p, lli_smd %p\n", + inode->i_ino, inode->i_sb, lli, lli->lli_smd); + LDLM_ERROR(lock, "cancel lock on bad inode %p", inode); + LBUG(); + } + ll_pgcache_remove_extent(inode, lli->lli_smd, lock); break; default: @@ -797,6 +715,102 @@ int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new, RETURN(0); } +/* + * some callers, notably truncate, really don't want i_size set based + * on the the size returned by the getattr, or lock acquisition in + * the future. + */ +int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode, + struct lov_stripe_md *lsm, + int mode, struct ldlm_extent *extent, + struct lustre_handle *lockh) +{ + struct ll_sb_info *sbi = ll_i2sbi(inode); + int rc, flags = 0; + ENTRY; + + LASSERT(lockh->cookie == 0); + + /* XXX phil: can we do this? won't it screw the file size up? */ + if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) || + (sbi->ll_flags & LL_SBI_NOLCK)) + RETURN(0); + + CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n", + inode->i_ino, extent->start, extent->end); + + rc = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, extent, + sizeof(extent), mode, &flags, ll_extent_lock_callback, + inode, lockh); + + RETURN(rc); +} + +/* + * this grabs a lock and manually implements behaviour that makes it look like + * the OST is returning the file size with each lock acquisition. + */ +int ll_extent_lock(struct ll_file_data *fd, struct inode *inode, + struct lov_stripe_md *lsm, int mode, + struct ldlm_extent *extent, struct lustre_handle *lockh) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct ldlm_extent size_lock; + struct lustre_handle match_lockh = {0}; + int flags, rc, matched; + ENTRY; + + rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh); + if (rc != ELDLM_OK) + RETURN(rc); + + if (test_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags)) + RETURN(0); + + rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL); + if (rc) { + ll_extent_unlock(fd, inode, lsm, mode, lockh); + RETURN(rc); + } + + size_lock.start = inode->i_size; + size_lock.end = OBD_OBJECT_EOF; + + /* XXX I bet we should be checking the lock ignore flags.. */ + flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA; + matched = obd_match(&ll_i2sbi(inode)->ll_osc_conn, lsm, LDLM_EXTENT, + &size_lock, sizeof(size_lock), LCK_PR, &flags, + inode, &match_lockh); + + /* hey, alright, we hold a size lock that covers the size we + * just found, its not going to change for a while.. */ + if (matched == 1) { + set_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags); + obd_cancel(&ll_i2sbi(inode)->ll_osc_conn, lsm, LCK_PR, + &match_lockh); + } + + RETURN(0); +} + +int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode, + struct lov_stripe_md *lsm, int mode, + struct lustre_handle *lockh) +{ + struct ll_sb_info *sbi = ll_i2sbi(inode); + int rc; + ENTRY; + + /* XXX phil: can we do this? won't it screw the file size up? */ + if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) || + (sbi->ll_flags & LL_SBI_NOLCK)) + RETURN(0); + + rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh); + + RETURN(rc); +} + static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos) { @@ -819,6 +833,10 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_READ_BYTES, count); + + if (!lsm) + RETURN(0); + /* grab a -> eof extent to push extending writes out of node's caches * so we can see them at the getattr after lock acquisition. this will * turn into a seperate [*ppos + count, EOF] 'size intent' lock attempt @@ -852,8 +870,8 @@ static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, /* * Write to a file (through the page cache). */ -static ssize_t -ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) +static ssize_t ll_file_write(struct file *file, const char *buf, size_t count, + loff_t *ppos) { struct ll_file_data *fd = file->private_data; struct inode *inode = file->f_dentry->d_inode; @@ -868,6 +886,7 @@ ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n", inode->i_ino, inode->i_generation, inode, count, *ppos); + SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */ /* * sleep doing some writeback work of this mount's dirty data * if the VM thinks we're low on memory.. other dirtying code @@ -875,12 +894,14 @@ ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) * careful not to hold locked pages while they do so. like * ll_prepare_write. *cough* */ - LL_CHECK_DIRTY(inode->i_sb); + ll_check_dirty(inode->i_sb); /* POSIX, but surprised the VFS doesn't check this already */ if (count == 0) RETURN(0); + LASSERT(lsm); + if (file->f_flags & O_APPEND) { extent.start = 0; extent.end = OBD_OBJECT_EOF; @@ -943,7 +964,8 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file, lsm = lli->lli_smd; if (lsm) { up(&lli->lli_open_sem); - CERROR("stripe already exists for ino %lu\n", inode->i_ino); + CDEBUG(D_IOCTL, "stripe already exists for ino %lu\n", + inode->i_ino); /* If we haven't already done the open, do so now */ if (file->f_flags & O_LOV_DELAY_CREATE) { int rc2 = ll_osc_open(conn, inode, file, lsm); @@ -987,6 +1009,7 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd, struct ll_file_data *fd = file->private_data; struct lustre_handle *conn; int flags; + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino, inode->i_generation, inode, cmd); @@ -1077,8 +1100,8 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin) int ll_fsync(struct file *file, struct dentry *dentry, int data) { - int ret; struct inode *inode = dentry->d_inode; + int rc; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, inode->i_generation, inode); @@ -1090,17 +1113,17 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data) * still holding the PW lock that covered the dirty pages. XXX we * should probably get a reference on it, though, just to be clear. */ - ret = filemap_fdatasync(dentry->d_inode->i_mapping); - if ( ret == 0 ) - ret = filemap_fdatawait(dentry->d_inode->i_mapping); + rc = filemap_fdatasync(inode->i_mapping); + if (rc == 0) + rc = filemap_fdatawait(inode->i_mapping); - RETURN(ret); + RETURN(rc); } -int ll_inode_revalidate(struct dentry *dentry) +int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it) { struct inode *inode = dentry->d_inode; - struct lov_stripe_md *lsm = NULL; + struct lov_stripe_md *lsm; ENTRY; if (!inode) { @@ -1118,70 +1141,41 @@ int ll_inode_revalidate(struct dentry *dentry) below when the lock is marked CB_PENDING. That RPC may not go out because someone else may be in another RPC waiting for that lock*/ - if (!(dentry->d_it && dentry->d_it->it_lock_mode) && - !ll_have_md_lock(dentry)) { + if (!(it && it->it_lock_mode) && !ll_have_md_lock(dentry)) { + struct lustre_md md; struct ptlrpc_request *req = NULL; struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode); struct ll_fid fid; - struct mds_body *body; - struct lov_mds_md *lmm; unsigned long valid = 0; - int eadatalen = 0, rc; + int rc; + int ealen = 0; - /* Why don't we update all valid MDS fields here, if we're - * doing an RPC anyways? -phil */ if (S_ISREG(inode->i_mode)) { - eadatalen = obd_size_diskmd(&sbi->ll_osc_conn, NULL); + ealen = obd_size_diskmd(&sbi->ll_osc_conn, NULL); valid |= OBD_MD_FLEASIZE; } ll_inode2fid(&fid, inode); - rc = mdc_getattr(&sbi->ll_mdc_conn, &fid, - valid, eadatalen, &req); + rc = mdc_getattr(&sbi->ll_mdc_conn, &fid, valid, ealen, &req); if (rc) { CERROR("failure %d inode %lu\n", rc, inode->i_ino); RETURN(-abs(rc)); } - - body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body)); - LASSERT (body != NULL); /* checked by mdc_getattr() */ - LASSERT_REPSWABBED (req, 0); /* swabbed by mdc_getattr() */ - - if (S_ISREG(inode->i_mode) && - (body->valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS))) { - CERROR("MDS sent back size for regular file\n"); - body->valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS); - } + rc = mdc_req2lustre_md(req, 0, &sbi->ll_osc_conn, &md); /* XXX Too paranoid? */ - if ((body->valid ^ valid) & OBD_MD_FLEASIZE) + if ((md.body->valid ^ valid) & OBD_MD_FLEASIZE) CERROR("Asked for %s eadata but got %s\n", (valid & OBD_MD_FLEASIZE) ? "some" : "no", - (body->valid & OBD_MD_FLEASIZE) ? "some":"none"); - - if (S_ISREG(inode->i_mode) && - (body->valid & OBD_MD_FLEASIZE)) { - if (body->eadatasize == 0) { /* no EA data */ - CERROR("OBD_MD_FLEASIZE set but no data\n"); - RETURN(-EPROTO); - } - /* Only bother with this if inode's lsm not set? */ - lmm = lustre_msg_buf(req->rq_repmsg,1,body->eadatasize); - LASSERT(lmm != NULL); /* mdc_getattr() checked */ - LASSERT_REPSWABBED(req, 1); /* mdc_getattr() swabbed */ - - rc = obd_unpackmd (&sbi->ll_osc_conn, - &lsm, lmm, body->eadatasize); - if (rc < 0) { - CERROR("Error %d unpacking eadata\n", rc); - ptlrpc_req_finished(req); - RETURN(rc); - } - LASSERT(rc >= sizeof(*lsm)); + (md.body->valid & OBD_MD_FLEASIZE) ? "some": + "none"); + if (rc) { + ptlrpc_req_finished(req); + RETURN(rc); } - ll_update_inode(inode, body, lsm); - if (lsm != NULL && ll_i2info(inode)->lli_smd != lsm) - obd_free_memmd(&sbi->ll_osc_conn, &lsm); + ll_update_inode(inode, md.body, md.lsm); + if (md.lsm != NULL && ll_i2info(inode)->lli_smd != md.lsm) + obd_free_memmd(&sbi->ll_osc_conn, &md.lsm); ptlrpc_req_finished(req); } @@ -1211,19 +1205,20 @@ int ll_inode_revalidate(struct dentry *dentry) } #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -static int ll_getattr(struct vfsmount *mnt, struct dentry *de, +int ll_getattr(struct vfsmount *mnt, struct dentry *de, + struct lookup_intent *it, struct kstat *stat) { int res = 0; struct inode *inode = de->d_inode; + res = ll_inode_revalidate_it(de, it); lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_GETATTR); - res = ll_inode_revalidate(de); + if (res) return res; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - stat->dev = inode->i_dev; -#endif + + stat->dev = inode->i_sb->s_dev; stat->ino = inode->i_ino; stat->mode = inode->i_mode; stat->nlink = inode->i_nlink; @@ -1234,6 +1229,8 @@ static int ll_getattr(struct vfsmount *mnt, struct dentry *de, stat->mtime = inode->i_mtime; stat->ctime = inode->i_ctime; stat->size = inode->i_size; + stat->blksize = inode->i_blksize; + stat->blocks = inode->i_blocks; return 0; } #endif @@ -1254,9 +1251,9 @@ struct inode_operations ll_file_inode_operations = { setattr: ll_setattr, truncate: ll_truncate, #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - getattr: ll_getattr, + getattr_it: ll_getattr, #else - revalidate: ll_inode_revalidate, + revalidate_it: ll_inode_revalidate_it, #endif }; @@ -1264,8 +1261,8 @@ struct inode_operations ll_special_inode_operations = { setattr_raw: ll_setattr_raw, setattr: ll_setattr, #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - getattr: ll_getattr, + getattr_it: ll_getattr, #else - revalidate: ll_inode_revalidate, + revalidate_it: ll_inode_revalidate_it, #endif }; diff --git a/lustre/llite/iod.c b/lustre/llite/iod.c index e3fabe6..c30ef8a 100644 --- a/lustre/llite/iod.c +++ b/lustre/llite/iod.c @@ -38,7 +38,6 @@ #include #include #include -#include "llite_internal.h" /* PG_inactive_clean is shorthand for rmap, we want free_high/low here.. */ #ifdef PG_inactive_clean @@ -47,6 +46,7 @@ #define DEBUG_SUBSYSTEM S_LLITE #include +#include "llite_internal.h" #ifndef list_for_each_prev_safe #define list_for_each_prev_safe(pos, n, head) \ @@ -56,11 +56,6 @@ extern spinlock_t inode_lock; -struct ll_writeback_pages { - obd_count npgs, max; - struct brw_page *pga; -}; - /* * check to see if we're racing with truncate and put the page in * the brw_page array. returns 0 if there is more room and 1 @@ -139,13 +134,13 @@ static void ll_get_dirty_pages(struct inode *inode, list_del(&page->list); list_add(&page->list, &mapping->locked_pages); - if ( ! PageDirty(page) ) { + if (!PageDirty(page)) { unlock_page(page); continue; } ClearPageDirty(page); - if ( llwp_consume_page(llwp, inode, page) != 0) + if (llwp_consume_page(llwp, inode, page) != 0) break; } @@ -153,26 +148,31 @@ static void ll_get_dirty_pages(struct inode *inode, EXIT; } -static void ll_writeback(struct inode *inode, struct ll_writeback_pages *llwp) +static void ll_writeback(struct inode *inode, struct obdo *oa, + struct ll_writeback_pages *llwp) { - int rc, i; struct ptlrpc_request_set *set; + int rc, i; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),bytes=%u\n", inode->i_ino, inode->i_generation, inode, ((llwp->npgs-1) << PAGE_SHIFT) + llwp->pga[llwp->npgs-1].count); + SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */ set = ptlrpc_prep_set(); if (set == NULL) { CERROR ("Can't create request set\n"); rc = -ENOMEM; } else { - rc = obd_brw_async(OBD_BRW_WRITE, ll_i2obdconn(inode), + rc = obd_brw_async(OBD_BRW_WRITE, ll_i2obdconn(inode), oa, ll_i2info(inode)->lli_smd, llwp->npgs, llwp->pga, set, NULL); if (rc == 0) - rc = ptlrpc_set_wait (set); + rc = ptlrpc_set_wait(set); + if (rc == 0) + obdo_refresh_inode(inode, oa, + oa->o_valid & ~OBD_MD_FLSIZE); ptlrpc_set_destroy (set); } /* @@ -278,6 +278,7 @@ int ll_check_dirty(struct super_block *sb) unsigned long old_flags; /* hack? */ int making_progress; struct inode *inode; + struct obdo oa; int rc = 0; ENTRY; @@ -328,12 +329,18 @@ int ll_check_dirty(struct super_block *sb) llwp.npgs = 0; ll_get_dirty_pages(inode, &llwp); if (llwp.npgs) { - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, - LPROC_LL_WB_PRESSURE, - llwp.npgs); - ll_writeback(inode, &llwp); - rc += llwp.npgs; - making_progress = 1; + oa.o_id = + ll_i2info(inode)->lli_smd->lsm_object_id; + oa.o_valid = OBD_MD_FLID; + obdo_from_inode(&oa, inode, + OBD_MD_FLTYPE | OBD_MD_FLATIME| + OBD_MD_FLMTIME| OBD_MD_FLCTIME); + lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, + LPROC_LL_WB_PRESSURE, + llwp.npgs); + ll_writeback(inode, &oa, &llwp); + rc += llwp.npgs; + making_progress = 1; } } while (llwp.npgs && should_writeback()); @@ -382,13 +389,14 @@ cleanup: } #endif /* linux 2.5 */ -int ll_batch_writepage(struct inode *inode, struct page *page) +int ll_batch_writepage(struct inode *inode, struct obdo *oa, struct page *page) { unsigned long old_flags; /* hack? */ struct ll_writeback_pages llwp; int rc = 0; ENTRY; + SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */ old_flags = current->flags; current->flags |= PF_MEMALLOC; rc = ll_alloc_brw(inode, &llwp); @@ -401,7 +409,7 @@ int ll_batch_writepage(struct inode *inode, struct page *page) if (llwp.npgs) { lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_WB_WRITEPAGE, llwp.npgs); - ll_writeback(inode, &llwp); + ll_writeback(inode, oa, &llwp); } kfree(llwp.pga); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 4684383..fd37709 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -10,11 +10,49 @@ #ifndef LLITE_INTERNAL_H #define LLITE_INTERNAL_H + +struct ll_sb_info; struct lustre_handle; struct lov_stripe_md; +extern void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi); +extern struct proc_dir_entry *proc_lustre_fs_root; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +# define hlist_del_init list_del_init +#endif + +static inline struct inode *ll_info2i(struct ll_inode_info *lli) +{ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) + return &lli->lli_vfs_inode; +#else + return list_entry(lli, struct inode, u.generic_ip); +#endif +} + +/* llite/commit_callback.c */ +int ll_commitcbd_setup(struct ll_sb_info *); +int ll_commitcbd_cleanup(struct ll_sb_info *); + +/* lproc_llite.c */ +int lprocfs_register_mountpoint(struct proc_dir_entry *parent, + struct super_block *sb, char *osc, char *mdc); +void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi); + +/* llite/namei.c */ +struct inode *ll_iget(struct super_block *sb, ino_t hash, + struct lustre_md *lic); +struct dentry *ll_find_alias(struct inode *, struct dentry *); +int ll_it_open_error(int phase, struct lookup_intent *it); int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode, int flags, void *opaque); + +/* llite/rw.c */ +void ll_end_writeback(struct inode *, struct page *); + +void ll_remove_dirty(struct inode *inode, unsigned long start, + unsigned long end); int ll_rd_dirty_pages(char *page, char **start, off_t off, int count, int *eof, void *data); int ll_rd_max_dirty_pages(char *page, char **start, off_t off, int count, @@ -26,4 +64,96 @@ int ll_clear_dirty_pages(struct lustre_handle *conn, struct lov_stripe_md *lsm, int ll_mark_dirty_page(struct lustre_handle *conn, struct lov_stripe_md *lsm, unsigned long index); +/* llite/file.c */ +extern int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *); + +/* llite/super.c */ +int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc); +int ll_setattr(struct dentry *de, struct iattr *attr); + +/* iod.c */ +#define IO_STAT_ADD(FIS, STAT, VAL) do { \ + struct file_io_stats *_fis_ = (FIS); \ + spin_lock(&_fis_->fis_lock); \ + _fis_->fis_##STAT += VAL; \ + spin_unlock(&_fis_->fis_lock); \ +} while (0) + +#define INODE_IO_STAT_ADD(INODE, STAT, VAL) \ + IO_STAT_ADD(&ll_i2sbi(INODE)->ll_iostats, STAT, VAL) + +#define PAGE_IO_STAT_ADD(PAGE, STAT, VAL) \ + INODE_IO_STAT_ADD((PAGE)->mapping, STAT, VAL) + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +/* XXX lliod needs more work in 2.5 before being proven and brought back + * to 2.4, it'll at least require a patch to introduce page->private */ +int lliod_start(struct ll_sb_info *sbi, struct inode *inode); +void lliod_stop(struct ll_sb_info *sbi); +#else +#define lliod_start(sbi, inode) ({int _ret = 0; (void)sbi, (void)inode; _ret;}) +#define lliod_stop(sbi) do { (void)sbi; } while (0) +#endif +void lliod_wakeup(struct inode *inode); +void lliod_give_plist(struct inode *inode, struct plist *plist, int rw); +void lliod_give_page(struct inode *inode, struct page *page, int rw); +void plist_init(struct plist *plist); /* for lli initialization.. */ + +void ll_lldo_init(struct ll_dirty_offsets *lldo); +void ll_record_dirty(struct inode *inode, unsigned long offset); +void ll_remove_dirty(struct inode *inode, unsigned long start, + unsigned long end); +int ll_find_dirty(struct ll_dirty_offsets *lldo, unsigned long *start, + unsigned long *end); +int ll_farthest_dirty(struct ll_dirty_offsets *lldo, unsigned long *farthest); + + +/* llite/super25.c */ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +int ll_getattr(struct vfsmount *mnt, struct dentry *de, + struct lookup_intent *it, + struct kstat *stat); +#endif + + +/* llite/dcache.c */ +void ll_intent_release(struct lookup_intent *); +extern void ll_set_dd(struct dentry *de); +void ll_unhash_aliases(struct inode *); + +/* llite/rw.c */ +void ll_truncate(struct inode *inode); +void ll_end_writeback(struct inode *inode, struct page *page); +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +int ll_check_dirty(struct super_block *sb); +int ll_batch_writepage(struct inode *inode, struct obdo *oa, struct page *page); +#else +#define ll_check_dirty(SB) do { (void)SB; } while (0) +#endif + +/* llite/llite_lib.c */ + +extern struct super_operations ll_super_operations; + +char *ll_read_opt(const char *opt, char *data); +int ll_set_opt(const char *opt, char *data, int fl); +void ll_options(char *options, char **ost, char **mds, int *flags); +void ll_lli_init(struct ll_inode_info *lli); +int ll_fill_super(struct super_block *sb, void *data, int silent); +void ll_put_super(struct super_block *sb); +void ll_clear_inode(struct inode *inode); +int ll_attr2inode(struct inode *inode, struct iattr *attr, int trunc); +int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc); +int ll_setattr_raw(struct inode *inode, struct iattr *attr); +int ll_setattr(struct dentry *de, struct iattr *attr); +int ll_statfs(struct super_block *sb, struct kstatfs *sfs); +void ll_update_inode(struct inode *inode, struct mds_body *body, + struct lov_stripe_md *lsm); +int it_disposition(struct lookup_intent *it, int flag); +void it_set_disposition(struct lookup_intent *it, int flag); +void ll_read_inode2(struct inode *inode, void *opaque); +void ll_umount_begin(struct super_block *sb); + + + #endif /* LLITE_INTERNAL_H */ diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index 42fea4b..8908d44 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -22,15 +22,13 @@ #define DEBUG_SUBSYSTEM S_LLITE #include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include -#endif #include #include #include "llite_internal.h" /* /proc/lustre/llite mount point registration */ +struct proc_dir_entry *proc_lustre_fs_root; #ifndef LPROCFS int lprocfs_register_mountpoint(struct proc_dir_entry *parent, @@ -41,36 +39,113 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent, void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi){} #else -#define LPROC_LLITE_STAT_FCT(fct_name, get_statfs_fct) \ -int fct_name(char *page, char **start, off_t off, \ - int count, int *eof, void *data) \ -{ \ - struct statfs sfs; \ - int rc; \ - LASSERT(data != NULL); \ - rc = get_statfs_fct((struct super_block*)data, &sfs); \ - return (rc==0 \ - ? lprocfs_##fct_name (page, start, off, count, eof, &sfs) \ - : rc); \ +long long mnt_instance; + +static int ll_rd_blksize(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct super_block *sb = (struct super_block *)data; + struct obd_statfs osfs; + int rc; + + LASSERT(sb != NULL); + rc = ll_statfs_internal(sb, &osfs, jiffies - HZ); + if (!rc) { + *eof = 1; + rc = snprintf(page, count, "%u\n", osfs.os_bsize); + } + + return rc; } -long long mnt_instance; +static int ll_rd_kbytestotal(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct super_block *sb = (struct super_block *)data; + struct obd_statfs osfs; + int rc; + + LASSERT(sb != NULL); + rc = ll_statfs_internal(sb, &osfs, jiffies - HZ); + if (!rc) { + __u32 blk_size = osfs.os_bsize >> 10; + __u64 result = osfs.os_blocks; + + while (blk_size >>= 1) + result <<= 1; + + *eof = 1; + rc = snprintf(page, count, LPU64"\n", result); + } + return rc; + +} + +static int ll_rd_kbytesfree(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct super_block *sb = (struct super_block *)data; + struct obd_statfs osfs; + int rc; -LPROC_LLITE_STAT_FCT(rd_blksize, vfs_statfs); -LPROC_LLITE_STAT_FCT(rd_kbytestotal, vfs_statfs); -LPROC_LLITE_STAT_FCT(rd_kbytesfree, vfs_statfs); -LPROC_LLITE_STAT_FCT(rd_filestotal, vfs_statfs); -LPROC_LLITE_STAT_FCT(rd_filesfree, vfs_statfs); -LPROC_LLITE_STAT_FCT(rd_filegroups, vfs_statfs); + LASSERT(sb != NULL); + rc = ll_statfs_internal(sb, &osfs, jiffies - HZ); + if (!rc) { + __u32 blk_size = osfs.os_bsize >> 10; + __u64 result = osfs.os_bfree; + + while (blk_size >>= 1) + result <<= 1; + + *eof = 1; + rc = snprintf(page, count, LPU64"\n", result); + } + return rc; +} + +static int ll_rd_filestotal(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct super_block *sb = (struct super_block *)data; + struct obd_statfs osfs; + int rc; + + LASSERT(sb != NULL); + rc = ll_statfs_internal(sb, &osfs, jiffies - HZ); + if (!rc) { + *eof = 1; + rc = snprintf(page, count, LPU64"\n", osfs.os_files); + } + return rc; +} -int rd_path(char *page, char **start, off_t off, int count, int *eof, - void *data) +static int ll_rd_filesfree(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct super_block *sb = (struct super_block *)data; + struct obd_statfs osfs; + int rc; + + LASSERT(sb != NULL); + rc = ll_statfs_internal(sb, &osfs, jiffies - HZ); + if (!rc) { + *eof = 1; + rc = snprintf(page, count, LPU64"\n", osfs.os_ffree); + } + return rc; + +} + +#if 0 +static int ll_rd_path(char *page, char **start, off_t off, int count, int *eof, + void *data) { return 0; } +#endif -int rd_fstype(char *page, char **start, off_t off, int count, int *eof, - void *data) +static int ll_rd_fstype(char *page, char **start, off_t off, int count, + int *eof, void *data) { struct super_block *sb = (struct super_block*)data; @@ -79,8 +154,8 @@ int rd_fstype(char *page, char **start, off_t off, int count, int *eof, return snprintf(page, count, "%s\n", sb->s_type->name); } -int rd_sb_uuid(char *page, char **start, off_t off, int count, int *eof, - void *data) +static int ll_rd_sb_uuid(char *page, char **start, off_t off, int count, + int *eof, void *data) { struct super_block *sb = (struct super_block *)data; @@ -89,18 +164,20 @@ int rd_sb_uuid(char *page, char **start, off_t off, int count, int *eof, return snprintf(page, count, "%s\n", ll_s2sbi(sb)->ll_sb_uuid.uuid); } -struct lprocfs_vars lprocfs_obd_vars[] = { - { "uuid", rd_sb_uuid, 0, 0 }, - { "mntpt_path", rd_path, 0, 0 }, - { "fstype", rd_fstype, 0, 0 }, - { "blocksize", rd_blksize, 0, 0 }, - { "kbytestotal", rd_kbytestotal, 0, 0 }, - { "kbytesfree", rd_kbytesfree, 0, 0 }, - { "filestotal", rd_filestotal, 0, 0 }, - { "filesfree", rd_filesfree, 0, 0 }, - { "filegroups", rd_filegroups, 0, 0 }, - { "dirty_pages", ll_rd_dirty_pages, 0, 0}, +static struct lprocfs_vars lprocfs_obd_vars[] = { + { "uuid", ll_rd_sb_uuid, 0, 0 }, + //{ "mntpt_path", ll_rd_path, 0, 0 }, + { "fstype", ll_rd_fstype, 0, 0 }, + { "blocksize", ll_rd_blksize, 0, 0 }, + { "kbytestotal", ll_rd_kbytestotal, 0, 0 }, + { "kbytesfree", ll_rd_kbytesfree, 0, 0 }, + { "filestotal", ll_rd_filestotal, 0, 0 }, + { "filesfree", ll_rd_filesfree, 0, 0 }, + //{ "filegroups", lprocfs_rd_filegroups, 0, 0 }, +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + { "dirty_pages", ll_rd_dirty_pages, 0, 0}, { "max_dirty_pages", ll_rd_max_dirty_pages, ll_wr_max_dirty_pages, 0}, +#endif { 0 } }; diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index da6e670..b9223e8 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -41,74 +41,29 @@ #include #include #include - -/* from dcache.c */ -extern void ll_set_dd(struct dentry *de); - -/* from super.c */ -extern void ll_change_inode(struct inode *inode); -extern int ll_setattr(struct dentry *de, struct iattr *attr); - -/* from dir.c */ -extern int ll_add_link (struct dentry *dentry, struct inode *inode); -obd_id ll_inode_by_name(struct inode * dir, struct dentry *dentry, int *typ); -int ext2_make_empty(struct inode *inode, struct inode *parent); -struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, - struct dentry *dentry, struct page ** res_page); -int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ); -int ext2_empty_dir (struct inode * inode); -struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p); -void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, - struct page *page, struct inode *inode); - -/* - * Couple of helper functions - make the code slightly cleaner. - */ -static inline void ext2_inc_count(struct inode *inode) -{ - inode->i_nlink++; -} - -/* postpone the disk update until the inode really goes away */ -static inline void ext2_dec_count(struct inode *inode) -{ - inode->i_nlink--; -} -static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) -{ - int err; - err = ll_add_link(dentry, inode); - if (!err) { - d_instantiate(dentry, inode); - return 0; - } - ext2_dec_count(inode); - iput(inode); - return err; -} +#include "llite_internal.h" /* methods */ #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -static int ll_find_inode(struct inode *inode, unsigned long ino, void *opaque) +static int ll_test_inode(struct inode *inode, unsigned long ino, void *opaque) #else static int ll_test_inode(struct inode *inode, void *opaque) #endif { - struct ll_read_inode2_cookie *lic = opaque; - struct mds_body *body = lic->lic_body; + struct lustre_md *md = opaque; - if (!(lic->lic_body->valid & (OBD_MD_FLGENER | OBD_MD_FLID))) + if (!(md->body->valid & (OBD_MD_FLGENER | OBD_MD_FLID))) CERROR("invalid generation\n"); - CDEBUG(D_VFSTRACE, "comparing inode %p ino %lu/%u to body %lu/%u\n", - inode, inode->i_ino, inode->i_generation, ino, - lic->lic_body->generation); + CDEBUG(D_VFSTRACE, "comparing inode %p ino %lu/%u to body %u/%u\n", + inode, inode->i_ino, inode->i_generation, + md->body->ino, md->body->generation); - if (inode->i_generation != lic->lic_body->generation) + if (inode->i_generation != md->body->generation) return 0; /* Apply the attributes in 'opaque' to this inode */ - ll_update_inode(inode, body, lic->lic_lsm); + ll_update_inode(inode, md->body, md->lsm); return 1; } @@ -127,16 +82,21 @@ int ll_unlock(__u32 mode, struct lustre_handle *lockh) * Returns inode or NULL */ #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -extern int ll_read_inode2(struct inode *inode, void *opaque); +int ll_set_inode(struct inode *inode, void *opaque) +{ + ll_read_inode2(inode, opaque); + return 0; +} struct inode *ll_iget(struct super_block *sb, ino_t hash, - struct ll_read_inode2_cookie *lic) + struct lustre_md *md) { struct inode *inode; LASSERT(hash != 0); - inode = iget5_locked(sb, hash, ll_test_inode, ll_read_inode2, lic); - if (inode == NULL) - return NULL; /* removed ERR_PTR(-ENOMEM) -eeb */ + inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md); + + if (!inode) + return (NULL); /* removed ERR_PTR(-ENOMEM) -eeb */ if (inode->i_state & I_NEW) unlock_new_inode(inode); @@ -146,11 +106,11 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash, } #else struct inode *ll_iget(struct super_block *sb, ino_t hash, - struct ll_read_inode2_cookie *lic) + struct lustre_md *md) { struct inode *inode; LASSERT(hash != 0); - inode = iget4(sb, hash, ll_find_inode, lic); + inode = iget4(sb, hash, ll_test_inode, md); CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", inode->i_ino, inode->i_generation, inode); return inode; @@ -171,36 +131,37 @@ static int ll_intent_to_lock_mode(struct lookup_intent *it) int ll_it_open_error(int phase, struct lookup_intent *it) { - if (it->it_disposition & IT_OPEN_OPEN) { - if (phase == IT_OPEN_OPEN) + if (it_disposition(it, DISP_OPEN_OPEN)) { + if (phase == DISP_OPEN_OPEN) return it->it_status; else return 0; } - if (it->it_disposition & IT_OPEN_CREATE) { - if (phase == IT_OPEN_CREATE) + if (it_disposition(it, DISP_OPEN_CREATE)) { + if (phase == DISP_OPEN_CREATE) return it->it_status; else return 0; } - if (it->it_disposition & IT_OPEN_LOOKUP) { - if (phase == IT_OPEN_LOOKUP) + if (it_disposition(it, DISP_LOOKUP_EXECD)) { + if (phase == DISP_LOOKUP_EXECD) return it->it_status; else return 0; } + CERROR("it disp: %X, status: %d\n", it->it_disposition, it->it_status); LBUG(); return 0; } -int ll_mdc_blocking_ast(struct ldlm_lock *lock, - struct ldlm_lock_desc *desc, +int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void *data, int flag) { int rc; struct lustre_handle lockh; + struct inode *inode = lock->l_data; ENTRY; switch (flag) { @@ -214,9 +175,13 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock, break; case LDLM_CB_CANCELING: { /* Invalidate all dentries associated with this inode */ - struct inode *inode = lock->l_data; - LASSERT(inode != NULL); - + if (inode == NULL) + break; + if (lock->l_resource->lr_name.name[0] != inode->i_ino || + lock->l_resource->lr_name.name[1] != inode->i_generation) { + LDLM_ERROR(lock, "data mismatch with ino %lu/%u", + inode->i_ino, inode->i_generation); + } if (S_ISDIR(inode->i_mode)) { CDEBUG(D_INODE, "invalidating inode %lu\n", inode->i_ino); @@ -227,7 +192,7 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock, #warning FIXME: we should probably free this inode if there are no aliases if (inode->i_sb->s_root && inode != inode->i_sb->s_root->d_inode) - d_unhash_aliases(inode); + ll_unhash_aliases(inode); break; } default: @@ -237,17 +202,6 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock, RETURN(0); } -void ll_mdc_lock_set_inode(struct lustre_handle *lockh, struct inode *inode) -{ - struct ldlm_lock *lock = ldlm_handle2lock(lockh); - ENTRY; - - LASSERT(lock != NULL); - lock->l_data = inode; - LDLM_LOCK_PUT(lock); - EXIT; -} - int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode, int flags, void *opaque) { @@ -287,35 +241,74 @@ void ll_prepare_mdc_op_data(struct mdc_op_data *data, data->mode = mode; } -#define IT_ENQ_COMPLETE (1<<16) - +/* + *This long block is all about fixing up the local state so that it is + *correct as of the moment _before_ the operation was applied; that + *way, the VFS will think that everything is normal and call Lustre's + *regular VFS methods. + * + * If we're performing a creation, that means that unless the creation + * failed with EEXIST, we should fake up a negative dentry. + * + * For everything else, we want to lookup to succeed. + * + * One additional note: if CREATE or OPEN succeeded, we add an extra + * reference to the request because we need to keep it around until + * ll_create/ll_open gets called. + * + * The server will return to us, in it_disposition, an indication of + * exactly what it_status refers to. + * + * If DISP_OPEN_OPEN is set, then it_status refers to the open() call, + * otherwise if DISP_OPEN_CREATE is set, then it status is the + * creation failure mode. In either case, one of DISP_LOOKUP_NEG or + * DISP_LOOKUP_POS will be set, indicating whether the child lookup + * was successful. + * + * Else, if DISP_LOOKUP_EXECD then it_status is the rc of the child + * lookup. + */ int ll_intent_lock(struct inode *parent, struct dentry **de, - struct lookup_intent *it, intent_finish_cb intent_finish) + struct lookup_intent *it, int flags, intent_finish_cb intent_finish) { struct dentry *dentry = *de; struct inode *inode = dentry->d_inode; struct ll_sb_info *sbi = ll_i2sbi(parent); struct lustre_handle lockh; struct lookup_intent lookup_it = { .it_op = IT_LOOKUP }; - struct ptlrpc_request *request = NULL; - int rc = 0, offset, flag = 0; + struct ptlrpc_request *request; + int rc = 0; + struct mds_body *mds_body; + int mode; obd_id ino = 0; ENTRY; #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - if (it && it->it_op == 0) - *it = lookup_it; + if (it && it->it_magic != INTENT_MAGIC) { + CERROR("WARNING: uninitialized intent\n"); + LBUG(); + intent_init(it, IT_LOOKUP, 0); + } + if (it->it_op == IT_GETATTR || + it->it_op == 0) + it->it_op = IT_LOOKUP; + #endif - if (it == NULL) + if (!it ||it->it_op == IT_GETXATTR) it = &lookup_it; + it->it_op_release = ll_intent_release; + CDEBUG(D_DLMTRACE, "name: %*s, intent: %s\n", dentry->d_name.len, dentry->d_name.name, ldlm_it2str(it->it_op)); - + if (dentry->d_name.len > EXT2_NAME_LEN) RETURN(-ENAMETOOLONG); - if (!(it->it_disposition & IT_ENQ_COMPLETE)) { + /* This function may be called twice, we only once want to + execute the request associated with the intent. If it was + done already, we skip past this and use the results. */ + if (!it_disposition(it, DISP_ENQ_COMPLETE)) { struct mdc_op_data op_data; ll_prepare_mdc_op_data(&op_data, parent, dentry->d_inode, @@ -325,174 +318,73 @@ int ll_intent_lock(struct inode *parent, struct dentry **de, rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, it, ll_intent_to_lock_mode(it), &op_data, &lockh, NULL, 0, ldlm_completion_ast, - ll_mdc_blocking_ast, parent); + ll_mdc_blocking_ast, NULL); if (rc < 0) RETURN(rc); memcpy(it->it_lock_handle, &lockh, sizeof(lockh)); } - - request = (struct ptlrpc_request *)it->it_data; + request = it->it_data; + LASSERT(request != NULL); /* non-zero it_disposition indicates that the server performed the * intent on our behalf. */ - if (it->it_disposition) { - struct mds_body *mds_body; - int mode; - - /* This long block is all about fixing up the local - * state so that it is correct as of the moment - * _before_ the operation was applied; that way, the - * VFS will think that everything is normal and call - * Lustre's regular FS function. - * - * If we're performing a creation, that means that unless the - * creation failed with EEXIST, we should fake up a negative - * dentry. Likewise for the target of a hard link. - * - * For everything else, we want to lookup to succeed. */ - - /* One additional note: if CREATE/MKDIR/etc succeeded, - * we add an extra reference to the request because we - * need to keep it around until ll_create gets called. - * For anything else which results in - * LL_LOOKUP_POSITIVE, we can do the iget() - * immediately with the contents of the reply (in the - * intent_finish callback). In the create case, - * however, we need to wait until ll_create_node to do - * the iget() or the VFS will abort with -EEXISTS. - */ - - offset = 1; - mds_body = lustre_msg_buf(request->rq_repmsg, offset, - sizeof(*mds_body)); - LASSERT (mds_body != NULL); /* mdc_enqueue checked */ - LASSERT_REPSWABBED (request, offset); /* mdc_enqueue swabbed */ - - ino = mds_body->fid1.id; - mode = mds_body->mode; - - /*We were called from revalidate2: did we find the same inode?*/ - if (inode && (ino != inode->i_ino || - mds_body->fid1.generation != inode->i_generation)) { - it->it_disposition |= IT_ENQ_COMPLETE; - RETURN(-ESTALE); - } + LASSERT(it_disposition(it, DISP_IT_EXECD)); + + + mds_body = lustre_msg_buf(request->rq_repmsg, 1, sizeof(*mds_body)); + LASSERT(mds_body != NULL); /* mdc_enqueue checked */ + LASSERT_REPSWABBED(request, 1); /* mdc_enqueue swabbed */ + + /* XXX everything with fids please, no ino's inode's etc */ + ino = mds_body->fid1.id; + mode = mds_body->mode; + + /*We were called from revalidate2: did we find the same inode?*/ + if (inode && + (ino != inode->i_ino || + mds_body->fid1.generation != inode->i_generation)) { + it_set_disposition(it, DISP_ENQ_COMPLETE); + RETURN(-ESTALE); + } - /* If we're doing an IT_OPEN which did not result in an actual - * successful open, then we need to remove the bit which saves - * this request for unconditional replay. */ - if (it->it_op & IT_OPEN && - (!(it->it_disposition & IT_OPEN_OPEN) || - it->it_status != 0)) { + /* If we're doing an IT_OPEN which did not result in an actual + * successful open, then we need to remove the bit which saves + * this request for unconditional replay. */ + if (it->it_op & IT_OPEN) { + if (!it_disposition(it, DISP_OPEN_OPEN) || + it->it_status != 0) { unsigned long flags; - + spin_lock_irqsave (&request->rq_lock, flags); request->rq_replay = 0; spin_unlock_irqrestore (&request->rq_lock, flags); } - - if (it->it_op & IT_CREAT) { - mdc_store_inode_generation(request, 2, 1); - /* The server will return to us, in it_disposition, an - * indication of exactly what it_status refers to. - * - * If IT_OPEN_OPEN is set, then it_status refers to the - * open() call, otherwise if IT_OPEN_CREATE is set, then - * it status is the creation failure mode. In either - * case, one of IT_OPEN_NEG or IT_OPEN_POS will be set, - * indicating whether the child lookup was successful. - * - * Else, if IT_OPEN_LOOKUP then it_status is the rc - * of the child lookup. - * - * Finally, if none of the bits are set, then the - * failure occurred while looking up the parent. */ - rc = ll_it_open_error(IT_OPEN_LOOKUP, it); - if (rc) - GOTO(drop_req, rc); - - if (it->it_disposition & IT_OPEN_CREATE) - ptlrpc_request_addref(request); - if (it->it_disposition & IT_OPEN_OPEN) - ptlrpc_request_addref(request); - - if (it->it_disposition & IT_OPEN_NEG) - flag = LL_LOOKUP_NEGATIVE; - else - flag = LL_LOOKUP_POSITIVE; - } else if (it->it_op == IT_OPEN) { - LASSERT(!(it->it_disposition & IT_OPEN_CREATE)); - - rc = ll_it_open_error(IT_OPEN_LOOKUP, it); - if (rc) - GOTO(drop_req, rc); - - if (it->it_disposition & IT_OPEN_OPEN) - ptlrpc_request_addref(request); - - if (it->it_disposition & IT_OPEN_NEG) - flag = LL_LOOKUP_NEGATIVE; - else - flag = LL_LOOKUP_POSITIVE; - } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) { - /* For check ops, we want the lookup to succeed */ - it->it_data = NULL; - if (it->it_status) - flag = LL_LOOKUP_NEGATIVE; - else - flag = LL_LOOKUP_POSITIVE; - } else - LBUG(); - } else { - struct ll_fid fid; - obd_flag valid; - int eadatalen; - int mode; - - LBUG(); /* For the moment, no non-intent locks */ - - /* it_disposition == 0 indicates that it just did a simple lock - * request, for which we are very thankful. move along with - * the local lookup then. */ - - //memcpy(&lli->lli_intent_lock_handle, &lockh, sizeof(lockh)); - offset = 0; - - ino = ll_inode_by_name(parent, dentry, &mode); - if (!ino) { - CERROR("inode %*s not found by name\n", - dentry->d_name.len, dentry->d_name.name); - GOTO(drop_lock, rc = -ENOENT); - } - - valid = OBD_MD_FLNOTOBD; - - if (S_ISREG(mode)) { - eadatalen = obd_size_diskmd(&sbi->ll_osc_conn, NULL), - valid |= OBD_MD_FLEASIZE; - } else { - eadatalen = 0; - valid |= OBD_MD_FLBLOCKS; - } - - fid.id = ino; - fid.generation = 0; - fid.f_type = mode; - rc = mdc_getattr(&sbi->ll_mdc_conn, &fid, valid, - eadatalen, &request); - if (rc) { - CERROR("failure %d inode "LPX64"\n", rc, ino); - GOTO(drop_lock, rc = -abs(rc)); - } } - LASSERT (request != NULL); + rc = ll_it_open_error(DISP_LOOKUP_EXECD, it); + if (rc) + GOTO(drop_req, rc); + + /* keep requests around for the multiple phases of the call + * this shows the DISP_XX must guarantee we make it into the call + */ + if (it_disposition(it, DISP_OPEN_CREATE)) + ptlrpc_request_addref(request); + if (it_disposition(it, DISP_OPEN_OPEN)) + ptlrpc_request_addref(request); + + if (it->it_op & IT_CREAT) { + /* XXX this belongs in ll_create_iit */ + } else if (it->it_op == IT_OPEN) { + LASSERT(!it_disposition(it, DISP_OPEN_CREATE)); + } else + LASSERT(it->it_op & (IT_GETATTR | IT_LOOKUP)); if (intent_finish != NULL) { struct lustre_handle old_lock; struct ldlm_lock *lock; - rc = intent_finish(flag, request, parent, de, it, offset, ino); + rc = intent_finish(request, parent, de, it, 1, ino); dentry = *de; /* intent_finish may change *de */ inode = dentry->d_inode; if (rc != 0) @@ -525,29 +417,16 @@ int ll_intent_lock(struct inode *parent, struct dentry **de, } ptlrpc_req_finished(request); - /* This places the intent in the dentry so that the vfs_xxx - * operation can lay its hands on it; but that is not always - * needed... (we need to save it in the GETATTR case for the - * benefit of ll_inode_revalidate -phil) */ - /* Ignore trying to save the intent for "special" inodes as - * they have special semantics that can cause deadlocks on - * the intent semaphore. -mmex */ - if ((!inode || S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) || - S_ISLNK(inode->i_mode)) && (it->it_op & (IT_OPEN | IT_GETATTR))) - LL_SAVE_INTENT(dentry, it); - else - CDEBUG(D_DENTRY, - "D_IT dentry %p fsdata %p intent: %s status %d\n", - dentry, ll_d2d(dentry), ldlm_it2str(it->it_op), - it->it_status); - + CDEBUG(D_DENTRY, "D_IT dentry %p intent: %s status %d disp %x\n", + dentry, ldlm_it2str(it->it_op), it->it_status, it->it_disposition); + + /* drop IT_LOOKUP locks */ if (it->it_op == IT_LOOKUP) - ll_intent_release(dentry, it); - + ll_intent_release(it); RETURN(rc); drop_lock: - ll_intent_release(dentry, it); + ll_intent_release(it); drop_req: ptlrpc_req_finished(request); RETURN(rc); @@ -582,7 +461,7 @@ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de) if (!list_empty(&dentry->d_lru)) list_del_init(&dentry->d_lru); - list_del_init(&dentry->d_hash); + hlist_del_init(&dentry->d_hash); __d_rehash(dentry, 0); /* avoid taking dcache_lock inside */ spin_unlock(&dcache_lock); atomic_inc(&dentry->d_count); @@ -597,68 +476,34 @@ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de) } static int -lookup2_finish(int flag, struct ptlrpc_request *request, +lookup2_finish(struct ptlrpc_request *request, struct inode *parent, struct dentry **de, struct lookup_intent *it, int offset, obd_id ino) { struct ll_sb_info *sbi = ll_i2sbi(parent); struct dentry *dentry = *de, *saved = *de; struct inode *inode = NULL; - struct ll_read_inode2_cookie lic = {.lic_body = NULL, .lic_lsm = NULL}; + int rc; /* NB 1 request reference will be taken away by ll_intent_lock() * when I return */ - - if (!(flag & LL_LOOKUP_NEGATIVE)) { + if (!it_disposition(it, DISP_LOOKUP_NEG)) { + struct lustre_md md; ENTRY; - /* We only get called if the mdc_enqueue() called from - * ll_intent_lock() was successful. Therefore the mds_body - * is present and correct, and the eadata is present if - * body->eadatasize != 0 (but still opaque, so only - * obd_unpackmd() can check the size) */ - lic.lic_body = lustre_msg_buf(request->rq_repmsg, offset, - sizeof (*lic.lic_body)); - LASSERT(lic.lic_body != NULL); - LASSERT_REPSWABBED(request, offset); - - if (S_ISREG(lic.lic_body->mode) && - (lic.lic_body->valid & OBD_MD_FLEASIZE)) { - struct lov_mds_md *lmm; - int lmm_size; - int rc; - - lmm_size = lic.lic_body->eadatasize; - if (lmm_size == 0) { - CERROR("OBD_MD_FLEASIZE set but " - "eadatasize 0\n"); - RETURN(-EPROTO); - } - lmm = lustre_msg_buf(request->rq_repmsg, offset + 1, - lmm_size); - LASSERT(lmm != NULL); - LASSERT_REPSWABBED(request, offset + 1); - - rc = obd_unpackmd(&sbi->ll_osc_conn, - &lic.lic_lsm, lmm, lmm_size); - if (rc < 0) { - CERROR("Error %d unpacking eadata\n", rc); - RETURN(rc); - } - LASSERT(rc >= sizeof(*lic.lic_lsm)); - } + rc =mdc_req2lustre_md(request, offset, &sbi->ll_osc_conn, &md); + if (rc) + RETURN(rc); - /* Both ENOMEM and an RPC timeout are possible in ll_iget; which - * to pick? A more generic EIO? -phik */ - inode = ll_iget(dentry->d_sb, ino, &lic); + inode = ll_iget(dentry->d_sb, ino, &md); if (!inode) { /* free the lsm if we allocated one above */ - if (lic.lic_lsm != NULL) - obd_free_memmd(&sbi->ll_osc_conn, &lic.lic_lsm); + if (md.lsm != NULL) + obd_free_memmd(&sbi->ll_osc_conn, &md.lsm); RETURN(-ENOMEM); - } else if (lic.lic_lsm != NULL && - ll_i2info(inode)->lli_smd != lic.lic_lsm) { - obd_free_memmd(&sbi->ll_osc_conn, &lic.lic_lsm); + } else if (md.lsm != NULL && + ll_i2info(inode)->lli_smd != md.lsm) { + obd_free_memmd(&sbi->ll_osc_conn, &md.lsm); } /* If this is a stat, get the authoritative file size */ @@ -685,8 +530,10 @@ lookup2_finish(int flag, struct ptlrpc_request *request, /* We asked for a lock on the directory, and may have been * granted a lock on the inode. Just in case, fixup the data * pointer. */ - ll_mdc_lock_set_inode((struct lustre_handle*)it->it_lock_handle, - inode); + CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n", + inode, inode->i_ino, inode->i_generation); + ldlm_lock_set_data((struct lustre_handle*)it->it_lock_handle, + inode); } else { ENTRY; } @@ -700,8 +547,8 @@ lookup2_finish(int flag, struct ptlrpc_request *request, RETURN(0); } -static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry, - struct lookup_intent *it) +static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, + struct lookup_intent *it, int flags) { struct dentry *save = dentry, *retval; int rc; @@ -711,7 +558,11 @@ static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry, dentry->d_name.name, parent->i_ino, parent->i_generation, parent, LL_IT2STR(it)); - rc = ll_intent_lock(parent, &dentry, it, lookup2_finish); + if (d_mountpoint(dentry)) { + CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it)); + } + + rc = ll_intent_lock(parent, &dentry, it, flags, lookup2_finish); if (rc < 0) { CDEBUG(D_INFO, "ll_intent_lock: %d\n", rc); GOTO(out, retval = ERR_PTR(rc)); @@ -725,167 +576,136 @@ static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry, return retval; } -/* We depend on "mode" being set with the proper file type/umask by now */ -static struct inode *ll_create_node(struct inode *dir, const char *name, - int namelen, const void *data, int datalen, - int mode, __u64 extra, - struct lookup_intent *it) +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, + struct nameidata *nd) { - struct inode *inode; - struct ptlrpc_request *request = NULL; - struct mds_body *body; - time_t time = LTIME_S(CURRENT_TIME); - struct ll_sb_info *sbi = ll_i2sbi(dir); - struct ll_read_inode2_cookie lic; + struct dentry *de; ENTRY; - if (it && it->it_disposition) { - ll_invalidate_inode_pages(dir); - request = it->it_data; - body = lustre_msg_buf(request->rq_repmsg, 1, sizeof (*body)); - LASSERT (body != NULL); /* checked already */ - LASSERT_REPSWABBED (request, 1); /* swabbed already */ - } else { - struct mdc_op_data op_data; - int gid = current->fsgid; - int rc; - - if (dir->i_mode & S_ISGID) { - gid = dir->i_gid; - if (S_ISDIR(mode)) - mode |= S_ISGID; - } - - ll_prepare_mdc_op_data(&op_data, dir, NULL, name, namelen, 0); - rc = mdc_create(&sbi->ll_mdc_conn, &op_data, - data, datalen, mode, current->fsuid, gid, - time, extra, &request); - if (rc) { - inode = ERR_PTR(rc); - GOTO(out, rc); - } - body = lustre_swab_repbuf(request, 0, sizeof (*body), - lustre_swab_mds_body); - if (body == NULL) { - CERROR ("Can't unpack mds_body\n"); - GOTO (out, inode = ERR_PTR(-EPROTO)); - } - } - - lic.lic_body = body; - lic.lic_lsm = NULL; - - inode = ll_iget(dir->i_sb, body->ino, &lic); - if (!inode || is_bad_inode(inode)) { - /* XXX might need iput() for bad inode */ - int rc = -EIO; - CERROR("new_inode -fatal: rc %d\n", rc); - LBUG(); - GOTO(out, rc); - } - - if (!list_empty(&inode->i_dentry)) { - CERROR("new_inode -fatal: inode %d, ct %d lnk %d\n", - body->ino, atomic_read(&inode->i_count), - inode->i_nlink); - iput(inode); - LBUG(); - inode = ERR_PTR(-EIO); - GOTO(out, -EIO); - } - - if (it && it->it_disposition) { - /* We asked for a lock on the directory, but were - * granted a lock on the inode. Since we finally have - * an inode pointer, stuff it in the lock. */ - ll_mdc_lock_set_inode((struct lustre_handle*)it->it_lock_handle, - inode); - } + if (nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST)) + de = ll_lookup_it(parent, dentry, &nd->it, nd->flags); + else + de = ll_lookup_it(parent, dentry, NULL, 0); - EXIT; - out: - ptlrpc_req_finished(request); - return inode; + RETURN(de); } +#endif static int ll_mdc_unlink(struct inode *dir, struct inode *child, __u32 mode, const char *name, int len) { struct ptlrpc_request *request = NULL; - struct ll_sb_info *sbi = ll_i2sbi(dir); struct mds_body *body; struct lov_mds_md *eadata; struct lov_stripe_md *lsm = NULL; - struct lustre_handle lockh; - struct lookup_intent it = { .it_op = IT_UNLINK }; - struct obdo *oa; - int err; + struct obd_trans_info oti = { 0 }; struct mdc_op_data op_data; + struct obdo *oa; + int rc; ENTRY; ll_prepare_mdc_op_data(&op_data, dir, child, name, len, mode); - - err = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_EX, - &op_data, &lockh, NULL, 0, - ldlm_completion_ast, ll_mdc_blocking_ast, - dir); - request = (struct ptlrpc_request *)it.it_data; - if (err < 0) - GOTO(out, err); - if (it.it_status) - GOTO(out, err = it.it_status); - err = 0; - - body = lustre_msg_buf (request->rq_repmsg, 1, sizeof (*body)); - LASSERT (body != NULL); /* checked by mdc_enqueue() */ - LASSERT_REPSWABBED (request, 1); /* swabbed by mdc_enqueue() */ + rc = mdc_unlink(&ll_i2sbi(dir)->ll_mdc_conn, &op_data, &request); + if (rc) + GOTO(out, rc); + /* req is swabbed so this is safe */ + body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body)); if (!(body->valid & OBD_MD_FLEASIZE)) - GOTO(out, 0); + GOTO(out, rc = 0); if (body->eadatasize == 0) { - CERROR ("OBD_MD_FLEASIZE set but eadatasize zero\n"); - GOTO (out, err = -EPROTO); + CERROR("OBD_MD_FLEASIZE set but eadatasize zero\n"); + GOTO(out, rc = -EPROTO); } /* The MDS sent back the EA because we unlinked the last reference * to this file. Use this EA to unlink the objects on the OST. - * Note that mdc_enqueue() has already checked there _is_ some EA - * data, but this data is opaque to both mdc_enqueue() and the MDS. - * We have to leave it to obd_unpackmd() to check it is complete - * and sensible. */ - eadata = lustre_msg_buf (request->rq_repmsg, 2, body->eadatasize); - LASSERT (eadata != NULL); - LASSERT_REPSWABBED (request, 2); - - err = obd_unpackmd(ll_i2obdconn(dir), &lsm, eadata, - body->eadatasize); - if (err < 0) { - CERROR("obd_unpackmd: %d\n", err); - GOTO (out_unlock, err); + * It's opaque so we don't swab here; we leave it to obd_unpackmd() to + * check it is complete and sensible. */ + eadata = lustre_swab_repbuf(request, 1, body->eadatasize, NULL); + LASSERT(eadata != NULL); + if (eadata == NULL) { + CERROR("Can't unpack MDS EA data\n"); + GOTO(out, rc = -EPROTO); } - LASSERT (err >= sizeof (*lsm)); + + rc = obd_unpackmd(ll_i2obdconn(dir), &lsm, eadata, body->eadatasize); + if (rc < 0) { + CERROR("obd_unpackmd: %d\n", rc); + GOTO(out, rc); + } + LASSERT(rc >= sizeof(*lsm)); oa = obdo_alloc(); if (oa == NULL) - GOTO(out_free_memmd, err = -ENOMEM); + GOTO(out_free_memmd, rc = -ENOMEM); oa->o_id = lsm->lsm_object_id; oa->o_mode = body->mode & S_IFMT; oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE; - err = obd_destroy(ll_i2obdconn(dir), oa, lsm, NULL); + if (body->valid & OBD_MD_FLCOOKIE) { + oa->o_valid |= OBD_MD_FLCOOKIE; + oti.oti_logcookies = lustre_msg_buf(request->rq_repmsg, 3, + body->eadatasize); + } + + rc = obd_destroy(ll_i2obdconn(dir), oa, lsm, &oti); obdo_free(oa); - if (err) + if (rc) CERROR("obd destroy objid 0x"LPX64" error %d\n", - lsm->lsm_object_id, err); + lsm->lsm_object_id, rc); out_free_memmd: obd_free_memmd(ll_i2obdconn(dir), &lsm); - out_unlock: - ldlm_lock_decref_and_cancel(&lockh, LCK_EX); out: ptlrpc_req_finished(request); - return err; + return rc; +} + +/* We depend on "mode" being set with the proper file type/umask by now */ +static struct inode *ll_create_node(struct inode *dir, const char *name, + int namelen, const void *data, int datalen, + int mode, __u64 extra, + struct lookup_intent *it) +{ + struct inode *inode; + struct ptlrpc_request *request = NULL; + struct ll_sb_info *sbi = ll_i2sbi(dir); + struct lustre_md md; + int rc; + ENTRY; + + LASSERT(it && it->it_disposition); + + ll_invalidate_inode_pages(dir); + + request = it->it_data; + rc = mdc_req2lustre_md(request, 1, &sbi->ll_osc_conn, &md); + if (rc) { + GOTO(out, inode = ERR_PTR(rc)); + } + + inode = ll_iget(dir->i_sb, md.body->ino, &md); + if (!inode || is_bad_inode(inode)) { + /* XXX might need iput() for bad inode */ + int rc = -EIO; + CERROR("new_inode -fatal: rc %d\n", rc); + LBUG(); + GOTO(out, rc); + } + LASSERT(list_empty(&inode->i_dentry)); + + CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n", + inode, inode->i_ino, inode->i_generation); + ldlm_lock_set_data((struct lustre_handle*)it->it_lock_handle, + inode); + + EXIT; + out: + ptlrpc_req_finished(request); + return inode; } /* @@ -902,54 +722,46 @@ static int ll_mdc_unlink(struct inode *dir, struct inode *child, __u32 mode, * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ll_create(struct inode *dir, struct dentry *dentry, int mode) +static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode, struct lookup_intent *it) { - struct lookup_intent *it; struct inode *inode; + struct ptlrpc_request *request = it->it_data; int rc = 0; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n", dentry->d_name.name, dir->i_ino, dir->i_generation, dir, - LL_IT2STR(dentry->d_it)); - - it = dentry->d_it; + LL_IT2STR(it)); - rc = ll_it_open_error(IT_OPEN_CREATE, it); + rc = ll_it_open_error(DISP_OPEN_CREATE, it); if (rc) { - LL_GET_INTENT(dentry, it); - ptlrpc_req_finished(it->it_data); + ptlrpc_req_finished(request); RETURN(rc); } + mdc_store_inode_generation(request, 2, 1); inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len, NULL, 0, mode, 0, it); - if (IS_ERR(inode)) { - LL_GET_INTENT(dentry, it); RETURN(PTR_ERR(inode)); } - /* no directory data updates when intents rule */ - if (it && it->it_disposition) { - d_instantiate(dentry, inode); - RETURN(0); - } - - rc = ext2_add_nondir(dentry, inode); - RETURN(rc); + d_instantiate(dentry, inode); + RETURN(0); } -static int ll_mknod(struct inode *dir, struct dentry *dentry, int mode, - int rdev) +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +static int ll_create_nd(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { - LBUG(); - return -ENOSYS; + return ll_create_it(dir, dentry, mode, &nd->it); } +#endif -static int ll_mknod2(struct inode *dir, const char *name, int len, int mode, - int rdev) +static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev) { + struct inode *dir = nd->dentry->d_inode; + const char *name = nd->last.name; + int len = nd->last.len; struct ptlrpc_request *request = NULL; time_t time = LTIME_S(CURRENT_TIME); struct ll_sb_info *sbi = ll_i2sbi(dir); @@ -966,10 +778,13 @@ static int ll_mknod2(struct inode *dir, const char *name, int len, int mode, mode &= ~current->fs->umask; switch (mode & S_IFMT) { - case 0: case S_IFREG: + case 0: + case S_IFREG: mode |= S_IFREG; /* for mode = 0 case, fallthrough */ - case S_IFCHR: case S_IFBLK: - case S_IFIFO: case S_IFSOCK: + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode, current->fsuid, current->fsgid, time, @@ -985,16 +800,11 @@ static int ll_mknod2(struct inode *dir, const char *name, int len, int mode, RETURN(err); } -static int ll_symlink(struct inode *dir, struct dentry *dentry, - const char *symname) -{ - LBUG(); - return -ENOSYS; -} - -static int ll_symlink2(struct inode *dir, const char *name, int len, - const char *tgt) +static int ll_symlink_raw(struct nameidata *nd, const char *tgt) { + struct inode *dir = nd->dentry->d_inode; + const char *name = nd->last.name; + int len = nd->last.len; struct ptlrpc_request *request = NULL; time_t time = LTIME_S(CURRENT_TIME); struct ll_sb_info *sbi = ll_i2sbi(dir); @@ -1016,16 +826,12 @@ static int ll_symlink2(struct inode *dir, const char *name, int len, RETURN(err); } -static int ll_link(struct dentry *old_dentry, struct inode * dir, - struct dentry *dentry) -{ - LBUG(); - return -ENOSYS; -} - -static int ll_link2(struct inode *src, struct inode *dir, - const char *name, int len) +static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd) { + struct inode *src = srcnd->dentry->d_inode; + struct inode *dir = tgtnd->dentry->d_inode; + const char *name = tgtnd->last.name; + int len = tgtnd->last.len; struct ptlrpc_request *request = NULL; struct mdc_op_data op_data; int err; @@ -1043,14 +849,12 @@ static int ll_link2(struct inode *src, struct inode *dir, RETURN(err); } -static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode) -{ - LBUG(); - return -ENOSYS; -} -static int ll_mkdir2(struct inode *dir, const char *name, int len, int mode) +static int ll_mkdir_raw(struct nameidata *nd, int mode) { + struct inode *dir = nd->dentry->d_inode; + const char *name = nd->last.name; + int len = nd->last.len; struct ptlrpc_request *request = NULL; time_t time = LTIME_S(CURRENT_TIME); struct ll_sb_info *sbi = ll_i2sbi(dir); @@ -1066,14 +870,16 @@ static int ll_mkdir2(struct inode *dir, const char *name, int len, int mode) mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR; ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode, - current->fsuid, current->fsgid, - time, 0, &request); + current->fsuid, current->fsgid, time, 0, &request); ptlrpc_req_finished(request); RETURN(err); } -static int ll_rmdir2(struct inode *dir, const char *name, int len) +static int ll_rmdir_raw(struct nameidata *nd) { + struct inode *dir = nd->dentry->d_inode; + const char *name = nd->last.name; + int len = nd->last.len; int rc; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n", @@ -1083,8 +889,11 @@ static int ll_rmdir2(struct inode *dir, const char *name, int len) RETURN(rc); } -static int ll_unlink2(struct inode *dir, const char *name, int len) +static int ll_unlink_raw(struct nameidata *nd) { + struct inode *dir = nd->dentry->d_inode; + const char *name = nd->last.name; + int len = nd->last.len; int rc; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n", @@ -1094,29 +903,14 @@ static int ll_unlink2(struct inode *dir, const char *name, int len) RETURN(rc); } -static int ll_unlink(struct inode *dir, struct dentry *dentry) -{ - LBUG(); - return -ENOSYS; -} - -static int ll_rmdir(struct inode *dir, struct dentry *dentry) -{ - LBUG(); - return -ENOSYS; -} - -static int ll_rename(struct inode * old_dir, struct dentry * old_dentry, - struct inode * new_dir, struct dentry * new_dentry) -{ - LBUG(); - return -ENOSYS; -} - -static int ll_rename2(struct inode *src, struct inode *tgt, - const char *oldname, int oldlen, - const char *newname, int newlen) +static int ll_rename_raw(struct nameidata *oldnd, struct nameidata *newnd) { + struct inode *src = oldnd->dentry->d_inode; + struct inode *tgt = newnd->dentry->d_inode; + const char *oldname = oldnd->last.name; + int oldlen = oldnd->last.len; + const char *newname = newnd->last.name; + int newlen = newnd->last.len; struct ptlrpc_request *request = NULL; struct ll_sb_info *sbi = ll_i2sbi(src); struct mdc_op_data op_data; @@ -1134,27 +928,23 @@ static int ll_rename2(struct inode *src, struct inode *tgt, RETURN(err); } -extern int ll_inode_revalidate(struct dentry *dentry); struct inode_operations ll_dir_inode_operations = { - create: ll_create, - lookup2: ll_lookup2, - link: ll_link, /* LBUG() */ - link2: ll_link2, - unlink: ll_unlink, /* LBUG() */ - unlink2: ll_unlink2, - symlink: ll_symlink, /* LBUG() */ - symlink2: ll_symlink2, - mkdir: ll_mkdir, /* LBUG() */ - mkdir2: ll_mkdir2, - rmdir: ll_rmdir, /* LBUG() */ - rmdir2: ll_rmdir2, - mknod: ll_mknod, /* LBUG() */ - mknod2: ll_mknod2, - rename: ll_rename, /* LBUG() */ - rename2: ll_rename2, + link_raw: ll_link_raw, + unlink_raw: ll_unlink_raw, + symlink_raw: ll_symlink_raw, + mkdir_raw: ll_mkdir_raw, + rmdir_raw: ll_rmdir_raw, + mknod_raw: ll_mknod_raw, + rename_raw: ll_rename_raw, setattr: ll_setattr, setattr_raw: ll_setattr_raw, #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - revalidate: ll_inode_revalidate, + create_it: ll_create_it, + lookup_it: ll_lookup_it, + revalidate_it: ll_inode_revalidate_it, +#else + lookup_it: ll_lookup_nd, + create_nd: ll_create_nd, + getattr_it: ll_getattr, #endif }; diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 98f6086..b4004b5 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -32,11 +32,12 @@ #include #include #include -#include "llite_internal.h" #include #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) #include +#include +#include #else #include #endif @@ -51,7 +52,7 @@ #include #include -#include +#include "llite_internal.h" #include /* @@ -90,7 +91,8 @@ void set_page_clean(struct page *page) } /* SYNCHRONOUS I/O to object storage for an inode */ -static int ll_brw(int cmd, struct inode *inode, struct page *page, int flags) +static int ll_brw(int cmd, struct inode *inode, struct obdo *oa, + struct page *page, int flags) { struct ll_inode_info *lli = ll_i2info(inode); struct lov_stripe_md *lsm = lli->lli_smd; @@ -124,8 +126,8 @@ static int ll_brw(int cmd, struct inode *inode, struct page *page, int flags) else lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_BRW_READ, pg.count); - rc = obd_brw(cmd, ll_i2obdconn(inode), lsm, 1, &pg, NULL); - if (rc) + rc = obd_brw(cmd, ll_i2obdconn(inode), oa, lsm, 1, &pg, NULL); + if (rc != 0 && rc != -EIO) CERROR("error from obd_brw: rc = %d\n", rc); RETURN(rc); @@ -142,6 +144,7 @@ static int ll_readpage(struct file *file, struct page *first_page) struct page *page = first_page; struct list_head *pos; struct brw_page *pgs; + struct obdo *oa; unsigned long end_index, extent_end = 0; struct ptlrpc_request_set *set; int npgs = 0, rc = 0, max_pages; @@ -276,19 +279,33 @@ static int ll_readpage(struct file *file, struct page *first_page) } while (page); - set = ptlrpc_prep_set(); - if (set == NULL) { + if ((oa = obdo_alloc()) == NULL) { + CERROR("ENOMEM allocing obdo\n"); + rc = -ENOMEM; + } else if ((set = ptlrpc_prep_set()) == NULL) { CERROR("ENOMEM allocing request set\n"); + obdo_free(oa); rc = -ENOMEM; } else { - rc = obd_brw_async(OBD_BRW_READ, ll_i2obdconn(inode), + struct ll_file_data *fd = file->private_data; + + oa->o_id = lli->lli_smd->lsm_object_id; + memcpy(obdo_handle(oa), &fd->fd_ost_och.och_fh, + sizeof(fd->fd_ost_och.och_fh)); + oa->o_valid = OBD_MD_FLID | OBD_MD_FLHANDLE; + obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME); + + rc = obd_brw_async(OBD_BRW_READ, ll_i2obdconn(inode), oa, ll_i2info(inode)->lli_smd, npgs, pgs, set, NULL); if (rc == 0) rc = ptlrpc_set_wait(set); ptlrpc_set_destroy(set); + if (rc == 0) + obdo_refresh_inode(inode, oa, oa->o_valid); if (rc && rc != -EIO) CERROR("error from obd_brw_async: rc = %d\n", rc); + obdo_free(oa); } while (npgs-- > 0) { @@ -310,15 +327,15 @@ static int ll_readpage(struct file *file, struct page *first_page) void ll_truncate(struct inode *inode) { struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - struct obdo oa = {0}; + struct obdo oa; int err; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, inode->i_generation, inode); + /* object not yet allocated */ if (!lsm) { - /* object not yet allocated */ - inode->i_mtime = inode->i_ctime = CURRENT_TIME; + CERROR("truncate on inode %lu with no objects\n", inode->i_ino); EXIT; return; } @@ -331,8 +348,9 @@ void ll_truncate(struct inode *inode) ~0); oa.o_id = lsm->lsm_object_id; - oa.o_mode = inode->i_mode; - oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE; + oa.o_valid = OBD_MD_FLID; + obdo_from_inode(&oa, inode, OBD_MD_FLTYPE|OBD_MD_FLMODE|OBD_MD_FLATIME| + OBD_MD_FLMTIME | OBD_MD_FLCTIME); CDEBUG(D_INFO, "calling punch for "LPX64" (all bytes after %Lu)\n", oa.o_id, inode->i_size); @@ -343,7 +361,9 @@ void ll_truncate(struct inode *inode) if (err) CERROR("obd_truncate fails (%d) ino %lu\n", err, inode->i_ino); else - obdo_to_inode(inode, &oa, oa.o_valid); + obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | + OBD_MD_FLATIME | OBD_MD_FLMTIME | + OBD_MD_FLCTIME); EXIT; return; @@ -356,9 +376,11 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from, { struct inode *inode = page->mapping->host; struct ll_inode_info *lli = ll_i2info(inode); + struct ll_file_data *fd = file->private_data; struct lov_stripe_md *lsm = lli->lli_smd; obd_off offset = ((obd_off)page->index) << PAGE_SHIFT; struct brw_page pg; + struct obdo oa; int rc = 0; ENTRY; @@ -375,7 +397,7 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from, pg.off = offset; pg.count = PAGE_SIZE; pg.flag = 0; - rc = obd_brw(OBD_BRW_CHECK, ll_i2obdconn(inode), lsm, 1, &pg, NULL); + rc = obd_brw(OBD_BRW_CHECK, ll_i2obdconn(inode), NULL, lsm, 1,&pg,NULL); if (rc) RETURN(rc); @@ -393,7 +415,15 @@ static int ll_prepare_write(struct file *file, struct page *page, unsigned from, GOTO(prepare_done, rc = 0); } - rc = ll_brw(OBD_BRW_READ, inode, page, 0); + oa.o_id = lsm->lsm_object_id; + oa.o_mode = inode->i_mode; + memcpy(obdo_handle(&oa), &fd->fd_ost_och.och_fh, + sizeof(fd->fd_ost_och.och_fh)); + oa.o_valid = OBD_MD_FLID |OBD_MD_FLMODE |OBD_MD_FLTYPE |OBD_MD_FLHANDLE; + + rc = ll_brw(OBD_BRW_READ, inode, &oa, page, 0); + if (rc == 0) + obdo_refresh_inode(inode, &oa, oa.o_valid); EXIT; prepare_done: @@ -544,15 +574,19 @@ int ll_mark_dirty_page(struct lustre_handle *conn, struct lov_stripe_md *lsm, static int ll_writepage(struct page *page) { struct inode *inode = page->mapping->host; + struct obdo oa; ENTRY; CDEBUG(D_CACHE, "page %p [lau %d] inode %p\n", page, - PageLaunder(page), inode); + PageLaunder(page), inode); LASSERT(PageLocked(page)); - /* XXX should obd_brw errors trickle up? */ - ll_batch_writepage(inode, page); - RETURN(0); + oa.o_id = ll_i2info(inode)->lli_smd->lsm_object_id; + oa.o_valid = OBD_MD_FLID; + obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME | + OBD_MD_FLMTIME | OBD_MD_FLCTIME); + + RETURN(ll_batch_writepage(inode, &oa, page)); } /* @@ -567,6 +601,7 @@ static int ll_commit_write(struct file *file, struct page *page, int rc = 0; ENTRY; + SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */ LASSERT(inode == file->f_dentry->d_inode); LASSERT(PageLocked(page)); @@ -595,7 +630,18 @@ static int ll_commit_write(struct file *file, struct page *page, /* This means that we've hit either the local cache limit or the limit * of the OST's grant. */ if (rc == -EDQUOT) { - int rc = ll_batch_writepage(inode, page); + struct ll_file_data *fd = file->private_data; + struct obdo oa; + int rc; + + oa.o_id = ll_i2info(inode)->lli_smd->lsm_object_id; + memcpy(obdo_handle(&oa), &fd->fd_ost_och.och_fh, + sizeof(fd->fd_ost_och.och_fh)); + oa.o_valid = OBD_MD_FLID | OBD_MD_FLHANDLE; + obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME | + OBD_MD_FLMTIME | OBD_MD_FLCTIME); + + rc = ll_batch_writepage(inode, &oa, page); lock_page(page); /* caller expects to unlock */ RETURN(rc); } @@ -624,12 +670,13 @@ static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf, struct lov_stripe_md *lsm = lli->lli_smd; struct brw_page *pga; struct ptlrpc_request_set *set; + struct obdo oa; int length, i, flags, rc = 0; loff_t offset; ENTRY; if (!lsm || !lsm->lsm_object_id) - RETURN(-ENOMEM); + RETURN(-EBADF); if ((iobuf->offset & (blocksize - 1)) || (iobuf->length & (blocksize - 1))) @@ -663,6 +710,11 @@ static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf, } } + oa.o_id = lsm->lsm_object_id; + oa.o_valid = OBD_MD_FLID; + obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME | + OBD_MD_FLMTIME | OBD_MD_FLCTIME); + if (rw == WRITE) lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_DIRECT_WRITE, iobuf->length); @@ -670,8 +722,8 @@ static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf, lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_DIRECT_READ, iobuf->length); rc = obd_brw_async(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ, - ll_i2obdconn(inode), lsm, iobuf->nr_pages, pga, set, - NULL); + ll_i2obdconn(inode), &oa, lsm, iobuf->nr_pages, pga, + set, NULL); if (rc) { CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, "error from obd_brw_async: rc = %d\n", rc); diff --git a/lustre/llite/super.c b/lustre/llite/super.c index 85532f0..9a3ffa1 100644 --- a/lustre/llite/super.c +++ b/lustre/llite/super.c @@ -35,770 +35,18 @@ #include "llite_internal.h" #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -kmem_cache_t *ll_file_data_slab; extern struct address_space_operations ll_aops; extern struct address_space_operations ll_dir_aops; -struct super_operations ll_super_operations; - -/* /proc/lustre/llite root that tracks llite mount points */ -struct proc_dir_entry *proc_lustre_fs_root = NULL; -/* lproc_llite.c */ -extern void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi); -extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent, - struct super_block *sb, - char *osc, char *mdc); - -extern int ll_recover(struct recovd_data *, int); -extern int ll_commitcbd_setup(struct ll_sb_info *); -extern int ll_commitcbd_cleanup(struct ll_sb_info *); - -static char *ll_read_opt(const char *opt, char *data) -{ - char *value; - char *retval; - ENTRY; - - CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data); - if (strncmp(opt, data, strlen(opt))) - RETURN(NULL); - if ((value = strchr(data, '=')) == NULL) - RETURN(NULL); - - value++; - OBD_ALLOC(retval, strlen(value) + 1); - if (!retval) { - CERROR("out of memory!\n"); - RETURN(NULL); - } - - memcpy(retval, value, strlen(value)+1); - CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval); - RETURN(retval); -} - -static int ll_set_opt(const char *opt, char *data, int fl) -{ - ENTRY; - - CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data); - if (strncmp(opt, data, strlen(opt))) - RETURN(0); - else - RETURN(fl); -} - -static void ll_options(char *options, char **ost, char **mds, int *flags) -{ - char *this_char; - ENTRY; - - if (!options) { - EXIT; - return; - } - - for (this_char = strtok (options, ","); - this_char != NULL; - this_char = strtok (NULL, ",")) { - CDEBUG(D_SUPER, "this_char %s\n", this_char); - if ((!*ost && (*ost = ll_read_opt("osc", this_char)))|| - (!*mds && (*mds = ll_read_opt("mdc", this_char)))|| - (!(*flags & LL_SBI_NOLCK) && - ((*flags) = (*flags) | - ll_set_opt("nolock", this_char, LL_SBI_NOLCK)))) - continue; - } - EXIT; -} - -#ifndef log2 -#define log2(n) ffz(~(n)) -#endif static struct super_block *ll_read_super(struct super_block *sb, void *data, int silent) { - struct inode *root = 0; - struct obd_device *obd; - struct ll_sb_info *sbi; - struct obd_export *mdc_export; - char *osc = NULL; - char *mdc = NULL; int err; - struct ll_fid rootfid; - struct obd_statfs osfs; - struct ptlrpc_request *request = NULL; - struct ptlrpc_connection *mdc_conn; - struct ll_read_inode2_cookie lic; - class_uuid_t uuid; - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb); - OBD_ALLOC(sbi, sizeof(*sbi)); - if (!sbi) + err = ll_fill_super(sb, data, silent); + if (err) RETURN(NULL); - - INIT_LIST_HEAD(&sbi->ll_conn_chain); - INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list); - generate_random_uuid(uuid); - class_uuid_unparse(uuid, &sbi->ll_sb_uuid); - - sb->u.generic_sbp = sbi; - - ll_options(data, &osc, &mdc, &sbi->ll_flags); - - if (!osc) { - CERROR("no osc\n"); - GOTO(out_free, sb = NULL); - } - - if (!mdc) { - CERROR("no mdc\n"); - GOTO(out_free, sb = NULL); - } - - obd = class_name2obd(mdc); - if (!obd) { - CERROR("MDC %s: not setup or attached\n", mdc); - GOTO(out_free, sb = NULL); - } - - err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid); - if (err) { - CERROR("cannot connect to %s: rc = %d\n", mdc, err); - GOTO(out_free, sb = NULL); - } - - mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection; - - obd = class_name2obd(osc); - if (!obd) { - CERROR("OSC %s: not setup or attached\n", osc); - GOTO(out_mdc, sb = NULL); - } - - err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid); - if (err) { - CERROR("cannot connect to %s: rc = %d\n", osc, err); - GOTO(out_mdc, sb = NULL); - } - - err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid); - if (err) { - CERROR("cannot mds_connect: rc = %d\n", err); - GOTO(out_osc, sb = NULL); - } - CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id); - sbi->ll_rootino = rootfid.id; - - memset(&osfs, 0, sizeof(osfs)); - mdc_export = class_conn2export(&sbi->ll_mdc_conn); - if (mdc_export == NULL) { - CERROR("null mdc_export\n"); - GOTO(out_osc, sb = NULL); - } - err = obd_statfs(mdc_export, &osfs); - class_export_put(mdc_export); - sb->s_blocksize = osfs.os_bsize; - sb->s_blocksize_bits = log2(osfs.os_bsize); - sb->s_magic = LL_SUPER_MAGIC; - sb->s_maxbytes = PAGE_CACHE_MAXBYTES; - - sb->s_op = &ll_super_operations; - - /* make root inode - * XXX: move this to after cbd setup? */ - err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid, - OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request); - if (err) { - CERROR("mdc_getattr failed for root: rc = %d\n", err); - GOTO(out_osc, sb = NULL); - } - - /* initialize committed transaction callback daemon */ - spin_lock_init(&sbi->ll_commitcbd_lock); - init_waitqueue_head(&sbi->ll_commitcbd_waitq); - init_waitqueue_head(&sbi->ll_commitcbd_ctl_waitq); - sbi->ll_commitcbd_flags = 0; - err = ll_commitcbd_setup(sbi); - if (err) { - CERROR("failed to start commit callback daemon: rc = %d\n",err); - ptlrpc_req_finished (request); - GOTO(out_osc, sb = NULL); - } - - lic.lic_body = lustre_msg_buf(request->rq_repmsg, 0, - sizeof(*lic.lic_body)); - LASSERT (lic.lic_body != NULL); /* checked by mdc_getattr() */ - LASSERT_REPSWABBED (request, 0); /* swabbed by mdc_getattr() */ - - lic.lic_lsm = NULL; - - LASSERT(sbi->ll_rootino != 0); - root = iget4(sb, sbi->ll_rootino, NULL, &lic); - - ptlrpc_req_finished(request); - - if (root == NULL || is_bad_inode(root)) { - /* XXX might need iput() for bad inode */ - CERROR("lustre_lite: bad iget4 for root\n"); - GOTO(out_cbd, sb = NULL); - } - - sb->s_root = d_alloc_root(root); - - if (proc_lustre_fs_root) { - err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb, - osc, mdc); - if (err < 0) - CERROR("could not register mount in /proc/lustre"); - } - -out_dev: - if (mdc) - OBD_FREE(mdc, strlen(mdc) + 1); - if (osc) - OBD_FREE(osc, strlen(osc) + 1); - RETURN(sb); - -out_cbd: - ll_commitcbd_cleanup(sbi); -out_osc: - obd_disconnect(&sbi->ll_osc_conn, 0); -out_mdc: - obd_disconnect(&sbi->ll_mdc_conn, 0); -out_free: - lprocfs_unregister_mountpoint(sbi); - OBD_FREE(sbi, sizeof(*sbi)); - - goto out_dev; -} /* ll_read_super */ - -static void ll_put_super(struct super_block *sb) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct list_head *tmp, *next; - struct ll_fid rootfid; - struct obd_device *obd = class_conn2obd(&sbi->ll_mdc_conn); - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb); - list_del(&sbi->ll_conn_chain); - ll_commitcbd_cleanup(sbi); - obd_disconnect(&sbi->ll_osc_conn, 0); - - /* NULL request to force sync on the MDS, and get the last_committed - * value to flush remaining RPCs from the sending queue on client. - * - * XXX This should be an mdc_sync() call to sync the whole MDS fs, - * which we can call for other reasons as well. - */ - if (!obd->obd_no_recov) - mdc_getstatus(&sbi->ll_mdc_conn, &rootfid); - - lprocfs_unregister_mountpoint(sbi); - if (sbi->ll_proc_root) { - lprocfs_remove(sbi->ll_proc_root); - sbi->ll_proc_root = NULL; - } - - obd_disconnect(&sbi->ll_mdc_conn, 0); - - spin_lock(&dcache_lock); - list_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) { - struct dentry *dentry = list_entry(tmp, struct dentry, d_hash); - shrink_dcache_parent(dentry); - } - spin_unlock(&dcache_lock); - - OBD_FREE(sbi, sizeof(*sbi)); - - EXIT; -} /* ll_put_super */ - -static void ll_clear_inode(struct inode *inode) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_inode_info *lli = ll_i2info(inode); - int rc; - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); - rc = ll_mdc_cancel_unused(&sbi->ll_mdc_conn, inode, - LDLM_FL_NO_CALLBACK, inode); - if (rc < 0) { - CERROR("ll_mdc_cancel_unused: %d\n", rc); - /* XXX FIXME do something dramatic */ - } - - if (atomic_read(&inode->i_count) != 0) - CERROR("clearing in-use inode %lu: count = %d\n", - inode->i_ino, atomic_read(&inode->i_count)); - - if (lli->lli_smd) { - rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd, - LDLM_FL_WARN, inode); - if (rc < 0) { - CERROR("obd_cancel_unused: %d\n", rc); - /* XXX FIXME do something dramatic */ - } - obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd); - lli->lli_smd = NULL; - } - - if (lli->lli_symlink_name) { - OBD_FREE(lli->lli_symlink_name, - strlen(lli->lli_symlink_name) + 1); - lli->lli_symlink_name = NULL; - } - - EXIT; -} - -#if 0 -static void ll_delete_inode(struct inode *inode) -{ - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); - if (S_ISREG(inode->i_mode)) { - int err; - struct obdo *oa; - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - - /* mcreate with no open */ - if (!lsm) - GOTO(out, 0); - - if (lsm->lsm_object_id == 0) { - CERROR("This really happens\n"); - /* No obdo was ever created */ - GOTO(out, 0); - } - - oa = obdo_alloc(); - if (oa == NULL) - GOTO(out, -ENOMEM); - - oa->o_id = lsm->lsm_object_id; - obdo_from_inode(oa, inode, OBD_MD_FLID | OBD_MD_FLTYPE); - - err = obd_destroy(ll_i2obdconn(inode), oa, lsm, NULL); - obdo_free(oa); - if (err) - CDEBUG(D_INODE, - "inode %lu obd_destroy objid "LPX64" error %d\n", - inode->i_ino, lsm->lsm_object_id, err); - } -out: - clear_inode(inode); - EXIT; -} -#endif - -/* like inode_setattr, but doesn't mark the inode dirty */ -static int ll_attr2inode(struct inode *inode, struct iattr *attr, int trunc) -{ - unsigned int ia_valid = attr->ia_valid; - int error = 0; - - if ((ia_valid & ATTR_SIZE) && trunc) { - if (attr->ia_size > ll_file_maxbytes(inode)) { - error = -EFBIG; - goto out; - } - error = vmtruncate(inode, attr->ia_size); - if (error) - goto out; - } else if (ia_valid & ATTR_SIZE) - inode->i_size = attr->ia_size; - - if (ia_valid & ATTR_UID) - inode->i_uid = attr->ia_uid; - if (ia_valid & ATTR_GID) - inode->i_gid = attr->ia_gid; - if (ia_valid & ATTR_ATIME) - inode->i_atime = attr->ia_atime; - if (ia_valid & ATTR_MTIME) - inode->i_mtime = attr->ia_mtime; - if (ia_valid & ATTR_CTIME) - inode->i_ctime = attr->ia_ctime; - if (ia_valid & ATTR_MODE) { - inode->i_mode = attr->ia_mode; - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) - inode->i_mode &= ~S_ISGID; - } -out: - return error; -} - -int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc) -{ - struct ptlrpc_request *request = NULL; - struct ll_sb_info *sbi = ll_i2sbi(inode); - int err = 0; - ENTRY; - - /* change incore inode */ - err = ll_attr2inode(inode, attr, do_trunc); - if (err) - RETURN(err); - - /* Don't send size changes to MDS to avoid "fast EA" problems, and - * also avoid a pointless RPC (we get file size from OST anyways). - */ - attr->ia_valid &= ~ATTR_SIZE; - if (attr->ia_valid) { - struct mdc_op_data op_data; - - ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - err = mdc_setattr(&sbi->ll_mdc_conn, &op_data, - attr, NULL, 0, &request); - if (err) - CERROR("mdc_setattr fails: err = %d\n", err); - - ptlrpc_req_finished(request); - if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) { - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - struct obdo oa; - int err2; - - CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n", - inode->i_ino, attr->ia_mtime); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME; - oa.o_mtime = attr->ia_mtime; - err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL); - if (err2) { - CERROR("obd_setattr fails: rc=%d\n", err); - if (!err) - err = err2; - } - } - } - - RETURN(err); -} - -int ll_setattr_raw(struct inode *inode, struct iattr *attr) -{ - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; - int rc = 0, err; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); - - if ((attr->ia_valid & ATTR_SIZE)) { - struct ldlm_extent extent = {attr->ia_size, OBD_OBJECT_EOF}; - struct lustre_handle lockh = { 0 }; - - if (attr->ia_size > ll_file_maxbytes(inode)) - RETURN(-EFBIG); - - /* writeback uses inode->i_size to determine how far out - * its cached pages go. ll_truncate gets a PW lock, canceling - * our lock, _after_ it has updated i_size. this can confuse - * - * If this file doesn't have stripes yet, it is already, - * by definition, truncated. */ - if ((attr->ia_valid & ATTR_FROM_OPEN) && lsm == NULL) { - LASSERT(attr->ia_size == 0); - GOTO(skip_extent_lock, rc = 0); - } - - /* we really need to get our PW lock before we change - * inode->i_size. if we don't we can race with other - * i_size updaters on our node, like ll_file_read. we - * can also race with i_size propogation to other - * nodes through dirtying and writeback of final cached - * pages. this last one is especially bad for racing - * o_append users on other nodes. */ - rc = ll_extent_lock_no_validate(NULL, inode, lsm, LCK_PW, - &extent, &lockh); - if (rc != ELDLM_OK) { - if (rc > 0) - RETURN(-ENOLCK); - RETURN(rc); - } - - rc = vmtruncate(inode, attr->ia_size); - if (rc == 0) - set_bit(LLI_F_HAVE_SIZE_LOCK, - &ll_i2info(inode)->lli_flags); - - /* unlock now as we don't mind others file lockers racing with - * the mds updates below? */ - err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh); - if (err) - CERROR("ll_extent_unlock failed: %d\n", err); - if (rc) - RETURN(rc); - } - -skip_extent_lock: - /* Don't send size changes to MDS to avoid "fast EA" problems, and - * also avoid a pointless RPC (we get file size from OST anyways). - */ - attr->ia_valid &= ~ATTR_SIZE; - if (!attr->ia_valid) - RETURN(0); - - ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - - err = mdc_setattr(&sbi->ll_mdc_conn, &op_data, - attr, NULL, 0, &request); - if (err) - CERROR("mdc_setattr fails: err = %d\n", err); - - ptlrpc_req_finished(request); - - if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_MTIME_SET)) { - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - struct obdo oa; - int err2; - - if (lsm == NULL) { - CDEBUG(D_INODE, "no lsm: not setting mtime on OSTs\n"); - RETURN(err); - } - - CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n", - inode->i_ino, attr->ia_mtime); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMTIME; - oa.o_mtime = attr->ia_mtime; - err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL); - if (err2) { - CERROR("obd_setattr fails: rc=%d\n", err); - if (!err) - err = err2; - } - } - RETURN(err); -} - -int ll_setattr(struct dentry *de, struct iattr *attr) -{ - int rc = inode_change_ok(de->d_inode, attr); - CDEBUG(D_VFSTRACE, "VFS Op:name=%s\n", de->d_name.name); - if (rc) - return rc; - lprocfs_counter_incr(ll_i2sbi(de->d_inode)->ll_stats, LPROC_LL_SETATTR); - - return ll_inode_setattr(de->d_inode, attr, 1); -} - -static int ll_statfs(struct super_block *sb, struct statfs *sfs) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct obd_export *mdc_exp = class_conn2export(&sbi->ll_mdc_conn); - struct obd_export *osc_exp; - struct obd_statfs osfs; - int rc; - ENTRY; - - if (mdc_exp == NULL) - RETURN(-EINVAL); - - CDEBUG(D_VFSTRACE, "VFS Op:\n"); - lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_STAFS); - memset(sfs, 0, sizeof(*sfs)); - rc = obd_statfs(mdc_exp, &osfs); - statfs_unpack(sfs, &osfs); - if (rc) - CERROR("mdc_statfs fails: rc = %d\n", rc); - else - CDEBUG(D_SUPER, "mdc_statfs shows blocks "LPU64"/"LPU64 - " objects "LPU64"/"LPU64"\n", - osfs.os_bavail, osfs.os_blocks, - osfs.os_ffree, osfs.os_files); - - /* temporary until mds_statfs returns statfs info for all OSTs */ - if (!rc) { - osc_exp = class_conn2export(&sbi->ll_osc_conn); - if (osc_exp == NULL) - GOTO(out, rc = -EINVAL); - rc = obd_statfs(osc_exp, &osfs); - class_export_put(osc_exp); - if (rc) { - CERROR("obd_statfs fails: rc = %d\n", rc); - GOTO(out, rc); - } - CDEBUG(D_SUPER, "obd_statfs shows blocks "LPU64"/"LPU64 - " objects "LPU64"/"LPU64"\n", - osfs.os_bavail, osfs.os_blocks, - osfs.os_ffree, osfs.os_files); - - while (osfs.os_blocks > ~0UL) { - sfs->f_bsize <<= 1; - - osfs.os_blocks >>= 1; - osfs.os_bfree >>= 1; - osfs.os_bavail >>= 1; - } - - sfs->f_blocks = osfs.os_blocks; - sfs->f_bfree = osfs.os_bfree; - sfs->f_bavail = osfs.os_bavail; - - /* If we don't have as many objects free on the OST as inodes - * on the MDS, we reduce the total number of inodes to - * compensate, so that the "inodes in use" number is correct. - */ - if (osfs.os_ffree < (__u64)sfs->f_ffree) { - sfs->f_files = (sfs->f_files - sfs->f_ffree) + - osfs.os_ffree; - sfs->f_ffree = osfs.os_ffree; - } - } - -out: - class_export_put(mdc_exp); - RETURN(rc); -} - -void dump_lsm(int level, struct lov_stripe_md *lsm) -{ - CDEBUG(level, "objid "LPX64", maxbytes "LPX64", magic %#08x, " - "stripe_size %#08x, offset %u, stripe_count %u\n", - lsm->lsm_object_id, lsm->lsm_maxbytes, lsm->lsm_magic, - lsm->lsm_stripe_size, lsm->lsm_stripe_offset, - lsm->lsm_stripe_count); -} - -void ll_update_inode(struct inode *inode, struct mds_body *body, - struct lov_stripe_md *lsm) -{ - struct ll_inode_info *lli = ll_i2info(inode); - - LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0)); - if (lsm != NULL) { - if (lli->lli_smd == NULL) { - lli->lli_maxbytes = lsm->lsm_maxbytes; - if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES) - lli->lli_maxbytes = PAGE_CACHE_MAXBYTES; - lli->lli_smd = lsm; - } else { - if (memcmp(lli->lli_smd, lsm, sizeof(*lsm))) { - CERROR("lsm mismatch for inode %ld\n", - inode->i_ino); - CERROR("lli_smd:\n"); - dump_lsm(D_ERROR, lli->lli_smd); - CERROR("lsm:\n"); - dump_lsm(D_ERROR, lsm); - LBUG(); - } - } - } - - if (body->valid & OBD_MD_FLID) - inode->i_ino = body->ino; - if (body->valid & OBD_MD_FLATIME) - LTIME_S(inode->i_atime) = body->atime; - if (body->valid & OBD_MD_FLMTIME) - LTIME_S(inode->i_mtime) = body->mtime; - if (body->valid & OBD_MD_FLCTIME) - LTIME_S(inode->i_ctime) = body->ctime; - if (body->valid & OBD_MD_FLMODE) - inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT); - if (body->valid & OBD_MD_FLTYPE) - inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT); - if (body->valid & OBD_MD_FLUID) - inode->i_uid = body->uid; - if (body->valid & OBD_MD_FLGID) - inode->i_gid = body->gid; - if (body->valid & OBD_MD_FLFLAGS) - inode->i_flags = body->flags; - if (body->valid & OBD_MD_FLNLINK) - inode->i_nlink = body->nlink; - if (body->valid & OBD_MD_FLGENER) - inode->i_generation = body->generation; - if (body->valid & OBD_MD_FLRDEV) - inode->i_rdev = body->rdev; - if (body->valid & OBD_MD_FLSIZE) - inode->i_size = body->size; - if (body->valid & OBD_MD_FLBLOCKS) - inode->i_blocks = body->blocks; -} - -static void ll_read_inode2(struct inode *inode, void *opaque) -{ - struct ll_read_inode2_cookie *lic = opaque; - struct mds_body *body = lic->lic_body; - struct ll_inode_info *lli = ll_i2info(inode); - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); - - sema_init(&lli->lli_open_sem, 1); - spin_lock_init(&lli->lli_read_extent_lock); - INIT_LIST_HEAD(&lli->lli_read_extents); - lli->lli_flags = 0; - /* We default to 2T-4k until the LSM is created/read, at which point - * it'll be updated. */ - lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES; - - LASSERT(!lli->lli_smd); - - /* core attributes from the MDS first */ - ll_update_inode(inode, body, lic->lic_lsm); - - /* OIDEBUG(inode); */ - - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ll_file_inode_operations; - inode->i_fop = &ll_file_operations; - inode->i_mapping->a_ops = &ll_aops; - EXIT; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &ll_dir_inode_operations; - inode->i_fop = &ll_dir_operations; - inode->i_mapping->a_ops = &ll_dir_aops; - EXIT; - } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = &ll_fast_symlink_inode_operations; - EXIT; - } else { - inode->i_op = &ll_special_inode_operations; - init_special_inode(inode, inode->i_mode, inode->i_rdev); - EXIT; - } -} - -void ll_umount_begin(struct super_block *sb) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct obd_device *obd; - struct obd_ioctl_data ioc_data = { 0 }; - - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:\n"); - - obd = class_conn2obd(&sbi->ll_mdc_conn); - obd->obd_no_recov = 1; - obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_mdc_conn, sizeof ioc_data, - &ioc_data, NULL); - - obd = class_conn2obd(&sbi->ll_osc_conn); - obd->obd_no_recov = 1; - obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_osc_conn, sizeof ioc_data, - &ioc_data, NULL); - - /* Really, we'd like to wait until there are no requests outstanding, - * and then continue. For now, we just invalidate the requests, - * schedule, and hope. - */ - schedule(); - - EXIT; } /* exported operations */ diff --git a/lustre/llite/super25.c b/lustre/llite/super25.c index 980bfcd..5ab03ff 100644 --- a/lustre/llite/super25.c +++ b/lustre/llite/super25.c @@ -34,736 +34,35 @@ #include #include "llite_internal.h" -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -#include -kmem_cache_t *ll_file_data_slab; -extern struct address_space_operations ll_aops; -extern struct address_space_operations ll_dir_aops; -struct super_operations ll_super_operations; +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -/* /proc/lustre/llite root that tracks llite mount points */ -struct proc_dir_entry *proc_lustre_fs_root = NULL; -/* lproc_llite.c */ -extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent, - struct super_block *sb, - char *osc, char *mdc); - -extern int ll_init_inodecache(void); -extern void ll_destroy_inodecache(void); -extern int ll_recover(struct recovd_data *, int); -extern int ll_commitcbd_setup(struct ll_sb_info *); -extern int ll_commitcbd_cleanup(struct ll_sb_info *); -int ll_read_inode2(struct inode *inode, void *opaque); - -extern int ll_proc_namespace(struct super_block* sb, char* osc, char* mdc); - -static char *ll_read_opt(const char *opt, char *data) -{ - char *value; - char *retval; - ENTRY; - - CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data); - if (strncmp(opt, data, strlen(opt))) - RETURN(NULL); - if ((value = strchr(data, '=')) == NULL) - RETURN(NULL); - - value++; - OBD_ALLOC(retval, strlen(value) + 1); - if (!retval) { - CERROR("out of memory!\n"); - RETURN(NULL); - } - - memcpy(retval, value, strlen(value)+1); - CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval); - RETURN(retval); -} - -static int ll_set_opt(const char *opt, char *data, int fl) -{ - ENTRY; - - CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data); - if (strncmp(opt, data, strlen(opt))) - RETURN(0); - else - RETURN(fl); -} - -static void ll_options(char *options, char **ost, char **mds, int *flags) -{ - char *opt_ptr = options; - char *this_char; - ENTRY; - - if (!options) { - EXIT; - return; - } - - while ((this_char = strsep (&opt_ptr, ",")) != NULL) { - CDEBUG(D_SUPER, "this_char %s\n", this_char); - if ((!*ost && (*ost = ll_read_opt("osc", this_char)))|| - (!*mds && (*mds = ll_read_opt("mdc", this_char)))|| - (!(*flags & LL_SBI_NOLCK) && - ((*flags) = (*flags) | - ll_set_opt("nolock", this_char, LL_SBI_NOLCK)))) - continue; - } - EXIT; -} - -#ifndef log2 -#define log2(n) ffz(~(n)) -#endif - - -static int ll_fill_super(struct super_block *sb, void *data, int silent) -{ - struct inode *root = 0; - struct obd_device *obd; - struct ll_sb_info *sbi; - char *osc = NULL; - char *mdc = NULL; - int err; - struct ll_fid rootfid; - struct obd_statfs osfs; - struct ptlrpc_request *request = NULL; - struct ptlrpc_connection *mdc_conn; - struct ll_read_inode2_cookie lic; - class_uuid_t uuid; - - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:\n"); - - OBD_ALLOC(sbi, sizeof(*sbi)); - if (!sbi) - RETURN(-ENOMEM); - - INIT_LIST_HEAD(&sbi->ll_conn_chain); - INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list); - generate_random_uuid(uuid); - class_uuid_unparse(uuid, &sbi->ll_sb_uuid); - - sb->s_fs_info = sbi; - - ll_options(data, &osc, &mdc, &sbi->ll_flags); - - if (!osc) { - CERROR("no osc\n"); - GOTO(out_free, sb = NULL); - } - - if (!mdc) { - CERROR("no mdc\n"); - GOTO(out_free, sb = NULL); - } - - obd = class_name2obd(mdc); - if (!obd) { - CERROR("MDC %s: not setup or attached\n", mdc); - GOTO(out_free, sb = NULL); - } - - err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid); - if (err) { - CERROR("cannot connect to %s: rc = %d\n", mdc, err); - GOTO(out_free, sb = NULL); - } - - mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection; - - obd = class_name2obd(osc); - if (!obd) { - CERROR("OSC %s: not setup or attached\n", osc); - GOTO(out_mdc, sb = NULL); - } - - err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid); - if (err) { - CERROR("cannot connect to %s: rc = %d\n", osc, err); - GOTO(out_mdc, sb = NULL); - } - - err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid); - if (err) { - CERROR("cannot mds_connect: rc = %d\n", err); - GOTO(out_osc, sb = NULL); - } - CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id); - sbi->ll_rootino = rootfid.id; - - memset(&osfs, 0, sizeof(osfs)); - err = obd_statfs(&sbi->ll_mdc_conn, &osfs); - sb->s_blocksize = osfs.os_bsize; - sb->s_blocksize_bits = log2(osfs.os_bsize); - sb->s_magic = LL_SUPER_MAGIC; - sb->s_maxbytes = PAGE_CACHE_MAXBYTES; - - sb->s_op = &ll_super_operations; - - /* make root inode - * XXX: move this to after cbd setup? */ - err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid, - OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request); - if (err) { - CERROR("mdc_getattr failed for root: rc = %d\n", err); - GOTO(out_osc, sb = NULL); - } - - /* initialize committed transaction callback daemon */ - spin_lock_init(&sbi->ll_commitcbd_lock); - init_waitqueue_head(&sbi->ll_commitcbd_waitq); - init_waitqueue_head(&sbi->ll_commitcbd_ctl_waitq); - sbi->ll_commitcbd_flags = 0; - err = ll_commitcbd_setup(sbi); - if (err) { - CERROR("failed to start commit callback daemon: rc = %d\n",err); - ptlrpc_req_finished (request); - GOTO(out_osc, sb = NULL); - } - - lic.lic_body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*lic.lic_body)); - LASSERT (lic.lic_body != NULL); /* checked by mdc_getattr() */ - LASSERT_REPSWABBED (request, 0); /* swabbed by mdc_getattr() */ - - lic.lic_lsm = NULL; - - root = iget5_locked(sb, sbi->ll_rootino, NULL, - ll_read_inode2, &lic); - - ptlrpc_req_finished(request); - - if (root == NULL || is_bad_inode(root)) { - /* XXX might need iput() for bad inode */ - CERROR("lustre_lite: bad iget5 for root\n"); - GOTO(out_cbd, sb = NULL); - } - - sb->s_root = d_alloc_root(root); - root->i_state &= ~(I_LOCK | I_NEW); - printk("AMRUT 1\n"); - if (proc_lustre_fs_root) { - err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb, - osc, mdc); - if (err < 0) - CERROR("could not register mount in /proc/lustre"); - } - -out_dev: - if (mdc) - OBD_FREE(mdc, strlen(mdc) + 1); - if (osc) - OBD_FREE(osc, strlen(osc) + 1); - printk("AMRUT 2\n"); - - RETURN(0); - -out_cbd: - ll_commitcbd_cleanup(sbi); -out_osc: - obd_disconnect(&sbi->ll_osc_conn, 0); -out_mdc: - obd_disconnect(&sbi->ll_mdc_conn, 0); -out_free: - lprocfs_unregister_mountpoint(sbi); - OBD_FREE(sbi, sizeof(*sbi)); - - goto out_dev; -} /* ll_fill_super */ - - -int ll_setattr_raw(struct inode *inode, struct iattr *attr) -{ - struct ptlrpc_request *request = NULL; - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct mdc_op_data op_data; - int err = 0; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino); - - LPROC_COUNTER_INODE_INCBY1(inode, LPROC_LL_SETATTR); - if ((attr->ia_valid & ATTR_SIZE)) { - /* writeback uses inode->i_size to determine how far out - * its cached pages go. ll_truncate gets a PW lock, canceling - * our lock, _after_ it has updated i_size. this can confuse - * us into zero extending the file to the newly truncated - * size, and this has bad implications for a racing o_append. - * if we're extending our size we need to flush the pages - * with the correct i_size before vmtruncate stomps on - * the new i_size. again, this can only find pages to - * purge if the PW lock that generated them is still held. - */ - if ( attr->ia_size > inode->i_size ) { - filemap_fdatasync(inode->i_mapping); - filemap_fdatawait(inode->i_mapping); - } - err = vmtruncate(inode, attr->ia_size); - if (err) - RETURN(err); - } - - /* Don't send size changes to MDS to avoid "fast EA" problems, and - * also avoid a pointless RPC (we get file size from OST anyways). - */ - attr->ia_valid &= ~ATTR_SIZE; - if (!attr->ia_valid) - RETURN(0); - - ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - - err = mdc_setattr(&sbi->ll_mdc_conn, &op_data, - attr, NULL, 0, &request); - if (err) - CERROR("mdc_setattr fails: err = %d\n", err); - - ptlrpc_req_finished(request); - - if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) { - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - struct obdo oa; - int err2; - - CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n", - inode->i_ino, attr->ia_mtime); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMTIME; - oa.o_mtime = LTIME_S(attr->ia_mtime); - err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL); - if (err2) { - CERROR("obd_setattr fails: rc=%d\n", err); - if (!err) - err = err2; - } - } - RETURN(err); -} struct super_block * ll_get_sb(struct file_system_type *fs_type, - int flags, char *devname, void * data) + int flags, const char *devname, void * data) { + /* calls back in fill super */ return get_sb_nodev(fs_type, flags, data, ll_fill_super); } -static void ll_put_super(struct super_block *sb) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct list_head *tmp, *next; - struct ll_fid rootfid; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:\n"); - - list_del(&sbi->ll_conn_chain); - ll_commitcbd_cleanup(sbi); - obd_disconnect(&sbi->ll_osc_conn, 0); - - /* NULL request to force sync on the MDS, and get the last_committed - * value to flush remaining RPCs from the pending queue on client. - * - * XXX This should be an mdc_sync() call to sync the whole MDS fs, - * which we can call for other reasons as well. - */ - mdc_getstatus(&sbi->ll_mdc_conn, &rootfid); - - lprocfs_unregister_mountpoint(sbi); - if (sbi->ll_proc_root) { - lprocfs_remove(sbi->ll_proc_root); - sbi->ll_proc_root = NULL; - } - - obd_disconnect(&sbi->ll_mdc_conn, 0); - - spin_lock(&dcache_lock); - list_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list){ - struct dentry *dentry = list_entry(tmp, struct dentry, d_hash); - shrink_dcache_parent(dentry); - } - spin_unlock(&dcache_lock); - - OBD_FREE(sbi, sizeof(*sbi)); - - EXIT; -} /* ll_put_super */ - -static void ll_clear_inode(struct inode *inode) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_inode_info *lli = ll_i2info(inode); - int rc; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino); - -#warning "Is there a reason we don't do this in 2.5, but we do in 2.4?" -#if 0 - rc = ll_mdc_cancel_unused(&sbi->ll_mdc_conn, inode, LDLM_FL_NO_CALLBACK); - if (rc < 0) { - CERROR("ll_mdc_cancel_unused: %d\n", rc); - /* XXX FIXME do something dramatic */ - } - - if (lli->lli_smd) { - rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd, 0); - if (rc < 0) { - CERROR("obd_cancel_unused: %d\n", rc); - /* XXX FIXME do something dramatic */ - } - } -#endif - - if (atomic_read(&inode->i_count) != 0) - CERROR("clearing in-use inode %lu: count = %d\n", - inode->i_ino, atomic_read(&inode->i_count)); - - if (lli->lli_smd) { - obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd); - lli->lli_smd = NULL; - } - - if (lli->lli_symlink_name) { - OBD_FREE(lli->lli_symlink_name,strlen(lli->lli_symlink_name)+1); - lli->lli_symlink_name = NULL; - } - - EXIT; -} - -#if 0 -static void ll_delete_inode(struct inode *inode) -{ - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino); - if (S_ISREG(inode->i_mode)) { - int err; - struct obdo *oa; - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - - /* mcreate with no open */ - if (!lsm) - GOTO(out, 0); - - if (lsm->lsm_object_id == 0) { - CERROR("This really happens\n"); - /* No obdo was ever created */ - GOTO(out, 0); - } - - oa = obdo_alloc(); - if (oa == NULL) - GOTO(out, -ENOMEM); - - oa->o_id = lsm->lsm_object_id; - oa->o_mode = inode->i_mode; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE; - - err = obd_destroy(ll_i2obdconn(inode), oa, lsm); - obdo_free(oa); - if (err) - CDEBUG(D_SUPER, "obd destroy objid "LPX64" error %d\n", - lsm->lsm_object_id, err); - } -out: - clear_inode(inode); - EXIT; -} -#endif - -/* like inode_setattr, but doesn't mark the inode dirty */ -static int ll_attr2inode(struct inode * inode, struct iattr * attr, int trunc) -{ - unsigned int ia_valid = attr->ia_valid; - int error = 0; - - if ((ia_valid & ATTR_SIZE) && trunc) { - if (attr->ia_size > ll_file_maxbytes(inode)) { - error = -EFBIG; - goto out; - } - error = vmtruncate(inode, attr->ia_size); - if (error) - goto out; - } else if (ia_valid & ATTR_SIZE) - inode->i_size = attr->ia_size; - - if (ia_valid & ATTR_UID) - inode->i_uid = attr->ia_uid; - if (ia_valid & ATTR_GID) - inode->i_gid = attr->ia_gid; - if (ia_valid & ATTR_ATIME) - inode->i_atime = attr->ia_atime; - if (ia_valid & ATTR_MTIME) - inode->i_mtime = attr->ia_mtime; - if (ia_valid & ATTR_CTIME) - inode->i_ctime = attr->ia_ctime; - if (ia_valid & ATTR_MODE) { - inode->i_mode = attr->ia_mode; - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) - inode->i_mode &= ~S_ISGID; - } -out: - return error; -} - -int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc) -{ - struct ptlrpc_request *request = NULL; - struct ll_sb_info *sbi = ll_i2sbi(inode); - int err = 0; - - ENTRY; - - /* change incore inode */ - err = ll_attr2inode(inode, attr, do_trunc); - if (err) - RETURN(err); - - /* Don't send size changes to MDS to avoid "fast EA" problems, and - * also avoid a pointless RPC (we get file size from OST anyways). - */ - attr->ia_valid &= ~ATTR_SIZE; - if (attr->ia_valid) { - struct mdc_op_data op_data; - - ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - - err = mdc_setattr(&sbi->ll_mdc_conn, &op_data, - attr, NULL, 0, &request); - if (err) - CERROR("mdc_setattr fails: err = %d\n", err); - - ptlrpc_req_finished(request); - if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) { - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - struct obdo oa; - int err2; - - CDEBUG(D_ERROR, "setting mtime on OST\n"); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME; - oa.o_mtime = LTIME_S(attr->ia_mtime); - err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL); - if (err2) { - CERROR("obd_setattr fails: rc=%d\n", err); - if (!err) - err = err2; - } - } - } - - RETURN(err); -} - -int ll_setattr(struct dentry *de, struct iattr *attr) -{ - int rc = inode_change_ok(de->d_inode, attr); - CDEBUG(D_VFSTRACE, "VFS Op:name=%s\n", de->d_name.name); - if (rc) - return rc; - - LPROC_COUNTER_INODE_INCBY1((de->d_inode), LPROC_LL_SETATTR); - return ll_inode_setattr(de->d_inode, attr, 1); -} - -static int ll_statfs(struct super_block *sb, struct statfs *sfs) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct obd_statfs osfs; - int rc; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:\n"); - - LPROC_COUNTER_SBI_INCBY1(sbi, LPROC_LL_STAFS); - memset(sfs, 0, sizeof(*sfs)); - rc = obd_statfs(&sbi->ll_mdc_conn, &osfs); - statfs_unpack(sfs, &osfs); - if (rc) - CERROR("mdc_statfs fails: rc = %d\n", rc); - else - CDEBUG(D_SUPER, "mdc_statfs shows blocks "LPU64"/"LPU64 - " objects "LPU64"/"LPU64"\n", - osfs.os_bavail, osfs.os_blocks, - osfs.os_ffree, osfs.os_files); - - /* temporary until mds_statfs returns statfs info for all OSTs */ - if (!rc) { - rc = obd_statfs(&sbi->ll_osc_conn, &osfs); - if (rc) { - CERROR("obd_statfs fails: rc = %d\n", rc); - GOTO(out, rc); - } - CDEBUG(D_SUPER, "obd_statfs shows blocks "LPU64"/"LPU64 - " objects "LPU64"/"LPU64"\n", - osfs.os_bavail, osfs.os_blocks, - osfs.os_ffree, osfs.os_files); - - while (osfs.os_blocks > ~0UL) { - sfs->f_bsize <<= 1; - - osfs.os_blocks >>= 1; - osfs.os_bfree >>= 1; - osfs.os_bavail >>= 1; - } - sfs->f_blocks = osfs.os_blocks; - sfs->f_bfree = osfs.os_bfree; - sfs->f_bavail = osfs.os_bavail; - if (osfs.os_ffree < (__u64)sfs->f_ffree) { - sfs->f_files = (sfs->f_files - sfs->f_ffree) + - osfs.os_ffree; - sfs->f_ffree = osfs.os_ffree; - } - } - -out: - RETURN(rc); -} - -void ll_update_inode(struct inode *inode, struct mds_body *body, - struct lov_stripe_md *lsm) -{ - struct ll_inode_info *lli = ll_i2info(inode); - - LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0)); - if (lsm != NULL) { - if (lli->lli_smd == NULL) { - lli->lli_smd = lsm; - lli->lli_maxbytes = lsm->lsm_maxbytes; - if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES) - lli->lli_maxbytes = PAGE_CACHE_MAXBYTES; - } else { - LASSERT (!memcmp (lli->lli_smd, lsm, sizeof (*lsm))); - } - } - - if (body->valid & OBD_MD_FLID) - inode->i_ino = body->ino; - if (body->valid & OBD_MD_FLATIME) - LTIME_S(inode->i_atime) = body->atime; - if (body->valid & OBD_MD_FLMTIME) - LTIME_S(inode->i_mtime) = body->mtime; - if (body->valid & OBD_MD_FLCTIME) - LTIME_S(inode->i_ctime) = body->ctime; - if (body->valid & OBD_MD_FLMODE) - inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT); - if (body->valid & OBD_MD_FLTYPE) - inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT); - if (body->valid & OBD_MD_FLUID) - inode->i_uid = body->uid; - if (body->valid & OBD_MD_FLGID) - inode->i_gid = body->gid; - if (body->valid & OBD_MD_FLFLAGS) - inode->i_flags = body->flags; - if (body->valid & OBD_MD_FLNLINK) - inode->i_nlink = body->nlink; - if (body->valid & OBD_MD_FLGENER) - inode->i_generation = body->generation; - if (body->valid & OBD_MD_FLRDEV) - inode->i_rdev = to_kdev_t(body->rdev); - if (body->valid & OBD_MD_FLSIZE) - inode->i_size = body->size; - if (body->valid & OBD_MD_FLBLOCKS) - inode->i_blocks = body->blocks; -} - -int ll_read_inode2(struct inode *inode, void *opaque) -{ - struct ll_read_inode2_cookie *lic = opaque; - struct mds_body *body = lic->lic_body; - struct ll_inode_info *lli = ll_i2info(inode); - int rc = 0; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino); - - sema_init(&lli->lli_open_sem, 1); - /* these are 2.4 only, but putting them here for consistency.. */ - spin_lock_init(&lli->lli_read_extent_lock); - INIT_LIST_HEAD(&lli->lli_read_extents); - ll_lldo_init(&lli->lli_dirty); - lli->lli_flags = 0; - lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES; - - LASSERT(!lli->lli_smd); - - /* core attributes first */ - ll_update_inode(inode, body, lic ? lic->lic_lsm : NULL); - - /* OIDEBUG(inode); */ - - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ll_file_inode_operations; - inode->i_fop = &ll_file_operations; - inode->i_mapping->a_ops = &ll_aops; - EXIT; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &ll_dir_inode_operations; - inode->i_fop = &ll_dir_operations; - inode->i_mapping->a_ops = &ll_dir_aops; - EXIT; - } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = &ll_fast_symlink_inode_operations; - EXIT; - } else { - inode->i_op = &ll_special_inode_operations; - init_special_inode(inode, inode->i_mode, - kdev_t_to_nr(inode->i_rdev)); - EXIT; - } - - return rc; -} - - -void ll_umount_begin(struct super_block *sb) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct obd_device *obd; - struct obd_ioctl_data ioc_data = { 0 }; - - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:\n"); - - obd = class_conn2obd(&sbi->ll_mdc_conn); - obd->obd_no_recov = 1; - obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_mdc_conn, sizeof ioc_data, - &ioc_data, NULL); - - obd = class_conn2obd(&sbi->ll_osc_conn); - obd->obd_no_recov = 1; - obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_osc_conn, sizeof ioc_data, - &ioc_data, NULL); - - /* Really, we'd like to wait until there are no requests outstanding, - * and then continue. For now, we just invalidate the requests, - * schedule, and hope. - */ - schedule(); - - EXIT; -} - static kmem_cache_t *ll_inode_cachep; static struct inode *ll_alloc_inode(struct super_block *sb) { struct ll_inode_info *lli; - LPROC_COUNTER_SBI_INCBY1((ll_s2sbi(sb)), LL_ALLOC_INODE); + lprocfs_counter_incr((ll_s2sbi(sb))->ll_stats, LPROC_LL_ALLOC_INODE); OBD_SLAB_ALLOC(lli, ll_inode_cachep, SLAB_KERNEL, sizeof *lli); if (lli == NULL) return NULL; - memset(lli, 0, (char *)&lli->lli_vfs_inode - (char *)lli); - sema_init(&lli->lli_open_sem, 1); - init_MUTEX(&lli->lli_size_valid_sem); - lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES; + inode_init_once(&lli->lli_vfs_inode); + ll_lli_init(lli); return &lli->lli_vfs_inode; } static void ll_destroy_inode(struct inode *inode) { - OBD_SLAB_FREE(ll_inode_cachep, ll_i2info(inode), - sizeof(struct ll_inode_info)); + struct ll_inode_info *ptr = ll_i2info(inode); + OBD_SLAB_FREE(ptr, ll_inode_cachep, sizeof(*ptr)); } static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) @@ -792,15 +91,12 @@ void ll_destroy_inodecache(void) CERROR("ll_inode_cache: not all structures were freed\n"); } - - /* exported operations */ struct super_operations ll_super_operations = { alloc_inode: ll_alloc_inode, destroy_inode: ll_destroy_inode, clear_inode: ll_clear_inode, -// delete_inode: ll_delete_inode, put_super: ll_put_super, statfs: ll_statfs, umount_begin: ll_umount_begin diff --git a/lustre/llite/symlink.c b/lustre/llite/symlink.c index 19d234e..427f7f0 100644 --- a/lustre/llite/symlink.c +++ b/lustre/llite/symlink.c @@ -24,12 +24,10 @@ #include #include #include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include -#endif #define DEBUG_SUBSYSTEM S_LLITE #include +#include "llite_internal.h" static int ll_readlink_internal(struct inode *inode, struct ptlrpc_request **request, char **symname) @@ -117,82 +115,46 @@ static int ll_readlink(struct dentry *dentry, char *buffer, int buflen) RETURN(rc); } -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -static int ll_follow_link(struct dentry *dentry, struct nameidata *nd, - struct lookup_intent *it) +static int ll_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; struct ll_inode_info *lli = ll_i2info(inode); + struct lookup_intent *it = ll_nd2it(nd); struct ptlrpc_request *request; - int op = 0, mode = 0, rc; + int rc; char *symname; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op\n"); if (it != NULL) { - op = it->it_op; - mode = it->it_mode; - - ll_intent_release(dentry, it); - } - - down(&lli->lli_open_sem); - rc = ll_readlink_internal(inode, &request, &symname); - up(&lli->lli_open_sem); - if (rc) - GOTO(out, rc); + int op = it->it_op; + int mode = it->it_mode; - if (it != NULL) { + ll_intent_release(it); it->it_op = op; it->it_mode = mode; } - rc = vfs_follow_link_it(nd, symname, it); - ptlrpc_req_finished(request); - out: - RETURN(rc); -} -#else -static int ll_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct inode *inode = dentry->d_inode; - struct ll_inode_info *lli = ll_i2info(inode); - struct ptlrpc_request *request; - int op = 0, mode = 0, rc; - char *symname; - ENTRY; - - op = nd->it.it_op; - mode = nd->it.it_mode; - - ll_intent_release(dentry, &nd->it); - + CDEBUG(D_VFSTRACE, "VFS Op\n"); down(&lli->lli_open_sem); - rc = ll_readlink_internal(inode, &request, &symname); + up(&lli->lli_open_sem); if (rc) GOTO(out, rc); - nd->it.it_op = op; - nd->it.it_mode = mode; - rc = vfs_follow_link(nd, symname); ptlrpc_req_finished(request); out: - up(&lli->lli_open_sem); - RETURN(rc); } -#endif -extern int ll_inode_revalidate(struct dentry *dentry); -extern int ll_setattr(struct dentry *de, struct iattr *attr); struct inode_operations ll_fast_symlink_inode_operations = { readlink: ll_readlink, setattr: ll_setattr, setattr_raw: ll_setattr_raw, - follow_link2: ll_follow_link, + follow_link: ll_follow_link, #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - revalidate: ll_inode_revalidate + revalidate_it: ll_inode_revalidate_it +#else + getattr_it: ll_getattr #endif }; diff --git a/lustre/lov/.cvsignore b/lustre/lov/.cvsignore index e995588..e69dc6d 100644 --- a/lustre/lov/.cvsignore +++ b/lustre/lov/.cvsignore @@ -1,3 +1,4 @@ .deps Makefile Makefile.in +.*.cmd diff --git a/lustre/lov/Makefile.am b/lustre/lov/Makefile.am index 879e44d..83dba1a 100644 --- a/lustre/lov/Makefile.am +++ b/lustre/lov/Makefile.am @@ -7,12 +7,12 @@ DEFS= if LIBLUSTRE lib_LIBRARIES = liblov.a -liblov_a_SOURCES = lov_obd.c lov_pack.c +liblov_a_SOURCES = lov_obd.c lov_pack.c lov_internal.h else MODULE = lov modulefs_DATA = lov.o EXTRA_PROGRAMS = lov -lov_SOURCES = lov_obd.c lov_pack.c lproc_lov.c +lov_SOURCES = lov_obd.c lov_pack.c lproc_lov.c lov_internal.h endif include $(top_srcdir)/Rules diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 2974b2a..9562a4f 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -47,6 +47,11 @@ #include #include +#include "lov_internal.h" + +static int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off, + int stripeno, obd_off *obd_off); + struct lov_file_handles { struct portals_handle lfh_handle; atomic_t lfh_refcount; @@ -68,7 +73,7 @@ static void lov_lfh_addref(void *lfhp) struct lov_file_handles *lfh = lfhp; atomic_inc(&lfh->lfh_refcount); - CDEBUG(D_INFO, "GETting lfh %p : new refcount %d\n", lfh, + CDEBUG(D_MALLOC, "GETting lfh %p : new refcount %d\n", lfh, atomic_read(&lfh->lfh_refcount)); } @@ -99,7 +104,7 @@ static struct lov_file_handles *lov_handle2lfh(struct lustre_handle *handle) static void lov_lfh_put(struct lov_file_handles *lfh) { - CDEBUG(D_INFO, "PUTting lfh %p : new refcount %d\n", lfh, + CDEBUG(D_MALLOC, "PUTting lfh %p : new refcount %d\n", lfh, atomic_read(&lfh->lfh_refcount) - 1); LASSERT(atomic_read(&lfh->lfh_refcount) > 0 && atomic_read(&lfh->lfh_refcount) < 0x5a5a); @@ -174,19 +179,18 @@ int lov_attach(struct obd_device *dev, obd_count len, void *data) struct proc_dir_entry *entry; int rc; - lprocfs_init_vars(&lvars); + lprocfs_init_vars(lov, &lvars); rc = lprocfs_obd_attach(dev, lvars.obd_vars); - if (rc) + if (rc) return rc; entry = create_proc_entry("target_obd", 0444, dev->obd_proc_entry); - if (entry == NULL) + if (entry == NULL) RETURN(-ENOMEM); - entry->proc_fops = &ll_proc_target_fops; + entry->proc_fops = &lov_proc_target_fops; entry->data = dev; - + return rc; - } int lov_detach(struct obd_device *dev) @@ -214,15 +218,17 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, if (rc) RETURN(rc); + exp = class_conn2export(conn); + spin_lock_init(&exp->exp_lov_data.led_lock); + INIT_LIST_HEAD(&exp->exp_lov_data.led_open_head); + /* We don't want to actually do the underlying connections more than * once, so keep track. */ lov->refcount++; - if (lov->refcount > 1) + if (lov->refcount > 1) { + class_export_put(exp); RETURN(0); - - exp = class_conn2export(conn); - spin_lock_init(&exp->exp_lov_data.led_lock); - INIT_LIST_HEAD(&exp->exp_lov_data.led_open_head); + } /* retrieve LOV metadata from MDS */ rc = obd_connect(&mdc_conn, lov->mdcobd, &lov_mds_uuid); @@ -248,9 +254,9 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, * array fits in LOV_MAX_UUID_BUFFER_SIZE and all uuids are * terminated), but I still need to verify it makes overall * sense */ - mdesc = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*mdesc)); - LASSERT (mdesc != NULL); - LASSERT_REPSWABBED (req, 0); + mdesc = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*mdesc)); + LASSERT(mdesc != NULL); + LASSERT_REPSWABBED(req, 0); *desc = *mdesc; @@ -279,15 +285,15 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, * demands on memory here. */ lov->bufsize = sizeof(struct lov_tgt_desc) * desc->ld_tgt_count; OBD_ALLOC(lov->tgts, lov->bufsize); - if (!lov->tgts) { + if (lov->tgts == NULL) { CERROR("Out of memory\n"); GOTO(out_req, rc = -ENOMEM); } uuids = lustre_msg_buf(req->rq_repmsg, 1, sizeof(*uuids) * desc->ld_tgt_count); - LASSERT (uuids != NULL); - LASSERT_REPSWABBED (req, 1); + LASSERT(uuids != NULL); + LASSERT_REPSWABBED(req, 1); for (i = 0, tgts = lov->tgts; i < desc->ld_tgt_count; i++, tgts++) { struct obd_uuid *uuid = &tgts->uuid; @@ -330,7 +336,9 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, } mdc->cl_max_mds_easize = obd_size_diskmd(conn, NULL); - ptlrpc_req_finished (req); + mdc->cl_max_mds_cookiesize = desc->ld_tgt_count * + sizeof(struct llog_cookie); + ptlrpc_req_finished(req); class_export_put(exp); RETURN (0); @@ -356,7 +364,7 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, RETURN (rc); } -static int lov_disconnect(struct lustre_handle *conn, int failover) +static int lov_disconnect(struct lustre_handle *conn, int flags) { struct obd_device *obd = class_conn2obd(conn); struct lov_obd *lov = &obd->u.lov; @@ -383,7 +391,7 @@ static int lov_disconnect(struct lustre_handle *conn, int failover) class_conn2obd(&lov->tgts[i].conn); osc_obd->obd_no_recov = 1; } - rc = obd_disconnect(&lov->tgts[i].conn, failover); + rc = obd_disconnect(&lov->tgts[i].conn, flags); if (rc) { if (lov->tgts[i].active) { CERROR("Target %s disconnect error %d\n", @@ -400,6 +408,7 @@ static int lov_disconnect(struct lustre_handle *conn, int failover) lov->bufsize = 0; lov->tgts = NULL; + out_local: exp = class_conn2export(conn); if (exp == NULL) { CERROR("export handle "LPU64" invalid! If you can reproduce, " @@ -421,7 +430,6 @@ static int lov_disconnect(struct lustre_handle *conn, int failover) spin_unlock(&exp->exp_lov_data.led_lock); class_export_put(exp); - out_local: rc = class_disconnect(conn, 0); RETURN(rc); } @@ -548,6 +556,8 @@ static obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size, static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid, struct lov_stripe_md *lsm, int stripeno, int *set) { + valid &= src->o_valid; + if (*set) { if (valid & OBD_MD_FLSIZE) { /* this handles sparse files properly */ @@ -566,68 +576,102 @@ static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid, if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime) tgt->o_mtime = src->o_mtime; } else { - obdo_cpy_md(tgt, src, valid); + memcpy(tgt, src, sizeof(*tgt)); + tgt->o_id = lsm->lsm_object_id; if (valid & OBD_MD_FLSIZE) tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno); *set = 1; } } +#ifndef log2 +#define log2(n) ffz(~(n)) +#endif + /* the LOV expects oa->o_id to be set to the LOV object id */ -static int lov_create(struct lustre_handle *conn, struct obdo *oa, +static int lov_create(struct lustre_handle *conn, struct obdo *src_oa, struct lov_stripe_md **ea, struct obd_trans_info *oti) { struct obd_export *export = class_conn2export(conn); struct lov_obd *lov; struct lov_stripe_md *lsm; - struct lov_oinfo *loi; - struct obdo *tmp; + struct lov_oinfo *loi = NULL; + struct obdo *tmp_oa, *ret_oa; + struct llog_cookie *cookies = NULL; unsigned ost_count, ost_idx; - int set = 0, obj_alloc = 0; - int rc = 0, i; + int set = 0, obj_alloc = 0, cookie_sent = 0, rc = 0, i; ENTRY; LASSERT(ea); if (!export) - GOTO(out_exp, rc = -EINVAL); + RETURN(-EINVAL); lov = &export->exp_obd->u.lov; if (!lov->desc.ld_active_tgt_count) GOTO(out_exp, rc = -EIO); - tmp = obdo_alloc(); - if (!tmp) + ret_oa = obdo_alloc(); + if (!ret_oa) GOTO(out_exp, rc = -ENOMEM); + tmp_oa = obdo_alloc(); + if (!tmp_oa) + GOTO(out_oa, rc = -ENOMEM); + lsm = *ea; if (!lsm) { - rc = obd_alloc_memmd(conn, &lsm); + int stripes; + ost_count = lov_get_stripecnt(lov, 0); + + /* If the MDS file was truncated up to some size, stripe over + * enough OSTs to allow the file to be created at that size. + */ + if (src_oa->o_valid & OBD_MD_FLSIZE) { + stripes=((src_oa->o_size+LUSTRE_STRIPE_MAXBYTES)>>12)-1; + do_div(stripes, (__u32)(LUSTRE_STRIPE_MAXBYTES >> 12)); + + if (stripes > lov->desc.ld_active_tgt_count) + GOTO(out_exp, rc = -EFBIG); + if (stripes < ost_count) + stripes = ost_count; + } else + stripes = ost_count; + + rc = lov_alloc_memmd(&lsm, stripes); if (rc < 0) GOTO(out_tmp, rc); rc = 0; - lsm->lsm_magic = LOV_MAGIC; } ost_count = lov->desc.ld_tgt_count; - LASSERT(oa->o_valid & OBD_MD_FLID); - lsm->lsm_object_id = oa->o_id; + LASSERT(src_oa->o_valid & OBD_MD_FLID); + lsm->lsm_object_id = src_oa->o_id; if (!lsm->lsm_stripe_size) lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size; if (!*ea || lsm->lsm_stripe_offset >= ost_count) { get_random_bytes(&ost_idx, 2); ost_idx %= ost_count; - } else + } else { ost_idx = lsm->lsm_stripe_offset; + } CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n", lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx); + /* XXX LOV STACKING: need to figure out how many real OSCs */ + if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) { + oti_alloc_cookies(oti, lsm->lsm_stripe_count); + if (!oti->oti_logcookies) + GOTO(out_cleanup, rc = -ENOMEM); + cookies = oti->oti_logcookies; + } + loi = lsm->lsm_oinfo; for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) { struct lov_stripe_md obj_md; @@ -640,14 +684,30 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa, } /* create data objects with "parent" OA */ - memcpy(tmp, oa, sizeof(*tmp)); + memcpy(tmp_oa, src_oa, sizeof(*tmp_oa)); + + /* XXX When we start creating objects on demand, we need to + * make sure that we always create the object on the + * stripe which holds the existing file size. + */ + if (src_oa->o_valid & OBD_MD_FLSIZE) { + if (lov_stripe_offset(lsm, src_oa->o_size, i, + &tmp_oa->o_size) < 0 && + tmp_oa->o_size) + tmp_oa->o_size--; + + CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n", + i, tmp_oa->o_size, src_oa->o_size); + } + /* XXX: LOV STACKING: use real "obj_mdp" sub-data */ - err = obd_create(&lov->tgts[ost_idx].conn, tmp, &obj_mdp, oti); + err = obd_create(&lov->tgts[ost_idx].conn, tmp_oa,&obj_mdp,oti); if (err) { if (lov->tgts[ost_idx].active) { CERROR("error creating objid "LPX64" sub-object" - " on OST idx %d/%d: rc = %d\n", oa->o_id, - ost_idx, lsm->lsm_stripe_count, err); + " on OST idx %d/%d: rc = %d\n", + src_oa->o_id, ost_idx, + lsm->lsm_stripe_count, err); if (err > 0) { CERROR("obd_create returned invalid " "err %d\n", err); @@ -658,17 +718,22 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa, rc = err; continue; } - loi->loi_id = tmp->o_id; + loi->loi_id = tmp_oa->o_id; loi->loi_ost_idx = ost_idx; CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64" at idx %d\n", lsm->lsm_object_id, loi->loi_id, ost_idx); if (set == 0) lsm->lsm_stripe_offset = ost_idx; - lov_merge_attrs(oa, tmp, OBD_MD_FLBLKSZ, lsm, obj_alloc, &set); - ot_init(&loi->loi_dirty_ot_inline); + lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, + obj_alloc, &set); loi->loi_dirty_ot = &loi->loi_dirty_ot_inline; + ot_init(loi->loi_dirty_ot); + if (cookies) + ++oti->oti_logcookies; + if (tmp_oa->o_valid & OBD_MD_FLCOOKIE) + ++cookie_sent; ++obj_alloc; ++loi; @@ -677,6 +742,12 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa, GOTO(out_done, rc = 0); } + /* If we were passed specific striping params, then a failure to + * meet those requirements is an error, since we can't reallocate + * that memory (it might be part of a larger array or something). + * + * We can only get here if lsm_stripe_count was originally > 1. + */ if (*ea != NULL) { CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n", lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count,rc); @@ -686,27 +757,61 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa, } else { struct lov_stripe_md *lsm_new; /* XXX LOV STACKING call into osc for sizes */ - unsigned size = lov_stripe_md_size(obj_alloc); + unsigned oldsize, newsize; + + if (oti && cookies && cookie_sent) { + oldsize = lsm->lsm_stripe_count * sizeof(*cookies); + newsize = obj_alloc * sizeof(*cookies); + + oti_alloc_cookies(oti, obj_alloc); + if (oti->oti_logcookies) { + memcpy(oti->oti_logcookies, cookies, newsize); + OBD_FREE(cookies, oldsize); + cookies = oti->oti_logcookies; + } else { + CWARN("'leaking' %d bytes\n", oldsize-newsize); + } + } CERROR("reallocating LSM for objid "LPX64": old %u new %u\n", lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count); - OBD_ALLOC(lsm_new, size); - if (!lsm_new) - GOTO(out_cleanup, rc = -ENOMEM); - memcpy(lsm_new, lsm, size); - lsm_new->lsm_stripe_count = obj_alloc; - - /* XXX LOV STACKING call into osc for sizes */ - OBD_FREE(lsm, lov_stripe_md_size(lsm->lsm_stripe_count)); - lsm = lsm_new; - + oldsize = lov_stripe_md_size(lsm->lsm_stripe_count); + newsize = lov_stripe_md_size(obj_alloc); + OBD_ALLOC(lsm_new, newsize); + if (lsm_new != NULL) { + memcpy(lsm_new, lsm, newsize); + lsm_new->lsm_stripe_count = obj_alloc; + OBD_FREE(lsm, newsize); + lsm = lsm_new; + } else { + CWARN("'leaking' %d bytes\n", oldsize - newsize); + } rc = 0; } out_done: *ea = lsm; + if (src_oa->o_valid & OBD_MD_FLSIZE && + ret_oa->o_size != src_oa->o_size) { + CERROR("original size "LPU64" isn't new object size "LPU64"\n", + src_oa->o_size, ret_oa->o_size); + LBUG(); + } + ret_oa->o_id = src_oa->o_id; + memcpy(src_oa, ret_oa, sizeof(*src_oa)); out_tmp: - obdo_free(tmp); + obdo_free(tmp_oa); + out_oa: + obdo_free(ret_oa); + if (oti && cookies) { + oti->oti_logcookies = cookies; + if (!cookie_sent) { + oti_free_cookies(oti); + src_oa->o_valid &= ~OBD_MD_FLCOOKIE; + } else { + src_oa->o_valid |= OBD_MD_FLCOOKIE; + } + } out_exp: class_export_put(export); return rc; @@ -717,15 +822,26 @@ static int lov_create(struct lustre_handle *conn, struct obdo *oa, --loi; /* destroy already created objects here */ - memcpy(tmp, oa, sizeof(*tmp)); - tmp->o_id = loi->loi_id; - err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL, - NULL); + memcpy(tmp_oa, src_oa, sizeof(*tmp_oa)); + tmp_oa->o_id = loi->loi_id; + + if (oti && cookie_sent) { + err = obd_log_cancel(&lov->tgts[loi->loi_ost_idx].conn, + NULL, 1, --oti->oti_logcookies, + OBD_LLOG_FL_SENDNOW); + if (err) + CERROR("Failed to cancel objid "LPX64" subobj " + LPX64" cookie on OST idx %d: rc = %d\n", + src_oa->o_id, loi->loi_id, + loi->loi_ost_idx, err); + } + + err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa, + NULL, oti); if (err) - CERROR("Failed to uncreate objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - oa->o_id, loi->loi_id, loi->loi_ost_idx, - err); + CERROR("Failed to uncreate objid "LPX64" subobj "LPX64 + " on OST idx %d: rc = %d\n", src_oa->o_id, + loi->loi_id, loi->loi_ost_idx, err); } if (*ea == NULL) obd_free_memmd(conn, &lsm); @@ -779,12 +895,12 @@ static int lov_destroy(struct lustre_handle *conn, struct obdo *oa, memcpy(&tmp, oa, sizeof(tmp)); tmp.o_id = loi->loi_id; if (lfh) - memcpy(obdo_handle(&tmp), lfh->lfh_och + i, - FD_OSTDATA_SIZE); + memcpy(obdo_handle(&tmp), &lfh->lfh_och[i].och_fh, + sizeof(lfh->lfh_och[i].och_fh)); else tmp.o_valid &= ~OBD_MD_FLHANDLE; err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, &tmp, - NULL, NULL); + NULL, oti); if (err && lov->tgts[loi->loi_ost_idx].active) { CERROR("error: destroying objid "LPX64" subobj " LPX64" on OST idx %d: rc = %d\n", @@ -839,8 +955,8 @@ static int lov_getattr(struct lustre_handle *conn, struct obdo *oa, memcpy(&tmp, oa, sizeof(tmp)); tmp.o_id = loi->loi_id; if (lfh) - memcpy(obdo_handle(&tmp), lfh->lfh_och + i, - FD_OSTDATA_SIZE); + memcpy(obdo_handle(&tmp), &lfh->lfh_och[i].och_fh, + sizeof(lfh->lfh_och[i].och_fh)); else tmp.o_valid &= ~OBD_MD_FLHANDLE; @@ -867,12 +983,13 @@ static int lov_getattr(struct lustre_handle *conn, struct obdo *oa, return rc; } -static int lov_getattr_interpret(struct ptlrpc_request_set *rqset, - struct lov_getattr_async_args *aa, int rc) +static int lov_getattr_interpret(struct ptlrpc_request_set *rqset, void *data, + int rc) { + struct lov_getattr_async_args *aa = data; struct lov_stripe_md *lsm = aa->aa_lsm; struct obdo *oa = aa->aa_oa; - struct obdo *obdos = aa->aa_stripe_oas; + struct obdo *obdos = aa->aa_obdos; struct lov_oinfo *loi; int i; int set = 0; @@ -881,8 +998,8 @@ static int lov_getattr_interpret(struct ptlrpc_request_set *rqset, if (rc == 0) { /* NB all stripe requests succeeded to get here */ - for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; - i++,loi++) { + for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; + i++, loi++) { if (obdos[i].o_valid == 0) /* inactive stripe */ continue; @@ -955,8 +1072,8 @@ static int lov_getattr_async (struct lustre_handle *conn, struct obdo *oa, memcpy(&obdos[i], oa, sizeof(obdos[i])); obdos[i].o_id = loi->loi_id; if (lfh) - memcpy(obdo_handle(&obdos[i]), lfh->lfh_och + i, - FD_OSTDATA_SIZE); + memcpy(obdo_handle(&obdos[i]), &lfh->lfh_och[i].och_fh, + sizeof(lfh->lfh_och[i].och_fh)); else obdos[i].o_valid &= ~OBD_MD_FLHANDLE; @@ -980,7 +1097,7 @@ static int lov_getattr_async (struct lustre_handle *conn, struct obdo *oa, aa = (struct lov_getattr_async_args *)&rqset->set_args; aa->aa_lsm = lsm; aa->aa_oa = oa; - aa->aa_stripe_oas = obdos; + aa->aa_obdos = obdos; GOTO (out, rc = 0); out_obdos: @@ -992,10 +1109,10 @@ static int lov_getattr_async (struct lustre_handle *conn, struct obdo *oa, RETURN (rc); } -static int lov_setattr(struct lustre_handle *conn, struct obdo *oa, +static int lov_setattr(struct lustre_handle *conn, struct obdo *src_oa, struct lov_stripe_md *lsm, struct obd_trans_info *oti) { - struct obdo *tmp; + struct obdo *tmp_oa, *ret_oa; struct obd_export *export = class_conn2export(conn); struct lov_obd *lov; struct lov_oinfo *loi; @@ -1009,18 +1126,17 @@ static int lov_setattr(struct lustre_handle *conn, struct obdo *oa, if (!export || !export->exp_obd) GOTO(out, rc = -ENODEV); - /* size changes should go through punch and not setattr */ - LASSERT(!(oa->o_valid & OBD_MD_FLSIZE)); - - /* for now, we only expect mtime updates here */ - LASSERT(!(oa->o_valid & ~(OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME))); - - tmp = obdo_alloc(); - if (!tmp) + /* for now, we only expect time updates here */ + LASSERT(!(src_oa->o_valid & ~(OBD_MD_FLID|OBD_MD_FLTYPE|OBD_MD_FLMODE| + OBD_MD_FLATIME | OBD_MD_FLMTIME | + OBD_MD_FLCTIME))); + ret_oa = obdo_alloc(); + if (!ret_oa) GOTO(out, rc = -ENOMEM); - if (oa->o_valid & OBD_MD_FLHANDLE) - lfh = lov_handle2lfh(obdo_handle(oa)); + tmp_oa = obdo_alloc(); + if (!tmp_oa) + GOTO(out_oa, rc = -ENOMEM); lov = &export->exp_obd->u.lov; for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { @@ -1031,46 +1147,54 @@ static int lov_setattr(struct lustre_handle *conn, struct obdo *oa, continue; } - obdo_cpy_md(tmp, oa, oa->o_valid); + memcpy(tmp_oa, src_oa, sizeof(*tmp_oa)); if (lfh) - memcpy(obdo_handle(tmp), lfh->lfh_och + i, - FD_OSTDATA_SIZE); + memcpy(obdo_handle(tmp_oa), &lfh->lfh_och[i].och_fh, + sizeof(lfh->lfh_och[i].och_fh)); else - tmp->o_valid &= ~OBD_MD_FLHANDLE; + tmp_oa->o_valid &= ~OBD_MD_FLHANDLE; - tmp->o_id = loi->loi_id; + tmp_oa->o_id = loi->loi_id; - err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp, + err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa, NULL, NULL); if (err) { if (lov->tgts[loi->loi_ost_idx].active) { CERROR("error: setattr objid "LPX64" subobj " LPX64" on OST idx %d: rc = %d\n", - oa->o_id, loi->loi_id, loi->loi_ost_idx, - err); + src_oa->o_id, loi->loi_id, + loi->loi_ost_idx, err); if (!rc) rc = err; } - } else - set = 1; + continue; + } + + lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, i, &set); } - obdo_free(tmp); if (!set && !rc) rc = -EIO; if (lfh != NULL) lov_lfh_put(lfh); - GOTO(out, rc); - out: + + ret_oa->o_id = src_oa->o_id; + memcpy(src_oa, ret_oa, sizeof(*src_oa)); + GOTO(out_tmp, rc); +out_tmp: + obdo_free(tmp_oa); +out_oa: + obdo_free(ret_oa); +out: class_export_put(export); return rc; } -static int lov_open(struct lustre_handle *conn, struct obdo *oa, +static int lov_open(struct lustre_handle *conn, struct obdo *src_oa, struct lov_stripe_md *lsm, struct obd_trans_info *oti, struct obd_client_handle *och) { - struct obdo *tmp; /* on the heap here, on the stack in lov_close? */ + struct obdo *tmp_oa, *ret_oa; struct obd_export *export = class_conn2export(conn); struct lov_obd *lov; struct lov_oinfo *loi; @@ -1085,20 +1209,24 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa, if (!export || !export->exp_obd) GOTO(out_exp, rc = -ENODEV); - tmp = obdo_alloc(); - if (!tmp) + ret_oa = obdo_alloc(); + if (!ret_oa) GOTO(out_exp, rc = -ENOMEM); + tmp_oa = obdo_alloc(); + if (!tmp_oa) + GOTO(out_oa, rc = -ENOMEM); + lfh = lov_lfh_new(); if (lfh == NULL) GOTO(out_tmp, rc = -ENOMEM); - OBD_ALLOC(lfh->lfh_och, lsm->lsm_stripe_count * sizeof *och); + OBD_ALLOC(lfh->lfh_och, lsm->lsm_stripe_count * sizeof(*och)); if (!lfh->lfh_och) GOTO(out_lfh, rc = -ENOMEM); lov = &export->exp_obd->u.lov; - oa->o_size = 0; - oa->o_blocks = 0; + src_oa->o_size = 0; + src_oa->o_blocks = 0; for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { if (lov->tgts[loi->loi_ost_idx].active == 0) { CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); @@ -1106,11 +1234,11 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa, } /* create data objects with "parent" OA */ - memcpy(tmp, oa, sizeof(*tmp)); - tmp->o_id = loi->loi_id; + memcpy(tmp_oa, src_oa, sizeof(*tmp_oa)); + tmp_oa->o_id = loi->loi_id; - rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp, - NULL, NULL, lfh->lfh_och + i); + rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa, + NULL, NULL, &lfh->lfh_och[i]); if (rc) { if (!lov->tgts[loi->loi_ost_idx].active) { rc = 0; @@ -1118,27 +1246,31 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa, } CERROR("error: open objid "LPX64" subobj "LPX64 " on OST idx %d: rc = %d\n", - oa->o_id, lsm->lsm_oinfo[i].loi_id, + src_oa->o_id, lsm->lsm_oinfo[i].loi_id, loi->loi_ost_idx, rc); goto out_handles; } - lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &set); + lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, i, &set); } lfh->lfh_count = lsm->lsm_stripe_count; och->och_fh.cookie = lfh->lfh_handle.h_cookie; - obdo_handle(oa)->cookie = lfh->lfh_handle.h_cookie; - oa->o_valid |= OBD_MD_FLHANDLE; + obdo_handle(ret_oa)->cookie = lfh->lfh_handle.h_cookie; + ret_oa->o_valid |= OBD_MD_FLHANDLE; + ret_oa->o_id = src_oa->o_id; + memcpy(src_oa, ret_oa, sizeof(*src_oa)); - /* llfh refcount transfers to list */ + /* lfh refcount transfers to list */ spin_lock(&export->exp_lov_data.led_lock); list_add(&lfh->lfh_list, &export->exp_lov_data.led_open_head); spin_unlock(&export->exp_lov_data.led_lock); GOTO(out_tmp, rc); out_tmp: - obdo_free(tmp); + obdo_free(tmp_oa); + out_oa: + obdo_free(ret_oa); out_exp: class_export_put(export); return rc; @@ -1150,16 +1282,16 @@ static int lov_open(struct lustre_handle *conn, struct obdo *oa, if (lov->tgts[loi->loi_ost_idx].active == 0) continue; - memcpy(tmp, oa, sizeof(*tmp)); - tmp->o_id = loi->loi_id; - memcpy(obdo_handle(tmp), lfh->lfh_och + i, FD_OSTDATA_SIZE); + memcpy(tmp_oa, src_oa, sizeof(*tmp_oa)); + tmp_oa->o_id = loi->loi_id; + memcpy(obdo_handle(tmp_oa), &lfh->lfh_och[i], FD_OSTDATA_SIZE); - err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp, + err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp_oa, NULL, NULL); if (err && lov->tgts[loi->loi_ost_idx].active) { CERROR("error: closing objid "LPX64" subobj "LPX64 " on OST idx %d after open error: rc=%d\n", - oa->o_id, loi->loi_id, loi->loi_ost_idx, err); + src_oa->o_id, loi->loi_id, loi->loi_ost_idx,err); } } @@ -1189,6 +1321,8 @@ static int lov_close(struct lustre_handle *conn, struct obdo *oa, if (oa->o_valid & OBD_MD_FLHANDLE) lfh = lov_handle2lfh(obdo_handle(oa)); + if (!lfh) + LBUG(); lov = &export->exp_obd->u.lov; for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { @@ -1198,7 +1332,7 @@ static int lov_close(struct lustre_handle *conn, struct obdo *oa, memcpy(&tmp, oa, sizeof(tmp)); tmp.o_id = loi->loi_id; if (lfh) - memcpy(obdo_handle(&tmp), lfh->lfh_och + i, + memcpy(obdo_handle(&tmp), &lfh->lfh_och[i], FD_OSTDATA_SIZE); else tmp.o_valid &= ~OBD_MD_FLHANDLE; @@ -1223,18 +1357,16 @@ static int lov_close(struct lustre_handle *conn, struct obdo *oa, OBD_FREE(lfh->lfh_och, lsm->lsm_stripe_count * FD_OSTDATA_SIZE); lov_lfh_destroy(lfh); + LASSERT(atomic_read(&lfh->lfh_refcount) == 1); lov_lfh_put(lfh); /* balance handle2lfh above */ - } + } else + LBUG(); GOTO(out, rc); out: class_export_put(export); return rc; } -#ifndef log2 -#define log2(n) ffz(~(n)) -#endif - /* we have an offset in file backed by an lov and want to find out where * that offset lands in our given stripe of the file. for the easy * case where the offset is within the stripe, we just have to scale the @@ -1404,8 +1536,8 @@ static int lov_punch(struct lustre_handle *conn, struct obdo *oa, memcpy(&tmp, oa, sizeof(tmp)); tmp.o_id = loi->loi_id; if (lfh) - memcpy(obdo_handle(&tmp), lfh->lfh_och + i, - FD_OSTDATA_SIZE); + memcpy(obdo_handle(&tmp), &lfh->lfh_och[i].och_fh, + sizeof(lfh->lfh_och[i].och_fh)); else tmp.o_valid &= ~OBD_MD_FLHANDLE; @@ -1455,7 +1587,7 @@ static int lov_brw_check(struct lov_obd *lov, struct lov_stripe_md *lsm, return 0; } -static int lov_brw(int cmd, struct lustre_handle *conn, +static int lov_brw(int cmd, struct lustre_handle *conn, struct obdo *src_oa, struct lov_stripe_md *lsm, obd_count oa_bufs, struct brw_page *pga, struct obd_trans_info *oti) { @@ -1467,10 +1599,12 @@ static int lov_brw(int cmd, struct lustre_handle *conn, int ost_idx; } *stripeinfo, *si, *si_last; struct obd_export *export = class_conn2export(conn); + struct obdo *ret_oa = NULL, *tmp_oa = NULL; + struct lov_file_handles *lfh = NULL; struct lov_obd *lov; struct brw_page *ioarr; struct lov_oinfo *loi; - int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count; + int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count, set = 0; ENTRY; if (lsm_bad_magic(lsm)) @@ -1495,6 +1629,21 @@ static int lov_brw(int cmd, struct lustre_handle *conn, if (!ioarr) GOTO(out_where, rc = -ENOMEM); + if (src_oa) { + ret_oa = obdo_alloc(); + if (!ret_oa) + GOTO(out_ioarr, rc = -ENOMEM); + + tmp_oa = obdo_alloc(); + if (!tmp_oa) + GOTO(out_oa, rc = -ENOMEM); + + if (src_oa->o_valid & OBD_MD_FLHANDLE) + lfh = lov_handle2lfh(obdo_handle(src_oa)); + else + src_oa->o_valid &= ~OBD_MD_FLHANDLE; + } + for (i = 0; i < oa_bufs; i++) { where[i] = lov_stripe_number(lsm, pga[i].off); stripeinfo[where[i]].bufct++; @@ -1524,23 +1673,46 @@ static int lov_brw(int cmd, struct lustre_handle *conn, if (lov->tgts[si->ost_idx].active == 0) { CDEBUG(D_HA, "lov idx %d inactive\n", si->ost_idx); - GOTO(out_ioarr, rc = -EIO); + GOTO(out_oa, rc = -EIO); } if (si->bufct) { LASSERT(shift < oa_bufs); - rc = obd_brw(cmd, &lov->tgts[si->ost_idx].conn, + if (src_oa) { + memcpy(tmp_oa, src_oa, sizeof(*tmp_oa)); + if (lfh) + memcpy(obdo_handle(tmp_oa), + &lfh->lfh_och[i].och_fh, + sizeof(lfh->lfh_och[i].och_fh)); + } + + tmp_oa->o_id = si->lsm.lsm_object_id; + rc = obd_brw(cmd, &lov->tgts[si->ost_idx].conn, tmp_oa, &si->lsm, si->bufct, &ioarr[shift], oti); if (rc) GOTO(out_ioarr, rc); + + lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, + i, &set); } } - GOTO(out_ioarr, rc); + + ret_oa->o_id = src_oa->o_id; + memcpy(src_oa, ret_oa, sizeof(*src_oa)); + + GOTO(out_oa, rc); + out_oa: + if (tmp_oa) + obdo_free(tmp_oa); + if (ret_oa) + obdo_free(ret_oa); out_ioarr: OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs); out_where: OBD_FREE(where, sizeof(*where) * oa_bufs); + if (lfh) + lov_lfh_put(lfh); out_sinfo: OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo)); out_exp: @@ -1548,18 +1720,43 @@ static int lov_brw(int cmd, struct lustre_handle *conn, return rc; } -static int lov_brw_interpret (struct ptlrpc_request_set *set, - struct lov_brw_async_args *aa, int rc) +static int lov_brw_interpret(struct ptlrpc_request_set *rqset, + struct lov_brw_async_args *aa, int rc) { - obd_count oa_bufs = aa->aa_oa_bufs; - struct brw_page *ioarr = aa->aa_ioarr; + struct lov_stripe_md *lsm = aa->aa_lsm; + obd_count oa_bufs = aa->aa_oa_bufs; + struct obdo *oa = aa->aa_oa; + struct obdo *obdos = aa->aa_obdos; + struct brw_page *ioarr = aa->aa_ioarr; + struct lov_oinfo *loi; + int i, set = 0; ENTRY; - OBD_FREE (ioarr, sizeof (*ioarr) * oa_bufs); - RETURN (rc); + if (rc == 0) { + /* NB all stripe requests succeeded to get here */ + + for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; + i++, loi++) { + if (obdos[i].o_valid == 0) /* inactive stripe */ + continue; + + lov_merge_attrs(oa, &obdos[i], obdos[i].o_valid, lsm, + i, &set); + } + + if (!set) { + CERROR("No stripes had valid attrs\n"); + rc = -EIO; + } + } + oa->o_id = lsm->lsm_object_id; + + OBD_FREE(obdos, lsm->lsm_stripe_count * sizeof(*obdos)); + OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs); + RETURN(rc); } -static int lov_brw_async(int cmd, struct lustre_handle *conn, +static int lov_brw_async(int cmd, struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *lsm, obd_count oa_bufs, struct brw_page *pga, struct ptlrpc_request_set *set, struct obd_trans_info *oti) @@ -1573,7 +1770,9 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn, } *stripeinfo, *si, *si_last; struct obd_export *export = class_conn2export(conn); struct lov_obd *lov; + struct lov_file_handles *lfh = NULL; struct brw_page *ioarr; + struct obdo *obdos = NULL; struct lov_oinfo *loi; struct lov_brw_async_args *aa; int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count; @@ -1597,9 +1796,20 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn, if (!where) GOTO(out_sinfo, rc = -ENOMEM); + if (oa) { + OBD_ALLOC(obdos, sizeof(*obdos) * stripe_count); + if (!obdos) + GOTO(out_where, rc = -ENOMEM); + + if (oa->o_valid & OBD_MD_FLHANDLE) + lfh = lov_handle2lfh(obdo_handle(oa)); + else + oa->o_valid &= ~OBD_MD_FLHANDLE; + } + OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs); if (!ioarr) - GOTO(out_where, rc = -ENOMEM); + GOTO(out_obdos, rc = -ENOMEM); for (i = 0; i < oa_bufs; i++) { where[i] = lov_stripe_number(lsm, pga[i].off); @@ -1612,6 +1822,15 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn, si->index = si_last->index + si_last->bufct; si->lsm.lsm_object_id = loi->loi_id; si->ost_idx = loi->loi_ost_idx; + + if (oa) { + memcpy(&obdos[i], oa, sizeof(*obdos)); + obdos[i].o_id = si->lsm.lsm_object_id; + if (lfh) + memcpy(obdo_handle(&obdos[i]), + &lfh->lfh_och[i].och_fh, + sizeof(lfh->lfh_och[i].och_fh)); + } } for (i = 0; i < oa_bufs; i++) { @@ -1637,24 +1856,35 @@ static int lov_brw_async(int cmd, struct lustre_handle *conn, } LASSERT(shift < oa_bufs); + rc = obd_brw_async(cmd, &lov->tgts[si->ost_idx].conn, - &si->lsm, si->bufct, &ioarr[shift], - set, oti); + &obdos[i], &si->lsm, si->bufct, + &ioarr[shift], set, oti); if (rc) GOTO(out_ioarr, rc); } - LASSERT (rc == 0); - LASSERT (set->set_interpret == NULL); - set->set_interpret = lov_brw_interpret; - LASSERT (sizeof (set->set_args) >= sizeof (struct lov_brw_async_args)); + LASSERT(rc == 0); + LASSERT(set->set_interpret == NULL); + set->set_interpret = (set_interpreter_func)lov_brw_interpret; + LASSERT(sizeof(set->set_args) >= sizeof(struct lov_brw_async_args)); aa = (struct lov_brw_async_args *)&set->set_args; - aa->aa_oa_bufs = oa_bufs; + aa->aa_lsm = lsm; + aa->aa_obdos = obdos; + aa->aa_oa = oa; aa->aa_ioarr = ioarr; + aa->aa_oa_bufs = oa_bufs; + + /* Don't free ioarr or obdos - that's done in lov_brw_interpret */ GOTO(out_where, rc); + out_ioarr: OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs); + out_obdos: + OBD_FREE(obdos, stripe_count * sizeof(*obdos)); out_where: OBD_FREE(where, sizeof(*where) * oa_bufs); + if (lfh) + lov_lfh_put(lfh); out_sinfo: OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo)); out_exp: @@ -1980,20 +2210,16 @@ static int lov_cancel_unused(struct lustre_handle *conn, (tot) += (add); \ } while(0) -static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs) +static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs, + unsigned long max_age) { - struct obd_export *tgt_export; - struct lov_obd *lov; + struct lov_obd *lov = &obd->u.lov; struct obd_statfs lov_sfs; int set = 0; int rc = 0; int i; ENTRY; - if (!export || !export->exp_obd) - RETURN(-ENODEV); - - lov = &export->exp_obd->u.lov; /* We only get block data from the OBD */ for (i = 0; i < lov->desc.ld_tgt_count; i++) { @@ -2004,14 +2230,8 @@ static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs) continue; } - tgt_export = class_conn2export(&lov->tgts[i].conn); - if (!tgt_export) { - CDEBUG(D_HA, "lov idx %d NULL export\n", i); - continue; - } - - err = obd_statfs(tgt_export, &lov_sfs); - class_export_put(tgt_export); + err = obd_statfs(class_conn2obd(&lov->tgts[i].conn), &lov_sfs, + max_age); if (err) { if (lov->tgts[i].active) { CERROR("error: statfs OSC %s on OST idx %d: " @@ -2022,6 +2242,7 @@ static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs) } continue; } + if (!set) { memcpy(osfs, &lov_sfs, sizeof(lov_sfs)); set = 1; @@ -2044,6 +2265,7 @@ static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs) LOV_SUM_MAX(osfs->os_ffree, lov_sfs.os_ffree); } } + if (set) { __u32 expected_stripes = lov->desc.ld_default_stripe_count ? lov->desc.ld_default_stripe_count : @@ -2055,6 +2277,7 @@ static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs) do_div(osfs->os_ffree, expected_stripes); } else if (!rc) rc = -EIO; + RETURN(rc); } @@ -2191,7 +2414,28 @@ static int lov_get_info(struct lustre_handle *conn, __u32 keylen, RETURN(-EINVAL); } -static int lov_mark_page_dirty(struct lustre_handle *conn, +static int lov_set_info(struct lustre_handle *conn, obd_count keylen, + void *key, obd_count vallen, void *val) +{ + struct obd_device *obddev = class_conn2obd(conn); + struct lov_obd *lov = &obddev->u.lov; + int i, rc = 0; + ENTRY; + + if (keylen < strlen("mds_conn") || + memcmp(key, "mds_conn", strlen("mds_conn")) != 0) + RETURN(-EINVAL); + + for (i = 0; i < lov->desc.ld_tgt_count; i++) { + int er; + er = obd_set_info(&lov->tgts[i].conn, keylen, key, vallen, val); + if (!rc) + rc = er; + } + RETURN(rc); +} + +static int lov_mark_page_dirty(struct lustre_handle *conn, struct lov_stripe_md *lsm, unsigned long offset) { struct lov_obd *lov = &class_conn2obd(conn)->u.lov; @@ -2209,12 +2453,12 @@ static int lov_mark_page_dirty(struct lustre_handle *conn, RETURN(-ENOMEM); stripe = lov_stripe_number(lsm, (obd_off)offset << PAGE_CACHE_SHIFT); - lov_stripe_offset(lsm, (obd_off)offset << PAGE_CACHE_SHIFT, stripe, + lov_stripe_offset(lsm, (obd_off)offset << PAGE_CACHE_SHIFT, stripe, &off); off >>= PAGE_CACHE_SHIFT; loi = &lsm->lsm_oinfo[stripe]; - CDEBUG(D_INODE, "off %lu => off %lu on stripe %d\n", offset, + CDEBUG(D_INODE, "off %lu => off %lu on stripe %d\n", offset, (unsigned long)off, stripe); submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline; @@ -2223,7 +2467,7 @@ static int lov_mark_page_dirty(struct lustre_handle *conn, RETURN(rc); } -static int lov_clear_dirty_pages(struct lustre_handle *conn, +static int lov_clear_dirty_pages(struct lustre_handle *conn, struct lov_stripe_md *lsm, unsigned long start, unsigned long end, unsigned long *cleared) @@ -2267,11 +2511,11 @@ static int lov_clear_dirty_pages(struct lustre_handle *conn, obd_start >>= PAGE_CACHE_SHIFT; obd_end >>= PAGE_CACHE_SHIFT; - CDEBUG(D_INODE, "offs [%lu,%lu] => offs [%lu,%lu] stripe %d\n", - start, end, (unsigned long)obd_start, + CDEBUG(D_INODE, "offs [%lu,%lu] => offs [%lu,%lu] stripe %d\n", + start, end, (unsigned long)obd_start, (unsigned long)obd_end, loi->loi_ost_idx); submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline; - rc = obd_clear_dirty_pages(&lov->tgts[loi->loi_ost_idx].conn, + rc = obd_clear_dirty_pages(&lov->tgts[loi->loi_ost_idx].conn, submd, obd_start, obd_end, &osc_cleared); if (rc) @@ -2310,15 +2554,14 @@ static int lov_last_dirty_offset(struct lustre_handle *conn, *offset = 0; lov = &export->exp_obd->u.lov; rc = -ENOENT; - for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; - i++, loi++) { + for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++){ count = lsm->lsm_stripe_size >> PAGE_CACHE_SHIFT; skip = (lsm->lsm_stripe_count - 1) * count; submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline; - err = obd_last_dirty_offset(&lov->tgts[loi->loi_ost_idx].conn, + err = obd_last_dirty_offset(&lov->tgts[loi->loi_ost_idx].conn, submd, &tmp); if (err == -ENOENT) continue; @@ -2326,7 +2569,7 @@ static int lov_last_dirty_offset(struct lustre_handle *conn, GOTO(out_exp, rc = err); rc = 0; - if (tmp != ~0) + if (tmp != ~0) tmp += (tmp/count * skip) + (i * count); if (tmp > *offset) *offset = tmp; @@ -2338,6 +2581,100 @@ out_exp: RETURN(rc); } +/* For LOV catalogs, we "nest" catalogs from the parent catalog. What this + * means is that the parent catalog has a bunch of log cookies that are + * pointing at one catalog for each OSC. The OSC catalogs in turn hold + * cookies for actual log files. */ +static int lov_get_catalogs(struct lov_obd *lov, struct llog_handle *cathandle) +{ + int i, rc; + + ENTRY; + for (i = 0; i < lov->desc.ld_tgt_count; i++) { + lov->tgts[i].ltd_cathandle = llog_new_log(cathandle, + &lov->tgts[i].uuid); + if (IS_ERR(lov->tgts[i].ltd_cathandle)) + continue; + rc = llog_init_catalog(cathandle, &lov->tgts[i].uuid); + if (rc) + GOTO(err_logs, rc); + } + lov->lo_catalog_loaded = 1; + RETURN(0); +err_logs: + while (i-- > 0) { + llog_delete_log(cathandle, lov->tgts[i].ltd_cathandle); + llog_close_log(cathandle, lov->tgts[i].ltd_cathandle); + } + return rc; +} + +/* Add log records for each OSC that this object is striped over, and return + * cookies for each one. We _would_ have nice abstraction here, except that + * we need to keep cookies in stripe order, even if some are NULL, so that + * the right cookies are passed back to the right OSTs at the client side. + * Unset cookies should be all-zero (which will never occur naturally). */ +static int lov_log_add(struct lustre_handle *conn, + struct llog_handle *cathandle, + struct llog_trans_hdr *rec, struct lov_stripe_md *lsm, + struct llog_cookie *logcookies, int numcookies) +{ + struct obd_device *obd = class_conn2obd(conn); + struct lov_obd *lov = &obd->u.lov; + struct lov_oinfo *loi; + int i, rc = 0; + ENTRY; + + LASSERT(logcookies && numcookies >= lsm->lsm_stripe_count); + + if (unlikely(!lov->lo_catalog_loaded)) + lov_get_catalogs(lov, cathandle); + + for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { + rc += obd_log_add(&lov->tgts[loi->loi_ost_idx].conn, + lov->tgts[loi->loi_ost_idx].ltd_cathandle, + rec, NULL, logcookies + rc, numcookies - rc); + } + + RETURN(rc); +} + +static int lov_log_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm, + int count, struct llog_cookie *cookies, int flags) +{ + struct obd_export *export = class_conn2export(conn); + struct lov_obd *lov; + struct lov_oinfo *loi; + int rc = 0, i; + ENTRY; + + LASSERT(lsm != NULL); + if (export == NULL || export->exp_obd == NULL) + GOTO(out, rc = -ENODEV); + + LASSERT(count == lsm->lsm_stripe_count); + + loi = lsm->lsm_oinfo; + lov = &export->exp_obd->u.lov; + for (i = 0; i < count; i++, cookies++, loi++) { + int err; + + err = obd_log_cancel(&lov->tgts[loi->loi_ost_idx].conn, + NULL, 1, cookies, flags); + if (err && lov->tgts[loi->loi_ost_idx].active) { + CERROR("error: objid "LPX64" subobj "LPX64 + " on OST idx %d: rc = %d\n", lsm->lsm_object_id, + loi->loi_id, loi->loi_ost_idx, err); + if (!rc) + rc = err; + } + } + GOTO(out, rc); + out: + class_export_put(export); + return rc; +} + struct obd_ops lov_obd_ops = { o_owner: THIS_MODULE, o_attach: lov_attach, @@ -2364,9 +2701,12 @@ struct obd_ops lov_obd_ops = { o_cancel_unused: lov_cancel_unused, o_iocontrol: lov_iocontrol, o_get_info: lov_get_info, - .o_mark_page_dirty = lov_mark_page_dirty, - .o_clear_dirty_pages = lov_clear_dirty_pages, - .o_last_dirty_offset = lov_last_dirty_offset, + o_set_info: lov_set_info, + o_log_add: lov_log_add, + o_log_cancel: lov_log_cancel, + o_mark_page_dirty: lov_mark_page_dirty, + o_clear_dirty_pages: lov_clear_dirty_pages, + o_last_dirty_offset: lov_last_dirty_offset, }; int __init lov_init(void) @@ -2374,15 +2714,13 @@ int __init lov_init(void) struct lprocfs_static_vars lvars; int rc; - printk(KERN_INFO "Lustre Logical Object Volume driver; " - "info@clusterfs.com\n"); - lprocfs_init_vars(&lvars); + lprocfs_init_vars(lov, &lvars); rc = class_register_type(&lov_obd_ops, lvars.module_vars, OBD_LOV_DEVICENAME); RETURN(rc); } -static void __exit lov_exit(void) +static void /*__exit*/ lov_exit(void) { class_unregister_type(OBD_LOV_DEVICENAME); } diff --git a/lustre/lov/lov_pack.c b/lustre/lov/lov_pack.c index bbb40de..a719aac 100644 --- a/lustre/lov/lov_pack.c +++ b/lustre/lov/lov_pack.c @@ -34,6 +34,8 @@ #include #include +#include "lov_internal.h" + void lov_dump_lmm(int level, struct lov_mds_md *lmm) { struct lov_object_id *loi; @@ -129,14 +131,14 @@ int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmmp, for (i = 0, loi = lsm->lsm_oinfo; i < stripe_count; i++, loi++) { /* XXX call down to osc_packmd() to do the packing */ LASSERT (loi->loi_id); - lmm->lmm_objects[loi->loi_ost_idx].l_object_id = + lmm->lmm_objects[loi->loi_ost_idx].l_object_id = cpu_to_le64 (loi->loi_id); } RETURN(lmm_size); } -static int lov_get_stripecnt(struct lov_obd *lov, int stripe_count) +int lov_get_stripecnt(struct lov_obd *lov, int stripe_count) { if (!stripe_count) stripe_count = lov->desc.ld_default_stripe_count; @@ -146,6 +148,90 @@ static int lov_get_stripecnt(struct lov_obd *lov, int stripe_count) return stripe_count; } +static int lov_verify_lmm(struct lov_mds_md *lmm, int lmm_bytes, + int *ost_count, int *stripe_count, int *ost_offset) +{ + if (lmm_bytes < sizeof(*lmm)) { + CERROR("lov_mds_md too small: %d, need at least %d\n", + lmm_bytes, (int)sizeof(*lmm)); + return -EINVAL; + } + + if (le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC) { + CERROR("bad disk LOV MAGIC: %#08x != %#08x\n", + le32_to_cpu(lmm->lmm_magic), LOV_MAGIC); + lov_dump_lmm(D_WARNING, lmm); + return -EINVAL; + } + + *ost_count = le16_to_cpu(lmm->lmm_ost_count); + *stripe_count = le16_to_cpu(lmm->lmm_stripe_count); + *ost_offset = le32_to_cpu(lmm->lmm_stripe_offset); + + if (*ost_count == 0 || *stripe_count == 0) { + CERROR("zero OST count %d or stripe count %d\n", + *ost_count, *stripe_count); + lov_dump_lmm(D_WARNING, lmm); + return -EINVAL; + } + + if (lmm_bytes < lov_mds_md_size(*ost_count)) { + CERROR("lov_mds_md too small: %d, need %d\n", + lmm_bytes, lov_mds_md_size(*ost_count)); + lov_dump_lmm(D_WARNING, lmm); + return -EINVAL; + } + + if (*ost_offset > *ost_count) { + CERROR("starting OST offset %d > number of OSTs %d\n", + *ost_offset, *ost_count); + lov_dump_lmm(D_WARNING, lmm); + return -EINVAL; + } + + if (*stripe_count > *ost_count) { + CERROR("stripe count %d > number of OSTs %d\n", + *stripe_count, *ost_count); + lov_dump_lmm(D_WARNING, lmm); + return -EINVAL; + } + + if (lmm->lmm_object_id == 0) { + CERROR("zero object id\n"); + lov_dump_lmm(D_WARNING, lmm); + return -EINVAL; + } + + return 0; +} + +int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count) +{ + int lsm_size = lov_stripe_md_size(stripe_count); + struct lov_oinfo *loi; + int i; + + OBD_ALLOC(*lsmp, lsm_size); + if (!*lsmp) + return -ENOMEM; + + (*lsmp)->lsm_magic = LOV_MAGIC; + (*lsmp)->lsm_stripe_count = stripe_count; + (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count; + + for (i = 0, loi = (*lsmp)->lsm_oinfo; i < stripe_count; i++, loi++){ + loi->loi_dirty_ot = &loi->loi_dirty_ot_inline; + ot_init(loi->loi_dirty_ot); + } + return lsm_size; +} + +void lov_free_memmd(struct lov_stripe_md **lsmp) +{ + OBD_FREE(*lsmp, lov_stripe_md_size((*lsmp)->lsm_stripe_count)); + *lsmp = NULL; +} + /* Unpack LOV object metadata from disk storage. It is packed in LE byte * order and is opaque to the networking layer. */ @@ -156,75 +242,48 @@ int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp, struct lov_obd *lov = &obd->u.lov; struct lov_stripe_md *lsm; struct lov_oinfo *loi; - int ost_count = 0; - int ost_offset = 0; + int ost_count; + int ost_offset; int stripe_count; int lsm_size; int i; ENTRY; + /* If passed an MDS struct use values from there, otherwise defaults */ if (lmm) { - if (lmm_bytes < sizeof (*lmm)) { - CERROR("lov_mds_md too small: %d, need %d\n", - lmm_bytes, (int)sizeof(*lmm)); - RETURN(-EINVAL); - } - if (le32_to_cpu (lmm->lmm_magic) != LOV_MAGIC) { - CERROR("bad disk LOV MAGIC: %#08x != %#08x\n", - le32_to_cpu (lmm->lmm_magic), LOV_MAGIC); - RETURN(-EINVAL); - } - - ost_count = le16_to_cpu (lmm->lmm_ost_count); - stripe_count = le16_to_cpu (lmm->lmm_stripe_count); - - if (ost_count == 0 || stripe_count == 0) { - CERROR ("zero ost %d or stripe %d count\n", - ost_count, stripe_count); - RETURN (-EINVAL); - } - - if (lmm_bytes < lov_mds_md_size (ost_count)) { - CERROR ("lov_mds_md too small: %d, need %d\n", - lmm_bytes, lov_mds_md_size (ost_count)); - RETURN (-EINVAL); - } - } else + i = lov_verify_lmm(lmm, lmm_bytes, &ost_count, &stripe_count, + &ost_offset); + if (i) + RETURN(i); + } else { + ost_count = 0; stripe_count = lov_get_stripecnt(lov, 0); + ost_offset = 0; + } - /* XXX LOV STACKING call into osc for sizes */ - lsm_size = lov_stripe_md_size(stripe_count); - + /* If we aren't passed an lsmp struct, we just want the size */ if (!lsmp) - RETURN(lsm_size); + /* XXX LOV STACKING call into osc for sizes */ + RETURN(lov_stripe_md_size(stripe_count)); + /* If we are passed an allocated struct but nothing to unpack, free */ if (*lsmp && !lmm) { - stripe_count = (*lsmp)->lsm_stripe_count; - OBD_FREE(*lsmp, lov_stripe_md_size(stripe_count)); - *lsmp = NULL; + lov_free_memmd(lsmp); RETURN(0); } - if (!*lsmp) { - OBD_ALLOC(*lsmp, lsm_size); - if (!*lsmp) - RETURN(-ENOMEM); - } - - lsm = *lsmp; - lsm->lsm_magic = LOV_MAGIC; - lsm->lsm_stripe_count = stripe_count; - lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count; + lsm_size = lov_alloc_memmd(lsmp, stripe_count); + if (lsm_size < 0) + RETURN(lsm_size); + /* If we are passed a pointer but nothing to unpack, we only alloc */ if (!lmm) RETURN(lsm_size); - lsm->lsm_object_id = le64_to_cpu (lmm->lmm_object_id); - lsm->lsm_stripe_size = le32_to_cpu (lmm->lmm_stripe_size); - ost_offset = lsm->lsm_stripe_offset = le32_to_cpu (lmm->lmm_stripe_offset); - - LMM_ASSERT(lsm->lsm_object_id); - LMM_ASSERT(ost_count); + lsm = *lsmp; + lsm->lsm_object_id = le64_to_cpu(lmm->lmm_object_id); + lsm->lsm_stripe_size = le32_to_cpu(lmm->lmm_stripe_size); + lsm->lsm_stripe_offset = ost_offset; for (i = 0, loi = lsm->lsm_oinfo; i < ost_count; i++, ost_offset++) { ost_offset %= ost_count; @@ -232,17 +291,20 @@ int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp, if (!lmm->lmm_objects[ost_offset].l_object_id) continue; - LMM_ASSERT(loi - lsm->lsm_oinfo < stripe_count); /* XXX LOV STACKING call down to osc_unpackmd() */ loi->loi_id = - le64_to_cpu (lmm->lmm_objects[ost_offset].l_object_id); + le64_to_cpu(lmm->lmm_objects[ost_offset].l_object_id); loi->loi_ost_idx = ost_offset; - loi->loi_dirty_ot = &loi->loi_dirty_ot_inline; - ot_init(loi->loi_dirty_ot); loi++; } - LMM_ASSERT(loi - lsm->lsm_oinfo > 0); - LMM_ASSERT(loi - lsm->lsm_oinfo == stripe_count); + + if (loi - lsm->lsm_oinfo != stripe_count) { + CERROR("missing objects in lmm struct\n"); + lov_dump_lmm(D_WARNING, lmm); + lov_free_memmd(lsmp); + RETURN(-EINVAL); + } + RETURN(lsm_size); } @@ -260,7 +322,6 @@ int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp, struct obd_device *obd = class_conn2obd(conn); struct lov_obd *lov = &obd->u.lov; struct lov_mds_md lmm; - struct lov_stripe_md *lsm; int stripe_count; int rc; ENTRY; @@ -272,7 +333,7 @@ int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp, /* Bug 1185 FIXME: struct lov_mds_md is little-endian everywhere else */ if (lmm.lmm_magic != LOV_MAGIC) { - CERROR("bad userland LOV MAGIC: %#08x != %#08x\n", + CDEBUG(D_IOCTL, "bad userland LOV MAGIC: %#08x != %#08x\n", lmm.lmm_magic, LOV_MAGIC); RETURN(-EINVAL); } @@ -291,32 +352,27 @@ int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp, } #endif if (lmm.lmm_stripe_size & (PAGE_SIZE - 1)) { - CERROR("stripe size %u not multiple of %lu\n", + CDEBUG(D_IOCTL, "stripe size %u not multiple of %lu\n", lmm.lmm_stripe_size, PAGE_SIZE); RETURN(-EINVAL); } stripe_count = lov_get_stripecnt(lov, lmm.lmm_stripe_count); if ((__u64)lmm.lmm_stripe_size * stripe_count > ~0UL) { - CERROR("stripe width %ux%u > %lu on 32-bit system\n", + CDEBUG(D_IOCTL, "stripe width %ux%u > %lu on 32-bit system\n", lmm.lmm_stripe_size, (int)lmm.lmm_stripe_count, ~0UL); RETURN(-EINVAL); } - /* XXX LOV STACKING call into osc for sizes */ - OBD_ALLOC(lsm, lov_stripe_md_size(stripe_count)); - if (!lsm) - RETURN(-ENOMEM); + rc = lov_alloc_memmd(lsmp, stripe_count); - lsm->lsm_magic = LOV_MAGIC; - lsm->lsm_stripe_count = stripe_count; - lsm->lsm_stripe_offset = lmm.lmm_stripe_offset; - lsm->lsm_stripe_size = lmm.lmm_stripe_size; - lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count; + if (rc < 0) + RETURN(rc); - *lsmp = lsm; + (*lsmp)->lsm_stripe_offset = lmm.lmm_stripe_offset; + (*lsmp)->lsm_stripe_size = lmm.lmm_stripe_size; - RETURN(rc); + RETURN(0); } /* Retrieve object striping information. diff --git a/lustre/lov/lproc_lov.c b/lustre/lov/lproc_lov.c index e0b3adb..7b7a00c 100644 --- a/lustre/lov/lproc_lov.c +++ b/lustre/lov/lproc_lov.c @@ -30,19 +30,12 @@ #include #ifndef LPROCFS -struct lprocfs_vars lprocfs_module_vars[] = { {0} }; -struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_module_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; #else -DEFINE_LPROCFS_STATFS_FCT(rd_blksize, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, obd_self_statfs); - -int rd_stripesize(char *page, char **start, off_t off, int count, int *eof, - void *data) +static int lov_rd_stripesize(char *page, char **start, off_t off, int count, + int *eof, void *data) { struct obd_device *dev = (struct obd_device *)data; struct lov_desc *desc; @@ -53,8 +46,8 @@ int rd_stripesize(char *page, char **start, off_t off, int count, int *eof, return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_size); } -int rd_stripeoffset(char *page, char **start, off_t off, int count, int *eof, - void *data) +static int lov_rd_stripeoffset(char *page, char **start, off_t off, int count, + int *eof, void *data) { struct obd_device *dev = (struct obd_device *)data; struct lov_desc *desc; @@ -65,8 +58,8 @@ int rd_stripeoffset(char *page, char **start, off_t off, int count, int *eof, return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_offset); } -int rd_stripetype(char *page, char **start, off_t off, int count, int *eof, - void *data) +static int lov_rd_stripetype(char *page, char **start, off_t off, int count, + int *eof, void *data) { struct obd_device* dev = (struct obd_device*)data; struct lov_desc *desc; @@ -77,8 +70,8 @@ int rd_stripetype(char *page, char **start, off_t off, int count, int *eof, return snprintf(page, count, "%u\n", desc->ld_pattern); } -int rd_stripecount(char *page, char **start, off_t off, int count, int *eof, - void *data) +static int lov_rd_stripecount(char *page, char **start, off_t off, int count, + int *eof, void *data) { struct obd_device *dev = (struct obd_device *)data; struct lov_desc *desc; @@ -89,8 +82,8 @@ int rd_stripecount(char *page, char **start, off_t off, int count, int *eof, return snprintf(page, count, "%u\n", desc->ld_default_stripe_count); } -int rd_numobd(char *page, char **start, off_t off, int count, int *eof, - void *data) +static int lov_rd_numobd(char *page, char **start, off_t off, int count, + int *eof, void *data) { struct obd_device *dev = (struct obd_device*)data; struct lov_desc *desc; @@ -102,8 +95,8 @@ int rd_numobd(char *page, char **start, off_t off, int count, int *eof, } -int rd_activeobd(char *page, char **start, off_t off, int count, int *eof, - void *data) +static int lov_rd_activeobd(char *page, char **start, off_t off, int count, + int *eof, void *data) { struct obd_device* dev = (struct obd_device*)data; struct lov_desc *desc; @@ -114,7 +107,8 @@ int rd_activeobd(char *page, char **start, off_t off, int count, int *eof, return snprintf(page, count, "%u\n", desc->ld_active_tgt_count); } -int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data) +static int lov_rd_mdc(char *page, char **start, off_t off, int count, int *eof, + void *data) { struct obd_device *dev = (struct obd_device*) data; struct lov_obd *lov; @@ -125,7 +119,7 @@ int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data) return snprintf(page, count, "%s\n", lov->mdcobd->obd_uuid.uuid); } -static void *ll_tgt_seq_start(struct seq_file *p, loff_t *pos) +static void *lov_tgt_seq_start(struct seq_file *p, loff_t *pos) { struct obd_device *dev = p->private; struct lov_obd *lov = &dev->u.lov; @@ -133,12 +127,12 @@ static void *ll_tgt_seq_start(struct seq_file *p, loff_t *pos) return (*pos >= lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]); } -static void ll_tgt_seq_stop(struct seq_file *p, void *v) -{ +static void lov_tgt_seq_stop(struct seq_file *p, void *v) +{ } -static void *ll_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos) +static void *lov_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos) { struct obd_device *dev = p->private; struct lov_obd *lov = &dev->u.lov; @@ -147,7 +141,7 @@ static void *ll_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos) return (*pos >=lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]); } -static int ll_tgt_seq_show(struct seq_file *p, void *v) +static int lov_tgt_seq_show(struct seq_file *p, void *v) { struct lov_tgt_desc *tgt = v; struct obd_device *dev = p->private; @@ -157,18 +151,18 @@ static int ll_tgt_seq_show(struct seq_file *p, void *v) tgt->active ? "" : "IN"); } -struct seq_operations ll_tgt_sops = { - .start = ll_tgt_seq_start, - .stop = ll_tgt_seq_stop, - .next = ll_tgt_seq_next, - .show = ll_tgt_seq_show, +struct seq_operations lov_tgt_sops = { + .start = lov_tgt_seq_start, + .stop = lov_tgt_seq_stop, + .next = lov_tgt_seq_next, + .show = lov_tgt_seq_show, }; -static int ll_target_seq_open(struct inode *inode, struct file *file) +static int lov_target_seq_open(struct inode *inode, struct file *file) { struct proc_dir_entry *dp = inode->u.generic_ip; struct seq_file *seq; - int rc = seq_open(file, &ll_tgt_sops); + int rc = seq_open(file, &lov_tgt_sops); if (rc) return rc; @@ -178,35 +172,36 @@ static int ll_target_seq_open(struct inode *inode, struct file *file) return 0; } + struct lprocfs_vars lprocfs_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, - { "stripesize", rd_stripesize, 0, 0 }, - { "stripeoffset", rd_stripeoffset, 0, 0 }, - { "stripecount", rd_stripecount, 0, 0 }, - { "stripetype", rd_stripetype, 0, 0 }, - { "numobd", rd_numobd, 0, 0 }, - { "activeobd", rd_activeobd, 0, 0 }, - { "filestotal", rd_filestotal, 0, 0 }, - { "filesfree", rd_filesfree, 0, 0 }, - { "filegroups", rd_filegroups, 0, 0 }, - { "blocksize", rd_blksize, 0, 0 }, - { "kbytestotal", rd_kbytestotal, 0, 0 }, - { "kbytesfree", rd_kbytesfree, 0, 0 }, - { "target_mdc", rd_mdc, 0, 0 }, + { "uuid", lprocfs_rd_uuid, 0, 0 }, + { "stripesize", lov_rd_stripesize, 0, 0 }, + { "stripeoffset", lov_rd_stripeoffset, 0, 0 }, + { "stripecount", lov_rd_stripecount, 0, 0 }, + { "stripetype", lov_rd_stripetype, 0, 0 }, + { "numobd", lov_rd_numobd, 0, 0 }, + { "activeobd", lov_rd_activeobd, 0, 0 }, + { "filestotal", lprocfs_rd_filestotal, 0, 0 }, + { "filesfree", lprocfs_rd_filesfree, 0, 0 }, + //{ "filegroups", lprocfs_rd_filegroups, 0, 0 }, + { "blocksize", lprocfs_rd_blksize, 0, 0 }, + { "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 }, + { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 }, + { "target_mdc", lov_rd_mdc, 0, 0 }, { 0 } }; -struct lprocfs_vars lprocfs_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, +static struct lprocfs_vars lprocfs_module_vars[] = { + { "num_refs", lprocfs_rd_numrefs, 0, 0 }, { 0 } }; -struct file_operations ll_proc_target_fops = { - .open = ll_target_seq_open, +struct file_operations lov_proc_target_fops = { + .open = lov_target_seq_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; #endif /* LPROCFS */ -LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars) +LPROCFS_INIT_VARS(lov, lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/mdc/.cvsignore b/lustre/mdc/.cvsignore index e530020..49c6100 100644 --- a/lustre/mdc/.cvsignore +++ b/lustre/mdc/.cvsignore @@ -6,3 +6,4 @@ Makefile Makefile.in .deps TAGS +.*.cmd diff --git a/lustre/mdc/lproc_mdc.c b/lustre/mdc/lproc_mdc.c index 3f81507..6dca228 100644 --- a/lustre/mdc/lproc_mdc.c +++ b/lustre/mdc/lproc_mdc.c @@ -22,42 +22,32 @@ #define DEBUG_SUBSYSTEM S_CLASS #include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include -#endif +#include #include #include #ifndef LPROCFS -struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; -struct lprocfs_vars lprocfs_module_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_module_vars[] = { {0} }; #else - -DEFINE_LPROCFS_STATFS_FCT(rd_blksize, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, obd_self_statfs); - -struct lprocfs_vars lprocfs_obd_vars[] = { +static struct lprocfs_vars lprocfs_obd_vars[] = { { "uuid", lprocfs_rd_uuid, 0, 0 }, - { "blocksize", rd_blksize, 0, 0 }, - { "kbytestotal", rd_kbytestotal, 0, 0 }, - { "kbytesfree", rd_kbytesfree, 0, 0 }, - { "filestotal", rd_filestotal, 0, 0 }, - { "filesfree", rd_filesfree, 0, 0 }, - { "filegroups", rd_filegroups, 0, 0 }, + { "blocksize", lprocfs_rd_blksize, 0, 0 }, + { "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 }, + { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 }, + { "filestotal", lprocfs_rd_filestotal, 0, 0 }, + { "filesfree", lprocfs_rd_filesfree, 0, 0 }, + //{ "filegroups", lprocfs_rd_filegroups, 0, 0 }, { "mds_server_uuid", lprocfs_rd_server_uuid, 0, 0 }, { "mds_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 }, { 0 } }; -struct lprocfs_vars lprocfs_module_vars[] = { +static struct lprocfs_vars lprocfs_module_vars[] = { { "num_refs", lprocfs_rd_numrefs, 0, 0 }, { 0 } }; #endif /* LPROCFS */ -LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars) +LPROCFS_INIT_VARS(mdc, lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index e39a0aa..49d85ab2 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -1,24 +1,25 @@ -void mds_pack_req_body(struct ptlrpc_request *); -void mds_pack_rep_body(struct ptlrpc_request *); -void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size, +void mdc_pack_req_body(struct ptlrpc_request *); +void mdc_pack_rep_body(struct ptlrpc_request *); +void mdc_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size, obd_id ino, int type); -void mds_getattr_pack(struct ptlrpc_request *req, int valid, int offset, +void mdc_getattr_pack(struct ptlrpc_request *req, int valid, int offset, int flags, struct mdc_op_data *data); -void mds_setattr_pack(struct ptlrpc_request *req, +void mdc_setattr_pack(struct ptlrpc_request *req, struct mdc_op_data *data, - struct iattr *iattr, void *ea, int ealen); -void mds_create_pack(struct ptlrpc_request *req, int offset, + struct iattr *iattr, void *ea, int ealen, + void *ea2, int ea2len); +void mdc_create_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *op_data, __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time, const void *data, int datalen); -void mds_open_pack(struct ptlrpc_request *req, int offset, +void mdc_open_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *op_data, __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time, __u32 flags, const void *data, int datalen); -void mds_unlink_pack(struct ptlrpc_request *req, int offset, +void mdc_unlink_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *data); -void mds_link_pack(struct ptlrpc_request *req, int offset, +void mdc_link_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *data); -void mds_rename_pack(struct ptlrpc_request *req, int offset, +void mdc_rename_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *data, const char *old, int oldlen, const char *new, int newlen); diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c index 806a830..a17f7a1 100644 --- a/lustre/mdc/mdc_lib.c +++ b/lustre/mdc/mdc_lib.c @@ -28,7 +28,7 @@ #include #include -void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size, +void mdc_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size, obd_id ino, int type, __u64 xid) { struct mds_body *b; @@ -45,7 +45,7 @@ void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size, b->nlink = size; /* !! */ } -static void mds_pack_body(struct mds_body *b) +static void mdc_pack_body(struct mds_body *b) { LASSERT (b != NULL); @@ -54,14 +54,14 @@ static void mds_pack_body(struct mds_body *b) b->capability = current->cap_effective; } -void mds_pack_req_body(struct ptlrpc_request *req) +void mdc_pack_req_body(struct ptlrpc_request *req) { struct mds_body *b = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*b)); - mds_pack_body(b); + mdc_pack_body(b); } /* packing of MDS records */ -void mds_create_pack(struct ptlrpc_request *req, int offset, +void mdc_create_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *op_data, __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time, const void *data, int datalen) @@ -94,8 +94,9 @@ void mds_create_pack(struct ptlrpc_request *req, int offset, memcpy (tmp, data, datalen); } } + /* packing of MDS records */ -void mds_open_pack(struct ptlrpc_request *req, int offset, +void mdc_open_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *op_data, __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time, __u32 flags, const void *data, int datalen) @@ -109,8 +110,9 @@ void mds_open_pack(struct ptlrpc_request *req, int offset, rec->cr_fsuid = current->fsuid; rec->cr_fsgid = current->fsgid; rec->cr_cap = current->cap_effective; - ll_ino2fid(&rec->cr_fid, op_data->ino1, - op_data->gen1, op_data->typ1); + if (op_data != NULL) + ll_ino2fid(&rec->cr_fid, op_data->ino1, + op_data->gen1, op_data->typ1); memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid)); rec->cr_mode = mode; rec->cr_flags = flags; @@ -123,17 +125,22 @@ void mds_open_pack(struct ptlrpc_request *req, int offset, else rec->cr_suppgid = -1; - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1); - LOGL0(op_data->name, op_data->namelen, tmp); + if (op_data->name) { + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, + op_data->namelen + 1); + LOGL0(op_data->name, op_data->namelen, tmp); + } if (data) { tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, datalen); memcpy (tmp, data, datalen); } } -void mds_setattr_pack(struct ptlrpc_request *req, + +void mdc_setattr_pack(struct ptlrpc_request *req, struct mdc_op_data *data, - struct iattr *iattr, void *ea, int ealen) + struct iattr *iattr, void *ea, int ealen, + void *ea2, int ea2len) { struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*rec)); @@ -163,11 +170,18 @@ void mds_setattr_pack(struct ptlrpc_request *req, rec->sa_suppgid = -1; } - if (ealen != 0) - memcpy(lustre_msg_buf(req->rq_reqmsg, 1, ealen), ea, ealen); + if (ealen == 0) + return; + + memcpy(lustre_msg_buf(req->rq_reqmsg, 1, ealen), ea, ealen); + + if (ea2len == 0) + return; + + memcpy(lustre_msg_buf(req->rq_reqmsg, 2, ea2len), ea2, ea2len); } -void mds_unlink_pack(struct ptlrpc_request *req, int offset, +void mdc_unlink_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *data) { struct mds_rec_unlink *rec; @@ -194,7 +208,7 @@ void mds_unlink_pack(struct ptlrpc_request *req, int offset, LOGL0(data->name, data->namelen, tmp); } -void mds_link_pack(struct ptlrpc_request *req, int offset, +void mdc_link_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *data) { struct mds_rec_link *rec; @@ -221,7 +235,7 @@ void mds_link_pack(struct ptlrpc_request *req, int offset, LOGL0(data->name, data->namelen, tmp); } -void mds_rename_pack(struct ptlrpc_request *req, int offset, +void mdc_rename_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *data, const char *old, int oldlen, const char *new, int newlen) { @@ -255,7 +269,7 @@ void mds_rename_pack(struct ptlrpc_request *req, int offset, } } -void mds_getattr_pack(struct ptlrpc_request *req, int valid, int offset, +void mdc_getattr_pack(struct ptlrpc_request *req, int valid, int offset, int flags, struct mdc_op_data *data) { struct mds_body *b; diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c index 2da2fdb..4f7443e 100644 --- a/lustre/mdc/mdc_reint.c +++ b/lustre/mdc/mdc_reint.c @@ -35,22 +35,24 @@ #include "mdc_internal.h" /* mdc_setattr does its own semaphore handling */ -static int mdc_reint(struct ptlrpc_request *request, int level) +static int mdc_reint(struct ptlrpc_request *request, + struct mdc_rpc_lock *rpc_lock, int level) { int rc; - __u32 *opcodeptr; + - opcodeptr = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*opcodeptr)); request->rq_level = level; - if (!(*opcodeptr == REINT_SETATTR)) - mdc_get_rpc_lock(&mdc_rpc_lock, NULL); + mdc_get_rpc_lock(rpc_lock, NULL); rc = ptlrpc_queue_wait(request); - if (!(*opcodeptr == REINT_SETATTR)) - mdc_put_rpc_lock(&mdc_rpc_lock, NULL); - + mdc_put_rpc_lock(rpc_lock, NULL); if (rc) CDEBUG(D_INFO, "error in handling %d\n", rc); + else if (!lustre_swab_repbuf(request, 0, sizeof(struct mds_body), + lustre_swab_mds_body)) { + CERROR ("Can't unpack mds_body\n"); + rc = -EPROTO; + } return rc; } @@ -60,42 +62,45 @@ static int mdc_reint(struct ptlrpc_request *request, int level) * If it is called with iattr->ia_valid & ATTR_FROM_OPEN, then it is a * magic open-path setattr that should take the setattr semaphore and * go to the setattr portal. */ -int mdc_setattr(struct lustre_handle *conn, - struct mdc_op_data *data, - struct iattr *iattr, void *ea, int ealen, +int mdc_setattr(struct lustre_handle *conn, struct mdc_op_data *data, + struct iattr *iattr, void *ea, int ealen, void *ea2, int ea2len, struct ptlrpc_request **request) { struct ptlrpc_request *req; struct mds_rec_setattr *rec; struct mdc_rpc_lock *rpc_lock; - int rc, bufcount = 1, size[2] = {sizeof(*rec), ealen}; + int rc, bufcount = 1, size[3] = {sizeof(*rec), ealen, ea2len}; ENTRY; LASSERT(iattr != NULL); - if (ealen > 0) + if (ealen > 0) { bufcount = 2; + if (ea2len > 0) + bufcount = 3; + } req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, bufcount, size, NULL); - if (!req) + if (req == NULL) RETURN(-ENOMEM); if (iattr->ia_valid & ATTR_FROM_OPEN) { req->rq_request_portal = MDS_SETATTR_PORTAL; //XXX FIXME bug 249 rpc_lock = &mdc_setattr_lock; - } else + } else { rpc_lock = &mdc_rpc_lock; + } - mds_setattr_pack(req, data, iattr, ea, ealen); + if (iattr->ia_valid & (ATTR_MTIME | ATTR_CTIME)) + CDEBUG(D_INODE, "setting mtime %lu, ctime %lu\n", + iattr->ia_mtime, iattr->ia_ctime); + mdc_setattr_pack(req, data, iattr, ea, ealen, ea2, ea2len); size[0] = sizeof(struct mds_body); req->rq_replen = lustre_msg_size(1, size); - mdc_get_rpc_lock(rpc_lock, NULL); - rc = mdc_reint(req, LUSTRE_CONN_FULL); - mdc_put_rpc_lock(rpc_lock, NULL); - + rc = mdc_reint(req, rpc_lock, LUSTRE_CONN_FULL); *request = req; if (rc == -ERESTARTSYS) rc = 0; @@ -103,17 +108,14 @@ int mdc_setattr(struct lustre_handle *conn, RETURN(rc); } -int mdc_create(struct lustre_handle *conn, - struct mdc_op_data *op_data, - const void *data, int datalen, - int mode, __u32 uid, __u32 gid, __u64 time, __u64 rdev, - struct ptlrpc_request **request) +int mdc_create(struct lustre_handle *conn, struct mdc_op_data *op_data, + const void *data, int datalen, int mode, __u32 uid, __u32 gid, + __u64 time, __u64 rdev, struct ptlrpc_request **request) { struct ptlrpc_request *req; - int rc, size[3] = {sizeof(struct mds_rec_create), - op_data->namelen + 1, 0}; + int rc, size[3] = {sizeof(struct mds_rec_create), op_data->namelen + 1}; int level, bufcount = 2; -// ENTRY; + ENTRY; if (data && datalen) { size[bufcount] = datalen; @@ -122,14 +124,12 @@ int mdc_create(struct lustre_handle *conn, req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, bufcount, size, NULL); - if (!req) - return -ENOMEM; -// RETURN(-ENOMEM); + if (req == NULL) + RETURN(-ENOMEM); - /* mds_create_pack fills msg->bufs[1] with name + /* mdc_create_pack fills msg->bufs[1] with name * and msg->bufs[2] with tgt, for symlinks or lov MD data */ - mds_create_pack(req, 0, op_data, - mode, rdev, uid, gid, time, + mdc_create_pack(req, 0, op_data, mode, rdev, uid, gid, time, data, datalen); size[0] = sizeof(struct mds_body); @@ -137,7 +137,7 @@ int mdc_create(struct lustre_handle *conn, level = LUSTRE_CONN_FULL; resend: - rc = mdc_reint(req, level); + rc = mdc_reint(req, &mdc_rpc_lock, level); /* Resend if we were told to. */ if (rc == -ERESTARTSYS) { level = LUSTRE_CONN_RECOVER; @@ -148,12 +148,10 @@ int mdc_create(struct lustre_handle *conn, mdc_store_inode_generation(req, 0, 0); *request = req; - return rc; -// RETURN(rc); + RETURN(rc); } -int mdc_unlink(struct lustre_handle *conn, - struct mdc_op_data *data, +int mdc_unlink(struct lustre_handle *conn, struct mdc_op_data *data, struct ptlrpc_request **request) { struct obd_device *obddev = class_conn2obd(conn); @@ -162,27 +160,26 @@ int mdc_unlink(struct lustre_handle *conn, ENTRY; LASSERT(req == NULL); - req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size, NULL); - if (!req) + if (req == NULL) RETURN(-ENOMEM); *request = req; size[0] = sizeof(struct mds_body); size[1] = obddev->u.cli.cl_max_mds_easize; - req->rq_replen = lustre_msg_size(2, size); + size[2] = obddev->u.cli.cl_max_mds_cookiesize; + req->rq_replen = lustre_msg_size(3, size); - mds_unlink_pack(req, 0, data); + mdc_unlink_pack(req, 0, data); - rc = mdc_reint(req, LUSTRE_CONN_FULL); + rc = mdc_reint(req, &mdc_rpc_lock, LUSTRE_CONN_FULL); if (rc == -ERESTARTSYS) rc = 0; RETURN(rc); } -int mdc_link(struct lustre_handle *conn, - struct mdc_op_data *data, +int mdc_link(struct lustre_handle *conn, struct mdc_op_data *data, struct ptlrpc_request **request) { struct ptlrpc_request *req; @@ -191,15 +188,15 @@ int mdc_link(struct lustre_handle *conn, req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size, NULL); - if (!req) + if (req == NULL) RETURN(-ENOMEM); - mds_link_pack(req, 0, data); + mdc_link_pack(req, 0, data); size[0] = sizeof(struct mds_body); req->rq_replen = lustre_msg_size(1, size); - rc = mdc_reint(req, LUSTRE_CONN_FULL); + rc = mdc_reint(req, &mdc_rpc_lock, LUSTRE_CONN_FULL); *request = req; if (rc == -ERESTARTSYS) rc = 0; @@ -207,10 +204,8 @@ int mdc_link(struct lustre_handle *conn, RETURN(rc); } -int mdc_rename(struct lustre_handle *conn, - struct mdc_op_data *data, - const char *old, int oldlen, - const char *new, int newlen, +int mdc_rename(struct lustre_handle *conn, struct mdc_op_data *data, + const char *old, int oldlen, const char *new, int newlen, struct ptlrpc_request **request) { struct ptlrpc_request *req; @@ -220,15 +215,15 @@ int mdc_rename(struct lustre_handle *conn, req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 3, size, NULL); - if (!req) + if (req == NULL) RETURN(-ENOMEM); - mds_rename_pack(req, 0, data, old, oldlen, new, newlen); + mdc_rename_pack(req, 0, data, old, oldlen, new, newlen); size[0] = sizeof(struct mds_body); req->rq_replen = lustre_msg_size(1, size); - rc = mdc_reint(req, LUSTRE_CONN_FULL); + rc = mdc_reint(req, &mdc_rpc_lock, LUSTRE_CONN_FULL); *request = req; if (rc == -ERESTARTSYS) rc = 0; diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 204a836..b205d21 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -46,6 +46,7 @@ struct mdc_rpc_lock mdc_setattr_lock; EXPORT_SYMBOL(mdc_rpc_lock); /* Helper that implements most of mdc_getstatus and signal_completed_replay. */ +/* XXX this should become mdc_get_info("key"), sending MDS_GET_INFO RPC */ static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid, int level, int msg_flags) { @@ -62,7 +63,7 @@ static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid, req->rq_level = level; req->rq_replen = lustre_msg_size(1, &size); - mds_pack_req_body(req); + mdc_pack_req_body(req); req->rq_reqmsg->flags |= msg_flags; rc = ptlrpc_queue_wait(req); @@ -88,13 +89,14 @@ static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid, return rc; } -/* should become mdc_getinfo() */ +/* This should be mdc_get_info("rootfid") */ int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid) { return send_getstatus(class_conn2cliimp(conn), rootfid, LUSTRE_CONN_CON, 0); } +/* should call mdc_get_info("lovdesc") and mdc_get_info("lovtgts") */ int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh, struct ptlrpc_request **request) { @@ -233,7 +235,7 @@ int mdc_getattr(struct lustre_handle *conn, struct ll_fid *fid, memcpy(&body->fid1, fid, sizeof(*fid)); body->valid = valid; body->eadatasize = ea_size; - mds_pack_req_body(req); + mdc_pack_req_body(req); rc = mdc_getattr_common (conn, ea_size, req); if (rc != 0) { @@ -263,7 +265,7 @@ int mdc_getattr_name(struct lustre_handle *conn, struct ll_fid *fid, memcpy(&body->fid1, fid, sizeof(*fid)); body->valid = valid; body->eadatasize = ea_size; - mds_pack_req_body(req); + mdc_pack_req_body(req); LASSERT (strnlen (filename, namelen) == namelen - 1); memcpy(lustre_msg_buf(req->rq_reqmsg, 1, namelen), filename, namelen); @@ -283,9 +285,9 @@ void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff, int repoff) { struct mds_rec_create *rec = - lustre_msg_buf(req->rq_reqmsg, reqoff, sizeof (*rec)); + lustre_msg_buf(req->rq_reqmsg, reqoff, sizeof(*rec)); struct mds_body *body = - lustre_msg_buf(req->rq_repmsg, repoff, sizeof (*body)); + lustre_msg_buf(req->rq_repmsg, repoff, sizeof(*body)); LASSERT (rec != NULL); LASSERT (body != NULL); @@ -295,11 +297,49 @@ void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff, rec->cr_replayfid.generation, rec->cr_replayfid.id); } +int mdc_req2lustre_md(struct ptlrpc_request *req, int offset, + struct lustre_handle *obd_import, + struct lustre_md *md) +{ + int rc; + ENTRY; + + LASSERT(md); + memset(md, 0, sizeof(*md)); + + md->body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*md->body)); + LASSERT (md->body != NULL); + LASSERT_REPSWABBED (req, offset); + + if (md->body->valid & OBD_MD_FLEASIZE) { + int lmmsize; + struct lov_mds_md *lmm; + + LASSERT(S_ISREG(md->body->mode)); + + if (md->body->eadatasize == 0) { + CERROR ("OBD_MD_FLEASIZE set, but eadatasize 0\n"); + RETURN(-EPROTO); + } + lmmsize = md->body->eadatasize; + lmm = lustre_msg_buf(req->rq_repmsg, offset + 1, lmmsize); + LASSERT (lmm != NULL); + LASSERT_REPSWABBED (req, offset + 1); + + rc = obd_unpackmd(obd_import, &md->lsm, lmm, lmmsize); + if (rc < 0) { + /* XXX don't know if I should do this... */ + CERROR ("Error %d unpacking eadata\n", rc); + LBUG(); + } + LASSERT (rc >= sizeof (*md->lsm)); + } + RETURN(0); +} + + /* We always reserve enough space in the reply packet for a stripe MD, because - * we don't know in advance the file type. - * - * XXX we could get that from ext2_dir_entry_2 file_type - */ + * we don't know in advance the file type. */ int mdc_enqueue(struct lustre_handle *conn, int lock_type, struct lookup_intent *it, @@ -318,9 +358,10 @@ int mdc_enqueue(struct lustre_handle *conn, { .name = {data->ino1, data->gen1} }; int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)}; int rc, flags = LDLM_FL_HAS_INTENT; - int repsize[3] = {sizeof(struct ldlm_reply), + int repsize[4] = {sizeof(struct ldlm_reply), sizeof(struct mds_body), - obddev->u.cli.cl_max_mds_easize}; + obddev->u.cli.cl_max_mds_easize, + obddev->u.cli.cl_max_mds_cookiesize}; struct ldlm_reply *dlm_rep; struct ldlm_intent *lit; struct ldlm_request *lockreq; @@ -352,7 +393,7 @@ int mdc_enqueue(struct lustre_handle *conn, lit->opc = (__u64)it->it_op; /* pack the intended request */ - mds_open_pack(req, 2, data, it->it_mode, 0, current->fsuid, + mdc_open_pack(req, 2, data, it->it_mode, 0, current->fsuid, current->fsgid, LTIME_S(CURRENT_TIME), it->it_flags, tgt, tgtlen); /* get ready for the reply */ @@ -371,10 +412,10 @@ int mdc_enqueue(struct lustre_handle *conn, lit->opc = (__u64)it->it_op; /* pack the intended request */ - mds_unlink_pack(req, 2, data); + mdc_unlink_pack(req, 2, data); /* get ready for the reply */ - reply_buffers = 3; - req->rq_replen = lustre_msg_size(3, repsize); + reply_buffers = 4; + req->rq_replen = lustre_msg_size(4, repsize); } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) { int valid = OBD_MD_FLNOTOBD | OBD_MD_FLEASIZE; size[2] = sizeof(struct mds_body); @@ -390,7 +431,7 @@ int mdc_enqueue(struct lustre_handle *conn, lit->opc = (__u64)it->it_op; /* pack the intended request */ - mds_getattr_pack(req, valid, 2, it->it_flags, data); + mdc_getattr_pack(req, valid, 2, it->it_flags, data); /* get ready for the reply */ reply_buffers = 3; req->rq_replen = lustre_msg_size(3, repsize); @@ -447,8 +488,8 @@ int mdc_enqueue(struct lustre_handle *conn, } dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep)); - LASSERT (dlm_rep != NULL); /* checked by ldlm_cli_enqueue() */ - LASSERT_REPSWABBED (req, 0); /* swabbed by ldlm_cli_enqueue() */ + LASSERT(dlm_rep != NULL); /* checked by ldlm_cli_enqueue() */ + LASSERT_REPSWABBED(req, 0); /* swabbed by ldlm_cli_enqueue() */ it->it_disposition = (int) dlm_rep->lock_policy_res1; it->it_status = (int) dlm_rep->lock_policy_res2; @@ -456,8 +497,8 @@ int mdc_enqueue(struct lustre_handle *conn, it->it_data = req; /* We know what to expect, so we do any byte flipping required here */ - LASSERT (reply_buffers == 3 || reply_buffers == 1); - if (reply_buffers == 3) { + LASSERT(reply_buffers == 4 || reply_buffers == 3 || reply_buffers == 1); + if (reply_buffers >= 3) { struct mds_body *body; body = lustre_swab_repbuf (req, 1, sizeof (*body), @@ -471,8 +512,8 @@ int mdc_enqueue(struct lustre_handle *conn, /* The eadata is opaque; just check that it is * there. Eventually, obd_unpackmd() will check * the contents */ - eadata = lustre_swab_repbuf (req, 2, body->eadatasize, - NULL); + eadata = lustre_swab_repbuf(req, 2, body->eadatasize, + NULL); if (eadata == NULL) { CERROR ("Missing/short eadata\n"); RETURN (-EPROTO); @@ -490,8 +531,7 @@ static void mdc_replay_open(struct ptlrpc_request *req) struct list_head *tmp; struct mds_body *body; - body = lustre_swab_repbuf (req, 1, sizeof (*body), - lustre_swab_mds_body); + body = lustre_swab_repbuf(req, 1, sizeof(*body), lustre_swab_mds_body); LASSERT (body != NULL); memcpy(&old, file_fh, sizeof(old)); @@ -517,15 +557,15 @@ void mdc_set_open_replay_data(struct obd_client_handle *och) { struct ptlrpc_request *req = och->och_req; struct mds_rec_create *rec = - lustre_msg_buf(req->rq_reqmsg, 2, sizeof (*rec)); + lustre_msg_buf(req->rq_reqmsg, 2, sizeof(*rec)); struct mds_body *body = - lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body)); + lustre_msg_buf(req->rq_repmsg, 1, sizeof(*body)); - LASSERT (rec != NULL); + LASSERT(rec != NULL); /* outgoing messages always in my byte order */ - LASSERT (body != NULL); + LASSERT(body != NULL); /* incoming message in my byte order (it's been swabbed) */ - LASSERT_REPSWABBED (req, 1); + LASSERT_REPSWABBED(req, 1); memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid); req->rq_replay_cb = mdc_replay_open; @@ -589,7 +629,7 @@ int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset, if (rc != 0) GOTO(out, rc); - mds_readdir_pack(req, offset, PAGE_CACHE_SIZE, ino, type); + mdc_readdir_pack(req, offset, PAGE_CACHE_SIZE, ino, type); req->rq_replen = lustre_msg_size(1, &size); rc = ptlrpc_queue_wait(req); @@ -622,27 +662,28 @@ static int mdc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len, case OBD_IOC_CLIENT_RECOVER: RETURN(ptlrpc_recover_import(imp, data->ioc_inlbuf1)); case IOC_OSC_SET_ACTIVE: - if (data->ioc_offset) { - CERROR("%s: can't reactivate MDC\n", - obddev->obd_uuid.uuid); - RETURN(-ENOTTY); - } - RETURN(ptlrpc_set_import_active(imp, 0)); + RETURN(ptlrpc_set_import_active(imp, data->ioc_offset)); default: CERROR("osc_ioctl(): unrecognised ioctl %#x\n", cmd); RETURN(-ENOTTY); } } -static int mdc_statfs(struct obd_export *exp, struct obd_statfs *osfs) +static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs, + unsigned long max_age) { struct ptlrpc_request *req; struct obd_statfs *msfs; int rc, size = sizeof(*msfs); ENTRY; - req = ptlrpc_prep_req(exp->exp_obd->u.cli.cl_import, MDS_STATFS, 0, - NULL, NULL); + /* We could possibly pass max_age in the request (as an absolute + * timestamp or a "seconds.usec ago") so the target can avoid doing + * extra calls into the filesystem if that isn't necessary (e.g. + * during mount that would help a bit). Having relative timestamps + * is not so great if request processing is slow, while absolute + * timestamps are not ideal because they need time synchronization. */ + req = ptlrpc_prep_req(obd->u.cli.cl_import, MDS_STATFS, 0, NULL, NULL); if (!req) RETURN(-ENOMEM); @@ -655,14 +696,13 @@ static int mdc_statfs(struct obd_export *exp, struct obd_statfs *osfs) if (rc) GOTO(out, rc); - msfs = lustre_swab_repbuf (req, 0, sizeof (*msfs), - lustre_swab_obd_statfs); + msfs = lustre_swab_repbuf(req, 0, sizeof(*msfs),lustre_swab_obd_statfs); if (msfs == NULL) { - CERROR ("Can't unpack obd_statfs\n"); - GOTO (out, rc = -EPROTO); + CERROR("Can't unpack obd_statfs\n"); + GOTO(out, rc = -EPROTO); } - memcpy (osfs, msfs, sizeof (*msfs)); + memcpy(osfs, msfs, sizeof (*msfs)); EXIT; out: ptlrpc_req_finished(req); @@ -670,11 +710,83 @@ out: return rc; } +static int mdc_pin(struct lustre_handle *conn, obd_id ino, __u32 gen, int type, + struct obd_client_handle *handle, int flag) +{ + struct ptlrpc_request *req; + struct mds_body *body; + int rc, size = sizeof(*body); + ENTRY; + + req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_PIN, 1, &size, NULL); + if (req == NULL) + RETURN(-ENOMEM); + + body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); + ll_ino2fid(&body->fid1, ino, gen, type); + body->flags = flag; + + req->rq_replen = lustre_msg_size(1, &size); + + mdc_get_rpc_lock(&mdc_rpc_lock, NULL); + rc = ptlrpc_queue_wait(req); + mdc_put_rpc_lock(&mdc_rpc_lock, NULL); + if (rc) { + CERROR("pin failed: %d\n", rc); + ptlrpc_req_finished(req); + RETURN(rc); + } + + body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_mds_body); + if (body == NULL) { + ptlrpc_req_finished(req); + RETURN(rc); + } + + memcpy(&handle->och_fh, &body->handle, sizeof(body->handle)); + handle->och_req = req; /* will be dropped by unpin */ + handle->och_magic = OBD_CLIENT_HANDLE_MAGIC; + RETURN(rc); +} + +static int mdc_unpin(struct lustre_handle *conn, + struct obd_client_handle *handle, int flag) +{ + struct ptlrpc_request *req; + struct mds_body *body; + int rc, size = sizeof(*body); + ENTRY; + + if (handle->och_magic != OBD_CLIENT_HANDLE_MAGIC) + RETURN(0); + + req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size, + NULL); + if (req == NULL) + RETURN(-ENOMEM); + + body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body)); + memcpy(&body->handle, &handle->och_fh, sizeof(body->handle)); + body->flags = flag; + + req->rq_replen = lustre_msg_size(0, NULL); + mdc_get_rpc_lock(&mdc_rpc_lock, NULL); + rc = ptlrpc_queue_wait(req); + mdc_put_rpc_lock(&mdc_rpc_lock, NULL); + + if (rc != 0) + CERROR("unpin failed: %d\n", rc); + + ptlrpc_req_finished(req); + ptlrpc_req_finished(handle->och_req); + RETURN(rc); +} + static int mdc_attach(struct obd_device *dev, obd_count len, void *data) { struct lprocfs_static_vars lvars; - lprocfs_init_vars(&lvars); + lprocfs_init_vars(mdc, &lvars); return lprocfs_obd_attach(dev, lvars.obd_vars); } @@ -692,7 +804,9 @@ struct obd_ops mdc_obd_ops = { o_connect: client_import_connect, o_disconnect: client_import_disconnect, o_iocontrol: mdc_iocontrol, - o_statfs: mdc_statfs + o_statfs: mdc_statfs, + o_pin: mdc_pin, + o_unpin: mdc_unpin, }; int __init mdc_init(void) @@ -700,12 +814,12 @@ int __init mdc_init(void) struct lprocfs_static_vars lvars; mdc_init_rpc_lock(&mdc_rpc_lock); mdc_init_rpc_lock(&mdc_setattr_lock); - lprocfs_init_vars(&lvars); + lprocfs_init_vars(mdc, &lvars); return class_register_type(&mdc_obd_ops, lvars.module_vars, LUSTRE_MDC_NAME); } -static void __exit mdc_exit(void) +static void /*__exit*/ mdc_exit(void) { class_unregister_type(LUSTRE_MDC_NAME); } @@ -715,6 +829,7 @@ MODULE_AUTHOR("Cluster File Systems, Inc. "); MODULE_DESCRIPTION("Lustre Metadata Client"); MODULE_LICENSE("GPL"); +EXPORT_SYMBOL(mdc_req2lustre_md); EXPORT_SYMBOL(mdc_getstatus); EXPORT_SYMBOL(mdc_getlovinfo); EXPORT_SYMBOL(mdc_enqueue); diff --git a/lustre/mds/.cvsignore b/lustre/mds/.cvsignore index e530020..49c6100 100644 --- a/lustre/mds/.cvsignore +++ b/lustre/mds/.cvsignore @@ -6,3 +6,4 @@ Makefile Makefile.in .deps TAGS +.*.cmd diff --git a/lustre/mds/Makefile.mk b/lustre/mds/Makefile.mk index 6b712fb..0696bd7 100644 --- a/lustre/mds/Makefile.mk +++ b/lustre/mds/Makefile.mk @@ -3,8 +3,9 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -include fs/lustre/portals/Kernelenv +include $(src)/../portals/Kernelenv obj-y += mds.o - -mds-objs := mds_lov.o handler.o mds_reint.o mds_fs.o lproc_mds.o mds_internal.h mds_updates.o mds_open.o simple.o target.o +mds-objs := mds_lov.o handler.o mds_reint.o mds_fs.o lproc_mds.o mds_open.o \ + mds_lib.o + diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index de3f2ed..756e290 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -50,19 +50,11 @@ #include #include #include +#include + #include "mds_internal.h" -extern int mds_get_lovtgts(struct mds_obd *obd, int tgt_count, - struct obd_uuid *uuidarray); -extern int mds_get_lovdesc(struct mds_obd *obd, struct lov_desc *desc); -int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle, - struct ptlrpc_request *req, int rc, int disp); -static int mds_cleanup(struct obd_device * obddev, int force, int failover); - -inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req) -{ - return &req->rq_export->exp_obd->u.mds; -} +static int mds_cleanup(struct obd_device *obd, int flags); static int mds_bulk_timeout(void *data) { @@ -188,6 +180,9 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, snprintf(fid_name, sizeof(fid_name), "0x%lx", ino); + CDEBUG(D_DENTRY, "--> mds_fid2dentry: ino %lu, gen %u, sb %p\n", + ino, generation, mds->mds_sb); + /* under ext3 this is neither supposed to return bad inodes nor NULL inodes. */ result = ll_lookup_one_len(fid_name, mds->mds_fid_de, strlen(fid_name)); @@ -198,9 +193,6 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, if (!inode) RETURN(ERR_PTR(-ENOENT)); - CDEBUG(D_DENTRY, "--> mds_fid2dentry: ino %lu, gen %u, sb %p\n", - inode->i_ino, inode->i_generation, inode->i_sb); - if (generation && inode->i_generation != generation) { /* we didn't find the right inode.. */ CERROR("bad inode %lu, link: %d ct: %d or generation %u/%u\n", @@ -341,80 +333,125 @@ void mds_mfd_destroy(struct mds_file_data *mfd) mds_mfd_put(mfd); } -/* Call with med->med_open_lock held, please. */ -static int mds_close_mfd(struct mds_file_data *mfd, struct mds_export_data *med) +/* Close a "file descriptor" and possibly unlink an orphan from the + * PENDING directory. + * + * If we are being called from mds_disconnect() because the client has + * disappeared, then req == NULL and we do not update last_rcvd because + * there is nothing that could be recovered by the client at this stage + * (it will not even _have_ an entry in last_rcvd anymore). + */ +static int mds_mfd_close(struct ptlrpc_request *req, struct obd_device *obd, + struct mds_file_data *mfd) { - struct dentry *de = NULL; - -#ifdef CONFIG_SMP - LASSERT(spin_is_locked(&med->med_open_lock)); -#endif - list_del(&mfd->mfd_list); + struct dentry *dparent = mfd->mfd_dentry->d_parent; + struct inode *child_inode = mfd->mfd_dentry->d_inode; + char fidname[LL_FID_NAMELEN]; + int last_orphan, fidlen, rc = 0; + ENTRY; - if (mfd->mfd_dentry->d_parent) { - LASSERT(atomic_read(&mfd->mfd_dentry->d_parent->d_count)); - de = dget(mfd->mfd_dentry->d_parent); + if (dparent) { + LASSERT(atomic_read(&dparent->d_count) > 0); + dparent = dget(dparent); } - /* this is the actual "close" */ - l_dput(mfd->mfd_dentry); + fidlen = ll_fid2str(fidname, child_inode->i_ino, + child_inode->i_generation); - if (de) - l_dput(de); + last_orphan = mds_open_orphan_dec_test(child_inode) && + mds_inode_is_orphan(child_inode); + /* this is the actual "close" */ + l_dput(mfd->mfd_dentry); mds_mfd_destroy(mfd); - RETURN(0); -} -static int mds_disconnect(struct lustre_handle *conn, int failover) -{ - struct obd_export *export = class_conn2export(conn); - int rc; - unsigned long flags; - ENTRY; + if (dparent) + l_dput(dparent); - ldlm_cancel_locks_for_export(export); + if (last_orphan) { + struct mds_obd *mds = &obd->u.mds; + struct inode *pending_dir = mds->mds_pending_dir->d_inode; + struct dentry *pending_child = NULL; + void *handle; - spin_lock_irqsave(&export->exp_lock, flags); - export->exp_failover = failover; - spin_unlock_irqrestore(&export->exp_lock, flags); + CDEBUG(D_ERROR, "destroying orphan object %s\n", fidname); - rc = class_disconnect(conn, failover); - class_export_put(export); + /* Sadly, there is no easy way to save pending_child from + * mds_reint_unlink() into mfd, so we need to re-lookup, + * but normally it will still be in the dcache. + */ + down(&pending_dir->i_sem); + pending_child = lookup_one_len(fidname, mds->mds_pending_dir, + fidlen); + if (IS_ERR(pending_child)) + GOTO(out_lock, rc = PTR_ERR(pending_child)); + LASSERT(pending_child->d_inode != NULL); + + handle = fsfilt_start(obd, pending_dir, FSFILT_OP_UNLINK, NULL); + if (IS_ERR(handle)) + GOTO(out_dput, rc = PTR_ERR(handle)); + rc = vfs_unlink(pending_dir, pending_child); + if (rc) + CERROR("error unlinking orphan %s: rc %d\n",fidname,rc); + + if (req) { + rc = mds_finish_transno(mds, pending_dir, handle, req, + rc, 0); + } else { + int err = fsfilt_commit(obd, pending_dir, handle, 0); + if (err) { + CERROR("error committing orphan unlink: %d\n", + err); + if (!rc) + rc = err; + } + } + out_dput: + dput(pending_child); + out_lock: + up(&pending_dir->i_sem); + } RETURN(rc); } -static void mds_destroy_export(struct obd_export *export) +static int mds_disconnect(struct lustre_handle *conn, int flags) { + struct obd_export *export = class_conn2export(conn); struct mds_export_data *med = &export->exp_mds_data; - struct list_head *tmp, *n; + struct obd_device *obd = export->exp_obd; + struct obd_run_ctxt saved; int rc; - ENTRY; - LASSERT(!strcmp(export->exp_obd->obd_type->typ_name, - LUSTRE_MDS_NAME)); - /* - * Close any open files. - */ + push_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL); + /* Close any open files (which may also cause orphan unlinking). */ spin_lock(&med->med_open_lock); - list_for_each_safe(tmp, n, &med->med_open_head) { + while (!list_empty(&med->med_open_head)) { + struct list_head *tmp = med->med_open_head.next; struct mds_file_data *mfd = list_entry(tmp, struct mds_file_data, mfd_list); #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + /* bug 1579: fix force-closing for 2.5 */ struct dentry *dentry = mfd->mfd_dentry; + + list_del(&mfd->mfd_list); + spin_unlock(&med->med_open_lock); + CERROR("force closing client file handle for %*s (%s:%lu)\n", dentry->d_name.len, dentry->d_name.name, kdevname(dentry->d_inode->i_sb->s_dev), dentry->d_inode->i_ino); + rc = mds_mfd_close(NULL, obd, mfd); #endif - rc = mds_close_mfd(mfd, med); if (rc) CDEBUG(D_INODE, "Error closing file: %d\n", rc); + spin_lock(&med->med_open_lock); } spin_unlock(&med->med_open_lock); + pop_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL); + ldlm_cancel_locks_for_export(export); if (export->exp_outstanding_reply) { struct ptlrpc_request *req = export->exp_outstanding_reply; unsigned long flags; @@ -432,9 +469,13 @@ static void mds_destroy_export(struct obd_export *export) export->exp_outstanding_reply = NULL; } - if (!export->exp_failover) + if (!(flags & OBD_OPT_FAILOVER)) mds_client_free(export); - EXIT; + + rc = class_disconnect(conn, flags); + class_export_put(export); + + RETURN(rc); } /* @@ -448,14 +489,24 @@ static void mds_fsync_super(struct super_block *sb) { lock_kernel(); lock_super(sb); +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) if (sb->s_dirt && sb->s_op && sb->s_op->write_super) sb->s_op->write_super(sb); +#else + if (sb->s_dirt && sb->s_op) { + if (sb->s_op->sync_fs) + sb->s_op->sync_fs(sb, 1); + else if (sb->s_op->write_super) + sb->s_op->write_super(sb); + } +#endif unlock_super(sb); unlock_kernel(); } static int mds_getstatus(struct ptlrpc_request *req) { + struct obd_device *obd = req->rq_export->exp_obd; struct mds_obd *mds = mds_req2mds(req); struct mds_body *body; int rc, size = sizeof(*body); @@ -473,7 +524,7 @@ static int mds_getstatus(struct ptlrpc_request *req) * requests if they have any. This would be fsync_super() if it * was exported. */ - mds_fsync_super(mds->mds_sb); + fsfilt_sync(obd, mds->mds_sb); body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body)); memcpy(&body->fid1, &mds->mds_rootfid, sizeof(body->fid1)); @@ -525,8 +576,7 @@ static int mds_getlovinfo(struct ptlrpc_request *req) memcpy(desc, &mds->mds_lov_desc, sizeof (*desc)); tgt_count = mds->mds_lov_desc.ld_tgt_count; - uuid0 = lustre_msg_buf (req->rq_repmsg, 1, - tgt_count * sizeof (*uuid0)); + uuid0 = lustre_msg_buf(req->rq_repmsg, 1, tgt_count * sizeof (*uuid0)); if (uuid0 == NULL) { CERROR("too many targets, enlarge client buffers\n"); req->rq_status = -ENOSPC; @@ -539,6 +589,8 @@ static int mds_getlovinfo(struct ptlrpc_request *req) req->rq_status = rc; RETURN(0); } + memcpy(&mds->mds_osc_uuid, &mds->mds_lov_desc.ld_uuid, + sizeof(mds->mds_osc_uuid)); RETURN(0); } @@ -616,8 +668,8 @@ int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg, rc = fsfilt_get_md(obd, inode, lmm, lmm_size); if (rc < 0) { - CERROR ("Error %d reading eadata for ino %lu\n", - rc, inode->i_ino); + CERROR("Error %d reading eadata for ino %lu\n", + rc, inode->i_ino); } else if (rc > 0) { body->valid |= OBD_MD_FLEASIZE; body->eadatasize = rc; @@ -639,19 +691,22 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry, if (inode == NULL) RETURN(-ENOENT); - body = lustre_msg_buf(req->rq_repmsg, reply_off, sizeof (*body)); - LASSERT (body != NULL); /* caller prepped reply */ + body = lustre_msg_buf(req->rq_repmsg, reply_off, sizeof(*body)); + LASSERT(body != NULL); /* caller prepped reply */ mds_pack_inode2fid(&body->fid1, inode); mds_pack_inode2body(body, inode); - if (S_ISREG(inode->i_mode) && - (reqbody->valid & OBD_MD_FLEASIZE) != 0) { - rc = mds_pack_md(obd, req->rq_repmsg, reply_off + 1, - body, inode); + if (S_ISREG(inode->i_mode) && (reqbody->valid & OBD_MD_FLEASIZE) != 0) { + rc = mds_pack_md(obd, req->rq_repmsg, reply_off+1, body, inode); + + /* If we have LOV EA data, the OST holds size, atime, mtime */ + if (!(body->valid & OBD_MD_FLEASIZE)) + body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | + OBD_MD_FLATIME | OBD_MD_FLMTIME); } else if (S_ISLNK(inode->i_mode) && (reqbody->valid & OBD_MD_LINKNAME) != 0) { - char *symname = lustre_msg_buf(req->rq_repmsg, reply_off + 1, 0); + char *symname = lustre_msg_buf(req->rq_repmsg, reply_off + 1,0); int len; LASSERT (symname != NULL); /* caller prepped reply */ @@ -672,6 +727,7 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry, rc = 0; } } + RETURN(rc); } @@ -684,11 +740,10 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode, ENTRY; body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body)); - LASSERT (body != NULL); /* checked by caller */ - LASSERT_REQSWABBED (req, offset); /* swabbed by caller */ + LASSERT(body != NULL); /* checked by caller */ + LASSERT_REQSWABBED(req, offset); /* swabbed by caller */ - if (S_ISREG(inode->i_mode) && - (body->valid & OBD_MD_FLEASIZE) != 0) { + if (S_ISREG(inode->i_mode) && (body->valid & OBD_MD_FLEASIZE)) { int rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0); CDEBUG(D_INODE, "got %d bytes MD data for inode %lu\n", rc, inode->i_ino); @@ -701,14 +756,14 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode, size[bufcount] = 0; CERROR("MD size %d larger than maximum possible %u\n", rc, mds->mds_max_mdsize); - } else + } else { size[bufcount] = rc; + } bufcount++; - } else if (S_ISLNK (inode->i_mode) && - (body->valid & OBD_MD_LINKNAME) != 0) { + } else if (S_ISLNK(inode->i_mode) && (body->valid & OBD_MD_LINKNAME)) { if (inode->i_size + 1 != body->eadatasize) - CERROR ("symlink size: %Lu, reply space: %d\n", - inode->i_size + 1, body->eadatasize); + CERROR("symlink size: %Lu, reply space: %d\n", + inode->i_size + 1, body->eadatasize); size[bufcount] = MIN(inode->i_size + 1, body->eadatasize); bufcount++; CDEBUG(D_INODE, "symlink size: %Lu, reply space: %d\n", @@ -724,9 +779,8 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode, rc = lustre_pack_msg(bufcount, size, NULL, &req->rq_replen, &req->rq_repmsg); if (rc) { - CERROR("out of memoryK\n"); - req->rq_status = rc; - GOTO(out, rc); + CERROR("out of memory\n"); + GOTO(out, req->rq_status = rc); } EXIT; @@ -738,6 +792,8 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode, static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req, struct lustre_handle *client_lockh) { + struct mds_export_data *med = &req->rq_export->exp_mds_data; + struct mds_client_data *mcd = med->med_mcd; struct obd_device *obd = req->rq_export->exp_obd; struct mds_obd *mds = mds_req2mds(req); struct dentry *parent, *child; @@ -748,8 +804,15 @@ static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req, int namelen, rc = 0; char *name; - if (req->rq_export->exp_outstanding_reply) - mds_steal_ack_locks(req->rq_export, req); + req->rq_transno = mcd->mcd_last_transno; + req->rq_status = mcd->mcd_last_result; + + LASSERT (req->rq_export->exp_outstanding_reply); + + mds_steal_ack_locks(req->rq_export, req); + + if (req->rq_status) + return; body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body)); LASSERT (body != NULL); /* checked by caller */ @@ -770,6 +833,7 @@ static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req, uc.ouc_cap = body->capability; uc.ouc_suppgid1 = body->suppgid; uc.ouc_suppgid2 = -1; + push_ctxt(&saved, &mds->mds_ctxt, &uc); parent = mds_fid2dentry(mds, &body->fid1, NULL); LASSERT(!IS_ERR(parent)); @@ -785,7 +849,8 @@ static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req, } rc = mds_getattr_internal(obd, child, req, body, offset); - req->rq_status = rc; + /* XXX need to handle error here */ + LASSERT(!rc); l_dput(child); l_dput(parent); } @@ -795,6 +860,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, { struct mds_obd *mds = mds_req2mds(req); struct obd_device *obd = req->rq_export->exp_obd; + struct ldlm_reply *rep = NULL; struct obd_run_ctxt saved; struct mds_body *body; struct dentry *de = NULL, *dchild = NULL; @@ -803,7 +869,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, struct ldlm_res_id child_res_id = { .name = {0} }; struct lustre_handle parent_lockh; int namesize; - int flags = 0, rc = 0, cleanup_phase = 0, req_was_resent; + int flags = 0, rc = 0, cleanup_phase = 0; char *name; ENTRY; @@ -811,34 +877,39 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, /* Swab now, before anyone looks inside the request */ - body = lustre_swab_reqbuf (req, offset, sizeof (*body), - lustre_swab_mds_body); + body = lustre_swab_reqbuf(req, offset, sizeof(*body), + lustre_swab_mds_body); if (body == NULL) { - CERROR ("Can't swab mds_body\n"); - GOTO (cleanup, rc = -EFAULT); + CERROR("Can't swab mds_body\n"); + GOTO(cleanup, rc = -EFAULT); } - LASSERT_REQSWAB (req, offset + 1); - name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0); + LASSERT_REQSWAB(req, offset + 1); + name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0); if (name == NULL) { - CERROR ("Can't unpack name\n"); - GOTO (cleanup, rc = -EFAULT); + CERROR("Can't unpack name\n"); + GOTO(cleanup, rc = -EFAULT); } namesize = req->rq_reqmsg->buflens[offset + 1]; - req_was_resent = lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT; - if (child_lockh->cookie) { - LASSERT(req_was_resent); - reconstruct_getattr_name(offset, req, child_lockh); - RETURN(0); - } else if (req_was_resent) { - DEBUG_REQ(D_HA, req, "no reply for RESENT req"); + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { + struct obd_export *exp = req->rq_export; + if (exp->exp_outstanding_reply && + exp->exp_outstanding_reply->rq_xid == req->rq_xid) { + reconstruct_getattr_name(offset, req, child_lockh); + RETURN(0); + } + DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")", + exp->exp_outstanding_reply ? + exp->exp_outstanding_reply->rq_xid : (u64)0); } LASSERT (offset == 0 || offset == 2); - /* if requests were at offset 2, replies go back at 1 */ - if (offset) + /* if requests were at offset 2, the getattr reply goes back at 1 */ + if (offset) { + rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep)); offset = 1; + } uc.ouc_fsuid = body->fsuid; uc.ouc_fsgid = body->fsgid; @@ -847,6 +918,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, uc.ouc_suppgid2 = -1; push_ctxt(&saved, &mds->mds_ctxt, &uc); /* Step 1: Lookup/lock parent */ + intent_set_disposition(rep, DISP_LOOKUP_EXECD); de = mds_fid2locked_dentry(obd, &body->fid1, NULL, LCK_PR, &parent_lockh); if (IS_ERR(de)) @@ -868,7 +940,10 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, cleanup_phase = 2; /* child dentry */ if (dchild->d_inode == NULL) { + intent_set_disposition(rep, DISP_LOOKUP_NEG); GOTO(cleanup, rc = -ENOENT); + } else { + intent_set_disposition(rep, DISP_LOOKUP_POS); } /* Step 3: Lock child */ @@ -963,11 +1038,17 @@ out_pop: return rc; } + +static int mds_obd_statfs(struct obd_device *obd, struct obd_statfs *osfs, + unsigned long max_age) +{ + return fsfilt_statfs(obd, obd->u.mds.mds_sb, osfs); +} + static int mds_statfs(struct ptlrpc_request *req) { struct obd_device *obd = req->rq_export->exp_obd; - struct obd_statfs *osfs; - int rc, size = sizeof(*osfs); + int rc, size = sizeof(struct obd_statfs); ENTRY; rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); @@ -976,10 +1057,10 @@ static int mds_statfs(struct ptlrpc_request *req) GOTO(out, rc); } - osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*osfs)); - rc = fsfilt_statfs(obd, obd->u.mds.mds_sb, osfs); + /* We call this so that we can cache a bit - 1 jiffie worth */ + rc = obd_statfs(obd, lustre_msg_buf(req->rq_repmsg,0,size),jiffies-HZ); if (rc) { - CERROR("mds: statfs failed: rc %d\n", rc); + CERROR("mds_obd_statfs failed: rc %d\n", rc); GOTO(out, rc); } @@ -1006,8 +1087,10 @@ static void reconstruct_close(struct ptlrpc_request *req) static int mds_close(struct ptlrpc_request *req) { struct mds_export_data *med = &req->rq_export->exp_mds_data; + struct obd_device *obd = req->rq_export->exp_obd; struct mds_body *body; struct mds_file_data *mfd; + struct obd_run_ctxt saved; int rc; ENTRY; @@ -1028,10 +1111,20 @@ static int mds_close(struct ptlrpc_request *req) RETURN(-ESTALE); } + rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg); + if (rc) { + CERROR("lustre_pack_msg: rc = %d\n", rc); + req->rq_status = rc; + } + spin_lock(&med->med_open_lock); - req->rq_status = mds_close_mfd(mfd, med); + list_del(&mfd->mfd_list); spin_unlock(&med->med_open_lock); + push_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL); + req->rq_status = mds_mfd_close(rc ? NULL : req, obd, mfd); + pop_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL); + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK)) { CERROR("test case OBD_FAIL_MDS_CLOSE_PACK\n"); req->rq_status = -ENOMEM; @@ -1039,12 +1132,6 @@ static int mds_close(struct ptlrpc_request *req) RETURN(-ENOMEM); } - rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) { - CERROR("mds: lustre_pack_msg: rc = %d\n", rc); - req->rq_status = rc; - } - mds_mfd_put(mfd); RETURN(0); } @@ -1073,7 +1160,7 @@ static int mds_readpage(struct ptlrpc_request *req) GOTO (out, rc = -EFAULT); /* body->size is actually the offset -eeb */ - if ((body->size & (PAGE_SIZE - 1)) != 0) { + if ((body->size & ~PAGE_MASK) != 0) { CERROR ("offset "LPU64"not on a page boundary\n", body->size); GOTO (out, rc = -EFAULT); } @@ -1306,9 +1393,10 @@ int mds_handle(struct ptlrpc_request *req) break; case MDS_REINT: { - __u32 *opcp = lustre_msg_buf (req->rq_reqmsg, 0, sizeof (*opcp)); + __u32 *opcp = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*opcp)); __u32 opc; - int size[2] = {sizeof(struct mds_body), mds->mds_max_mdsize}; + int size[3] = {sizeof(struct mds_body), mds->mds_max_mdsize, + mds->mds_max_cookiesize}; int bufcount; /* NB only peek inside req now; mds_reint() will swab it */ @@ -1319,15 +1407,18 @@ int mds_handle(struct ptlrpc_request *req) } opc = *opcp; if (lustre_msg_swabbed (req->rq_reqmsg)) - __swab32s (&opc); + __swab32s(&opc); DEBUG_REQ(D_INODE, req, "reint %d (%s)", opc, - (opc < sizeof (reint_names) / sizeof (reint_names[0]) || - reint_names[opc] == NULL) ? reint_names[opc] : "unknown opcode"); + (opc < sizeof(reint_names) / sizeof(reint_names[0]) || + reint_names[opc] == NULL) ? reint_names[opc] : + "unknown opcode"); OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET, 0); if (opc == REINT_UNLINK) + bufcount = 3; + else if (opc == REINT_OPEN) bufcount = 2; else bufcount = 1; @@ -1348,11 +1439,23 @@ int mds_handle(struct ptlrpc_request *req) rc = mds_close(req); break; + case MDS_PIN: + DEBUG_REQ(D_INODE, req, "pin"); + OBD_FAIL_RETURN(OBD_FAIL_MDS_PIN_NET, 0); + rc = mds_pin(req); + break; + case OBD_PING: DEBUG_REQ(D_INODE, req, "ping"); rc = target_handle_ping(req); break; + case OBD_LOG_CANCEL: + CDEBUG(D_INODE, "log cancel\n"); + OBD_FAIL_RETURN(OBD_FAIL_OBD_LOG_CANCEL_NET, 0); + rc = -ENOTSUPP; /* la la la */ + break; + case LDLM_ENQUEUE: DEBUG_REQ(D_INODE, req, "enqueue"); OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0); @@ -1385,7 +1488,7 @@ int mds_handle(struct ptlrpc_request *req) struct obd_device *obd = list_entry(mds, struct obd_device, u.mds); req->rq_repmsg->last_xid = - le64_to_cpu (med->med_mcd->mcd_last_xid); + le64_to_cpu(med->med_mcd->mcd_last_xid); if (!obd->obd_no_transno) { req->rq_repmsg->last_committed = @@ -1421,8 +1524,9 @@ int mds_handle(struct ptlrpc_request *req) * * Also assumes for mds_last_transno that we are not modifying it (no locking). */ -int mds_update_server_data(struct mds_obd *mds) +int mds_update_server_data(struct obd_device *obd) { + struct mds_obd *mds = &obd->u.mds; struct mds_server_data *msd = mds->mds_server_data; struct file *filp = mds->mds_rcvd_filp; struct obd_run_ctxt saved; @@ -1433,21 +1537,16 @@ int mds_update_server_data(struct mds_obd *mds) msd->msd_last_transno = cpu_to_le64(mds->mds_last_transno); msd->msd_mount_count = cpu_to_le64(mds->mds_mount_count); - CDEBUG(D_SUPER, "MDS mount_count is %Lu, last_transno is %Lu\n", - (unsigned long long)mds->mds_mount_count, - (unsigned long long)mds->mds_last_transno); - rc = lustre_fwrite(filp, (char *)msd, sizeof(*msd), &off); + CDEBUG(D_SUPER, "MDS mount_count is "LPU64", last_transno is "LPU64"\n", + mds->mds_mount_count, mds->mds_last_transno); + rc = fsfilt_write_record(obd, filp, (char *)msd, sizeof(*msd), &off); if (rc != sizeof(*msd)) { CERROR("error writing MDS server data: rc = %d\n", rc); if (rc > 0) rc = -EIO; GOTO(out, rc); } -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - rc = fsync_dev(filp->f_dentry->d_inode->i_rdev); -#else rc = file_fsync(filp, filp->f_dentry, 1); -#endif if (rc) CERROR("error flushing MDS server data: rc = %d\n", rc); @@ -1457,10 +1556,10 @@ out: } /* mount the file system (secretly) */ -static int mds_setup(struct obd_device *obddev, obd_count len, void *buf) +static int mds_setup(struct obd_device *obd, obd_count len, void *buf) { struct obd_ioctl_data* data = buf; - struct mds_obd *mds = &obddev->u.mds; + struct mds_obd *mds = &obd->u.mds; struct vfsmount *mnt; int rc = 0; unsigned long page; @@ -1473,9 +1572,12 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf) if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2) RETURN(rc = -EINVAL); - obddev->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2); - if (IS_ERR(obddev->obd_fsops)) - RETURN(rc = PTR_ERR(obddev->obd_fsops)); + if (data->ioc_inlbuf4) + obd_str2uuid(&mds->mds_osc_uuid, data->ioc_inlbuf4); + + obd->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2); + if (IS_ERR(obd->obd_fsops)) + RETURN(rc = PTR_ERR(obd->obd_fsops)); if (data->ioc_inllen3 > 0 && data->ioc_inlbuf3) { @@ -1511,73 +1613,93 @@ static int mds_setup(struct obd_device *obddev, obd_count len, void *buf) spin_lock_init(&mds->mds_transno_lock); mds->mds_max_mdsize = sizeof(struct lov_mds_md); - rc = mds_fs_setup(obddev, mnt); + mds->mds_max_cookiesize = sizeof(struct llog_cookie); + rc = mds_fs_setup(obd, mnt); if (rc) { CERROR("MDS filesystem method init failed: rc = %d\n", rc); GOTO(err_put, rc); } - obddev->obd_namespace = - ldlm_namespace_new("mds_server", LDLM_NAMESPACE_SERVER); - if (obddev->obd_namespace == NULL) { - mds_cleanup(obddev, 0, 0); - GOTO(err_fs, rc = -ENOMEM); +#ifdef ENABLE_ORPHANS + rc = llog_start_commit_thread(); + if (rc < 0) + GOTO(err_fs, rc); +#endif + +#ifdef ENABLE_ORPHANS + mds->mds_catalog = mds_get_catalog(obd); + if (IS_ERR(mds->mds_catalog)) + GOTO(err_fs, rc = PTR_ERR(mds->mds_catalog)); +#endif + + obd->obd_namespace = ldlm_namespace_new("mds_server", + LDLM_NAMESPACE_SERVER); + if (obd->obd_namespace == NULL) { + mds_cleanup(obd, 0); + GOTO(err_log, rc = -ENOMEM); } ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, - "mds_ldlm_client", &obddev->obd_ldlm_client); + "mds_ldlm_client", &obd->obd_ldlm_client); mds->mds_has_lov_desc = 0; + obd->obd_replayable = 1; RETURN(0); +err_log: +#ifdef ENABLE_ORPHANS + mds_put_catalog(mds->mds_catalog); + /* No extra cleanup needed for llog_init_commit_thread() */ err_fs: - mds_fs_cleanup(obddev, 0); +#endif + mds_fs_cleanup(obd, 0); err_put: unlock_kernel(); mntput(mds->mds_vfsmnt); mds->mds_sb = 0; lock_kernel(); err_ops: - fsfilt_put_ops(obddev->obd_fsops); + fsfilt_put_ops(obd->obd_fsops); return rc; } -static int mds_cleanup(struct obd_device *obddev, int force, int failover) +static int mds_cleanup(struct obd_device *obd, int flags) { - struct super_block *sb; - struct mds_obd *mds = &obddev->u.mds; + struct mds_obd *mds = &obd->u.mds; ENTRY; - sb = mds->mds_sb; - if (!mds->mds_sb) + if (mds->mds_sb == NULL) RETURN(0); - mds_update_server_data(mds); - mds_fs_cleanup(obddev, failover); +#ifdef ENABLE_ORPHANS + mds_put_catalog(mds->mds_catalog); +#endif + if (mds->mds_osc_obd) + obd_disconnect(&mds->mds_osc_conn, flags); + mds_update_server_data(obd); + mds_fs_cleanup(obd, flags); unlock_kernel(); /* 2 seems normal on mds, (may_umount() also expects 2 fwiw), but we only see 1 at this point in obdfilter. */ - if (atomic_read(&obddev->u.mds.mds_vfsmnt->mnt_count) > 2){ - CERROR("%s: mount point busy, mnt_count: %d\n", - obddev->obd_name, - atomic_read(&obddev->u.mds.mds_vfsmnt->mnt_count)); - } + if (atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count) > 2) + CERROR("%s: mount point busy, mnt_count: %d\n", obd->obd_name, + atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count)); mntput(mds->mds_vfsmnt); mds->mds_sb = 0; - ldlm_namespace_free(obddev->obd_namespace); + ldlm_namespace_free(obd->obd_namespace); - if (obddev->obd_recovering) - target_cancel_recovery_timer(obddev); + if (obd->obd_recovering) + target_cancel_recovery_timer(obd); lock_kernel(); #ifdef CONFIG_DEV_RDONLY dev_clear_rdonly(2); #endif - fsfilt_put_ops(obddev->obd_fsops); + fsfilt_put_ops(obd->obd_fsops); RETURN(0); } @@ -1616,13 +1738,26 @@ static void fixup_handle_for_resent_req(struct ptlrpc_request *req, remote_hdl.cookie); } +int intent_disposition(struct ldlm_reply *rep, int flag) +{ + if (!rep) + return 0; + return (rep->lock_policy_res1 & flag); +} + +void intent_set_disposition(struct ldlm_reply *rep, int flag) +{ + if (!rep) + return; + rep->lock_policy_res1 |= flag; +} + static int ldlm_intent_policy(struct ldlm_namespace *ns, struct ldlm_lock **lockp, void *req_cookie, ldlm_mode_t mode, int flags, void *data) { struct ptlrpc_request *req = req_cookie; struct ldlm_lock *lock = *lockp; - int rc = 0; ENTRY; if (!req_cookie) @@ -1632,34 +1767,33 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns, /* an intent needs to be considered */ struct ldlm_intent *it; struct mds_obd *mds = &req->rq_export->exp_obd->u.mds; - struct mds_body *mds_body; struct ldlm_reply *rep; - struct lustre_handle lockh = { 0 }; + struct lustre_handle lockh; struct ldlm_lock *new_lock; - int rc, offset = 2, repsize[3] = {sizeof(struct ldlm_reply), - sizeof(struct mds_body), - mds->mds_max_mdsize}; + int offset = 2, repsize[4] = {sizeof(struct ldlm_reply), + sizeof(struct mds_body), + mds->mds_max_mdsize, + mds->mds_max_cookiesize}; - it = lustre_swab_reqbuf (req, 1, sizeof (*it), - lustre_swab_ldlm_intent); + it = lustre_swab_reqbuf(req, 1, sizeof (*it), + lustre_swab_ldlm_intent); if (it == NULL) { CERROR ("Intent missing\n"); - rc = req->rq_status = -EFAULT; - RETURN (rc); + req->rq_status = -EFAULT; + RETURN(req->rq_status); } LDLM_DEBUG(lock, "intent policy, opc: %s", ldlm_it2str(it->opc)); - rc = lustre_pack_msg(3, repsize, NULL, &req->rq_replen, - &req->rq_repmsg); - if (rc) { - rc = req->rq_status = -ENOMEM; - RETURN(rc); - } + req->rq_status = lustre_pack_msg(it->opc == IT_UNLINK ? 4 : 3, + repsize, NULL, &req->rq_replen, + &req->rq_repmsg); + if (req->rq_status) + RETURN(req->rq_status); rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep)); - rep->lock_policy_res1 = IT_INTENT_EXEC; + intent_set_disposition(rep, DISP_IT_EXECD); fixup_handle_for_resent_req(req, lock, &lockh); @@ -1667,45 +1801,28 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns, switch ((long)it->opc) { case IT_OPEN: case IT_CREAT|IT_OPEN: - rc = mds_reint(req, offset, &lockh); - /* We return a dentry to the client if IT_OPEN_POS is - * set, or if we make it to the OPEN portion of the - * programme (which implies that we created) */ - if (!(rep->lock_policy_res1 & IT_OPEN_POS || - rep->lock_policy_res1 & IT_OPEN_OPEN)) { - rep->lock_policy_res2 = rc; + /* XXX swab here to assert that an mds_open reint + * packet is following */ + rep->lock_policy_res2 = mds_reint(req, offset, &lockh); + /* We abort the lock if the lookup was negative and + * we did not make it to the OPEN portion */ + if (intent_disposition(rep, DISP_LOOKUP_NEG) && + !intent_disposition(rep, DISP_OPEN_OPEN)) RETURN(ELDLM_LOCK_ABORTED); - } - break; - case IT_UNLINK: - rc = mds_reint(req, offset, &lockh); - /* Don't return a lock if the unlink failed, or if we're - * not sending back an EA */ - if (rc) { - rep->lock_policy_res2 = rc; - RETURN(ELDLM_LOCK_ABORTED); - } - if (req->rq_status != 0) { - rep->lock_policy_res2 = req->rq_status; - RETURN(ELDLM_LOCK_ABORTED); - } - mds_body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*mds_body)); - if (!(mds_body->valid & OBD_MD_FLEASIZE)) { - rep->lock_policy_res2 = rc; - RETURN(ELDLM_LOCK_ABORTED); - } break; case IT_GETATTR: case IT_LOOKUP: case IT_READDIR: - rc = mds_getattr_name(offset, req, &lockh); + rep->lock_policy_res2 = mds_getattr_name(offset, req, + &lockh); /* FIXME: we need to sit down and decide on who should * set req->rq_status, who should return negative and - * positive return values, and what they all mean. */ - if (rc) { - rep->lock_policy_res2 = rc; + * positive return values, and what they all mean. + * - replay: returns 0 & req->status is old status + * - otherwise: returns req->status */ + if (!intent_disposition(rep, DISP_LOOKUP_POS) || + rep->lock_policy_res2) RETURN(ELDLM_LOCK_ABORTED); - } if (req->rq_status != 0) { rep->lock_policy_res2 = req->rq_status; RETURN(ELDLM_LOCK_ABORTED); @@ -1717,10 +1834,17 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns, } /* By this point, whatever function we called above must have - * filled in 'lockh' or returned an error. We want to give the - * new lock to the client instead of whatever lock it was about - * to get. */ + * either filled in 'lockh', been an intent replay, or returned + * an error. We want to allow replayed RPCs to not get a lock, + * since we would just drop it below anyways because lock replay + * is done separately by the client afterwards. For regular + * RPCs we want to give the new lock to the client instead of + * whatever lock it was about to get. + */ new_lock = ldlm_handle2lock(&lockh); + if (flags & LDLM_FL_INTENT_ONLY && !new_lock) + RETURN(ELDLM_LOCK_ABORTED); + LASSERT(new_lock != NULL); /* If we've already given this lock to a client once, then we @@ -1785,14 +1909,13 @@ static int ldlm_intent_policy(struct ldlm_namespace *ns, RETURN(ELDLM_LOCK_REPLACED); } else { int size = sizeof(struct ldlm_reply); - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, - &req->rq_repmsg); - if (rc) { + if (lustre_pack_msg(1, &size, NULL, &req->rq_replen, + &req->rq_repmsg)) { LBUG(); RETURN(-ENOMEM); } } - RETURN(rc); + RETURN(0); } int mds_attach(struct obd_device *dev, obd_count len, void *data) @@ -1906,7 +2029,7 @@ err_thread: } -static int mdt_cleanup(struct obd_device *obddev, int force, int failover) +static int mdt_cleanup(struct obd_device *obddev, int flags) { struct mds_obd *mds = &obddev->u.mds; ENTRY; @@ -1928,15 +2051,15 @@ extern int mds_iocontrol(unsigned int cmd, struct lustre_handle *conn, /* use obd ops to offer management infrastructure */ static struct obd_ops mds_obd_ops = { - o_owner: THIS_MODULE, - o_attach: mds_attach, - o_detach: mds_detach, - o_connect: mds_connect, - o_disconnect: mds_disconnect, - o_setup: mds_setup, - o_cleanup: mds_cleanup, - o_iocontrol: mds_iocontrol, - o_destroy_export: mds_destroy_export + o_owner: THIS_MODULE, + o_attach: mds_attach, + o_detach: mds_detach, + o_connect: mds_connect, + o_disconnect: mds_disconnect, + o_setup: mds_setup, + o_cleanup: mds_cleanup, + o_statfs: mds_obd_statfs, + o_iocontrol: mds_iocontrol }; static struct obd_ops mdt_obd_ops = { @@ -1961,7 +2084,7 @@ static int __init mds_init(void) return 0; } -static void __exit mds_exit(void) +static void /*__exit*/ mds_exit(void) { ldlm_unregister_intent(); class_unregister_type(LUSTRE_MDS_NAME); diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c index 5d6fa57..e355415 100644 --- a/lustre/mds/lproc_mds.c +++ b/lustre/mds/lproc_mds.c @@ -37,71 +37,43 @@ struct lprocfs_vars lprocfs_mdt_module_vars[] = { {0} }; #else -static inline int lprocfs_mds_statfs(void *data, struct statfs *sfs) -{ - struct obd_device* dev = (struct obd_device*) data; - struct mds_obd *mds; - - LASSERT(dev != NULL); - mds = &dev->u.mds; - return vfs_statfs(mds->mds_sb, sfs); -} - -DEFINE_LPROCFS_STATFS_FCT(rd_blksize, lprocfs_mds_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_mds_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, lprocfs_mds_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, lprocfs_mds_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, lprocfs_mds_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, lprocfs_mds_statfs); - -int rd_fstype(char *page, char **start, off_t off, int count, int *eof, - void *data) -{ - struct obd_device *obd = (struct obd_device *)data; - - LASSERT(obd != NULL); - LASSERT(obd->obd_fsops != NULL); - LASSERT(obd->obd_fsops->fs_type != NULL); - return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type); -} - -int lprocfs_mds_rd_mntdev(char *page, char **start, off_t off, int count, - int *eof, void *data) +static int lprocfs_mds_rd_mntdev(char *page, char **start, off_t off, int count, + int *eof, void *data) { struct obd_device* obd = (struct obd_device *)data; LASSERT(obd != NULL); LASSERT(obd->u.mds.mds_vfsmnt->mnt_devname); *eof = 1; - return snprintf(page, count, "%s\n", - obd->u.mds.mds_vfsmnt->mnt_devname); + + return snprintf(page, count, "%s\n",obd->u.mds.mds_vfsmnt->mnt_devname); } struct lprocfs_vars lprocfs_mds_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, - { "blocksize", rd_blksize, 0, 0 }, - { "kbytestotal",rd_kbytestotal, 0, 0 }, - { "kbytesfree", rd_kbytesfree, 0, 0 }, - { "fstype", rd_fstype, 0, 0 }, - { "filestotal", rd_filestotal, 0, 0 }, - { "filesfree", rd_filesfree, 0, 0 }, - { "filegroups", rd_filegroups, 0, 0 }, - { "mntdev", lprocfs_mds_rd_mntdev, 0, 0 }, + { "uuid", lprocfs_rd_uuid, 0, 0 }, + { "blocksize", lprocfs_rd_blksize, 0, 0 }, + { "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 }, + { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 }, + { "fstype", lprocfs_rd_fstype, 0, 0 }, + { "filestotal", lprocfs_rd_filestotal, 0, 0 }, + { "filesfree", lprocfs_rd_filesfree, 0, 0 }, + //{ "filegroups", lprocfs_rd_filegroups, 0, 0 }, + { "mntdev", lprocfs_mds_rd_mntdev, 0, 0 }, { 0 } }; struct lprocfs_vars lprocfs_mds_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, + { "num_refs", lprocfs_rd_numrefs, 0, 0 }, { 0 } }; struct lprocfs_vars lprocfs_mdt_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, + { "uuid", lprocfs_rd_uuid, 0, 0 }, { 0 } }; struct lprocfs_vars lprocfs_mdt_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, + { "num_refs", lprocfs_rd_numrefs, 0, 0 }, { 0 } }; diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index cefc680..56346ca 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -37,6 +37,9 @@ #include #include #include +#include + +#include "mds_internal.h" /* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */ #define MDS_MAX_CLIENTS (PAGE_SIZE * 8) @@ -50,10 +53,10 @@ * we know its offset. */ int mds_client_add(struct obd_device *obd, struct mds_obd *mds, - struct mds_export_data *med, int cl_off) + struct mds_export_data *med, int cl_idx) { unsigned long *bitmap = mds->mds_client_bitmap; - int new_client = (cl_off == -1); + int new_client = (cl_idx == -1); LASSERT(bitmap != NULL); @@ -61,39 +64,40 @@ int mds_client_add(struct obd_device *obd, struct mds_obd *mds, if (!strcmp(med->med_mcd->mcd_uuid, "OBD_CLASS_UUID")) RETURN(0); - /* the bitmap operations can handle cl_off > sizeof(long) * 8, so + /* the bitmap operations can handle cl_idx > sizeof(long) * 8, so * there's no need for extra complication here */ if (new_client) { - cl_off = find_first_zero_bit(bitmap, MDS_MAX_CLIENTS); + cl_idx = find_first_zero_bit(bitmap, MDS_MAX_CLIENTS); repeat: - if (cl_off >= MDS_MAX_CLIENTS) { + if (cl_idx >= MDS_MAX_CLIENTS) { CERROR("no room for clients - fix MDS_MAX_CLIENTS\n"); return -ENOMEM; } - if (test_and_set_bit(cl_off, bitmap)) { + if (test_and_set_bit(cl_idx, bitmap)) { CERROR("MDS client %d: found bit is set in bitmap\n", - cl_off); - cl_off = find_next_zero_bit(bitmap, MDS_MAX_CLIENTS, - cl_off); + cl_idx); + cl_idx = find_next_zero_bit(bitmap, MDS_MAX_CLIENTS, + cl_idx); goto repeat; } } else { - if (test_and_set_bit(cl_off, bitmap)) { + if (test_and_set_bit(cl_idx, bitmap)) { CERROR("MDS client %d: bit already set in bitmap!!\n", - cl_off); + cl_idx); LBUG(); } } - CDEBUG(D_INFO, "client at offset %d with UUID '%s' added\n", - cl_off, med->med_mcd->mcd_uuid); + CDEBUG(D_INFO, "client at index %d with UUID '%s' added\n", + cl_idx, med->med_mcd->mcd_uuid); - med->med_off = cl_off; + med->med_idx = cl_idx; + med->med_off = MDS_LR_CLIENT_START + (cl_idx * MDS_LR_CLIENT_SIZE); if (new_client) { struct obd_run_ctxt saved; - loff_t off = MDS_LR_CLIENT + (cl_off * MDS_LR_SIZE); + loff_t off = med->med_off; ssize_t written; void *handle; @@ -114,14 +118,16 @@ int mds_client_add(struct obd_device *obd, struct mds_obd *mds, * could use any of them, or maybe an FSFILT_OP_NONE is best? */ handle = fsfilt_start(obd,mds->mds_rcvd_filp->f_dentry->d_inode, - FSFILT_OP_SETATTR); + FSFILT_OP_SETATTR, NULL); if (IS_ERR(handle)) { written = PTR_ERR(handle); CERROR("unable to start transaction: rc %d\n", (int)written); } else { - written = lustre_fwrite(mds->mds_rcvd_filp,med->med_mcd, - sizeof(*med->med_mcd), &off); + written = fsfilt_write_record(obd, mds->mds_rcvd_filp, + (char *)med->med_mcd, + sizeof(*med->med_mcd), + &off); fsfilt_commit(obd,mds->mds_rcvd_filp->f_dentry->d_inode, handle, 0); } @@ -132,8 +138,8 @@ int mds_client_add(struct obd_device *obd, struct mds_obd *mds, RETURN(written); RETURN(-EIO); } - CDEBUG(D_INFO, "wrote client mcd at off %u (len %u)\n", - MDS_LR_CLIENT + (cl_off * MDS_LR_SIZE), + CDEBUG(D_INFO, "wrote client mcd at idx %u off %llu (len %u)\n", + med->med_idx, med->med_off, (unsigned int)sizeof(*med->med_mcd)); } return 0; @@ -143,11 +149,11 @@ int mds_client_free(struct obd_export *exp) { struct mds_export_data *med = &exp->exp_mds_data; struct mds_obd *mds = &exp->exp_obd->u.mds; + struct obd_device *obd = exp->exp_obd; struct mds_client_data zero_mcd; struct obd_run_ctxt saved; int written; unsigned long *bitmap = mds->mds_client_bitmap; - loff_t off; LASSERT(bitmap); if (!med->med_mcd) @@ -157,30 +163,29 @@ int mds_client_free(struct obd_export *exp) if (!strcmp(med->med_mcd->mcd_uuid, "OBD_CLASS_UUID")) GOTO(free_and_out, 0); - off = MDS_LR_CLIENT + (med->med_off * MDS_LR_SIZE); - - CDEBUG(D_INFO, "freeing client at offset %u (%lld)with UUID '%s'\n", - med->med_off, off, med->med_mcd->mcd_uuid); + CDEBUG(D_INFO, "freeing client at index %u (%lld)with UUID '%s'\n", + med->med_idx, med->med_off, med->med_mcd->mcd_uuid); - if (!test_and_clear_bit(med->med_off, bitmap)) { + if (!test_and_clear_bit(med->med_idx, bitmap)) { CERROR("MDS client %u: bit already clear in bitmap!!\n", - med->med_off); + med->med_idx); LBUG(); } memset(&zero_mcd, 0, sizeof zero_mcd); push_ctxt(&saved, &mds->mds_ctxt, NULL); - written = lustre_fwrite(mds->mds_rcvd_filp, (const char *)&zero_mcd, - sizeof(zero_mcd), &off); + written = fsfilt_write_record(obd, mds->mds_rcvd_filp, + (char *)&zero_mcd, sizeof(zero_mcd), + &med->med_off); pop_ctxt(&saved, &mds->mds_ctxt, NULL); if (written != sizeof(zero_mcd)) { - CERROR("error zeroing out client %s off %d in %s: %d\n", - med->med_mcd->mcd_uuid, med->med_off, LAST_RCVD, + CERROR("error zeroing out client %s index %d in %s: %d\n", + med->med_mcd->mcd_uuid, med->med_idx, LAST_RCVD, written); } else { CDEBUG(D_INFO, "zeroed out disconnecting client %s at off %d\n", - med->med_mcd->mcd_uuid, med->med_off); + med->med_mcd->mcd_uuid, med->med_idx); } free_and_out: @@ -199,20 +204,20 @@ static int mds_server_free_data(struct mds_obd *mds) return 0; } -static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f) +static int mds_read_last_rcvd(struct obd_device *obd, struct file *file) { - struct mds_obd *mds = &obddev->u.mds; + struct mds_obd *mds = &obd->u.mds; struct mds_server_data *msd; struct mds_client_data *mcd = NULL; loff_t off = 0; - int cl_off; - unsigned long last_rcvd_size = f->f_dentry->d_inode->i_size; + int cl_idx; + unsigned long last_rcvd_size = file->f_dentry->d_inode->i_size; __u64 last_transno = 0; - __u64 last_mount; + __u64 mount_count; int rc = 0; - LASSERT(sizeof(struct mds_client_data) == MDS_LR_SIZE); - LASSERT(sizeof(struct mds_server_data) <= MDS_LR_CLIENT); + LASSERT(sizeof(struct mds_client_data) == MDS_LR_CLIENT_SIZE); + LASSERT(sizeof(struct mds_server_data) <= MDS_LR_SERVER_SIZE); OBD_ALLOC(msd, sizeof(*msd)); if (!msd) @@ -225,40 +230,71 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f) RETURN(-ENOMEM); } - rc = lustre_fread(f, (char *)msd, sizeof(*msd), &off); - mds->mds_server_data = msd; - if (rc == 0) { - CERROR("%s: empty MDS %s, new MDS?\n", obddev->obd_name, - LAST_RCVD); + + if (last_rcvd_size == 0) { + CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD); + memcpy(msd->msd_uuid, obd->obd_uuid.uuid,sizeof(msd->msd_uuid)); + msd->msd_server_size = cpu_to_le32(MDS_LR_SERVER_SIZE); + msd->msd_client_start = cpu_to_le32(MDS_LR_CLIENT_START); + msd->msd_client_size = cpu_to_le16(MDS_LR_CLIENT_SIZE); + RETURN(0); } + rc = fsfilt_read_record(obd, file, (char *)msd, sizeof(*msd), &off); + if (rc != sizeof(*msd)) { - CERROR("error reading MDS %s: rc = %d\n", LAST_RCVD, rc); + CERROR("error reading MDS %s: rc = %d\n", LAST_RCVD,rc); if (rc > 0) rc = -EIO; GOTO(err_msd, rc); } + if (!msd->msd_server_size) + msd->msd_server_size = cpu_to_le32(MDS_LR_SERVER_SIZE); + if (!msd->msd_client_start) + msd->msd_client_start = cpu_to_le32(MDS_LR_CLIENT_START); + if (!msd->msd_client_size) + msd->msd_client_size = cpu_to_le16(MDS_LR_CLIENT_SIZE); + + if (msd->msd_feature_incompat) { + CERROR("unsupported incompat feature %x\n", + le32_to_cpu(msd->msd_feature_incompat)); + GOTO(err_msd, rc = -EINVAL); + } + if (msd->msd_feature_rocompat) { + CERROR("unsupported read-only feature %x\n", + le32_to_cpu(msd->msd_feature_rocompat)); + /* Do something like remount filesystem read-only */ + GOTO(err_msd, rc = -EINVAL); + } - CDEBUG(D_INODE, "last_rcvd has size %lu (msd + %lu clients)\n", - last_rcvd_size, (last_rcvd_size - MDS_LR_CLIENT)/MDS_LR_SIZE); - - /* - * When we do a clean MDS shutdown, we save the last_transno into - * the header. - */ last_transno = le64_to_cpu(msd->msd_last_transno); mds->mds_last_transno = last_transno; - CDEBUG(D_INODE, "got "LPU64" for server last_rcvd value\n", - last_transno); - - last_mount = le64_to_cpu(msd->msd_mount_count); - mds->mds_mount_count = last_mount; - CDEBUG(D_INODE, "got "LPU64" for server last_mount value\n",last_mount); - /* off is adjusted by lustre_fread, so we don't adjust it in the loop */ - for (off = MDS_LR_CLIENT, cl_off = 0; off < last_rcvd_size; cl_off++) { + mount_count = le64_to_cpu(msd->msd_mount_count); + mds->mds_mount_count = mount_count; + + CDEBUG(D_INODE, "%s: server last_transno: "LPU64"\n", + obd->obd_name, last_transno); + CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n", + obd->obd_name, mount_count); + CDEBUG(D_INODE, "%s: server data size: %u\n", + obd->obd_name, le32_to_cpu(msd->msd_server_size)); + CDEBUG(D_INODE, "%s: per-client data start: %u\n", + obd->obd_name, le32_to_cpu(msd->msd_client_start)); + CDEBUG(D_INODE, "%s: per-client data size: %u\n", + obd->obd_name, le32_to_cpu(msd->msd_client_size)); + CDEBUG(D_INODE, "%s: last_rcvd size: %lu\n", + obd->obd_name, last_rcvd_size); + CDEBUG(D_INODE, "%s: last_rcvd clients: %lu\n", obd->obd_name, + (last_rcvd_size - MDS_LR_CLIENT_START) / MDS_LR_CLIENT_SIZE); + + /* When we do a clean FILTER shutdown, we save the last_transno into + * the header. If we find clients with higher last_transno values + * then those clients may need recovery done. */ + for (cl_idx = 0; off < last_rcvd_size; cl_idx++) { + __u64 last_transno; int mount_age; if (!mcd) { @@ -267,10 +303,16 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f) GOTO(err_msd, rc = -ENOMEM); } - rc = lustre_fread(f, (char *)mcd, sizeof(*mcd), &off); + /* Don't assume off is incremented properly, in case + * sizeof(fsd) isn't the same as fsd->fsd_client_size. + */ + off = le32_to_cpu(msd->msd_client_start) + + cl_idx * le16_to_cpu(msd->msd_client_size); + rc = fsfilt_read_record(obd, file, (char *)mcd, + sizeof(*mcd), &off); if (rc != sizeof(*mcd)) { CERROR("error reading MDS %s offset %d: rc = %d\n", - LAST_RCVD, cl_off, rc); + LAST_RCVD, cl_idx, rc); if (rc > 0) /* XXX fatal error or just abort reading? */ rc = -EIO; break; @@ -278,7 +320,7 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f) if (mcd->mcd_uuid[0] == '\0') { CDEBUG(D_INFO, "skipping zeroed client at offset %d\n", - cl_off); + cl_idx); continue; } @@ -287,10 +329,15 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f) /* These exports are cleaned up by mds_disconnect(), so they * need to be set up like real exports as mds_connect() does. */ - mount_age = last_mount - le64_to_cpu(mcd->mcd_mount_count); + mount_age = mount_count - le64_to_cpu(mcd->mcd_mount_count); if (mount_age < MDS_MOUNT_RECOV) { - struct obd_export *exp = class_new_export(obddev); + struct obd_export *exp = class_new_export(obd); struct mds_export_data *med; + CERROR("RCVRNG CLIENT uuid: %s off: %d lr: "LPU64 + "srv lr: "LPU64" mnt: "LPU64" last mount: "LPU64 + "\n", mcd->mcd_uuid, cl_idx, + last_transno, le64_to_cpu(msd->msd_last_transno), + le64_to_cpu(mcd->mcd_mount_count), mount_count); if (!exp) { rc = -ENOMEM; @@ -301,35 +348,35 @@ static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f) sizeof exp->exp_client_uuid.uuid); med = &exp->exp_mds_data; med->med_mcd = mcd; - mds_client_add(obddev, mds, med, cl_off); + mds_client_add(obd, mds, med, cl_idx); /* create helper if export init gets more complex */ INIT_LIST_HEAD(&med->med_open_head); spin_lock_init(&med->med_open_lock); mcd = NULL; - obddev->obd_recoverable_clients++; + obd->obd_recoverable_clients++; class_export_put(exp); } else { CDEBUG(D_INFO, "discarded client %d, UUID '%s', count " - LPU64"\n", cl_off, mcd->mcd_uuid, + LPU64"\n", cl_idx, mcd->mcd_uuid, le64_to_cpu(mcd->mcd_mount_count)); } - CDEBUG(D_OTHER, "client at offset %d has last_transno = %Lu\n", - cl_off, (unsigned long long)last_transno); + CDEBUG(D_OTHER, "client at offset %d has last_transno = " + LPU64"\n", cl_idx, last_transno); if (last_transno > mds->mds_last_transno) mds->mds_last_transno = last_transno; } - obddev->obd_last_committed = mds->mds_last_transno; - if (obddev->obd_recoverable_clients) { + obd->obd_last_committed = mds->mds_last_transno; + if (obd->obd_recoverable_clients) { CERROR("RECOVERY: %d recoverable clients, last_transno " LPU64"\n", - obddev->obd_recoverable_clients, mds->mds_last_transno); - obddev->obd_next_recovery_transno = obddev->obd_last_committed + obd->obd_recoverable_clients, mds->mds_last_transno); + obd->obd_next_recovery_transno = obd->obd_last_committed + 1; - obddev->obd_recovering = 1; + obd->obd_recovering = 1; } if (mcd) @@ -342,12 +389,12 @@ err_msd: return rc; } -static int mds_fs_prep(struct obd_device *obddev) +static int mds_fs_prep(struct obd_device *obd) { - struct mds_obd *mds = &obddev->u.mds; + struct mds_obd *mds = &obd->u.mds; struct obd_run_ctxt saved; struct dentry *dentry; - struct file *f; + struct file *file; int rc; push_ctxt(&saved, &mds->mds_ctxt, NULL); @@ -373,46 +420,76 @@ static int mds_fs_prep(struct obd_device *obddev) } mds->mds_fid_de = dentry; - f = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0644); - if (IS_ERR(f)) { - rc = PTR_ERR(f); + dentry = simple_mkdir(current->fs->pwd, "PENDING", 0777); + if (IS_ERR(dentry)) { + rc = PTR_ERR(dentry); + CERROR("cannot create PENDING directory: rc = %d\n", rc); + GOTO(err_fid, rc); + } + mds->mds_pending_dir = dentry; + + dentry = simple_mkdir(current->fs->pwd, "LOGS", 0700); + if (IS_ERR(dentry)) { + rc = PTR_ERR(dentry); + CERROR("cannot create LOGS directory: rc = %d\n", rc); + GOTO(err_pending, rc); + } + mds->mds_logs_dir = dentry; + + file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0644); + if (IS_ERR(file)) { + rc = PTR_ERR(file); CERROR("cannot open/create %s file: rc = %d\n", LAST_RCVD, rc); - GOTO(err_pop, rc = PTR_ERR(f)); + + GOTO(err_logs, rc = PTR_ERR(file)); } - if (!S_ISREG(f->f_dentry->d_inode->i_mode)) { + if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { CERROR("%s is not a regular file!: mode = %o\n", LAST_RCVD, - f->f_dentry->d_inode->i_mode); + file->f_dentry->d_inode->i_mode); GOTO(err_filp, rc = -ENOENT); } - rc = fsfilt_journal_data(obddev, f); + rc = fsfilt_journal_data(obd, file); if (rc) { CERROR("cannot journal data on %s: rc = %d\n", LAST_RCVD, rc); GOTO(err_filp, rc); } - rc = mds_read_last_rcvd(obddev, f); + rc = mds_read_last_rcvd(obd, file); if (rc) { CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc); GOTO(err_client, rc); } - mds->mds_rcvd_filp = f; + mds->mds_rcvd_filp = file; +#ifdef I_SKIP_PDFLUSH + /* + * we need this to protect from deadlock + * pdflush vs. lustre_fwrite() + */ + file->f_dentry->d_inode->i_flags |= I_SKIP_PDFLUSH; +#endif err_pop: pop_ctxt(&saved, &mds->mds_ctxt, NULL); return rc; err_client: - class_disconnect_exports(obddev, 0); + class_disconnect_exports(obd, 0); err_filp: - if (filp_close(f, 0)) + if (filp_close(file, 0)) CERROR("can't close %s after error\n", LAST_RCVD); +err_logs: + dput(mds->mds_logs_dir); +err_pending: + dput(mds->mds_pending_dir); +err_fid: + dput(mds->mds_fid_de); goto err_pop; } -int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt) +int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt) { - struct mds_obd *mds = &obddev->u.mds; + struct mds_obd *mds = &obd->u.mds; ENTRY; mds->mds_vfsmnt = mnt; @@ -421,21 +498,20 @@ int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt) mds->mds_ctxt.pwdmnt = mnt; mds->mds_ctxt.pwd = mnt->mnt_root; mds->mds_ctxt.fs = get_ds(); - RETURN(mds_fs_prep(obddev)); + RETURN(mds_fs_prep(obd)); } -int mds_fs_cleanup(struct obd_device *obddev, int failover) +int mds_fs_cleanup(struct obd_device *obd, int flags) { - struct mds_obd *mds = &obddev->u.mds; + struct mds_obd *mds = &obd->u.mds; struct obd_run_ctxt saved; int rc = 0; - if (failover) + if (flags & OBD_OPT_FAILOVER) CERROR("%s: shutting down for failover; client state will" - " be preserved.\n", obddev->obd_name); + " be preserved.\n", obd->obd_name); - class_disconnect_exports(obddev, failover); /* this cleans up client - info too */ + class_disconnect_exports(obd, flags); /* cleans up client info too */ mds_server_free_data(mds); push_ctxt(&saved, &mds->mds_ctxt, NULL); @@ -443,7 +519,15 @@ int mds_fs_cleanup(struct obd_device *obddev, int failover) rc = filp_close(mds->mds_rcvd_filp, 0); mds->mds_rcvd_filp = NULL; if (rc) - CERROR("last_rcvd file won't close, rc=%d\n", rc); + CERROR("%s file won't close, rc=%d\n", LAST_RCVD, rc); + } + if (mds->mds_logs_dir) { + l_dput(mds->mds_logs_dir); + mds->mds_logs_dir = NULL; + } + if (mds->mds_pending_dir) { + l_dput(mds->mds_pending_dir); + mds->mds_pending_dir = NULL; } pop_ctxt(&saved, &mds->mds_ctxt, NULL); shrink_dcache_parent(mds->mds_fid_de); @@ -451,3 +535,233 @@ int mds_fs_cleanup(struct obd_device *obddev, int failover) return rc; } + +/* This is a callback from the llog_* functions. + * Assumes caller has already pushed us into the kernel context. */ +int mds_log_close(struct llog_handle *cathandle, struct llog_handle *loghandle) +{ + struct llog_object_hdr *llh = loghandle->lgh_hdr; + struct mds_obd *mds = &cathandle->lgh_obd->u.mds; + struct dentry *dchild = NULL; + int rc; + ENTRY; + + /* If we are going to delete this log, grab a ref before we close + * it so we don't have to immediately do another lookup. + */ + if (llh->llh_hdr.lth_type != LLOG_CATALOG_MAGIC && llh->llh_count == 0){ + CDEBUG(D_INODE, "deleting log file "LPX64":%x\n", + loghandle->lgh_cookie.lgc_lgl.lgl_oid, + loghandle->lgh_cookie.lgc_lgl.lgl_ogen); + down(&mds->mds_logs_dir->d_inode->i_sem); + dchild = dget(loghandle->lgh_file->f_dentry); + llog_delete_log(cathandle, loghandle); + } else { + CDEBUG(D_INODE, "closing log file "LPX64":%x\n", + loghandle->lgh_cookie.lgc_lgl.lgl_oid, + loghandle->lgh_cookie.lgc_lgl.lgl_ogen); + } + + rc = filp_close(loghandle->lgh_file, 0); + + llog_free_handle(loghandle); /* also removes loghandle from list */ + + if (dchild) { + int err = vfs_unlink(mds->mds_logs_dir->d_inode, dchild); + if (err) { + CERROR("error unlinking empty log %*s: rc %d\n", + dchild->d_name.len, dchild->d_name.name, err); + if (!rc) + rc = err; + } + l_dput(dchild); + up(&mds->mds_logs_dir->d_inode->i_sem); + } + RETURN(rc); +} + +/* This is a callback from the llog_* functions. + * Assumes caller has already pushed us into the kernel context. */ +struct llog_handle *mds_log_open(struct obd_device *obd, + struct llog_cookie *logcookie) +{ + struct ll_fid fid = { .id = logcookie->lgc_lgl.lgl_oid, + .generation = logcookie->lgc_lgl.lgl_ogen, + .f_type = S_IFREG }; + struct llog_handle *loghandle; + struct dentry *dchild; + int rc; + ENTRY; + + loghandle = llog_alloc_handle(); + if (loghandle == NULL) + RETURN(ERR_PTR(-ENOMEM)); + + down(&obd->u.mds.mds_logs_dir->d_inode->i_sem); + dchild = mds_fid2dentry(&obd->u.mds, &fid, NULL); + up(&obd->u.mds.mds_logs_dir->d_inode->i_sem); + if (IS_ERR(dchild)) { + rc = PTR_ERR(dchild); + CERROR("error looking up log file "LPX64":%x: rc %d\n", + fid.id, fid.generation, rc); + GOTO(out, rc); + } + + if (dchild->d_inode == NULL) { + rc = -ENOENT; + CERROR("nonexistent log file "LPX64":%x: rc %d\n", + fid.id, fid.generation, rc); + GOTO(out_put, rc); + } + + /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */ + mntget(obd->u.mds.mds_vfsmnt); + loghandle->lgh_file = dentry_open(dchild, obd->u.mds.mds_vfsmnt, + O_RDWR | O_LARGEFILE); + if (IS_ERR(loghandle->lgh_file)) { + rc = PTR_ERR(loghandle->lgh_file); + CERROR("error opening logfile "LPX64":%x: rc %d\n", + fid.id, fid.generation, rc); + GOTO(out, rc); + } + memcpy(&loghandle->lgh_cookie, logcookie, sizeof(*logcookie)); + loghandle->lgh_log_create = mds_log_create; + loghandle->lgh_log_open = mds_log_open; + loghandle->lgh_log_close = mds_log_close; + loghandle->lgh_obd = obd; + + RETURN(loghandle); + +out_put: + l_dput(dchild); +out: + llog_free_handle(loghandle); + return ERR_PTR(rc); +} + +/* This is a callback from the llog_* functions. + * Assumes caller has already pushed us into the kernel context. */ +struct llog_handle *mds_log_create(struct obd_device *obd) +{ + char logbuf[24], *logname; /* logSSSSSSSSSS.count */ + struct llog_handle *loghandle; + int rc, open_flags = O_RDWR | O_CREAT | O_LARGEFILE; + ENTRY; + + loghandle = llog_alloc_handle(); + if (!loghandle) + RETURN(ERR_PTR(-ENOMEM)); + +retry: + if (!obd->u.mds.mds_catalog) { + logname = "LOGS/catalog"; + } else { + sprintf(logbuf, "LOGS/log%lu.%u\n", + CURRENT_SECONDS, obd->u.mds.mds_catalog->lgh_index++); + open_flags |= O_EXCL; + logname = logbuf; + } + loghandle->lgh_file = filp_open(logname, open_flags, 0644); + if (IS_ERR(loghandle->lgh_file)) { + rc = PTR_ERR(loghandle->lgh_file); + if (rc == -EEXIST) { + CDEBUG(D_HA, "collision in logfile %s creation\n", + logname); + obd->u.mds.mds_catalog->lgh_index++; + goto retry; + } + CERROR("error opening/creating %s: rc %d\n", logname, rc); + GOTO(out_handle, rc); + } + + loghandle->lgh_cookie.lgc_lgl.lgl_oid = + loghandle->lgh_file->f_dentry->d_inode->i_ino; + loghandle->lgh_cookie.lgc_lgl.lgl_ogen = + loghandle->lgh_file->f_dentry->d_inode->i_generation; + loghandle->lgh_log_create = mds_log_create; + loghandle->lgh_log_open = mds_log_open; + loghandle->lgh_log_close = mds_log_close; + loghandle->lgh_obd = obd; + + RETURN(loghandle); + +out_handle: + llog_free_handle(loghandle); + return ERR_PTR(rc); +} + +struct llog_handle *mds_get_catalog(struct obd_device *obd) +{ + struct mds_server_data *msd = obd->u.mds.mds_server_data; + struct obd_run_ctxt saved; + struct llog_handle *cathandle = NULL; + int rc = 0; + ENTRY; + + push_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL); + + if (msd->msd_catalog_oid) { + struct llog_cookie catcookie; + + catcookie.lgc_lgl.lgl_oid = le64_to_cpu(msd->msd_catalog_oid); + catcookie.lgc_lgl.lgl_ogen = le32_to_cpu(msd->msd_catalog_ogen); + cathandle = mds_log_open(obd, &catcookie); + if (IS_ERR(cathandle)) { + CERROR("error opening catalog "LPX64":%x: rc %d\n", + catcookie.lgc_lgl.lgl_oid, + catcookie.lgc_lgl.lgl_ogen, + (int)PTR_ERR(cathandle)); + msd->msd_catalog_oid = 0; + msd->msd_catalog_ogen = 0; + } + /* ORPHANS FIXME: compare catalog UUID to msd_peeruuid */ + } + + if (!msd->msd_catalog_oid) { + struct llog_logid *lgl; + + cathandle = mds_log_create(obd); + if (IS_ERR(cathandle)) { + CERROR("error creating new catalog: rc %d\n", + (int)PTR_ERR(cathandle)); + GOTO(out, cathandle); + } + lgl = &cathandle->lgh_cookie.lgc_lgl; + msd->msd_catalog_oid = cpu_to_le64(lgl->lgl_oid); + msd->msd_catalog_ogen = cpu_to_le32(lgl->lgl_ogen); + rc = mds_update_server_data(obd); + if (rc) { + CERROR("error writing new catalog to disk: rc %d\n",rc); + GOTO(out_handle, rc); + } + } + + rc = llog_init_catalog(cathandle, &obd->u.mds.mds_osc_uuid); + +out: + pop_ctxt(&saved, &obd->u.mds.mds_ctxt, NULL); + RETURN(cathandle); + +out_handle: + mds_log_close(cathandle, cathandle); + cathandle = ERR_PTR(rc); + goto out; + +} + +void mds_put_catalog(struct llog_handle *cathandle) +{ + struct llog_handle *loghandle, *n; + int rc; + ENTRY; + + list_for_each_entry_safe(loghandle, n, &cathandle->lgh_list, lgh_list) + mds_log_close(cathandle, loghandle); + + rc = filp_close(cathandle->lgh_file, 0); + if (rc) + CERROR("error closing catalog: rc %d\n", rc); + + llog_free_handle(cathandle); + EXIT; +} diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index 0b62a92..c2d3d77 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -1,9 +1,41 @@ +#ifndef _MDS_INTERNAL_H +#define _MDS_INTERNAL_H +static inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req) +{ + return &req->rq_export->exp_obd->u.mds; +} + +/* mds/mds_fs.c */ +struct llog_handle *mds_log_create(struct obd_device *obd); +int mds_log_close(struct llog_handle *cathandle, struct llog_handle *loghandle); +struct llog_handle *mds_log_open(struct obd_device *obd, + struct llog_cookie *logcookie); +struct llog_handle *mds_get_catalog(struct obd_device *obd); +void mds_put_catalog(struct llog_handle *cathandle); + +/* mds/handler.c */ struct mds_file_data *mds_mfd_new(void); void mds_mfd_put(struct mds_file_data *mfd); void mds_mfd_destroy(struct mds_file_data *mfd); + +/* mds/mds_reint.c */ +void mds_commit_cb(struct obd_device *, __u64 last_rcvd, void *data, int error); +int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, + struct ptlrpc_request *req, int rc, __u32 op_data); + +/* mds/mds_lib.c */ int mds_update_unpack(struct ptlrpc_request *, int offset, struct mds_update_record *); +/* mds/mds_lov.c */ +int mds_get_lovtgts(struct mds_obd *mds, int tgt_count, + struct obd_uuid *uuidarray); + +/* mds/mds_open.c */ +int mds_open(struct mds_update_record *rec, int offset, + struct ptlrpc_request *req, struct lustre_handle *); +int mds_pin(struct ptlrpc_request *req); + /* mds/mds_fs.c */ int mds_client_add(struct obd_device *obd, struct mds_obd *mds, struct mds_export_data *med, int cl_off); @@ -13,3 +45,5 @@ int mds_client_free(struct obd_export *exp); void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode); void mds_pack_inode2body(struct mds_body *body, struct inode *inode); #endif + +#endif /* _MDS_INTERNAL_H */ diff --git a/lustre/mds/mds_lib.c b/lustre/mds/mds_lib.c index 8f16795..93ac300 100644 --- a/lustre/mds/mds_lib.c +++ b/lustre/mds/mds_lib.c @@ -57,17 +57,15 @@ void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode) fid->f_type = (S_IFMT & inode->i_mode); } +/* Note that we can copy all of the fields, just some will not be "valid" */ void mds_pack_inode2body(struct mds_body *b, struct inode *inode) { - b->valid = OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME | - OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | - OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLTYPE | OBD_MD_FLMODE | - OBD_MD_FLNLINK | OBD_MD_FLGENER; + b->valid = OBD_MD_FLID | OBD_MD_FLCTIME | OBD_MD_FLUID | OBD_MD_FLGID | + OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLNLINK | OBD_MD_FLGENER; - /* The MDS file size isn't authoritative for regular files, so don't - * even pretend. */ - if (S_ISREG(inode->i_mode)) - b->valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS); + if (!S_ISREG(inode->i_mode)) + b->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLATIME | + OBD_MD_FLMTIME; b->ino = inode->i_ino; b->atime = LTIME_S(inode->i_atime); @@ -80,10 +78,12 @@ void mds_pack_inode2body(struct mds_body *b, struct inode *inode) b->gid = inode->i_gid; b->flags = inode->i_flags; b->rdev = b->rdev; - b->nlink = inode->i_nlink; + /* Return the correct link count for orphan inodes */ + b->nlink = mds_inode_is_orphan(inode) ? 0 : inode->i_nlink; b->generation = inode->i_generation; b->suppgid = -1; } + /* unpacking */ static int mds_setattr_unpack(struct ptlrpc_request *req, int offset, struct mds_update_record *r) @@ -92,8 +92,8 @@ static int mds_setattr_unpack(struct ptlrpc_request *req, int offset, struct mds_rec_setattr *rec; ENTRY; - rec = lustre_swab_reqbuf (req, offset, sizeof (*rec), - lustre_swab_mds_rec_setattr); + rec = lustre_swab_reqbuf(req, offset, sizeof(*rec), + lustre_swab_mds_rec_setattr); if (rec == NULL) RETURN (-EFAULT); @@ -120,9 +120,14 @@ static int mds_setattr_unpack(struct ptlrpc_request *req, int offset, if (r->ur_eadata == NULL) RETURN (-EFAULT); r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 1]; - } else { - r->ur_eadata = NULL; - r->ur_eadatalen = 0; + } + + if (req->rq_reqmsg->bufcount > offset + 2) { + r->ur_logcookies = lustre_msg_buf(req->rq_reqmsg, offset + 2,0); + if (r->ur_eadata == NULL) + RETURN (-EFAULT); + + r->ur_cookielen = req->rq_reqmsg->buflens[offset + 2]; } RETURN(0); @@ -172,9 +177,6 @@ static int mds_create_unpack(struct ptlrpc_request *req, int offset, if (r->ur_tgt == NULL) RETURN (-EFAULT); r->ur_tgtlen = req->rq_reqmsg->buflens[offset + 2]; - } else { - r->ur_tgt = NULL; - r->ur_tgtlen = 0; } RETURN(0); } diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index 02c53cc..ecca88c 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -32,6 +32,9 @@ #include #include #include +#include + +#include "mds_internal.h" void le_lov_desc_to_cpu (struct lov_desc *ld) { @@ -141,6 +144,7 @@ int mds_set_lovdesc(struct obd_device *obd, struct lov_desc *desc, mds->mds_has_lov_desc = 1; /* XXX the MDS should not really know about this */ mds->mds_max_mdsize = lov_mds_md_size(desc->ld_tgt_count); + mds->mds_max_cookiesize = desc->ld_tgt_count*sizeof(struct llog_cookie); out: pop_ctxt(&saved, &mds->mds_ctxt, NULL); @@ -182,7 +186,8 @@ out: return rc; } -int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,struct obd_uuid *uuidarray) +int mds_get_lovtgts(struct mds_obd *mds, int tgt_count, + struct obd_uuid *uuidarray) { struct obd_run_ctxt saved; struct file *f; @@ -266,13 +271,13 @@ int mds_iocontrol(unsigned int cmd, struct lustre_handle *conn, RETURN(rc); - case OBD_IOC_SET_READONLY: + case OBD_IOC_SET_READONLY: { + BDEVNAME_DECLARE_STORAGE(tmp); CERROR("setting device %s read-only\n", - ll_bdevname(obd->u.mds.mds_sb->s_dev)); -#ifdef CONFIG_DEV_RDONLY + ll_bdevname(obd->u.mds.mds_sb->s_dev, tmp)); dev_set_rdonly(obd->u.mds.mds_sb->s_dev, 2); -#endif RETURN(0); + } case OBD_IOC_ABORT_RECOVERY: CERROR("aborting recovery for device %s\n", obd->obd_name); diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index 04d6ee9..2bd2f8c 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -45,19 +45,6 @@ #include "mds_internal.h" -extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req); -int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle, - struct ptlrpc_request *req, int rc, __u32 op_data); -extern int enqueue_ordered_locks(int lock_mode, struct obd_device *obd, - struct ldlm_res_id *p1_res_id, - struct ldlm_res_id *p2_res_id, - struct ldlm_res_id *c1_res_id, - struct ldlm_res_id *c2_res_id, - struct lustre_handle *p1_lockh, - struct lustre_handle *p2_lockh, - struct lustre_handle *c1_lockh, - struct lustre_handle *c2_lockh); - struct mds_file_data *mds_dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, @@ -65,17 +52,16 @@ struct mds_file_data *mds_dentry_open(struct dentry *dentry, { struct mds_export_data *med = &req->rq_export->exp_mds_data; struct inode *inode; - int mode; struct mds_file_data *mfd; - int error; + int mode, error; mfd = mds_mfd_new(); - if (!mfd) { + if (mfd == NULL) { CERROR("mds: out of memory\n"); GOTO(cleanup_dentry, error = -ENOMEM); } - mode = (flags+1) & O_ACCMODE; + mode = (flags + 1) & O_ACCMODE; inode = dentry->d_inode; if (mode & FMODE_WRITE) { @@ -107,6 +93,7 @@ void reconstruct_open(struct mds_update_record *rec, int offset, struct ptlrpc_request *req, struct lustre_handle *child_lockh) { + struct ptlrpc_request *oldreq = req->rq_export->exp_outstanding_reply; struct mds_export_data *med = &req->rq_export->exp_mds_data; struct mds_client_data *mcd = med->med_mcd; struct mds_obd *mds = mds_req2mds(req); @@ -115,7 +102,7 @@ void reconstruct_open(struct mds_update_record *rec, int offset, struct dentry *parent, *child; struct ldlm_reply *rep; struct mds_body *body; - int disp, rc; + int rc; struct list_head *t; int put_child = 1; ENTRY; @@ -127,14 +114,13 @@ void reconstruct_open(struct mds_update_record *rec, int offset, /* copy rc, transno and disp; steal locks */ req->rq_transno = mcd->mcd_last_transno; req->rq_status = mcd->mcd_last_result; - disp = rep->lock_policy_res1 = mcd->mcd_last_data; + intent_set_disposition(rep, mcd->mcd_last_data); - if (req->rq_export->exp_outstanding_reply) + if (oldreq) mds_steal_ack_locks(req->rq_export, req); - /* We never care about these. */ - disp &= ~(IT_OPEN_LOOKUP | IT_OPEN_POS | IT_OPEN_NEG); - if (!disp) { + /* Only replay if create or open actually happened. */ + if (!intent_disposition(rep, DISP_OPEN_CREATE | DISP_OPEN_OPEN) ) { EXIT; return; /* error looking up parent or child */ } @@ -149,11 +135,11 @@ void reconstruct_open(struct mds_update_record *rec, int offset, GOTO(out_dput, 0); /* child not present to open */ } - /* At this point, we know we have a child, which means that we'll send - * it back _unless_ it was open failed, _and_ we didn't create the file. - * I love you guys. No, really. + /* At this point, we know we have a child. We'll send + * it back _unless_ it not created and open failed. */ - if (((disp & (IT_OPEN_OPEN | IT_OPEN_CREATE)) == IT_OPEN_OPEN) && + if (intent_disposition(rep, DISP_OPEN_OPEN) && + !intent_disposition(rep, DISP_OPEN_CREATE) && req->rq_status) { GOTO(out_dput, 0); } @@ -165,8 +151,14 @@ void reconstruct_open(struct mds_update_record *rec, int offset, if (S_ISREG(child->d_inode->i_mode)) { rc = mds_pack_md(obd, req->rq_repmsg, 2, body, child->d_inode); + if (rc) LASSERT(rc == req->rq_status); + + /* If we have LOV EA data, the OST holds size, mtime */ + if (!(body->valid & OBD_MD_FLEASIZE)) + body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | + OBD_MD_FLATIME | OBD_MD_FLMTIME); } else { /* XXX need to check this case */ } @@ -185,7 +177,7 @@ void reconstruct_open(struct mds_update_record *rec, int offset, /* If we didn't get as far as trying to open, then some locking thing * probably went wrong, and we'll just bail here. */ - if ((disp & IT_OPEN_OPEN) == 0) + if (!intent_disposition(rep, DISP_OPEN_OPEN)) GOTO(out_dput, 0); /* If we failed, then we must have failed opening, so don't look for @@ -197,12 +189,12 @@ void reconstruct_open(struct mds_update_record *rec, int offset, mfd = NULL; list_for_each(t, &med->med_open_head) { mfd = list_entry(t, struct mds_file_data, mfd_list); - if (mfd->mfd_xid == req->rq_xid) + if (mfd->mfd_xid == req->rq_xid) break; mfd = NULL; } - if (req->rq_export->exp_outstanding_reply) { + if (oldreq) { /* if we're not recovering, it had better be found */ LASSERT(mfd); } else if (mfd == NULL) { @@ -226,35 +218,180 @@ void reconstruct_open(struct mds_update_record *rec, int offset, EXIT; } +int mds_pin(struct ptlrpc_request *req) +{ + struct mds_obd *mds = mds_req2mds(req); + struct inode *pending_dir = mds->mds_pending_dir->d_inode; + struct mds_file_data *mfd = NULL; + struct mds_body *body; + struct dentry *dchild; + struct obd_run_ctxt saved; + char fidname[LL_FID_NAMELEN]; + int fidlen = 0, rc, cleanup_phase = 0, size = sizeof(*body); + ENTRY; + + body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); + + down(&pending_dir->i_sem); + fidlen = ll_fid2str(fidname, body->fid1.id, body->fid1.generation); + dchild = lookup_one_len(fidname, mds->mds_pending_dir, fidlen); + if (IS_ERR(dchild)) { + up(&pending_dir->i_sem); + rc = PTR_ERR(dchild); + CERROR("error looking up %s in PENDING: rc = %d\n", + fidname, rc); + RETURN(rc); + } + + cleanup_phase = 2; + + if (dchild->d_inode) { + up(&pending_dir->i_sem); + mds_inode_set_orphan(dchild->d_inode); + mds_pack_inode2fid(&body->fid1, dchild->d_inode); + mds_pack_inode2body(body, dchild->d_inode); + GOTO(openit, rc = 0); + } + dput(dchild); + up(&pending_dir->i_sem); + + /* We didn't find it in PENDING so it isn't an orphan. See + * if it's a regular inode. */ + dchild = mds_fid2dentry(mds, &body->fid1, NULL); + if (!IS_ERR(dchild)) { + mds_pack_inode2fid(&body->fid1, dchild->d_inode); + mds_pack_inode2body(body, dchild->d_inode); + GOTO(openit, rc = 0); + } + + /* We didn't find this inode on disk, but we're trying to pin it. + * This should never happen. */ + CERROR("ENOENT during mds_pin for fid "LPU64"/%u\n", body->fid1.id, + body->fid1.generation); + RETURN(-ENOENT); + + openit: + /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */ + mfd = mds_dentry_open(dchild, mds->mds_vfsmnt, body->flags, req); + if (IS_ERR(mfd)) { + dchild = NULL; /* prevent a double dput in cleanup phase 2 */ + GOTO(cleanup, rc = PTR_ERR(mfd)); + } + + rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); + if (rc) { + CERROR("out of memoryK\n"); + GOTO(cleanup, rc); + } + body = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*body)); + + cleanup_phase = 4; /* mfd allocated */ + body->handle.cookie = mfd->mfd_handle.h_cookie; + CDEBUG(D_INODE, "mfd %p, cookie "LPX64"\n", mfd, + mfd->mfd_handle.h_cookie); + GOTO(cleanup, rc = 0); + + cleanup: + push_ctxt(&saved, &mds->mds_ctxt, NULL); + rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, NULL, + req, rc, 0); + pop_ctxt(&saved, &mds->mds_ctxt, NULL); + /* XXX what do we do here if mds_finish_transno itself failed? */ + switch (cleanup_phase) { + case 4: + if (rc) + mds_mfd_destroy(mfd); + case 2: + if (rc || S_ISLNK(dchild->d_inode->i_mode)) + l_dput(dchild); + } + return rc; +} + int mds_open(struct mds_update_record *rec, int offset, struct ptlrpc_request *req, struct lustre_handle *child_lockh) { + /* XXX ALLOCATE _something_ - 464 bytes on stack here */ static const char acc_table [] = {[O_RDONLY] MAY_READ, [O_WRONLY] MAY_WRITE, [O_RDWR] MAY_READ | MAY_WRITE}; struct mds_obd *mds = mds_req2mds(req); struct obd_device *obd = req->rq_export->exp_obd; - struct ldlm_reply *rep; - struct mds_body *body; - struct dentry *dchild = NULL, *parent; + struct ldlm_reply *rep = NULL; + struct mds_body *body = NULL; + struct dentry *dchild = NULL, *parent = NULL; struct mds_export_data *med; struct mds_file_data *mfd = NULL; struct ldlm_res_id child_res_id = { .name = {0} }; struct lustre_handle parent_lockh; int rc = 0, parent_mode, child_mode = LCK_PR, lock_flags, created = 0; - int cleanup_phase = 0; + int cleanup_phase = 0, acc_mode; void *handle = NULL; - int acc_mode; ENTRY; - LASSERT(offset == 2); /* only called via intent */ - rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep)); - body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body)); + if (offset == 2) { /* intent */ + rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep)); + body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body)); + } else if (offset == 0) { /* non-intent reint */ + body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body)); + } else { + body = NULL; + LBUG(); + } MDS_CHECK_RESENT(req, reconstruct_open(rec, offset, req, child_lockh)); + /* Step 0: If we are passed a fid, then we assume the client already + * opened this file and is only replaying the RPC, so we open the + * inode by fid (at some large expense in security). + */ + if (rec->ur_fid2->id) { + struct inode *pending_dir = mds->mds_pending_dir->d_inode; + char fidname[LL_FID_NAMELEN]; + int fidlen = 0; + + down(&pending_dir->i_sem); + fidlen = ll_fid2str(fidname, rec->ur_fid2->id, + rec->ur_fid2->generation); + dchild = lookup_one_len(fidname, mds->mds_pending_dir, fidlen); + if (IS_ERR(dchild)) { + up(&pending_dir->i_sem); + rc = PTR_ERR(dchild); + CERROR("error looking up %s in PENDING: rc = %d\n", + fidname, rc); + RETURN(rc); + } + + if (dchild->d_inode) { + up(&pending_dir->i_sem); + mds_inode_set_orphan(dchild->d_inode); + mds_pack_inode2fid(&body->fid1, dchild->d_inode); + mds_pack_inode2body(body, dchild->d_inode); + cleanup_phase = 2; + GOTO(openit, rc = 0); + } + dput(dchild); + up(&pending_dir->i_sem); + + /* We didn't find it in PENDING so it isn't an orphan. See + * if it was a regular inode that was previously created. + */ + dchild = mds_fid2dentry(mds, rec->ur_fid2, NULL); + if (!IS_ERR(dchild)) { + mds_pack_inode2fid(&body->fid1, dchild->d_inode); + mds_pack_inode2body(body, dchild->d_inode); + cleanup_phase = 2; + GOTO(openit, rc = 0); + } + + /* We didn't find the correct inode on disk either, so we + * need to re-create it via a regular replay. Do that below. + */ + LASSERT(rec->ur_flags & O_CREAT); + } + LASSERT(offset == 2); /* If we got here, we must be called via intent */ + med = &req->rq_export->exp_mds_data; - rep->lock_policy_res1 |= IT_OPEN_LOOKUP; if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_PACK)) { CERROR("test case OBD_FAIL_MDS_OPEN_PACK\n"); req->rq_status = -ENOMEM; @@ -263,11 +400,12 @@ int mds_open(struct mds_update_record *rec, int offset, if ((rec->ur_flags & O_ACCMODE) >= sizeof (acc_table)) RETURN(-EINVAL); - acc_mode = acc_table [rec->ur_flags & O_ACCMODE]; + acc_mode = acc_table[rec->ur_flags & O_ACCMODE]; if ((rec->ur_flags & O_TRUNC) != 0) acc_mode |= MAY_WRITE; /* Step 1: Find and lock the parent */ + intent_set_disposition(rep, DISP_LOOKUP_EXECD); parent_mode = (rec->ur_flags & O_CREAT) ? LCK_PW : LCK_PR; parent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, parent_mode, &parent_lockh); @@ -288,38 +426,88 @@ int mds_open(struct mds_update_record *rec, int offset, cleanup_phase = 2; /* child dentry */ if (dchild->d_inode) - rep->lock_policy_res1 |= IT_OPEN_POS; + intent_set_disposition(rep, DISP_LOOKUP_POS); else - rep->lock_policy_res1 |= IT_OPEN_NEG; + intent_set_disposition(rep, DISP_LOOKUP_NEG); /* Step 3: If the child was negative, and we're supposed to, * create it. */ if (!dchild->d_inode) { + unsigned long ino = rec->ur_fid2->id; + if (!(rec->ur_flags & O_CREAT)) { /* It's negative and we weren't supposed to create it */ GOTO(cleanup, rc = -ENOENT); } - rep->lock_policy_res1 |= IT_OPEN_CREATE; - handle = fsfilt_start(obd, parent->d_inode, FSFILT_OP_CREATE); + intent_set_disposition(rep, DISP_OPEN_CREATE); + handle = fsfilt_start(obd, parent->d_inode, FSFILT_OP_CREATE, + NULL); if (IS_ERR(handle)) { rc = PTR_ERR(handle); handle = NULL; GOTO(cleanup, rc); } + if (ino) + dchild->d_fsdata = (void *)(unsigned long)ino; + rc = vfs_create(parent->d_inode, dchild, rec->ur_mode); - if (rc) + if (dchild->d_fsdata == (void *)(unsigned long)ino) + dchild->d_fsdata = NULL; + + if (rc) { + CDEBUG(D_INODE, "error during create: %d\n", rc); GOTO(cleanup, rc); - created = 1; + } else { + struct iattr iattr; + struct inode *inode = dchild->d_inode; + + if (ino) { + LASSERT(ino == inode->i_ino); + /* Written as part of setattr */ + inode->i_generation = rec->ur_fid2->generation; + CDEBUG(D_HA, "recreated ino %lu with gen %x\n", + inode->i_ino, inode->i_generation); + } + + created = 1; + LTIME_S(iattr.ia_atime) = rec->ur_time; + LTIME_S(iattr.ia_ctime) = rec->ur_time; + LTIME_S(iattr.ia_mtime) = rec->ur_time; + + iattr.ia_uid = rec->ur_uid; + if (parent->d_inode->i_mode & S_ISGID) { + iattr.ia_gid = parent->d_inode->i_gid; + } else + iattr.ia_gid = rec->ur_gid; + + iattr.ia_valid = ATTR_UID | ATTR_GID | ATTR_ATIME | + ATTR_MTIME | ATTR_CTIME; + + rc = fsfilt_setattr(obd, dchild, handle, &iattr, 0); + if (rc) { + CERROR("error on setattr: rc = %d\n", rc); + /* XXX should we abort here in case of error? */ + } + } + child_mode = LCK_PW; acc_mode = 0; /* Don't check for permissions */ } + LASSERT(!mds_inode_is_orphan(dchild->d_inode)); + /* Step 4: It's positive, so lock the child */ child_res_id.name[0] = dchild->d_inode->i_ino; child_res_id.name[1] = dchild->d_inode->i_generation; reacquire: lock_flags = 0; + /* For the open(O_CREAT) case, this would technically be a lock + * inversion (getting a VFS lock after starting a transaction), + * but in that case we cannot possibly block on this lock because + * we just created the child and also hold a write lock on the + * parent, so nobody could be holding the lock yet. + */ rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL, child_res_id, LDLM_PLAIN, NULL, 0, child_mode, &lock_flags, ldlm_completion_ast, @@ -346,15 +534,19 @@ int mds_open(struct mds_update_record *rec, int offset, /* An append-only file must be opened in append mode for * writing */ - if (IS_APPEND(dchild->d_inode) && - (acc_mode & MAY_WRITE) != 0 && + if (IS_APPEND(dchild->d_inode) && (acc_mode & MAY_WRITE) != 0 && ((rec->ur_flags & O_APPEND) == 0 || (rec->ur_flags & O_TRUNC) != 0)) - GOTO (cleanup, rc = -EPERM); + GOTO(cleanup, rc = -EPERM); rc = mds_pack_md(obd, req->rq_repmsg, 2, body, dchild->d_inode); if (rc) GOTO(cleanup, rc); + + /* If we have LOV EA data, the OST holds size, mtime */ + if (!(body->valid & OBD_MD_FLEASIZE)) + body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | + OBD_MD_FLATIME | OBD_MD_FLMTIME); } if (!created && (rec->ur_flags & O_CREAT) && @@ -364,9 +556,9 @@ int mds_open(struct mds_update_record *rec, int offset, GOTO(cleanup, rc = -EEXIST); // returns a lock to the client } - /* If we're opening a file without an EA, the client needs a write - * lock. */ - if (S_ISREG(dchild->d_inode->i_mode) && + /* If we're opening a file without an EA for write, the client needs + * a write lock. */ + if (S_ISREG(dchild->d_inode->i_mode) && (rec->ur_flags & O_ACCMODE) && child_mode != LCK_PW && !(body->valid & OBD_MD_FLEASIZE)) { ldlm_lock_decref(child_lockh, child_mode); child_mode = LCK_PW; @@ -381,15 +573,14 @@ int mds_open(struct mds_update_record *rec, int offset, GOTO(cleanup, rc = -ENOTDIR); /* Step 5: mds_open it */ - rep->lock_policy_res1 |= IT_OPEN_OPEN; - + intent_set_disposition(rep, DISP_OPEN_OPEN); + openit: /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */ mfd = mds_dentry_open(dchild, mds->mds_vfsmnt, rec->ur_flags & ~(O_DIRECT | O_TRUNC), req); - if (!mfd) { - CERROR("mds: out of memory\n"); - dchild = NULL; /* prevent a double dput in step 2 */ - GOTO(cleanup, rc = -ENOMEM); + if (IS_ERR(mfd)) { + dchild = NULL; /* prevent a double dput in cleanup phase 2 */ + GOTO(cleanup, rc = PTR_ERR(mfd)); } cleanup_phase = 4; /* mfd allocated */ @@ -401,6 +592,7 @@ int mds_open(struct mds_update_record *rec, int offset, cleanup: rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle, req, rc, rep->lock_policy_res1); + /* XXX what do we do here if mds_finish_transno itself failed? */ switch (cleanup_phase) { case 4: if (rc && !S_ISLNK(dchild->d_inode->i_mode)) @@ -410,19 +602,22 @@ int mds_open(struct mds_update_record *rec, int offset, * ldlm_intent_policy: if we found the dentry, or we tried to * open it (meaning that we created, if it wasn't found), then * we return the lock to the caller and client. */ - if (!(rep->lock_policy_res1 & (IT_OPEN_OPEN | IT_OPEN_POS))) + if (intent_disposition(rep, DISP_LOOKUP_NEG) && + !intent_disposition(rep, DISP_OPEN_OPEN)) ldlm_lock_decref(child_lockh, child_mode); case 2: if (rc || S_ISLNK(dchild->d_inode->i_mode)) l_dput(dchild); case 1: - l_dput(parent); - if (rc) { - ldlm_lock_decref(&parent_lockh, parent_mode); - } else { - memcpy(&req->rq_ack_locks[0].lock, &parent_lockh, - sizeof(parent_lockh)); - req->rq_ack_locks[0].mode = parent_mode; + if (parent) { + l_dput(parent); + if (rc) { + ldlm_lock_decref(&parent_lockh, parent_mode); + } else { + memcpy(&req->rq_ack_locks[0].lock,&parent_lockh, + sizeof(parent_lockh)); + req->rq_ack_locks[0].mode = parent_mode; + } } } RETURN(rc); diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 50949dd..61871d7 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -37,19 +37,93 @@ #include #include #include + #include "mds_internal.h" -extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req); +void mds_commit_cb(struct obd_device *obd, __u64 transno, void *data, + int error) +{ + obd_transno_commit_cb(obd, transno, error); +} + +struct mds_logcancel_data { + struct lov_mds_md *mlcd_lmm; + int mlcd_size; + int mlcd_cookielen; + int mlcd_eadatalen; + struct llog_cookie mlcd_cookies[0]; +}; + +/* Establish a connection to the OSC when we first need it. We don't do + * this during MDS setup because that would introduce setup ordering issues. */ +static int mds_osc_connect(struct obd_device *obd, struct mds_obd *mds) +{ + int rc; + ENTRY; + + if (IS_ERR(mds->mds_osc_obd)) + RETURN(PTR_ERR(mds->mds_osc_obd)); + + if (mds->mds_osc_obd) + RETURN(0); + + mds->mds_osc_obd = class_uuid2obd(&mds->mds_osc_uuid); + if (!mds->mds_osc_obd) { + CERROR("MDS cannot locate OSC/LOV %s - no logging!\n", + mds->mds_osc_uuid.uuid); + mds->mds_osc_obd = ERR_PTR(-ENOTCONN); + RETURN(-ENOTCONN); + } + + rc = obd_connect(&mds->mds_osc_conn, mds->mds_osc_obd, &obd->obd_uuid); + if (rc) { + CERROR("MDS cannot locate OSC/LOV %s - no logging!\n", + mds->mds_osc_uuid.uuid); + mds->mds_osc_obd = ERR_PTR(rc); + RETURN(rc); + } + + rc = obd_set_info(&mds->mds_osc_conn, strlen("mds_conn"), "mds_conn", + 0, NULL); + RETURN(rc); +} -static void mds_commit_cb(struct obd_device *obd, __u64 transno, int error) +static void mds_cancel_cookies_cb(struct obd_device *obd, __u64 transno, + void *cb_data, int error) { + struct mds_logcancel_data *mlcd = cb_data; + struct lov_stripe_md *lsm = NULL; + int rc; + obd_transno_commit_cb(obd, transno, error); + + CDEBUG(D_HA, "cancelling %d cookies\n", + (int)(mlcd->mlcd_cookielen / sizeof(*mlcd->mlcd_cookies))); + + rc = obd_unpackmd(&obd->u.mds.mds_osc_conn, &lsm, mlcd->mlcd_lmm, + mlcd->mlcd_eadatalen); + if (rc < 0) { + CERROR("bad LSM cancelling %d log cookies: rc %d\n", + (int)(mlcd->mlcd_cookielen/sizeof(*mlcd->mlcd_cookies)), + rc); + } else { + rc = obd_log_cancel(&obd->u.mds.mds_osc_conn, lsm, + mlcd->mlcd_cookielen / + sizeof(*mlcd->mlcd_cookies), + mlcd->mlcd_cookies, OBD_LLOG_FL_SENDNOW); + ///* XXX 0 normally, SENDNOW for debug */); + if (rc) + CERROR("error cancelling %d log cookies: rc %d\n", + (int)(mlcd->mlcd_cookielen / + sizeof(*mlcd->mlcd_cookies)), rc); + } + + OBD_FREE(mlcd, mlcd->mlcd_size); } /* Assumes caller has already pushed us into the kernel context. */ -int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle, - struct ptlrpc_request *req, int rc, - __u32 op_data) +int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, + struct ptlrpc_request *req, int rc, __u32 op_data) { struct mds_export_data *med = &req->rq_export->exp_mds_data; struct mds_client_data *mcd = med->med_mcd; @@ -70,15 +144,15 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle, if (!handle) { /* if we're starting our own xaction, use our own inode */ - i = mds->mds_rcvd_filp->f_dentry->d_inode; - handle = fsfilt_start(obd, i, FSFILT_OP_SETATTR); + inode = mds->mds_rcvd_filp->f_dentry->d_inode; + handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL); if (IS_ERR(handle)) { CERROR("fsfilt_start: %ld\n", PTR_ERR(handle)); GOTO(out, rc = PTR_ERR(handle)); } } - off = MDS_LR_CLIENT + med->med_off * MDS_LR_SIZE; + off = med->med_off; transno = req->rq_reqmsg->transno; if (transno == 0) { @@ -94,10 +168,11 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle, mcd->mcd_last_data = cpu_to_le32(op_data); fsfilt_set_last_rcvd(req->rq_export->exp_obd, transno, handle, - mds_commit_cb); - written = lustre_fwrite(mds->mds_rcvd_filp, mcd, sizeof(*mcd), &off); - CDEBUG(D_INODE, "wrote trans "LPU64" client %s at #%u: written = " - LPSZ"\n", transno, mcd->mcd_uuid, med->med_off, written); + mds_commit_cb, NULL); + written = fsfilt_write_record(obd, mds->mds_rcvd_filp, + (char *)mcd, sizeof(*mcd), &off); + CDEBUG(D_INODE, "wrote trans "LPU64" client %s at idx %u: written = " + LPSZ"\n", transno, mcd->mcd_uuid, med->med_idx, written); if (written != sizeof(*mcd)) { CERROR("error writing to last_rcvd: rc = "LPSZ"\n", written); @@ -110,7 +185,7 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle, } commit: - err = fsfilt_commit(obd, i, handle, 0); + err = fsfilt_commit(obd, inode, handle, 0); if (err) { CERROR("error committing transaction: %d\n", err); if (!rc) @@ -139,22 +214,29 @@ int mds_fix_attr(struct inode *inode, struct mds_update_record *rec) if (!(ia_valid & ATTR_RAW)) RETURN(0); - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - RETURN(-EPERM); - - LTIME_S(attr->ia_ctime) = now; + if (!(ia_valid & ATTR_CTIME_SET)) + LTIME_S(attr->ia_ctime) = now; if (!(ia_valid & ATTR_ATIME_SET)) LTIME_S(attr->ia_atime) = now; if (!(ia_valid & ATTR_MTIME_SET)) LTIME_S(attr->ia_mtime) = now; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + RETURN(-EPERM); + /* times */ - if ((ia_valid & (ATTR_MTIME|ATTR_ATIME))==(ATTR_MTIME|ATTR_ATIME) && - !(ia_valid & ATTR_ATIME_SET)) { + if ((ia_valid & (ATTR_MTIME|ATTR_ATIME))==(ATTR_MTIME|ATTR_ATIME)) { if (rec->ur_fsuid != inode->i_uid && (error = permission(inode,MAY_WRITE)) != 0) RETURN(error); - } else if (ia_valid & ATTR_UID) { + } + + if (ia_valid & ATTR_SIZE) { + if ((error = permission(inode,MAY_WRITE)) != 0) + RETURN(error); + } + + if (ia_valid & ATTR_UID) { /* chown */ error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) @@ -164,7 +246,6 @@ int mds_fix_attr(struct inode *inode, struct mds_update_record *rec) if (attr->ia_gid == (gid_t) -1) attr->ia_gid = inode->i_gid; attr->ia_mode = inode->i_mode; - attr->ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME; /* * If the user or group of a non-directory has been * changed by a non-root user, remove the setuid bit. @@ -232,6 +313,14 @@ static void reconstruct_reint_setattr(struct mds_update_record *rec, mds_pack_inode2fid(&body->fid1, de->d_inode); mds_pack_inode2body(body, de->d_inode); + /* Don't return OST-specific attributes if we didn't just set them */ + if (rec->ur_iattr.ia_valid & ATTR_SIZE) + body->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; + if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) + body->valid |= OBD_MD_FLMTIME; + if (rec->ur_iattr.ia_valid & (ATTR_ATIME | ATTR_ATIME_SET)) + body->valid |= OBD_MD_FLATIME; + l_dput(de); } @@ -251,6 +340,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, struct inode *inode = NULL; struct lustre_handle lockh; void *handle = NULL; + struct mds_logcancel_data *mlcd = NULL; int rc = 0, cleanup_phase = 0, err, locked = 0; ENTRY; @@ -279,21 +369,28 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE, to_kdev_t(inode->i_sb->s_dev)); - handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR); +#ifdef ENABLE_ORPHANS + if (unlikely(mds->mds_osc_obd == NULL)) + mds_osc_connect(obd, mds); +#endif + + handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL); if (IS_ERR(handle)) { rc = PTR_ERR(handle); handle = NULL; GOTO(cleanup, rc); } + if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_CTIME)) + CDEBUG(D_INODE, "setting mtime %lu, ctime %lu\n", + LTIME_S(rec->ur_iattr.ia_mtime), + LTIME_S(rec->ur_iattr.ia_ctime)); rc = mds_fix_attr(inode, rec); if (rc) GOTO(cleanup, rc); rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr, 0); - if (rc == 0 && - S_ISREG(inode->i_mode) && - rec->ur_eadata != NULL) { + if (rc == 0 && S_ISREG(inode->i_mode) && rec->ur_eadata != NULL) { rc = fsfilt_set_md(obd, inode, handle, rec->ur_eadata, rec->ur_eadatalen); } @@ -302,10 +399,39 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, mds_pack_inode2fid(&body->fid1, inode); mds_pack_inode2body(body, inode); + /* Don't return OST-specific attributes if we didn't just set them */ + if (rec->ur_iattr.ia_valid & ATTR_SIZE) + body->valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; + if (rec->ur_iattr.ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) + body->valid |= OBD_MD_FLMTIME; + if (rec->ur_iattr.ia_valid & (ATTR_ATIME | ATTR_ATIME_SET)) + body->valid |= OBD_MD_FLATIME; + + if (rc == 0 && rec->ur_cookielen && !IS_ERR(mds->mds_osc_obd)) { + OBD_ALLOC(mlcd, sizeof(*mlcd) + rec->ur_cookielen + + rec->ur_eadatalen); + if (mlcd) { + mlcd->mlcd_size = sizeof(*mlcd) + rec->ur_cookielen + + rec->ur_eadatalen; + mlcd->mlcd_eadatalen = rec->ur_eadatalen; + mlcd->mlcd_cookielen = rec->ur_cookielen; + mlcd->mlcd_lmm = (void *)&mlcd->mlcd_cookies + + mlcd->mlcd_cookielen; + memcpy(&mlcd->mlcd_cookies, rec->ur_logcookies, + mlcd->mlcd_cookielen); + memcpy(mlcd->mlcd_lmm, rec->ur_eadata, + mlcd->mlcd_eadatalen); + } else { + CERROR("unable to allocate log cancel data\n"); + } + } EXIT; cleanup: + if (mlcd != NULL) + fsfilt_set_last_rcvd(req->rq_export->exp_obd, 0, handle, + mds_cancel_cookies_cb, mlcd); err = mds_finish_transno(mds, inode, handle, req, rc, 0); - switch(cleanup_phase) { + switch (cleanup_phase) { case 1: l_dput(de); if (locked) { @@ -418,7 +544,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, switch (type) { case S_IFREG:{ - handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE); + handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE, NULL); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); rc = vfs_create(dir, dchild, rec->ur_mode); @@ -426,7 +552,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, break; } case S_IFDIR:{ - handle = fsfilt_start(obd, dir, FSFILT_OP_MKDIR); + handle = fsfilt_start(obd, dir, FSFILT_OP_MKDIR, NULL); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); rc = vfs_mkdir(dir, dchild, rec->ur_mode); @@ -434,7 +560,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, break; } case S_IFLNK:{ - handle = fsfilt_start(obd, dir, FSFILT_OP_SYMLINK); + handle = fsfilt_start(obd, dir, FSFILT_OP_SYMLINK, NULL); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); if (rec->ur_tgt == NULL) /* no target supplied */ @@ -449,7 +575,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, case S_IFIFO: case S_IFSOCK:{ int rdev = rec->ur_rdev; - handle = fsfilt_start(obd, dir, FSFILT_OP_MKNOD); + handle = fsfilt_start(obd, dir, FSFILT_OP_MKNOD, NULL); if (IS_ERR(handle)) GOTO(cleanup, (handle = NULL, rc = PTR_ERR(handle))); rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev); @@ -458,13 +584,13 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, } default: CERROR("bad file type %o creating %s\n", type, rec->ur_name); + dchild->d_fsdata = NULL; GOTO(cleanup, rc = -EINVAL); } - /* In case we stored the desired inum in here, we want to clean up. - * We also do this in the cleanup block, for the error cases. - */ - dchild->d_fsdata = NULL; + /* In case we stored the desired inum in here, we want to clean up. */ + if (dchild->d_fsdata == (void *)(unsigned long)rec->ur_fid2->id) + dchild->d_fsdata = NULL; if (rc) { CDEBUG(D_INODE, "error during create: %d\n", rc); @@ -532,7 +658,6 @@ cleanup: } switch (cleanup_phase) { case 2: /* child dentry */ - dchild->d_fsdata = NULL; l_dput(dchild); case 1: /* locked parent dentry */ if (rc) { @@ -634,43 +759,134 @@ static void reconstruct_reint_unlink(struct mds_update_record *rec, int offset, "can't get EA for reconstructed unlink, leaking OST inodes"); } +/* If we are unlinking an open file/dir (i.e. creating an orphan) then + * we instead link the inode into the PENDING directory until it is + * finally released. We can't simply call mds_reint_rename() or some + * part thereof, because we don't have the inode to check for link + * count/open status until after it is locked. + * + * For lock ordering, we always get the PENDING, then pending_child lock + * last to avoid deadlocks. + */ +static int mds_unlink_orphan(struct mds_update_record *rec, + struct obd_device *obd, struct dentry *dparent, + struct dentry *dchild, void **handle) +{ + struct mds_obd *mds = &obd->u.mds; + struct inode *pending_dir = mds->mds_pending_dir->d_inode; + struct dentry *pending_child; + char fidname[LL_FID_NAMELEN]; + int fidlen = 0, rc; + ENTRY; + + LASSERT(!mds_inode_is_orphan(dchild->d_inode)); + + down(&pending_dir->i_sem); + fidlen = ll_fid2str(fidname, dchild->d_inode->i_ino, + dchild->d_inode->i_generation); + + CDEBUG(D_ERROR, "pending destroy of %dx open file %s = %s\n", + mds_open_orphan_count(dchild->d_inode), + rec->ur_name, fidname); + + pending_child = lookup_one_len(fidname, mds->mds_pending_dir, fidlen); + if (IS_ERR(pending_child)) + GOTO(out_lock, rc = PTR_ERR(pending_child)); + + if (pending_child->d_inode != NULL) { + CERROR("re-destroying orphan file %s?\n", rec->ur_name); + LASSERT(pending_child->d_inode == dchild->d_inode); + GOTO(out_dput, rc = 0); + } + + *handle = fsfilt_start(obd, pending_dir, FSFILT_OP_RENAME, NULL); + if (IS_ERR(*handle)) + GOTO(out_dput, rc = PTR_ERR(*handle)); + + rc = vfs_rename(dparent->d_inode, dchild, pending_dir, pending_child); + if (rc) + CERROR("error renaming orphan %lu/%s to PENDING: rc = %d\n", + dparent->d_inode->i_ino, rec->ur_name, rc); + else + mds_inode_set_orphan(dchild->d_inode); +out_dput: + dput(pending_child); +out_lock: + up(&pending_dir->i_sem); + RETURN(rc); +} + +static int mds_log_op_unlink(struct obd_device *obd, struct mds_obd *mds, + struct inode *inode, struct lustre_msg *repmsg, + int offset) +{ + struct lov_stripe_md *lsm = NULL; + struct llog_unlink_rec *lur; + int rc; + ENTRY; + + if (IS_ERR(mds->mds_osc_obd)) + RETURN(PTR_ERR(mds->mds_osc_obd)); + + rc = obd_unpackmd(&mds->mds_osc_conn, &lsm, + lustre_msg_buf(repmsg, offset, 0), + repmsg->buflens[offset]); + if (rc < 0) + RETURN(rc); + + OBD_ALLOC(lur, sizeof(*lur)); + if (!lur) + RETURN(-ENOMEM); + lur->lur_hdr.lth_len = lur->lur_end_len = sizeof(*lur); + lur->lur_hdr.lth_type = MDS_UNLINK_REC; + lur->lur_oid = inode->i_ino; + lur->lur_ogen = inode->i_generation; + + rc = obd_log_add(&mds->mds_osc_conn, mds->mds_catalog, &lur->lur_hdr, + lsm, lustre_msg_buf(repmsg, offset + 1, 0), + repmsg->buflens[offset+1]/sizeof(struct llog_cookie)); + + obd_free_memmd(&mds->mds_osc_conn, &lsm); + OBD_FREE(lur, sizeof(*lur)); + + RETURN(rc); +} + static int mds_reint_unlink(struct mds_update_record *rec, int offset, struct ptlrpc_request *req, - struct lustre_handle *child_lockh) + struct lustre_handle *lh) { - struct dentry *dir_de = NULL; + struct dentry *dparent = NULL; struct dentry *dchild = NULL; struct mds_obd *mds = mds_req2mds(req); struct obd_device *obd = req->rq_export->exp_obd; struct mds_body *body = NULL; - struct inode *dir_inode = NULL, *child_inode; - struct lustre_handle parent_lockh; + struct inode *child_inode; + struct lustre_handle parent_lockh, child_lockh; void *handle = NULL; struct ldlm_res_id child_res_id = { .name = {0} }; - int rc = 0, flags = 0, return_lock = 0; - int cleanup_phase = 0; + int rc = 0, flags = 0, log_unlink = 0, cleanup_phase = 0; ENTRY; LASSERT(offset == 0 || offset == 2); MDS_CHECK_RESENT(req, reconstruct_reint_unlink(rec, offset, req, - child_lockh)); + &child_lockh)); if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK)) GOTO(cleanup, rc = -ENOENT); /* Step 1: Lookup the parent by FID */ - dir_de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW, - &parent_lockh); - if (IS_ERR(dir_de)) - GOTO(cleanup, rc = PTR_ERR(dir_de)); - dir_inode = dir_de->d_inode; - LASSERT(dir_inode); + dparent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW, + &parent_lockh); + if (IS_ERR(dparent)) + GOTO(cleanup, rc = PTR_ERR(dparent)); + LASSERT(dparent->d_inode); cleanup_phase = 1; /* Have parent dentry lock */ /* Step 2: Lookup the child */ - dchild = ll_lookup_one_len(rec->ur_name, dir_de, rec->ur_namelen - 1); + dchild = ll_lookup_one_len(rec->ur_name, dparent, rec->ur_namelen - 1); if (IS_ERR(dchild)) GOTO(cleanup, rc = PTR_ERR(dchild)); @@ -678,15 +894,13 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, child_inode = dchild->d_inode; if (child_inode == NULL) { - CDEBUG(D_INODE, - "child doesn't exist (dir %lu, name %s)\n", - dir_inode->i_ino, rec->ur_name); - rc = -ENOENT; - GOTO(cleanup, rc); + CDEBUG(D_INODE, "child doesn't exist (dir %lu, name %s)\n", + dparent->d_inode->i_ino, rec->ur_name); + GOTO(cleanup, rc = -ENOENT); } DEBUG_REQ(D_INODE, req, "parent ino %lu, child ino %lu", - dir_inode->i_ino, child_inode->i_ino); + dparent->d_inode->i_ino, child_inode->i_ino); /* Step 3: Get a lock on the child */ child_res_id.name[0] = child_inode->i_ino; @@ -695,14 +909,14 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL, child_res_id, LDLM_PLAIN, NULL, 0, LCK_EX, &flags, ldlm_completion_ast, mds_blocking_ast, - NULL, child_lockh); + NULL, &child_lockh); if (rc != ELDLM_OK) GOTO(cleanup, rc); cleanup_phase = 3; /* child lock */ OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE, - to_kdev_t(dir_inode->i_sb->s_dev)); + to_kdev_t(dparent->d_inode->i_sb->s_dev)); /* ldlm_reply in buf[0] if called via intent */ if (offset) @@ -711,43 +925,89 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*body)); LASSERT(body != NULL); - /* Step 4: Do the unlink: client decides between rmdir/unlink! - * (bug 72) */ +#ifdef ENABLE_ORPHANS + if (unlikely(mds->mds_osc_obd == NULL)) + mds_osc_connect(obd, mds); +#endif + + /* If this is the last reference to this inode, get the OBD EA + * data first so the client can destroy OST objects */ + if (S_ISREG(child_inode->i_mode) && child_inode->i_nlink == 1) { + mds_pack_inode2fid(&body->fid1, child_inode); + mds_pack_inode2body(body, child_inode); + mds_pack_md(obd, req->rq_repmsg, offset + 1, body, child_inode); + if (!(body->valid & OBD_MD_FLEASIZE)) { + body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | + OBD_MD_FLATIME | OBD_MD_FLMTIME); + log_unlink = 1; + } + } + + /* We have to do these checks ourselves, in case we are making an + * orphan. The client tells us whether rmdir() or unlink() was called, + * so we need to return appropriate errors (bug 72). + * + * We don't have to check permissions, because vfs_rename (called from + * mds_unlink_orphan) also calls may_delete. */ + if ((rec->ur_mode & S_IFMT) == S_IFDIR) { + if (!S_ISDIR(child_inode->i_mode)) + GOTO(cleanup, rc = -ENOTDIR); + } else { + if (S_ISDIR(child_inode->i_mode)) + GOTO(cleanup, rc = -EISDIR); + } + + if (mds_open_orphan_count(child_inode) > 0) { + rc = mds_unlink_orphan(rec, obd, dparent, dchild, &handle); +#ifdef ENABLE_ORPHANS + if (!rc && mds_log_op_unlink(obd, mds, child_inode, + req->rq_repmsg, offset + 1) > 0) + body->valid |= OBD_MD_FLCOOKIE; +#endif + GOTO(cleanup, rc); + } + + // Step 4: Do the unlink: client decides between rmdir/unlink! (bug 72) switch (rec->ur_mode & S_IFMT) { case S_IFDIR: /* Drop any lingering child directories before we start our * transaction, to avoid doing multiple inode dirty/delete - * in our compound transaction (bug 1321). - */ + * in our compound transaction (bug 1321). */ shrink_dcache_parent(dchild); - handle = fsfilt_start(obd, dir_inode, FSFILT_OP_RMDIR); + handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_RMDIR, + NULL); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); cleanup_phase = 4; - rc = vfs_rmdir(dir_inode, dchild); + rc = vfs_rmdir(dparent->d_inode, dchild); break; - case S_IFREG: - /* If this is the last reference to this inode, get the OBD EA - * data first so the client can destroy OST objects */ - if (S_ISREG(child_inode->i_mode) && child_inode->i_nlink == 1) { - mds_pack_inode2fid(&body->fid1, child_inode); - mds_pack_inode2body(body, child_inode); - mds_pack_md(obd, req->rq_repmsg, offset + 1, - body, child_inode); - if (body->valid & OBD_MD_FLEASIZE) - return_lock = 1; - } - /* no break */ + case S_IFREG: { + handle = fsfilt_start(obd, dparent->d_inode, + FSFILT_OP_UNLINK_LOG, NULL); + if (IS_ERR(handle)) + GOTO(cleanup, rc = PTR_ERR(handle)); + + cleanup_phase = 4; + rc = vfs_unlink(dparent->d_inode, dchild); +#ifdef ENABLE_ORPHANS + if (!rc && log_unlink) + if (mds_log_op_unlink(obd, mds, child_inode, + req->rq_repmsg, offset + 1) > 0) + body->valid |= OBD_MD_FLCOOKIE; +#endif + break; + } case S_IFLNK: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: - handle = fsfilt_start(obd, dir_inode, FSFILT_OP_UNLINK); + handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK, + NULL); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); cleanup_phase = 4; - rc = vfs_unlink(dir_inode, dchild); + rc = vfs_unlink(dparent->d_inode, dchild); break; default: CERROR("bad file type %o unlinking %s\n", rec->ur_mode, @@ -758,29 +1018,29 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, cleanup: switch(cleanup_phase) { - case 4: - rc = mds_finish_transno(mds, dir_inode, handle, req, rc, 0); - if (rc && body) { - /* Don't unlink the OST objects if the MDS unlink failed */ + case 4: + rc = mds_finish_transno(mds, dparent->d_inode, handle, req, + rc, 0); + if (rc && body != NULL) { + // Don't unlink the OST objects if the MDS unlink failed body->valid = 0; } - case 3: /* child lock */ - if (rc != 0 || return_lock == 0) - ldlm_lock_decref(child_lockh, LCK_EX); - case 2: /* child dentry */ + case 3: /* child lock */ + ldlm_lock_decref(&child_lockh, LCK_EX); + case 2: /* child dentry */ l_dput(dchild); - case 1: /* parent dentry and lock */ + case 1: /* parent dentry and lock */ if (rc) { - ldlm_lock_decref(&parent_lockh, LCK_EX); + ldlm_lock_decref(&parent_lockh, LCK_PW); } else { memcpy(&req->rq_ack_locks[0].lock, &parent_lockh, sizeof(parent_lockh)); - req->rq_ack_locks[0].mode = LCK_EX; + req->rq_ack_locks[0].mode = LCK_PW; } - l_dput(dir_de); - case 0: + l_dput(dparent); + case 0: break; - default: + default: CERROR("invalid cleanup_phase %d\n", cleanup_phase); LBUG(); } @@ -857,8 +1117,10 @@ static int mds_reint_link(struct mds_update_record *rec, int offset, /* Step 3: Lookup the child */ dchild = ll_lookup_one_len(rec->ur_name, de_tgt_dir, rec->ur_namelen-1); if (IS_ERR(dchild)) { - CERROR("child lookup error %ld\n", PTR_ERR(dchild)); - GOTO(cleanup, rc = PTR_ERR(dchild)); + rc = PTR_ERR(dchild); + if (rc != -EPERM && rc != -EACCES) + CERROR("child lookup error %d\n", rc); + GOTO(cleanup, rc); } cleanup_phase = 4; /* child dentry */ @@ -874,15 +1136,15 @@ static int mds_reint_link(struct mds_update_record *rec, int offset, OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE, to_kdev_t(de_src->d_inode->i_sb->s_dev)); - handle = fsfilt_start(obd, de_tgt_dir->d_inode, FSFILT_OP_LINK); + handle = fsfilt_start(obd, de_tgt_dir->d_inode, FSFILT_OP_LINK, NULL); if (IS_ERR(handle)) { rc = PTR_ERR(handle); GOTO(cleanup, rc); } rc = vfs_link(de_src, de_tgt_dir->d_inode, dchild); - if (rc) - CERROR("link error %d\n", rc); + if (rc && rc != -EPERM && rc != -EACCES) + CERROR("vfs_link error %d\n", rc); cleanup: rc = mds_finish_transno(mds, de_tgt_dir ? de_tgt_dir->d_inode : NULL, handle, req, rc, 0); @@ -1057,13 +1319,12 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset, OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE, to_kdev_t(de_srcdir->d_inode->i_sb->s_dev)); - handle = fsfilt_start(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME); + handle = fsfilt_start(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME, NULL); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); lock_kernel(); - rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new, - NULL); + rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new); unlock_kernel(); EXIT; diff --git a/lustre/obdclass/.cvsignore b/lustre/obdclass/.cvsignore index e530020..49c6100 100644 --- a/lustre/obdclass/.cvsignore +++ b/lustre/obdclass/.cvsignore @@ -6,3 +6,4 @@ Makefile Makefile.in .deps TAGS +.*.cmd diff --git a/lustre/obdclass/Makefile.am b/lustre/obdclass/Makefile.am index 61f4bc2..06d60d4 100644 --- a/lustre/obdclass/Makefile.am +++ b/lustre/obdclass/Makefile.am @@ -1,5 +1,3 @@ -# FIXME: we need to make it clear that obdclass.o depends on -# lustre_build_version, or 'make -j2' breaks! DEFS= MODULE = obdclass @@ -9,15 +7,13 @@ else FSMOD = fsfilt_ext3 endif +class_obd.o: lustre_build_version + if LIBLUSTRE lib_LIBRARIES = liblustreclass.a -liblustreclass_a_SOURCES = uuid.c statfs_pack.c genops.c debug.c class_obd.c lustre_handles.c lustre_peer.c lprocfs_status.c simple.c - -class_obd.o: lustre_version - -lustre_version: - echo '#define LUSTRE_VERSION 12' > $(top_builddir)/include/linux/lustre_build_version.h - echo '#define BUILD_VERSION "1"' >> $(top_builddir)/include/linux/lustre_build_version.h +liblustreclass_a_SOURCES = uuid.c statfs_pack.c genops.c debug.c class_obd.c +liblustreclass_a_SOURCES += lustre_handles.c lustre_peer.c lprocfs_status.c +liblustreclass_a_SOURCES += simple.c recov_log.c obdo.c else modulefs_DATA = lustre_build_version obdclass.o $(FSMOD).o fsfilt_reiserfs.o @@ -25,15 +21,16 @@ EXTRA_PROGRAMS = obdclass $(FSMOD) fsfilt_reiserfs obdclass_SOURCES = class_obd.c debug.c genops.c sysctl.c uuid.c simple.c obdclass_SOURCES += lprocfs_status.c lustre_handles.c lustre_peer.c -obdclass_SOURCES += fsfilt.c statfs_pack.c otree.c +obdclass_SOURCES += fsfilt.c statfs_pack.c otree.c recov_log.c obdo.c endif include $(top_srcdir)/Rules -# XXX I'm sure there's some automake mv-if-different helper for this. lustre_build_version: perl $(top_srcdir)/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver + echo #define LUSTRE_RELEASE @RELEASE@ >> tmpver cmp -s $(top_builddir)/include/linux/lustre_build_version.h tmpver \ - 2> /dev/null && \ - $(RM) tmpver || \ - mv tmpver $(top_builddir)/include/linux/lustre_build_version.h + 2> /dev/null && \ + $(RM) tmpver || \ + mv tmpver $(top_builddir)/include/linux/lustre_build_version.h + diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index b497aa3..8275ed8 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -53,9 +53,7 @@ #include #include #else - # include - #endif #include @@ -64,6 +62,7 @@ #include #include /* for PTL_MD_MAX_IOV */ #include +#include struct semaphore obd_conf_sem; /* serialize configuration commands */ struct obd_device obd_dev[MAX_OBD_DEVICES]; @@ -181,12 +180,10 @@ static inline void obd_conn2data(struct obd_ioctl_data *data, static void dump_exports(struct obd_device *obd) { - struct list_head *tmp, *n; + struct obd_export *exp, *n; - list_for_each_safe(tmp, n, &obd->obd_exports) { - struct obd_export *exp = list_entry(tmp, struct obd_export, - exp_obd_chain); - CDEBUG(D_ERROR, "%s: %p %s %d %d %p\n", + list_for_each_entry_safe(exp, n, &obd->obd_exports, exp_obd_chain) { + CERROR("%s: %p %s %d %d %p\n", obd->obd_name, exp, exp->exp_client_uuid.uuid, atomic_read(&exp->exp_refcount), exp->exp_failed, exp->exp_outstanding_reply ); @@ -543,6 +540,7 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, obd->obd_type->typ_refcnt--; class_put_type(obd->obd_type); obd->obd_type = NULL; + memset(obd, 0, sizeof(*obd)); GOTO(out, err = 0); } @@ -562,7 +560,7 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, atomic_set(&obd->obd_refcount, 0); - if ( OBT(obd) && OBP(obd, setup) ) + if (OBT(obd) && OBP(obd, setup)) err = obd_setup(obd, sizeof(*data), data); if (!err) { @@ -574,8 +572,8 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, GOTO(out, err); } case OBD_IOC_CLEANUP: { - int force = 0, failover = 0; - char * flag; + int flags = 0; + char *flag; if (!obd->obd_set_up) { CERROR("Device %d not setup\n", obd->obd_minor); @@ -586,18 +584,19 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, for (flag = data->ioc_inlbuf1; *flag != 0; flag++) switch (*flag) { case 'F': - force = 1; + flags |= OBD_OPT_FORCE; break; case 'A': - failover = 1; + flags |= OBD_OPT_FAILOVER; break; default: - CERROR("unrecognised flag '%c'\n", + CERROR("unrecognised flag '%c'\n", *flag); } } - - if (atomic_read(&obd->obd_refcount) == 1 || force) { + + if (atomic_read(&obd->obd_refcount) == 1 || + flags & OBD_OPT_FORCE) { /* this will stop new connections, and need to do it before class_disconnect_exports() */ obd->obd_stopping = 1; @@ -607,19 +606,19 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, struct l_wait_info lwi = LWI_TIMEOUT_INTR(60 * HZ, NULL, NULL, NULL); int rc; - - if (!force) { + + if (!(flags & OBD_OPT_FORCE)) { CERROR("OBD device %d (%p) has refcount %d\n", - obd->obd_minor, obd, + obd->obd_minor, obd, atomic_read(&obd->obd_refcount)); dump_exports(obd); GOTO(out, err = -EBUSY); } - class_disconnect_exports(obd, failover); - CDEBUG(D_IOCTL, - "%s: waiting for obd refs to go away: %d\n", + class_disconnect_exports(obd, flags); + CDEBUG(D_IOCTL, + "%s: waiting for obd refs to go away: %d\n", obd->obd_name, atomic_read(&obd->obd_refcount)); - + rc = l_wait_event(obd->obd_refcount_waitq, atomic_read(&obd->obd_refcount) < 2, &lwi); if (rc == 0) { @@ -630,12 +629,12 @@ int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, atomic_read(&obd->obd_refcount)); dump_exports(obd); } - CDEBUG(D_IOCTL, "%s: awake, now finishing cleanup\n", + CDEBUG(D_IOCTL, "%s: awake, now finishing cleanup\n", obd->obd_name); } if (OBT(obd) && OBP(obd, cleanup)) - err = obd_cleanup(obd, force, failover); + err = obd_cleanup(obd, flags); if (!err) { obd->obd_set_up = obd->obd_stopping = 0; @@ -807,10 +806,10 @@ EXPORT_SYMBOL(class_conn2cliimp); EXPORT_SYMBOL(class_conn2ldlmimp); EXPORT_SYMBOL(class_disconnect); EXPORT_SYMBOL(class_disconnect_exports); -EXPORT_SYMBOL(lustre_uuid_to_peer); /* uuid.c */ EXPORT_SYMBOL(class_uuid_unparse); +EXPORT_SYMBOL(lustre_uuid_to_peer); EXPORT_SYMBOL(client_tgtuuid2obd); EXPORT_SYMBOL(class_handle_hash); @@ -831,12 +830,15 @@ int init_obdclass(void) ", info@clusterfs.com\n"); class_init_uuidlist(); - class_handle_init(); + err = class_handle_init(); + if (err) + return err; sema_init(&obd_conf_sem, 1); INIT_LIST_HEAD(&obd_types); - if ((err = misc_register(&obd_psdev))) { + err = misc_register(&obd_psdev); + if (err) { CERROR("cannot register %d err %d\n", OBD_MINOR, err); return err; } @@ -875,7 +877,7 @@ int obd_proc_read_version(char *page, char **start, off_t off, int count, int *e #endif #ifdef __KERNEL__ -static void __exit cleanup_obdclass(void) +static void /*__exit*/ cleanup_obdclass(void) #else static void cleanup_obdclass(void) #endif @@ -914,8 +916,8 @@ static void cleanup_obdclass(void) * kernel patch */ #ifdef __KERNEL__ #include -#define LUSTRE_MIN_VERSION 18 -#define LUSTRE_MAX_VERSION 19 +#define LUSTRE_MIN_VERSION 21 +#define LUSTRE_MAX_VERSION 21 #if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION) # error Cannot continue: Your Lustre kernel patch is older than the sources #elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION) diff --git a/lustre/obdclass/fsfilt.c b/lustre/obdclass/fsfilt.c index 4357b79..d0abdfe 100644 --- a/lustre/obdclass/fsfilt.c +++ b/lustre/obdclass/fsfilt.c @@ -64,7 +64,7 @@ void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops) /* unlock fsfilt_types list */ } -struct fsfilt_operations *fsfilt_get_ops(char *type) +struct fsfilt_operations *fsfilt_get_ops(const char *type) { struct fsfilt_operations *fs_ops; @@ -89,7 +89,7 @@ struct fsfilt_operations *fsfilt_get_ops(char *type) /* unlock fsfilt_types list */ } } - __MOD_INC_USE_COUNT(fs_ops->fs_owner); + try_module_get(fs_ops->fs_owner); /* unlock fsfilt_types list */ return fs_ops; @@ -97,7 +97,7 @@ struct fsfilt_operations *fsfilt_get_ops(char *type) void fsfilt_put_ops(struct fsfilt_operations *fs_ops) { - __MOD_DEC_USE_COUNT(fs_ops->fs_owner); + module_put(fs_ops->fs_owner); } diff --git a/lustre/obdclass/fsfilt_ext3.c b/lustre/obdclass/fsfilt_ext3.c index 5f6322f..5dd196d 100644 --- a/lustre/obdclass/fsfilt_ext3.c +++ b/lustre/obdclass/fsfilt_ext3.c @@ -32,7 +32,13 @@ #include #include #include -#include +#include +/* XXX ugh */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + #include +#else + #include +#endif #include #include #include @@ -43,10 +49,11 @@ static kmem_cache_t *fcb_cache; static atomic_t fcb_cache_count = ATOMIC_INIT(0); struct fsfilt_cb_data { - struct journal_callback cb_jcb; /* data private to jbd */ + struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */ fsfilt_cb_t cb_func; /* MDS/OBD completion function */ struct obd_device *cb_obd; /* MDS/OBD completion device */ __u64 cb_last_rcvd; /* MDS/OST last committed operation */ + void *cb_data; /* MDS/OST completion function data */ }; #define EXT3_XATTR_INDEX_LUSTRE 5 @@ -58,13 +65,24 @@ struct fsfilt_cb_data { * the inode (which we will be changing anyways as part of this * transaction). */ -static void *fsfilt_ext3_start(struct inode *inode, int op) +static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private) { /* For updates to the last recieved file */ int nblocks = EXT3_DATA_TRANS_BLOCKS; void *handle; switch(op) { + case FSFILT_OP_CREATE_LOG: + nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS; + op = FSFILT_OP_CREATE; + break; + case FSFILT_OP_UNLINK_LOG: + nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS; + op = FSFILT_OP_UNLINK; + break; + } + + switch(op) { case FSFILT_OP_RMDIR: case FSFILT_OP_UNLINK: nblocks += EXT3_DELETE_TRANS_BLOCKS; @@ -95,7 +113,7 @@ static void *fsfilt_ext3_start(struct inode *inode, int op) LBUG(); } - LASSERT(!current->journal_info); + LASSERT(current->journal_info == desc_private); lock_kernel(); handle = journal_start(EXT3_JOURNAL(inode), nblocks); unlock_kernel(); @@ -185,14 +203,14 @@ static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso) * the pages have been written. */ static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso, - int niocount, struct niobuf_remote *nb) + int niocount, void *desc_private) { journal_t *journal; handle_t *handle; int needed; ENTRY; - LASSERT(!current->journal_info); + LASSERT(current->journal_info == desc_private); journal = EXT3_SB(fso->fso_dentry->d_inode->i_sb)->s_journal; needed = fsfilt_ext3_credits_needed(objcount, fso); @@ -218,6 +236,8 @@ static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso, if (IS_ERR(handle)) CERROR("can't get handle for %d credits: rc = %ld\n", needed, PTR_ERR(handle)); + else + LASSERT(handle->h_buffer_credits >= needed); RETURN(handle); } @@ -249,24 +269,26 @@ static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle, * in the block pointers; this is really the "small" stripe MD data. * We can avoid further hackery by virtue of the MDS file size being * zero all the time (which doesn't invoke block truncate at unlink - * time), so we assert we never change the MDS file size from zero. - */ + * time), so we assert we never change the MDS file size from zero. */ if (iattr->ia_valid & ATTR_SIZE && !do_trunc) { /* ATTR_SIZE would invoke truncate: clear it */ iattr->ia_valid &= ~ATTR_SIZE; - inode->i_size = iattr->ia_size; + EXT3_I(inode)->i_disksize = inode->i_size = iattr->ia_size; /* make sure _something_ gets set - so new inode - * goes to disk (probably won't work over XFS - */ - if (!iattr->ia_valid & ATTR_MODE) { + * goes to disk (probably won't work over XFS */ + if (!(iattr->ia_valid & (ATTR_MODE | ATTR_MTIME | ATTR_CTIME))){ iattr->ia_valid |= ATTR_MODE; iattr->ia_mode = inode->i_mode; } } - if (inode->i_op->setattr) + + /* Don't allow setattr to change file type */ + iattr->ia_mode = (inode->i_mode & S_IFMT)|(iattr->ia_mode & ~S_IFMT); + + if (inode->i_op->setattr) { rc = inode->i_op->setattr(dentry, iattr); - else{ + } else { rc = inode_change_ok(inode, iattr); if (!rc) rc = inode_setattr(inode, iattr); @@ -286,8 +308,8 @@ static int fsfilt_ext3_set_md(struct inode *inode, void *handle, * it will fit, because putting it in an EA currently kills the MDS * performance. We'll fix this with "fast EAs" in the future. */ - if (lmm_size <= sizeof(EXT3_I(inode)->i_data) - - sizeof(EXT3_I(inode)->i_data[0])) { + if (inode->i_blocks == 0 && lmm_size <= sizeof(EXT3_I(inode)->i_data) - + sizeof(EXT3_I(inode)->i_data[0])) { /* XXX old_size is debugging only */ int old_size = EXT3_I(inode)->i_data[0]; if (old_size != 0) { @@ -303,8 +325,15 @@ static int fsfilt_ext3_set_md(struct inode *inode, void *handle, } else { down(&inode->i_sem); lock_kernel(); +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) rc = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_LUSTRE, XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0); +#else + rc = ext3_xattr_set_handle(handle, inode, + EXT3_XATTR_INDEX_LUSTRE, + XATTR_LUSTRE_MDS_OBJID, lmm, + lmm_size, 0); +#endif unlock_kernel(); up(&inode->i_sem); } @@ -319,7 +348,7 @@ static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size) { int rc; - if (EXT3_I(inode)->i_data[0]) { + if (inode->i_blocks == 0 && EXT3_I(inode)->i_data[0]) { int size = le32_to_cpu(EXT3_I(inode)->i_data[0]); LASSERT(size < sizeof(EXT3_I(inode)->i_data)); if (lmm) { @@ -411,14 +440,15 @@ static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error) { struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb; - fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error); + fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error); OBD_SLAB_FREE(fcb, fcb_cache, sizeof *fcb); atomic_dec(&fcb_cache_count); } static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd, - void *handle, fsfilt_cb_t cb_func) + void *handle, fsfilt_cb_t cb_func, + void *cb_data) { struct fsfilt_cb_data *fcb; @@ -430,10 +460,10 @@ static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd, fcb->cb_func = cb_func; fcb->cb_obd = obd; fcb->cb_last_rcvd = last_rcvd; + fcb->cb_data = cb_data; CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd); lock_kernel(); - /* Note that an "incompatible pointer" warning here is OK for now */ journal_callback_set(handle, fsfilt_ext3_cb_func, (struct journal_callback *)fcb); unlock_kernel(); @@ -443,10 +473,11 @@ static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd, static int fsfilt_ext3_journal_data(struct file *filp) { +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + /* bug 1576: enable data journaling on 2.5 when appropriate */ struct inode *inode = filp->f_dentry->d_inode; - EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL; - +#endif return 0; } @@ -459,7 +490,7 @@ static int fsfilt_ext3_journal_data(struct file *filp) */ static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs) { - struct statfs sfs; + struct kstatfs sfs; int rc = vfs_statfs(sb, &sfs); if (!rc && sfs.f_bfree < sfs.f_ffree) { @@ -484,6 +515,110 @@ static int fsfilt_ext3_prep_san_write(struct inode *inode, long *blocks, return ext3_prep_san_write(inode, blocks, nblocks, newsize); } +static int fsfilt_ext3_read_record(struct file * file, char *buf, + int size, loff_t *offs) +{ + struct buffer_head *bh; + unsigned long block, boffs; + struct inode *inode = file->f_dentry->d_inode; + int err; + + if (inode->i_size < *offs + size) { + CERROR("file size %llu is too short for read %u@%llu\n", + inode->i_size, size, *offs); + return -EIO; + } + + block = *offs >> inode->i_blkbits; + bh = ext3_bread(NULL, inode, block, 0, &err); + if (!bh) { + CERROR("can't read block: %d\n", err); + return err; + } + + boffs = (unsigned)*offs % bh->b_size; + if (boffs + size > bh->b_size) { + CERROR("request crosses block's border. offset %llu, size %u\n", + *offs, size); + brelse(bh); + return -EIO; + } + + memcpy(buf, bh->b_data + boffs, size); + brelse(bh); + *offs += size; + return size; +} + +static int fsfilt_ext3_write_record(struct file * file, char *buf, + int size, loff_t *offs) +{ + struct buffer_head *bh; + unsigned long block, boffs; + struct inode *inode = file->f_dentry->d_inode; + loff_t old_size = inode->i_size; + journal_t *journal; + handle_t *handle; + int err; + + journal = EXT3_SB(inode->i_sb)->s_journal; + handle = journal_start(journal, EXT3_DATA_TRANS_BLOCKS + 2); + if (handle == NULL) { + CERROR("can't start transaction\n"); + return -EIO; + } + + block = *offs >> inode->i_blkbits; + if (*offs + size > inode->i_size) { + down(&inode->i_sem); + if (*offs + size > inode->i_size) + inode->i_size = ((loff_t)block + 1) << inode->i_blkbits; + up(&inode->i_sem); + } + + bh = ext3_bread(handle, inode, block, 1, &err); + if (!bh) { + CERROR("can't read/create block: %d\n", err); + goto out; + } + + /* This is a hack only needed because ext3_get_block_handle() updates + * i_disksize after marking the inode dirty in ext3_splice_branch(). + * We will fix that when we get a chance, as ext3_mark_inode_dirty() + * is not without cost, nor is it even exported. + */ + if (inode->i_size > old_size) + mark_inode_dirty(inode); + + boffs = (unsigned)*offs % bh->b_size; + if (boffs + size > bh->b_size) { + CERROR("request crosses block's border. offset %llu, size %u\n", + *offs, size); + err = -EIO; + goto out; + } + + err = ext3_journal_get_write_access(handle, bh); + if (err) { + CERROR("journal_get_write_access() returned error %d\n", err); + goto out; + } + memcpy(bh->b_data + boffs, buf, size); + err = ext3_journal_dirty_metadata(handle, bh); + if (err) { + CERROR("journal_dirty_metadata() returned error %d\n", err); + goto out; + } + err = size; +out: + if (bh) + brelse(bh); + journal_stop(handle); + if (err > 0) + *offs += size; + return err; +} + static struct fsfilt_operations fsfilt_ext3_ops = { fs_type: "ext3", fs_owner: THIS_MODULE, @@ -499,6 +634,8 @@ static struct fsfilt_operations fsfilt_ext3_ops = { fs_statfs: fsfilt_ext3_statfs, fs_sync: fsfilt_ext3_sync, fs_prep_san_write: fsfilt_ext3_prep_san_write, + fs_write_record: fsfilt_ext3_write_record, + fs_read_record: fsfilt_ext3_read_record, }; static int __init fsfilt_ext3_init(void) diff --git a/lustre/obdclass/fsfilt_extN.c b/lustre/obdclass/fsfilt_extN.c index 1fba0f4..80f7e50 100644 --- a/lustre/obdclass/fsfilt_extN.c +++ b/lustre/obdclass/fsfilt_extN.c @@ -43,10 +43,11 @@ static kmem_cache_t *fcb_cache; static atomic_t fcb_cache_count = ATOMIC_INIT(0); struct fsfilt_cb_data { - struct journal_callback cb_jcb; /* data private to jbd */ + struct journal_callback cb_jcb; /* jbd private data - MUST BE FIRST */ fsfilt_cb_t cb_func; /* MDS/OBD completion function */ struct obd_device *cb_obd; /* MDS/OBD completion device */ __u64 cb_last_rcvd; /* MDS/OST last committed operation */ + void *cb_data; /* MDS/OST completion function data */ }; #define EXTN_XATTR_INDEX_LUSTRE 5 @@ -58,13 +59,24 @@ struct fsfilt_cb_data { * the inode (which we will be changing anyways as part of this * transaction). */ -static void *fsfilt_extN_start(struct inode *inode, int op) +static void *fsfilt_extN_start(struct inode *inode, int op, void *desc_private) { /* For updates to the last recieved file */ int nblocks = EXTN_DATA_TRANS_BLOCKS; void *handle; switch(op) { + case FSFILT_OP_CREATE_LOG: + nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS; + op = FSFILT_OP_CREATE; + break; + case FSFILT_OP_UNLINK_LOG: + nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS; + op = FSFILT_OP_UNLINK; + break; + } + + switch(op) { case FSFILT_OP_RMDIR: case FSFILT_OP_UNLINK: nblocks += EXTN_DELETE_TRANS_BLOCKS; @@ -95,7 +107,7 @@ static void *fsfilt_extN_start(struct inode *inode, int op) LBUG(); } - LASSERT(!current->journal_info); + LASSERT(current->journal_info == desc_private); lock_kernel(); handle = journal_start(EXTN_JOURNAL(inode), nblocks); unlock_kernel(); @@ -124,7 +136,7 @@ static void *fsfilt_extN_start(struct inode *inode, int op) * objcount inode blocks * 1 superblock * 2 * EXTN_SINGLEDATA_TRANS_BLOCKS for the quota files - * + * * 1 EXTN_DATA_TRANS_BLOCKS for the last_rcvd update. */ static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso) @@ -155,7 +167,7 @@ static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso) ngdblocks = EXTN_SB(sb)->s_gdb_count; needed += nbitmaps + ngdblocks; - + /* last_rcvd update */ needed += EXTN_DATA_TRANS_BLOCKS; @@ -185,14 +197,14 @@ static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso) * the pages have been written. */ static void *fsfilt_extN_brw_start(int objcount, struct fsfilt_objinfo *fso, - int niocount, struct niobuf_remote *nb) + int niocount, void *desc_private) { journal_t *journal; handle_t *handle; int needed; ENTRY; - LASSERT(!current->journal_info); + LASSERT(current->journal_info == desc_private); journal = EXTN_SB(fso->fso_dentry->d_inode->i_sb)->s_journal; needed = fsfilt_extN_credits_needed(objcount, fso); @@ -218,6 +230,8 @@ static void *fsfilt_extN_brw_start(int objcount, struct fsfilt_objinfo *fso, if (IS_ERR(handle)) CERROR("can't get handle for %d credits: rc = %ld\n", needed, PTR_ERR(handle)); + else + LASSERT(handle->h_buffer_credits >= needed); RETURN(handle); } @@ -249,24 +263,26 @@ static int fsfilt_extN_setattr(struct dentry *dentry, void *handle, * in the block pointers; this is really the "small" stripe MD data. * We can avoid further hackery by virtue of the MDS file size being * zero all the time (which doesn't invoke block truncate at unlink - * time), so we assert we never change the MDS file size from zero. - */ + * time), so we assert we never change the MDS file size from zero. */ if (iattr->ia_valid & ATTR_SIZE && !do_trunc) { /* ATTR_SIZE would invoke truncate: clear it */ iattr->ia_valid &= ~ATTR_SIZE; - inode->i_size = iattr->ia_size; + EXTN_I(inode)->i_disksize = inode->i_size = iattr->ia_size; /* make sure _something_ gets set - so new inode - * goes to disk (probably won't work over XFS - */ - if (!iattr->ia_valid & ATTR_MODE) { + * goes to disk (probably won't work over XFS */ + if (!(iattr->ia_valid & (ATTR_MODE | ATTR_MTIME | ATTR_CTIME))){ iattr->ia_valid |= ATTR_MODE; iattr->ia_mode = inode->i_mode; } } - if (inode->i_op->setattr) + + /* Don't allow setattr to change file type */ + iattr->ia_mode = (inode->i_mode & S_IFMT)|(iattr->ia_mode & ~S_IFMT); + + if (inode->i_op->setattr) { rc = inode->i_op->setattr(dentry, iattr); - else{ + } else { rc = inode_change_ok(inode, iattr); if (!rc) rc = inode_setattr(inode, iattr); @@ -286,8 +302,8 @@ static int fsfilt_extN_set_md(struct inode *inode, void *handle, * it will fit, because putting it in an EA currently kills the MDS * performance. We'll fix this with "fast EAs" in the future. */ - if (lmm_size <= sizeof(EXTN_I(inode)->i_data) - - sizeof(EXTN_I(inode)->i_data[0])) { + if (inode->i_blocks == 0 && lmm_size <= sizeof(EXTN_I(inode)->i_data) - + sizeof(EXTN_I(inode)->i_data[0])) { /* XXX old_size is debugging only */ int old_size = EXTN_I(inode)->i_data[0]; if (old_size != 0) { @@ -319,7 +335,7 @@ static int fsfilt_extN_get_md(struct inode *inode, void *lmm, int lmm_size) { int rc; - if (EXTN_I(inode)->i_data[0]) { + if (inode->i_blocks == 0 && EXTN_I(inode)->i_data[0]) { int size = le32_to_cpu(EXTN_I(inode)->i_data[0]); LASSERT(size < sizeof(EXTN_I(inode)->i_data)); if (lmm) { @@ -411,14 +427,15 @@ static void fsfilt_extN_cb_func(struct journal_callback *jcb, int error) { struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb; - fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error); + fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, fcb->cb_data, error); OBD_SLAB_FREE(fcb, fcb_cache, sizeof *fcb); atomic_dec(&fcb_cache_count); } static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd, - void *handle, fsfilt_cb_t cb_func) + void *handle, fsfilt_cb_t cb_func, + void *cb_data) { struct fsfilt_cb_data *fcb; @@ -430,10 +447,10 @@ static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd, fcb->cb_func = cb_func; fcb->cb_obd = obd; fcb->cb_last_rcvd = last_rcvd; + fcb->cb_data = cb_data; CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd); lock_kernel(); - /* Note that an "incompatible pointer" warning here is OK for now */ journal_callback_set(handle, fsfilt_extN_cb_func, (struct journal_callback *)fcb); unlock_kernel(); @@ -459,7 +476,7 @@ static int fsfilt_extN_journal_data(struct file *filp) */ static int fsfilt_extN_statfs(struct super_block *sb, struct obd_statfs *osfs) { - struct statfs sfs; + struct kstatfs sfs; int rc = vfs_statfs(sb, &sfs); if (!rc && sfs.f_bfree < sfs.f_ffree) { @@ -484,6 +501,110 @@ static int fsfilt_extN_prep_san_write(struct inode *inode, long *blocks, return extN_prep_san_write(inode, blocks, nblocks, newsize); } +static int fsfilt_extN_read_record(struct file * file, char *buf, + int size, loff_t *offs) +{ + struct buffer_head *bh; + unsigned long block, boffs; + struct inode *inode = file->f_dentry->d_inode; + int err; + + if (inode->i_size < *offs + size) { + CERROR("file size %llu is too short for read %u@%llu\n", + inode->i_size, size, *offs); + return -EIO; + } + + block = *offs >> inode->i_blkbits; + bh = extN_bread(NULL, inode, block, 0, &err); + if (!bh) { + CERROR("can't read block: %d\n", err); + return err; + } + + boffs = (unsigned)*offs % bh->b_size; + if (boffs + size > bh->b_size) { + CERROR("request crosses block's border. offset %llu, size %u\n", + *offs, size); + brelse(bh); + return -EIO; + } + + memcpy(buf, bh->b_data + boffs, size); + brelse(bh); + *offs += size; + return size; +} + +static int fsfilt_extN_write_record(struct file * file, char *buf, + int size, loff_t *offs) +{ + struct buffer_head *bh; + unsigned long block, boffs; + struct inode *inode = file->f_dentry->d_inode; + loff_t old_size = inode->i_size; + journal_t *journal; + handle_t *handle; + int err; + + journal = EXTN_SB(inode->i_sb)->s_journal; + handle = journal_start(journal, EXTN_DATA_TRANS_BLOCKS + 2); + if (handle == NULL) { + CERROR("can't start transaction\n"); + return -EIO; + } + + block = *offs >> inode->i_blkbits; + if (*offs + size > inode->i_size) { + down(&inode->i_sem); + if (*offs + size > inode->i_size) + inode->i_size = ((loff_t)block + 1) << inode->i_blkbits; + up(&inode->i_sem); + } + + bh = extN_bread(handle, inode, block, 1, &err); + if (!bh) { + CERROR("can't read/create block: %d\n", err); + goto out; + } + + /* This is a hack only needed because extN_get_block_handle() updates + * i_disksize after marking the inode dirty in extN_splice_branch(). + * We will fix that when we get a chance, as extN_mark_inode_dirty() + * is not without cost, nor is it even exported. + */ + if (inode->i_size > old_size) + mark_inode_dirty(inode); + + boffs = (unsigned)*offs % bh->b_size; + if (boffs + size > bh->b_size) { + CERROR("request crosses block's border. offset %llu, size %u\n", + *offs, size); + err = -EIO; + goto out; + } + + err = extN_journal_get_write_access(handle, bh); + if (err) { + CERROR("journal_get_write_access() returned error %d\n", err); + goto out; + } + memcpy(bh->b_data + boffs, buf, size); + err = extN_journal_dirty_metadata(handle, bh); + if (err) { + CERROR("journal_dirty_metadata() returned error %d\n", err); + goto out; + } + err = size; +out: + if (bh) + brelse(bh); + journal_stop(handle); + if (err > 0) + *offs += size; + return err; +} + static struct fsfilt_operations fsfilt_extN_ops = { fs_type: "extN", fs_owner: THIS_MODULE, @@ -499,6 +620,8 @@ static struct fsfilt_operations fsfilt_extN_ops = { fs_statfs: fsfilt_extN_statfs, fs_sync: fsfilt_extN_sync, fs_prep_san_write: fsfilt_extN_prep_san_write, + fs_write_record: fsfilt_extN_write_record, + fs_read_record: fsfilt_extN_read_record, }; static int __init fsfilt_extN_init(void) diff --git a/lustre/obdclass/fsfilt_reiserfs.c b/lustre/obdclass/fsfilt_reiserfs.c index ccefb92..3d118fc 100644 --- a/lustre/obdclass/fsfilt_reiserfs.c +++ b/lustre/obdclass/fsfilt_reiserfs.c @@ -48,18 +48,19 @@ #include #include -static void *fsfilt_reiserfs_start(struct inode *inode, int op) +static void *fsfilt_reiserfs_start(struct inode *inode, int op, + void *desc_private) { return (void *)0xf00f00be; } static void *fsfilt_reiserfs_brw_start(int objcount, struct fsfilt_objinfo *fso, - int niocount, struct niobuf_remote *nb) + int niocount, void *desc_private) { return (void *)0xf00f00be; } -static int fsfilt_reiserfs_commit(struct inode *inode, void *handle, +static int fsfilt_reiserfs_commit(struct inode *inode, void *handle, int force_sync) { if (handle != (void *)0xf00f00be) { @@ -131,8 +132,9 @@ static ssize_t fsfilt_reiserfs_readpage(struct file *file, char *buf, size_t cou return file->f_op->read(file, buf, count, offset); } -static int fsfilt_reiserfs_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd, - void *handle, fsfilt_cb_t cb_func) +static int fsfilt_reiserfs_set_last_rcvd(struct obd_device *obd, + __u64 last_rcvd, void *handle, + fsfilt_cb_t cb_func, void *cb_data) { static long next = 0; @@ -141,7 +143,7 @@ static int fsfilt_reiserfs_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd next = jiffies + 300 * HZ; } - cb_func(obd, last_rcvd, 0); + cb_func(obd, last_rcvd, cb_data, 0); return 0; } diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 4862cf3..bb48e5d 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -22,37 +22,38 @@ #define EXPORT_SYMTAB #define DEBUG_SUBSYSTEM S_CLASS -#ifdef __KERNEL__ -#include -#include -#include -#include -#include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include -#endif -#include -#else -#include +#ifdef __KERNEL__ +# include +# include +# include +# include +# include +# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +# include +# endif +# include +#else /* __KERNEL__ */ +# include #endif #include #include +#include #ifdef LPROCFS struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head, const char *name) { - struct proc_dir_entry* temp; + struct proc_dir_entry *temp; - if (!head) + if (head == NULL) return NULL; temp = head->subdir; while (temp != NULL) { - if (!strcmp(temp->name, name)) + if (strcmp(temp->name, name) == 0) return temp; temp = temp->next; @@ -65,26 +66,30 @@ struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head, int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list, void *data) { - if ((root == NULL) || (list == NULL)) + if (root == NULL || list == NULL) return -EINVAL; - while (list->name) { + while (list->name != NULL) { struct proc_dir_entry *cur_root, *proc; - char *pathcopy, *cur, *next; - int pathsize = strlen(list->name)+1; + char *pathcopy, *cur, *next, pathbuf[64]; + int pathsize = strlen(list->name) + 1; proc = NULL; cur_root = root; /* need copy of path for strsep */ - OBD_ALLOC(pathcopy, pathsize); - if (!pathcopy) - return -ENOMEM; + if (strlen(list->name) > sizeof(pathbuf) - 1) { + OBD_ALLOC(pathcopy, pathsize); + if (pathcopy == NULL) + return -ENOMEM; + } else { + pathcopy = pathbuf; + } next = pathcopy; strcpy(pathcopy, list->name); - while (cur_root && (cur = strsep(&next, "/"))) { + while (cur_root != NULL && (cur = strsep(&next, "/"))) { if (*cur =='\0') /* skip double/trailing "/" */ continue; @@ -92,10 +97,10 @@ int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list, CDEBUG(D_OTHER, "cur_root=%s, cur=%s, next=%s, (%s)\n", cur_root->name, cur, next, (proc ? "exists" : "new")); - if (next) + if (next != NULL) { cur_root = (proc ? proc : - proc_mkdir(cur, cur_root)); - else if (!proc) { + proc_mkdir(cur, cur_root)); + } else if (proc == NULL) { mode_t mode = 0444; if (list->write_fptr) mode = 0644; @@ -103,9 +108,10 @@ int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list, } } + if (pathcopy != pathbuf) OBD_FREE(pathcopy, pathsize); - if ((cur_root == NULL) || (proc == NULL)) { + if (cur_root == NULL || proc == NULL) { CERROR("LprocFS: No memory to create /proc entry %s", list->name); return -ENOMEM; @@ -119,7 +125,7 @@ int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list, return 0; } -void lprocfs_remove(struct proc_dir_entry* root) +void lprocfs_remove(struct proc_dir_entry *root) { struct proc_dir_entry *temp = root; struct proc_dir_entry *rm_entry; @@ -130,7 +136,7 @@ void lprocfs_remove(struct proc_dir_entry* root) LASSERT(parent != NULL); while (1) { - while (temp->subdir) + while (temp->subdir != NULL) temp = temp->subdir; rm_entry = temp; @@ -148,14 +154,14 @@ struct proc_dir_entry *lprocfs_register(const char *name, struct proc_dir_entry *newchild; newchild = lprocfs_srch(parent, name); - if (newchild) { + if (newchild != NULL) { CERROR(" Lproc: Attempting to register %s more than once \n", name); return ERR_PTR(-EALREADY); } newchild = proc_mkdir(name, parent); - if (newchild && list) { + if (newchild != NULL && list != NULL) { int rc = lprocfs_add_vars(newchild, list, data); if (rc) { lprocfs_remove(newchild); @@ -175,10 +181,10 @@ int lprocfs_rd_u64(char *page, char **start, off_t off, return snprintf(page, count, LPU64"\n", *(__u64 *)data); } -int lprocfs_rd_uuid(char* page, char **start, off_t off, int count, +int lprocfs_rd_uuid(char *page, char **start, off_t off, int count, int *eof, void *data) { - struct obd_device* dev = (struct obd_device*)data; + struct obd_device *dev = (struct obd_device*)data; LASSERT(dev != NULL); *eof = 1; @@ -186,9 +192,9 @@ int lprocfs_rd_uuid(char* page, char **start, off_t off, int count, } int lprocfs_rd_name(char *page, char **start, off_t off, int count, - int *eof, void *data) + int *eof, void* data) { - struct obd_device* dev = (struct obd_device *)data; + struct obd_device *dev = (struct obd_device *)data; LASSERT(dev != NULL); LASSERT(dev->obd_name != NULL); @@ -196,72 +202,98 @@ int lprocfs_rd_name(char *page, char **start, off_t off, int count, return snprintf(page, count, "%s\n", dev->obd_name); } -int lprocfs_rd_blksize(char* page, char **start, off_t off, int count, - int *eof, struct statfs *sfs) +int lprocfs_rd_fstype(char *page, char **start, off_t off, int count, int *eof, + void *data) { - LASSERT(sfs != NULL); - *eof = 1; - return snprintf(page, count, "%lu\n", sfs->f_bsize); + struct obd_device *obd = (struct obd_device *)data; + + LASSERT(obd != NULL); + LASSERT(obd->obd_fsops != NULL); + LASSERT(obd->obd_fsops->fs_type != NULL); + return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type); } -int lprocfs_rd_kbytestotal(char* page, char **start, off_t off, int count, - int *eof, struct statfs *sfs) +int lprocfs_rd_blksize(char *page, char **start, off_t off, int count, + int *eof, void *data) { - __u32 blk_size; - __u64 result; + struct obd_statfs osfs; + int rc = obd_statfs(data, &osfs, jiffies - HZ); + if (!rc) { + *eof = 1; + rc = snprintf(page, count, "%u\n", osfs.os_bsize); + } + return rc; +} - LASSERT(sfs != NULL); - blk_size = sfs->f_bsize >> 10; - result = sfs->f_blocks; +int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_statfs osfs; + int rc = obd_statfs(data, &osfs, jiffies - HZ); + if (!rc) { + __u32 blk_size = osfs.os_bsize >> 10; + __u64 result = osfs.os_blocks; - while (blk_size >>= 1) - result <<= 1; + while (blk_size >>= 1) + result <<= 1; - *eof = 1; - return snprintf(page, count, LPU64"\n", result); + *eof = 1; + rc = snprintf(page, count, LPU64"\n", result); + } + return rc; } -int lprocfs_rd_kbytesfree(char* page, char **start, off_t off, int count, - int *eof, struct statfs *sfs) +int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, int count, + int *eof, void *data) { - __u32 blk_size; - __u64 result; + struct obd_statfs osfs; + int rc = obd_statfs(data, &osfs, jiffies - HZ); + if (!rc) { + __u32 blk_size = osfs.os_bsize >> 10; + __u64 result = osfs.os_bfree; - LASSERT(sfs != NULL); - blk_size = sfs->f_bsize >> 10; - result = sfs->f_bfree; + while (blk_size >>= 1) + result <<= 1; - while (blk_size >>= 1) - result <<= 1; - - *eof = 1; - return snprintf(page, count, LPU64"\n", result); + *eof = 1; + rc = snprintf(page, count, LPU64"\n", result); + } + return rc; } -int lprocfs_rd_filestotal(char* page, char **start, off_t off, int count, - int *eof, struct statfs *sfs) +int lprocfs_rd_filestotal(char *page, char **start, off_t off, int count, + int *eof, void *data) { - LASSERT(sfs != NULL); - *eof = 1; - return snprintf(page, count, "%ld\n", sfs->f_files); + struct obd_statfs osfs; + int rc = obd_statfs(data, &osfs, jiffies - HZ); + if (!rc) { + *eof = 1; + rc = snprintf(page, count, LPU64"\n", osfs.os_files); + } + + return rc; } -int lprocfs_rd_filesfree(char* page, char **start, off_t off, int count, - int *eof, struct statfs *sfs) +int lprocfs_rd_filesfree(char *page, char **start, off_t off, int count, + int *eof, void *data) { - LASSERT(sfs != NULL); - *eof = 1; - return snprintf(page, count, "%ld\n", sfs->f_ffree); + struct obd_statfs osfs; + int rc = obd_statfs(data, &osfs, jiffies - HZ); + if (!rc) { + *eof = 1; + rc = snprintf(page, count, LPU64"\n", osfs.os_ffree); + } + return rc; } -int lprocfs_rd_filegroups(char* page, char **start, off_t off, int count, - int *eof, struct statfs *sfs) +int lprocfs_rd_filegroups(char *page, char **start, off_t off, int count, + int *eof, void *data) { *eof = 1; return snprintf(page, count, "unimplemented\n"); } -int lprocfs_rd_server_uuid(char* page, char **start, off_t off, int count, +int lprocfs_rd_server_uuid(char *page, char **start, off_t off, int count, int *eof, void *data) { struct obd_device *obd = (struct obd_device *)data; @@ -290,7 +322,7 @@ int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count, int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count, int *eof, void *data) { - struct obd_type* class = (struct obd_type*) data; + struct obd_type *class = (struct obd_type*) data; LASSERT(class != NULL); *eof = 1; @@ -334,21 +366,21 @@ struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num) if (num == 0) return NULL; - OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[smp_num_cpus])); + OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[num_online_cpus()])); if (stats == NULL) return NULL; percpusize = L1_CACHE_ALIGN(offsetof(typeof(*percpu), lp_cntr[num])); - stats->ls_percpu_size = smp_num_cpus * percpusize; + stats->ls_percpu_size = num_online_cpus() * percpusize; OBD_ALLOC(stats->ls_percpu[0], stats->ls_percpu_size); if (stats->ls_percpu[0] == NULL) { OBD_FREE(stats, offsetof(typeof(*stats), - ls_percpu[smp_num_cpus])); + ls_percpu[num_online_cpus()])); return NULL; } stats->ls_num = num; - for (i = 1; i < smp_num_cpus; i++) + for (i = 1; i < num_online_cpus(); i++) stats->ls_percpu[i] = (void *)(stats->ls_percpu[i - 1]) + percpusize; @@ -361,7 +393,7 @@ void lprocfs_free_stats(struct lprocfs_stats *stats) return; OBD_FREE(stats->ls_percpu[0], stats->ls_percpu_size); - OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[smp_num_cpus])); + OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[num_online_cpus()])); } /* Reset counter under lock */ @@ -410,17 +442,18 @@ static int lprocfs_stats_seq_show(struct seq_file *p, void *v) } idx = cntr - &(stats->ls_percpu[0])->lp_cntr[0]; - for (i = 0; i < smp_num_cpus; i++) { + for (i = 0; i < num_online_cpus(); i++) { struct lprocfs_counter *percpu_cntr = &(stats->ls_percpu[i])->lp_cntr[idx]; int centry; + do { - centry = atomic_read(&percpu_cntr->lc_cntl.la_entry); - t.lc_count = percpu_cntr->lc_count; - t.lc_sum = percpu_cntr->lc_sum; - t.lc_min = percpu_cntr->lc_min; - t.lc_max = percpu_cntr->lc_max; - t.lc_sumsquare = percpu_cntr->lc_sumsquare; + centry = atomic_read(&percpu_cntr->lc_cntl.la_entry); + t.lc_count = percpu_cntr->lc_count; + t.lc_sum = percpu_cntr->lc_sum; + t.lc_min = percpu_cntr->lc_min; + t.lc_max = percpu_cntr->lc_max; + t.lc_sumsquare = percpu_cntr->lc_sumsquare; } while (centry != atomic_read(&percpu_cntr->lc_cntl.la_entry) && centry != atomic_read(&percpu_cntr->lc_cntl.la_exit)); ret.lc_count += t.lc_count; @@ -453,10 +486,10 @@ static int lprocfs_stats_seq_show(struct seq_file *p, void *v) } struct seq_operations lprocfs_stats_seq_sops = { - .start = lprocfs_stats_seq_start, - .stop = lprocfs_stats_seq_stop, - .next = lprocfs_stats_seq_next, - .show = lprocfs_stats_seq_show, + start: lprocfs_stats_seq_start, + stop: lprocfs_stats_seq_stop, + next: lprocfs_stats_seq_next, + show: lprocfs_stats_seq_show, }; static int lprocfs_stats_seq_open(struct inode *inode, struct file *file) @@ -474,13 +507,13 @@ static int lprocfs_stats_seq_open(struct inode *inode, struct file *file) } struct file_operations lprocfs_stats_seq_fops = { - .open = lprocfs_stats_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, + open: lprocfs_stats_seq_open, + read: seq_read, + llseek: seq_lseek, + release: seq_release, }; -int lprocfs_register_stats(struct proc_dir_entry *root, const char* name, +int lprocfs_register_stats(struct proc_dir_entry *root, const char *name, struct lprocfs_stats *stats) { struct proc_dir_entry *entry; @@ -502,7 +535,7 @@ void lprocfs_counter_init(struct lprocfs_stats *stats, int index, int i; LASSERT(stats != NULL); - for (i = 0; i < smp_num_cpus; i++) { + for (i = 0; i < num_online_cpus(); i++) { c = &(stats->ls_percpu[i]->lp_cntr[index]); c->lc_config = conf; c->lc_min = ~(__u64)0; @@ -515,7 +548,7 @@ EXPORT_SYMBOL(lprocfs_counter_init); #define LPROCFS_OBD_OP_INIT(base, stats, op) \ do { \ unsigned int coffset = base + OBD_COUNTER_OFFSET(op); \ - LASSERT(coffset < stats->ls_num); \ + LASSERT(coffset < stats->ls_num); \ lprocfs_counter_init(stats, coffset, 0, #op, "reqs"); \ } while (0) @@ -529,10 +562,10 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats) LASSERT(obd->obd_proc_entry != NULL); LASSERT(obd->obd_cntr_base == 0); - num_stats = 1 + OBD_COUNTER_OFFSET(destroy_export) + + num_stats = 1 + OBD_COUNTER_OFFSET(unpin) + num_private_stats; stats = lprocfs_alloc_stats(num_stats); - if (!stats) + if (stats == NULL) return -ENOMEM; LPROCFS_OBD_OP_INIT(num_private_stats, stats, iocontrol); @@ -569,16 +602,28 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats) LPROCFS_OBD_OP_INIT(num_private_stats, stats, match); LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel); LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, log_add); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, log_cancel); LPROCFS_OBD_OP_INIT(num_private_stats, stats, san_preprw); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, mark_page_dirty); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, clear_dirty_pages); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, last_dirty_offset); LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, pin); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpin); for (i = num_private_stats; i < num_stats; i++) { - /* If this assertion failed, it is likely that an obd + /* If this LBUGs, it is likely that an obd * operation was added to struct obd_ops in * , and that the corresponding line item * LPROCFS_OBD_OP_INIT(.., .., opname) * is missing from the list above. */ - LASSERT(&(stats->ls_percpu[0])->lp_cntr[i].lc_name != NULL); + if (stats->ls_percpu[0]->lp_cntr[i].lc_name == NULL) { + CERROR("Missing obd_stat initializer obd_op " + "operation at offset %d. Aborting.\n", + i - num_private_stats); + LBUG(); + } } rc = lprocfs_register_stats(obd->obd_proc_entry, "stats", stats); if (rc < 0) { @@ -617,6 +662,7 @@ EXPORT_SYMBOL(lprocfs_free_obd_stats); EXPORT_SYMBOL(lprocfs_rd_u64); EXPORT_SYMBOL(lprocfs_rd_uuid); EXPORT_SYMBOL(lprocfs_rd_name); +EXPORT_SYMBOL(lprocfs_rd_fstype); EXPORT_SYMBOL(lprocfs_rd_server_uuid); EXPORT_SYMBOL(lprocfs_rd_conn_uuid); EXPORT_SYMBOL(lprocfs_rd_numrefs); diff --git a/lustre/obdclass/lustre_handles.c b/lustre/obdclass/lustre_handles.c index 06f86ad..bc07df9 100644 --- a/lustre/obdclass/lustre_handles.c +++ b/lustre/obdclass/lustre_handles.c @@ -4,32 +4,31 @@ * Copyright (C) 2002 Cluster File Systems, Inc. * Author: Phil Schwan * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ + * This file is part of Lustre, http://www.lustre.org/ * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. * - * Portals is distributed in the hope that it will be useful, + * Lustre is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. + * GNU General Public License for more details. * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define DEBUG_SUBSYSTEM S_CLASS #ifdef __KERNEL__ -#include -#include +# include +# include #else -#include +# include #endif - -#include +#include #include static spinlock_t handle_lock = SPIN_LOCK_UNLOCKED; @@ -118,7 +117,7 @@ int class_handle_init(void) LASSERT(handle_hash == NULL); - PORTAL_ALLOC(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE); + OBD_VMALLOC(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE); if (handle_hash == NULL) return -ENOMEM; @@ -158,7 +157,7 @@ void class_handle_cleanup(void) cleanup_all_handles(); } - PORTAL_FREE(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE); + OBD_VFREE(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE); handle_hash = NULL; if (handle_count) diff --git a/lustre/obdclass/lustre_peer.c b/lustre/obdclass/lustre_peer.c index 016354c..5987d2e 100644 --- a/lustre/obdclass/lustre_peer.c +++ b/lustre/obdclass/lustre_peer.c @@ -64,8 +64,8 @@ void class_exit_uuidlist(void) struct uuid_nid_data *data = list_entry(tmp, struct uuid_nid_data, head); - PORTAL_FREE(data->uuid, strlen(data->uuid) + 1); - PORTAL_FREE(data, sizeof(*data)); + OBD_FREE(data->uuid, strlen(data->uuid) + 1); + OBD_FREE(data, sizeof(*data)); } } @@ -109,11 +109,11 @@ int class_add_uuid(char *uuid, __u64 nid, __u32 nal) } rc = -ENOMEM; - PORTAL_ALLOC(data, sizeof(*data)); + OBD_ALLOC(data, sizeof(*data)); if (data == NULL) goto fail_0; - PORTAL_ALLOC(data->uuid, nob); + OBD_ALLOC(data->uuid, nob); if (data == NULL) goto fail_1; @@ -131,7 +131,7 @@ int class_add_uuid(char *uuid, __u64 nid, __u32 nal) return 0; fail_1: - PORTAL_FREE (data, sizeof (*data)); + OBD_FREE (data, sizeof (*data)); fail_0: kportal_put_ni (nal); return (rc); @@ -171,8 +171,8 @@ int class_del_uuid (char *uuid) list_del (&data->head); kportal_put_ni (data->nal); - PORTAL_FREE(data->uuid, strlen(data->uuid) + 1); - PORTAL_FREE(data, sizeof(*data)); + OBD_FREE(data->uuid, strlen(data->uuid) + 1); + OBD_FREE(data, sizeof(*data)); } while (!list_empty (&deathrow)); return 0; diff --git a/lustre/obdclass/simple.c b/lustre/obdclass/simple.c index 0ce54a3..bd1363a 100644 --- a/lustre/obdclass/simple.c +++ b/lustre/obdclass/simple.c @@ -139,7 +139,6 @@ void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, current->fsgid = saved->ouc.ouc_fsgid; current->cap_effective = saved->ouc.ouc_cap; current->ngroups = saved->ngroups; - current->groups[0] = saved->ouc.ouc_suppgid1; current->groups[1] = saved->ouc.ouc_suppgid2; } @@ -167,7 +166,7 @@ struct dentry *simple_mknod(struct dentry *dir, char *name, int mode) ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n"); CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name); - dchild = lookup_one_len(name, dir, strlen(name)); + dchild = ll_lookup_one_len(name, dir, strlen(name)); if (IS_ERR(dchild)) GOTO(out_up, dchild); @@ -201,7 +200,7 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode) ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n"); CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name); - dchild = lookup_one_len(name, dir, strlen(name)); + dchild = ll_lookup_one_len(name, dir, strlen(name)); if (IS_ERR(dchild)) GOTO(out_up, dchild); diff --git a/lustre/obdclass/statfs_pack.c b/lustre/obdclass/statfs_pack.c index 786a768..8bb78cc 100644 --- a/lustre/obdclass/statfs_pack.c +++ b/lustre/obdclass/statfs_pack.c @@ -31,7 +31,7 @@ #else #include #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include +#include #endif #endif @@ -40,8 +40,9 @@ #include #include -void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs) +void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs) { + memset(osfs, 0, sizeof(*osfs)); osfs->os_type = sfs->f_type; osfs->os_blocks = sfs->f_blocks; osfs->os_bfree = sfs->f_bfree; @@ -52,8 +53,9 @@ void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs) osfs->os_namelen = sfs->f_namelen; } -void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs) +void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs) { + memset(sfs, 0, sizeof(*sfs)); sfs->f_type = osfs->os_type; sfs->f_blocks = osfs->os_blocks; sfs->f_bfree = osfs->os_bfree; @@ -64,39 +66,5 @@ void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs) sfs->f_namelen = osfs->os_namelen; } -int obd_self_statfs(struct obd_device *obd, struct statfs *sfs) -{ - struct obd_export *export, *my_export = NULL; - struct obd_statfs osfs = { 0 }; - int rc; - ENTRY; - - LASSERT( obd != NULL ); - - spin_lock(&obd->obd_dev_lock); - if (list_empty(&obd->obd_exports)) { - spin_unlock(&obd->obd_dev_lock); - export = my_export = class_new_export(obd); - if (export == NULL) - RETURN(-ENOMEM); - } else { - export = list_entry(obd->obd_exports.next, typeof(*export), - exp_obd_chain); - export = class_export_get(export); - spin_unlock(&obd->obd_dev_lock); - } - - rc = obd_statfs(export, &osfs); - if (!rc) - statfs_unpack(sfs, &osfs); - - if (my_export) - class_unlink_export(my_export); - - class_export_put(export); - RETURN(rc); -} - EXPORT_SYMBOL(statfs_pack); EXPORT_SYMBOL(statfs_unpack); -EXPORT_SYMBOL(obd_self_statfs); diff --git a/lustre/obdecho/.cvsignore b/lustre/obdecho/.cvsignore index e530020..49c6100 100644 --- a/lustre/obdecho/.cvsignore +++ b/lustre/obdecho/.cvsignore @@ -6,3 +6,4 @@ Makefile Makefile.in .deps TAGS +.*.cmd diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c index f89df07..887889a 100644 --- a/lustre/obdecho/echo.c +++ b/lustre/obdecho/echo.c @@ -64,7 +64,7 @@ static int echo_connect(struct lustre_handle *conn, struct obd_device *obd, return class_connect(conn, obd, cluuid); } -static int echo_disconnect(struct lustre_handle *conn, int failover) +static int echo_disconnect(struct lustre_handle *conn, int flags) { struct obd_export *exp = class_conn2export(conn); @@ -72,7 +72,7 @@ static int echo_disconnect(struct lustre_handle *conn, int failover) ldlm_cancel_locks_for_export(exp); class_export_put(exp); - return (class_disconnect(conn, failover)); + return class_disconnect(conn, flags); } static __u64 echo_next_id(struct obd_device *obddev) @@ -235,7 +235,7 @@ static int echo_setattr(struct lustre_handle *conn, struct obdo *oa, int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb, struct niobuf_local *res, - void **desc_private, struct obd_trans_info *oti) + struct obd_trans_info *oti) { struct obd_device *obd; struct niobuf_local *r = res; @@ -253,7 +253,8 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa, CDEBUG(D_PAGE, "%s %d obdos with %d IOs\n", cmd == OBD_BRW_READ ? "reading" : "writing", objcount, niocount); - *desc_private = (void *)DESC_PRIV; + if (oti) + oti->oti_handle = (void *)DESC_PRIV; for (i = 0; i < objcount; i++, obj++) { int gfp_mask = (obj->ioo_id & 1) ? GFP_HIGHUSER : GFP_KERNEL; @@ -285,7 +286,7 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa, r->offset = nb->offset; r->len = nb->len; - LASSERT ((r->offset & (PAGE_SIZE - 1)) + r->len <= PAGE_SIZE); + LASSERT((r->offset & ~PAGE_MASK) + r->len <= PAGE_SIZE); CDEBUG(D_PAGE, "$$$$ get page %p @ "LPU64" for %d\n", r->page, r->offset, r->len); @@ -339,9 +340,9 @@ preprw_cleanup: return rc; } -int echo_commitrw(int cmd, struct obd_export *export, int objcount, - struct obd_ioobj *obj, int niocount, struct niobuf_local *res, - void *desc_private, struct obd_trans_info *oti) +int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa, + int objcount, struct obd_ioobj *obj, int niocount, + struct niobuf_local *res, struct obd_trans_info *oti) { struct obd_device *obd; struct niobuf_local *r = res; @@ -365,7 +366,7 @@ int echo_commitrw(int cmd, struct obd_export *export, int objcount, RETURN(-EINVAL); } - LASSERT(desc_private == (void *)DESC_PRIV); + LASSERT(oti == NULL || oti->oti_handle == (void *)DESC_PRIV); for (i = 0; i < objcount; i++, obj++) { int verify = obj->ioo_id != 0; @@ -437,7 +438,7 @@ static int echo_setup(struct obd_device *obddev, obd_count len, void *buf) RETURN(0); } -static int echo_cleanup(struct obd_device *obddev, int force, int failover) +static int echo_cleanup(struct obd_device *obddev, int flags) { ENTRY; @@ -453,7 +454,7 @@ int echo_attach(struct obd_device *obd, obd_count len, void *data) struct lprocfs_static_vars lvars; int rc; - lprocfs_init_vars(&lvars); + lprocfs_init_vars(echo, &lvars); rc = lprocfs_obd_attach(obd, lvars.obd_vars); if (rc != 0) return rc; @@ -539,7 +540,7 @@ static int __init obdecho_init(void) printk(KERN_INFO "Lustre Echo OBD driver; info@clusterfs.com\n"); - lprocfs_init_vars(&lvars); + lprocfs_init_vars(echo, &lvars); rc = echo_object0_pages_init (); if (rc != 0) @@ -561,7 +562,7 @@ static int __init obdecho_init(void) RETURN(rc); } -static void __exit obdecho_exit(void) +static void /*__exit*/ obdecho_exit(void) { echo_client_cleanup(); class_unregister_type(OBD_ECHO_DEVICENAME); diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index 79da7ea..c010798 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -484,7 +484,7 @@ echo_client_kbrw (struct obd_device *obd, int rw, } } - rc = obd_brw(rw, &ec->ec_conn, lsm, npages, pga, NULL); + rc = obd_brw(rw, &ec->ec_conn, oa, lsm, npages, pga, NULL); out: if (rc != 0) @@ -568,7 +568,7 @@ static int echo_client_ubrw(struct obd_device *obd, int rw, pgp->flag = 0; } - rc = obd_brw(rw, &ec->ec_conn, lsm, npages, pga, NULL); + rc = obd_brw(rw, &ec->ec_conn, oa, lsm, npages, pga, NULL); // if (rw == OBD_BRW_READ) // mark_dirty_kiobuf (kiobuf, count); @@ -1009,7 +1009,7 @@ static int echo_setup(struct obd_device *obddev, obd_count len, void *buf) RETURN(rc); } -static int echo_cleanup(struct obd_device * obddev, int force, int failover) +static int echo_cleanup(struct obd_device *obddev, int flags) { struct list_head *el; struct ec_object *eco; @@ -1023,21 +1023,21 @@ static int echo_cleanup(struct obd_device * obddev, int force, int failover) } /* XXX assuming sole access */ - while (!list_empty (&ec->ec_objects)) { + while (!list_empty(&ec->ec_objects)) { el = ec->ec_objects.next; - eco = list_entry (el, struct ec_object, eco_obj_chain); + eco = list_entry(el, struct ec_object, eco_obj_chain); - LASSERT (eco->eco_refcount == 0); + LASSERT(eco->eco_refcount == 0); eco->eco_refcount = 1; eco->eco_deleted = 1; - echo_put_object (eco); + echo_put_object(eco); } - rc = obd_disconnect (&ec->ec_conn, 0); + rc = obd_disconnect(&ec->ec_conn, 0); if (rc != 0) CERROR("fail to disconnect device: %d\n", rc); - RETURN (rc); + RETURN(rc); } static int echo_connect(struct lustre_handle *conn, struct obd_device *src, @@ -1057,7 +1057,7 @@ static int echo_connect(struct lustre_handle *conn, struct obd_device *src, RETURN (rc); } -static int echo_disconnect(struct lustre_handle *conn, int failover) +static int echo_disconnect(struct lustre_handle *conn, int flags) { struct obd_export *exp = class_conn2export (conn); struct obd_device *obd; @@ -1128,7 +1128,7 @@ int echo_client_init(void) { struct lprocfs_static_vars lvars; - lprocfs_init_vars(&lvars); + lprocfs_init_vars(echo, &lvars); return class_register_type(&echo_obd_ops, lvars.module_vars, OBD_ECHO_CLIENT_DEVICENAME); } diff --git a/lustre/obdecho/lproc_echo.c b/lustre/obdecho/lproc_echo.c index 6a16001..c25d156 100644 --- a/lustre/obdecho/lproc_echo.c +++ b/lustre/obdecho/lproc_echo.c @@ -25,30 +25,19 @@ #include #ifndef LPROCFS -struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; -struct lprocfs_vars lprocfs_module_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_module_vars[] = { {0} }; #else -int rd_fstype(char* page, char **start, off_t off, int count, int *eof, - void *data) -{ - struct obd_device* dev = (struct obd_device*)data; - - LASSERT(dev != NULL); - *eof = 1; - return snprintf(page, count, "%s\n", dev->u.echo.eo_fstype); -} - -struct lprocfs_vars lprocfs_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, - { "fstype", rd_fstype, 0, 0 }, +static struct lprocfs_vars lprocfs_obd_vars[] = { + { "uuid", lprocfs_rd_uuid, 0, 0 }, { 0 } }; -struct lprocfs_vars lprocfs_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, +static struct lprocfs_vars lprocfs_module_vars[] = { + { "num_refs", lprocfs_rd_numrefs, 0, 0 }, { 0 } }; #endif /* LPROCFS */ -LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars) +LPROCFS_INIT_VARS(echo, lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/obdfilter/.cvsignore b/lustre/obdfilter/.cvsignore index e530020..49c6100 100644 --- a/lustre/obdfilter/.cvsignore +++ b/lustre/obdfilter/.cvsignore @@ -6,3 +6,4 @@ Makefile Makefile.in .deps TAGS +.*.cmd diff --git a/lustre/obdfilter/Makefile.am b/lustre/obdfilter/Makefile.am index b9addf1..ed4ca1e 100644 --- a/lustre/obdfilter/Makefile.am +++ b/lustre/obdfilter/Makefile.am @@ -6,6 +6,7 @@ MODULE = obdfilter modulefs_DATA = obdfilter.o EXTRA_PROGRAMS = obdfilter -obdfilter_SOURCES = filter.c lproc_obdfilter.c +obdfilter_SOURCES = filter.c filter_io.c filter_log.c filter_san.c \ +lproc_obdfilter.c filter_internal.h include $(top_srcdir)/Rules diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 6f2d96c..b6c1bd9 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -33,31 +33,27 @@ * threaded operation on the OST. */ -#define EXPORT_SYMTAB #define DEBUG_SUBSYSTEM S_FILTER #include #include -#include // XXX kill me soon #include #include -#include -#include -#include #include -#include -#include -#include #include #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -#include +# include +# include #endif -enum { - LPROC_FILTER_READ_BYTES = 0, - LPROC_FILTER_WRITE_BYTES = 1, - LPROC_FILTER_LAST, -}; +#include +#include +#include +#include +#include +#include + +#include "filter_internal.h" #define S_SHIFT 12 static char *obd_type_by_mode[S_IFMT >> S_SHIFT] = { @@ -132,19 +128,79 @@ static void filter_ffd_destroy(struct filter_file_data *ffd) filter_ffd_put(ffd); } -static void filter_commit_cb(struct obd_device *obd, __u64 transno, int error) +static void filter_commit_cb(struct obd_device *obd, __u64 transno, + void *cb_data, int error) { obd_transno_commit_cb(obd, transno, error); } -/* Assumes caller has already pushed us into the kernel context. */ -int filter_finish_transno(struct obd_export *export, void *handle, - struct obd_trans_info *oti, int rc) + +static int filter_client_log_cancel(struct lustre_handle *conn, + struct lov_stripe_md *lsm, int count, + struct llog_cookie *cookies, int flags) { - __u64 last_rcvd; - struct obd_device *obd = export->exp_obd; + struct obd_device *obd = class_conn2obd(conn); + struct llog_commit_data *llcd; struct filter_obd *filter = &obd->u.filter; - struct filter_export_data *fed = &export->exp_filter_data; + int rc = 0; + ENTRY; + + if (count == 0 || cookies == NULL) { + down(&filter->fo_sem); + if (filter->fo_llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW)) + GOTO(out, rc); + + llcd = filter->fo_llcd; + GOTO(send_now, rc); + } + + down(&filter->fo_sem); + llcd = filter->fo_llcd; + if (llcd == NULL) { + llcd = llcd_grab(); + if (llcd == NULL) { + CERROR("couldn't get an llcd - dropped "LPX64":%x+%u\n", + cookies->lgc_lgl.lgl_oid, + cookies->lgc_lgl.lgl_ogen, cookies->lgc_index); + GOTO(out, rc = -ENOMEM); + } + llcd->llcd_import = filter->fo_mdc_imp; + filter->fo_llcd = llcd; + } + + memcpy(llcd->llcd_cookies + llcd->llcd_cookiebytes, cookies, + sizeof(*cookies)); + llcd->llcd_cookiebytes += sizeof(*cookies); + + GOTO(send_now, rc); +send_now: + if ((PAGE_SIZE - llcd->llcd_cookiebytes < sizeof(*cookies) || + flags & OBD_LLOG_FL_SENDNOW)) { + filter->fo_llcd = NULL; + llcd_send(llcd); + } +out: + up(&filter->fo_sem); + + return rc; +} + +/* When this (destroy) operation is committed, return the cancel cookie */ +static void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno, + void *cb_data, int error) +{ + filter_client_log_cancel(&obd->u.filter.fo_mdc_conn, NULL, 1, + cb_data, OBD_LLOG_FL_SENDNOW); + OBD_FREE(cb_data, sizeof(struct llog_cookie)); +} + +/* Assumes caller has already pushed us into the kernel context. */ +int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti, + int rc) +{ + struct filter_obd *filter = &exp->exp_obd->u.filter; + struct filter_export_data *fed = &exp->exp_filter_data; struct filter_client_data *fcd = fed->fed_fcd; + __u64 last_rcvd; loff_t off; ssize_t written; @@ -152,14 +208,14 @@ int filter_finish_transno(struct obd_export *export, void *handle, if (rc) RETURN(rc); - if (!obd->obd_replayable) + if (!exp->exp_obd->obd_replayable) RETURN(rc); /* we don't allocate new transnos for replayed requests */ - if (oti && oti->oti_transno == 0) { + if (oti != NULL && oti->oti_transno == 0) { spin_lock(&filter->fo_translock); - last_rcvd = le64_to_cpu(filter->fo_fsd->fsd_last_rcvd) + 1; - filter->fo_fsd->fsd_last_rcvd = cpu_to_le64(last_rcvd); + last_rcvd = le64_to_cpu(filter->fo_fsd->fsd_last_transno) + 1; + filter->fo_fsd->fsd_last_transno = cpu_to_le64(last_rcvd); spin_unlock(&filter->fo_translock); oti->oti_transno = last_rcvd; fcd->fcd_last_rcvd = cpu_to_le64(last_rcvd); @@ -169,27 +225,28 @@ int filter_finish_transno(struct obd_export *export, void *handle, fcd->fcd_last_xid = 0; off = fed->fed_lr_off; - fsfilt_set_last_rcvd(obd, last_rcvd, handle, filter_commit_cb); - written = lustre_fwrite(filter->fo_rcvd_filp, (char *)fcd, - sizeof(*fcd), &off); + fsfilt_set_last_rcvd(exp->exp_obd, last_rcvd, oti->oti_handle, + filter_commit_cb, NULL); + written = fsfilt_write_record(exp->exp_obd, + filter->fo_rcvd_filp, (char *)fcd, + sizeof(*fcd), &off); CDEBUG(D_HA, "wrote trans #"LPD64" for client %s at #%d: " - "written = "LPSZ"\n", last_rcvd, fcd->fcd_uuid, + "written = "LPSZ"\n", last_rcvd, fcd->fcd_uuid, fed->fed_lr_idx, written); if (written == sizeof(*fcd)) RETURN(0); - CERROR("error writing to last_rcvd file: rc = %d\n", + CERROR("error writing to %s: rc = %d\n", LAST_RCVD, (int)written); if (written >= 0) - RETURN(-EIO); - + RETURN(-ENOSPC); RETURN(written); - } + } RETURN(0); } -static inline void f_dput(struct dentry *dentry) +void f_dput(struct dentry *dentry) { /* Can't go inside filter_ddelete because it can block */ CDEBUG(D_INODE, "putting %s: %p, count = %d\n", @@ -207,26 +264,19 @@ static void filter_drelease(struct dentry *dentry) } struct dentry_operations filter_dops = { - .d_release = filter_drelease, + d_release: filter_drelease, }; -#define LAST_RCVD "last_rcvd" -#define INIT_OBJID 2 - -/* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */ -#define FILTER_LR_MAX_CLIENTS (PAGE_SIZE * 8) -#define FILTER_LR_MAX_CLIENT_WORDS (FILTER_LR_MAX_CLIENTS/sizeof(unsigned long)) - /* Add client data to the FILTER. We use a bitmap to locate a free space * in the last_rcvd file if cl_idx is -1 (i.e. a new client). * Otherwise, we have just read the data from the last_rcvd file and - * we know its offset. - */ -int filter_client_add(struct obd_device *obd, struct filter_obd *filter, - struct filter_export_data *fed, int cl_idx) + * we know its offset. */ +static int filter_client_add(struct obd_device *obd, struct filter_obd *filter, + struct filter_export_data *fed, int cl_idx) { unsigned long *bitmap = filter->fo_last_rcvd_slots; int new_client = (cl_idx == -1); + ENTRY; LASSERT(bitmap != NULL); @@ -242,7 +292,7 @@ int filter_client_add(struct obd_device *obd, struct filter_obd *filter, repeat: if (cl_idx >= FILTER_LR_MAX_CLIENTS) { CERROR("no client slots - fix FILTER_LR_MAX_CLIENTS\n"); - return -ENOMEM; + RETURN(-ENOMEM); } if (test_and_set_bit(cl_idx, bitmap)) { CERROR("FILTER client %d: found bit is set in bitmap\n", @@ -270,23 +320,23 @@ int filter_client_add(struct obd_device *obd, struct filter_obd *filter, if (new_client) { struct obd_run_ctxt saved; loff_t off = fed->fed_lr_off; - ssize_t written; + int written; void *handle; CDEBUG(D_INFO, "writing client fcd at idx %u (%llu) (len %u)\n", fed->fed_lr_idx,off,(unsigned int)sizeof(*fed->fed_fcd)); push_ctxt(&saved, &filter->fo_ctxt, NULL); - /* Transaction eeded to fix for bug 1403 */ + /* Transaction needed to fix bug 1403 */ handle = fsfilt_start(obd, filter->fo_rcvd_filp->f_dentry->d_inode, - FSFILT_OP_SETATTR); + FSFILT_OP_SETATTR, NULL); if (IS_ERR(handle)) { written = PTR_ERR(handle); CERROR("unable to start transaction: rc %d\n", (int)written); } else { - written = lustre_fwrite(filter->fo_rcvd_filp, + written = fsfilt_write_record(obd, filter->fo_rcvd_filp, (char *)fed->fed_fcd, sizeof(*fed->fed_fcd), &off); fsfilt_commit(obd, @@ -296,32 +346,35 @@ int filter_client_add(struct obd_device *obd, struct filter_obd *filter, pop_ctxt(&saved, &filter->fo_ctxt, NULL); if (written != sizeof(*fed->fed_fcd)) { + CERROR("error writing %s client idx %u: rc %d\n", + LAST_RCVD, fed->fed_lr_idx, written); if (written < 0) RETURN(written); - RETURN(-EIO); + RETURN(-ENOSPC); } } - return 0; + RETURN(0); } -int filter_client_free(struct obd_export *exp, int failover) +static int filter_client_free(struct obd_export *exp, int flags) { struct filter_export_data *fed = &exp->exp_filter_data; struct filter_obd *filter = &exp->exp_obd->u.filter; + struct obd_device *obd = exp->exp_obd; struct filter_client_data zero_fcd; struct obd_run_ctxt saved; int written; loff_t off; ENTRY; - if (!fed->fed_fcd) + if (fed->fed_fcd == NULL) RETURN(0); - if (failover != 0) + if (flags & OBD_OPT_FAILOVER) GOTO(free, 0); /* XXX if fcd_uuid were a real obd_uuid, I could use obd_uuid_equals */ - if (!strcmp(fed->fed_fcd->fcd_uuid, "OBD_CLASS_UUID")) + if (strcmp(fed->fed_fcd->fcd_uuid, "OBD_CLASS_UUID") == 0) GOTO(free, 0); LASSERT(filter->fo_last_rcvd_slots != NULL); @@ -339,8 +392,9 @@ int filter_client_free(struct obd_export *exp, int failover) memset(&zero_fcd, 0, sizeof zero_fcd); push_ctxt(&saved, &filter->fo_ctxt, NULL); - written = lustre_fwrite(filter->fo_rcvd_filp, (const char *)&zero_fcd, - sizeof(zero_fcd), &off); + written = fsfilt_write_record(obd, filter->fo_rcvd_filp, + (char *)&zero_fcd, sizeof(zero_fcd), + &off); /* XXX: this write gets lost sometimes, unless this sync is here. */ if (written > 0) @@ -374,29 +428,30 @@ static int filter_free_server_data(struct filter_obd *filter) return 0; } - /* assumes caller is already in kernel ctxt */ -static int filter_update_server_data(struct file *filp, - struct filter_server_data *fsd) +int filter_update_server_data(struct obd_device *obd, + struct file *filp, struct filter_server_data *fsd) { loff_t off = 0; int rc; + ENTRY; CDEBUG(D_INODE, "server uuid : %s\n", fsd->fsd_uuid); CDEBUG(D_INODE, "server last_objid: "LPU64"\n", le64_to_cpu(fsd->fsd_last_objid)); CDEBUG(D_INODE, "server last_rcvd : "LPU64"\n", - le64_to_cpu(fsd->fsd_last_rcvd)); + le64_to_cpu(fsd->fsd_last_transno)); CDEBUG(D_INODE, "server last_mount: "LPU64"\n", le64_to_cpu(fsd->fsd_mount_count)); - rc = lustre_fwrite(filp, (char *)fsd, sizeof(*fsd), &off); - if (rc != sizeof(*fsd)) { - CDEBUG(D_INODE, "error writing filter_server_data: rc = %d\n", - rc); - RETURN(-EIO); - } - RETURN(0); + rc = fsfilt_write_record(obd, filp, (char *)fsd, sizeof(*fsd), &off); + if (rc == sizeof(*fsd)) + RETURN(0); + + CDEBUG(D_INODE, "error writing filter_server_data: rc = %d\n", rc); + if (rc >= 0) + RETURN(-ENOSPC); + RETURN(rc); } /* assumes caller has already in kernel ctxt */ @@ -432,11 +487,11 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp, } if (last_rcvd_size == 0) { - CERROR("%s: initializing new last_rcvd\n", obd->obd_name); + CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD); memcpy(fsd->fsd_uuid, obd->obd_uuid.uuid,sizeof(fsd->fsd_uuid)); fsd->fsd_last_objid = cpu_to_le64(init_lastobjid); - fsd->fsd_last_rcvd = 0; + fsd->fsd_last_transno = 0; mount_count = fsd->fsd_mount_count = 0; fsd->fsd_server_size = cpu_to_le32(FILTER_LR_SERVER_SIZE); fsd->fsd_client_start = cpu_to_le32(FILTER_LR_CLIENT_START); @@ -444,15 +499,18 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp, fsd->fsd_subdir_count = cpu_to_le16(FILTER_SUBDIR_COUNT); filter->fo_subdir_count = FILTER_SUBDIR_COUNT; } else { - ssize_t retval = lustre_fread(filp, (char *)fsd, sizeof(*fsd), - &off); + int retval = fsfilt_read_record(obd, filp, (char *)fsd, + sizeof(*fsd), &off); if (retval != sizeof(*fsd)) { - CDEBUG(D_INODE,"OBD filter: error reading %s\n", - LAST_RCVD); + CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n", + LAST_RCVD, retval); GOTO(err_fsd, rc = -EIO); } mount_count = le64_to_cpu(fsd->fsd_mount_count); filter->fo_subdir_count = le16_to_cpu(fsd->fsd_subdir_count); + fsd->fsd_last_objid = + cpu_to_le64(le64_to_cpu(fsd->fsd_last_objid) + + FILTER_SKIP_OBJID); } if (fsd->fsd_feature_incompat) { @@ -470,7 +528,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp, CDEBUG(D_INODE, "%s: server last_objid: "LPU64"\n", obd->obd_name, le64_to_cpu(fsd->fsd_last_objid)); CDEBUG(D_INODE, "%s: server last_rcvd : "LPU64"\n", - obd->obd_name, le64_to_cpu(fsd->fsd_last_rcvd)); + obd->obd_name, le64_to_cpu(fsd->fsd_last_transno)); CDEBUG(D_INODE, "%s: server last_mount: "LPU64"\n", obd->obd_name, mount_count); CDEBUG(D_INODE, "%s: server data size: %u\n", @@ -482,13 +540,8 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp, CDEBUG(D_INODE, "%s: server subdir_count: %u\n", obd->obd_name, le16_to_cpu(fsd->fsd_subdir_count)); - /* - * When we do a clean FILTER shutdown, we save the last_rcvd into - * the header. If we find clients with higher last_rcvd values - * then those clients may need recovery done. - */ if (!obd->obd_replayable) { - CERROR("%s: recovery support OFF\n", obd->obd_name); + CWARN("%s: recovery support OFF\n", obd->obd_name); GOTO(out, rc = 0); } @@ -507,7 +560,8 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp, */ off = le32_to_cpu(fsd->fsd_client_start) + cl_idx * le16_to_cpu(fsd->fsd_client_size); - rc = lustre_fread(filp, (char *)fcd, sizeof(*fcd), &off); + rc = fsfilt_read_record(obd, filp, (char *)fcd, sizeof(*fcd), + &off); if (rc != sizeof(*fcd)) { CERROR("error reading FILTER %s offset %d: rc = %d\n", LAST_RCVD, cl_idx, rc); @@ -534,7 +588,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp, CERROR("RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64 " srv lr: "LPU64" mnt: "LPU64" last mount: " LPU64"\n", fcd->fcd_uuid, cl_idx, - last_rcvd, le64_to_cpu(fsd->fsd_last_rcvd), + last_rcvd, le64_to_cpu(fsd->fsd_last_transno), le64_to_cpu(fcd->fcd_mount_count), mount_count); if (exp == NULL) { /* XXX this rc is ignored */ @@ -563,15 +617,16 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp, CDEBUG(D_OTHER, "client at idx %d has last_rcvd = "LPU64"\n", cl_idx, last_rcvd); - if (last_rcvd > le64_to_cpu(filter->fo_fsd->fsd_last_rcvd)) - filter->fo_fsd->fsd_last_rcvd = cpu_to_le64(last_rcvd); + if (last_rcvd > le64_to_cpu(filter->fo_fsd->fsd_last_transno)) + filter->fo_fsd->fsd_last_transno=cpu_to_le64(last_rcvd); obd->obd_last_committed = - le64_to_cpu(filter->fo_fsd->fsd_last_rcvd); + le64_to_cpu(filter->fo_fsd->fsd_last_transno); + if (obd->obd_recoverable_clients) { CERROR("RECOVERY: %d recoverable clients, last_rcvd " LPU64"\n", obd->obd_recoverable_clients, - le64_to_cpu(filter->fo_fsd->fsd_last_rcvd)); + le64_to_cpu(filter->fo_fsd->fsd_last_transno)); obd->obd_next_recovery_transno = obd->obd_last_committed + 1; obd->obd_recovering = 1; @@ -585,8 +640,8 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp, out: fsd->fsd_mount_count = cpu_to_le64(mount_count + 1); - /* save it,so mount count and last_recvd is current */ - rc = filter_update_server_data(filp, filter->fo_fsd); + /* save it, so mount count and last_transno is current */ + rc = filter_update_server_data(obd, filp, filter->fo_fsd); RETURN(rc); @@ -639,7 +694,7 @@ static int filter_prep(struct obd_device *obd) filter->fo_dentry_O_mode[mode] = dentry; } - file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0700); + file = filp_open(LAST_RCVD, O_RDWR | O_CREAT | O_LARGEFILE, 0700); if (!file || IS_ERR(file)) { rc = PTR_ERR(file); CERROR("OBD filter: cannot open/create %s: rc = %d\n", @@ -663,8 +718,15 @@ static int filter_prep(struct obd_device *obd) filter->fo_fop = file->f_op; filter->fo_iop = inode->i_op; filter->fo_aops = inode->i_mapping->a_ops; +#ifdef I_SKIP_PDFLUSH + /* + * we need this to protect from deadlock + * pdflush vs. lustre_fwrite() + */ + inode->i_flags |= I_SKIP_PDFLUSH; +#endif - rc = filter_init_server_data(obd, file, INIT_OBJID); + rc = filter_init_server_data(obd, file, FILTER_INIT_OBJID); if (rc) { CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc); GOTO(err_client, rc); @@ -740,9 +802,10 @@ static void filter_post(struct obd_device *obd) * from lastobjid */ push_ctxt(&saved, &filter->fo_ctxt, NULL); - rc = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd); + rc = filter_update_server_data(obd, filter->fo_rcvd_filp, + filter->fo_fsd); if (rc) - CERROR("OBD filter: error writing lastobjid: rc = %ld\n", rc); + CERROR("error writing lastobjid: rc = %ld\n", rc); if (filter->fo_rcvd_filp) { @@ -751,7 +814,7 @@ static void filter_post(struct obd_device *obd) filp_close(filter->fo_rcvd_filp, 0); filter->fo_rcvd_filp = NULL; if (rc) - CERROR("last_rcvd file won't closed rc = %ld\n", rc); + CERROR("error closing %s: rc = %ld\n", LAST_RCVD, rc); } if (filter->fo_subdir_count) { @@ -777,8 +840,7 @@ static void filter_post(struct obd_device *obd) pop_ctxt(&saved, &filter->fo_ctxt, NULL); } - -static __u64 filter_next_id(struct filter_obd *filter) +__u64 filter_next_id(struct filter_obd *filter) { obd_id id; LASSERT(filter->fo_fsd != NULL); @@ -792,8 +854,9 @@ static __u64 filter_next_id(struct filter_obd *filter) } /* direct cut-n-paste of mds_blocking_ast() */ -int filter_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, - void *data, int flag) +static int filter_blocking_ast(struct ldlm_lock *lock, + struct ldlm_lock_desc *desc, + void *data, int flag) { int do_ast; ENTRY; @@ -852,6 +915,7 @@ static int filter_lock_dentry(struct obd_device *obd, struct dentry *de, RETURN(rc == ELDLM_OK ? 0 : -ENOLCK); /* XXX translate ldlm code */ } +/* We never dget the object parent, so DON'T dput it either */ static void filter_parent_unlock(struct dentry *dparent, struct lustre_handle *lockh, ldlm_mode_t lock_mode) @@ -860,8 +924,8 @@ static void filter_parent_unlock(struct dentry *dparent, } /* We never dget the object parent, so DON'T dput it either */ -static inline struct dentry *filter_parent(struct obd_device *obd, - obd_mode mode, obd_id objid) +struct dentry *filter_parent(struct obd_device *obd, obd_mode mode, + obd_id objid) { struct filter_obd *filter = &obd->u.filter; @@ -873,10 +937,9 @@ static inline struct dentry *filter_parent(struct obd_device *obd, } /* We never dget the object parent, so DON'T dput it either */ -static inline struct dentry *filter_parent_lock(struct obd_device *obd, - obd_mode mode, obd_id objid, - ldlm_mode_t lock_mode, - struct lustre_handle *lockh) +struct dentry *filter_parent_lock(struct obd_device *obd, obd_mode mode, + obd_id objid, ldlm_mode_t lock_mode, + struct lustre_handle *lockh) { unsigned long now = jiffies; struct dentry *de = filter_parent(obd, mode, objid); @@ -886,7 +949,7 @@ static inline struct dentry *filter_parent_lock(struct obd_device *obd, return de; rc = filter_lock_dentry(obd, de, lock_mode, lockh); - if (time_after(jiffies, now + 15*HZ)) + if (time_after(jiffies, now + 15 * HZ)) CERROR("slow parent lock %lus\n", (jiffies - now) / HZ); return rc ? ERR_PTR(rc) : de; } @@ -897,13 +960,11 @@ static inline struct dentry *filter_parent_lock(struct obd_device *obd, * appropriately for this operation (normally a write lock). If * dir_dentry is NULL, we do a read lock while we do the lookup to * avoid races with create/destroy and such changing the directory - * internal to the filesystem code. - */ -static struct dentry *filter_fid2dentry(struct obd_device *obd, - struct dentry *dir_dentry, - obd_mode mode, obd_id id) + * internal to the filesystem code. */ +struct dentry *filter_fid2dentry(struct obd_device *obd, + struct dentry *dir_dentry, + obd_mode mode, obd_id id) { - struct super_block *sb = obd->u.filter.fo_sb; struct lustre_handle lockh; struct dentry *dparent = dir_dentry; struct dentry *dchild; @@ -911,11 +972,6 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd, int len; ENTRY; - if (!sb || !sb->s_dev) { - CERROR("device not initialized.\n"); - RETURN(ERR_PTR(-ENXIO)); - } - if (id == 0) { CERROR("fatal: invalid object id 0\n"); LBUG(); @@ -923,7 +979,7 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd, } len = sprintf(name, LPU64, id); - if (!dir_dentry) { + if (dir_dentry == NULL) { dparent = filter_parent_lock(obd, mode, id, LCK_PR, &lockh); if (IS_ERR(dparent)) RETURN(dparent); @@ -931,7 +987,7 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd, CDEBUG(D_INODE, "looking up object O/%*s/%s\n", dparent->d_name.len, dparent->d_name.name, name); dchild = ll_lookup_one_len(name, dparent, len); - if (!dir_dentry) + if (dir_dentry == NULL) filter_parent_unlock(dparent, &lockh, LCK_PR); if (IS_ERR(dchild)) { CERROR("child lookup error %ld\n", PTR_ERR(dchild)); @@ -947,13 +1003,12 @@ static struct dentry *filter_fid2dentry(struct obd_device *obd, } static struct file *filter_obj_open(struct obd_export *export, - __u64 id, __u32 type, - ldlm_mode_t parent_mode, + struct obd_trans_info *oti, + __u64 id, __u32 type, int parent_mode, struct lustre_handle *parent_lockh) { struct obd_device *obd = export->exp_obd; struct filter_obd *filter = &obd->u.filter; - struct super_block *sb = filter->fo_sb; struct dentry *dchild = NULL, *dparent = NULL; struct filter_export_data *fed = &export->exp_filter_data; struct filter_dentry_data *fdd = NULL; @@ -966,11 +1021,6 @@ static struct file *filter_obj_open(struct obd_export *export, push_ctxt(&saved, &filter->fo_ctxt, NULL); - if (!sb || !sb->s_dev) { - CERROR("fatal: device not initialized.\n"); - GOTO(cleanup, file = ERR_PTR(-ENXIO)); - } - if (!id) { CERROR("fatal: invalid obdo "LPU64"\n", id); GOTO(cleanup, file = ERR_PTR(-ESTALE)); @@ -1014,6 +1064,7 @@ static struct file *filter_obj_open(struct obd_export *export, if (dchild->d_inode == NULL) { CERROR("opening non-existent object %s - O_CREAT?\n", name); + /* dput(dchild); call filter_create_internal here */ file = ERR_PTR(-ENOENT); GOTO(cleanup, file); } @@ -1083,9 +1134,8 @@ cleanup: } /* Caller must hold LCK_PW on parent and push us into kernel context. - * Caller is also required to ensure that dchild->d_inode exists. - */ -static int filter_destroy_internal(struct obd_device *obd, + * Caller is also required to ensure that dchild->d_inode exists. */ +static int filter_destroy_internal(struct obd_device *obd, obd_id objid, struct dentry *dparent, struct dentry *dchild) { @@ -1099,6 +1149,39 @@ static int filter_destroy_internal(struct obd_device *obd, inode->i_nlink, atomic_read(&inode->i_count)); } + +#if 0 + /* Tell the clients that the object is gone now and that they should + * throw away any cached pages. We don't need to wait until they're + * done, so just decref the lock right away and let ldlm_completion_ast + * clean up when it's all over. */ + ldlm_cli_enqueue(..., LCK_PW, AST_INTENT_DESTROY, &lockh); + ldlm_lock_decref(&lockh, LCK_PW); +#endif + + if (0) { + struct lustre_handle lockh; + int flags = 0, rc; + struct ldlm_res_id res_id = { .name = { objid } }; + + /* This part is a wee bit iffy: we really only want to bust the + * locks on our stripe, so that we don't end up bouncing + * [0->EOF] locks around on each of the OSTs as the rest of the + * destroys get processed. Because we're only talking to + * the local LDLM, though, we should only end up locking the + * whole of our stripe. When bug 1425 (take all locks on OST + * for stripe 0) is fixed, this code should be revisited. */ + struct ldlm_extent extent = { 0, OBD_OBJECT_EOF }; + + rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL, + res_id, LDLM_EXTENT, &extent, + sizeof(extent), LCK_PW, &flags, + ldlm_completion_ast, filter_blocking_ast, + NULL, &lockh); + /* We only care about the side-effects, just drop the lock. */ + ldlm_lock_decref(&lockh, LCK_PW); + } + rc = vfs_unlink(dparent->d_inode, dchild); if (rc) @@ -1113,8 +1196,7 @@ static int filter_destroy_internal(struct obd_device *obd, */ static int filter_close_internal(struct obd_export *exp, struct filter_file_data *ffd, - struct obd_trans_info *oti, - int failover) + struct obd_trans_info *oti, int flags) { struct obd_device *obd = exp->exp_obd; struct filter_obd *filter = &obd->u.filter; @@ -1128,13 +1210,14 @@ static int filter_close_internal(struct obd_export *exp, ENTRY; LASSERT(filp->private_data == ffd); - LASSERT(fdd); + LASSERT(fdd != NULL); LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC); rc = filp_close(filp, 0); if (atomic_dec_and_test(&fdd->fdd_open_count) && - fdd->fdd_flags & FILTER_FLAG_DESTROY && !failover) { + (fdd->fdd_flags & FILTER_FLAG_DESTROY) && + !(flags & OBD_OPT_FAILOVER)) { void *handle; push_ctxt(&saved, &filter->fo_ctxt, NULL); @@ -1148,15 +1231,27 @@ static int filter_close_internal(struct obd_export *exp, cleanup_phase = 2; handle = fsfilt_start(obd, dparent->d_inode, - FSFILT_OP_UNLINK); + FSFILT_OP_UNLINK_LOG, oti); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); + if (oti != NULL) { + if (oti->oti_handle == NULL) + oti->oti_handle = handle; + else + LASSERT(oti->oti_handle == handle); + } + +#ifdef ENABLE_ORPHANS + /* Remove orphan unlink record from log */ + llog_cancel_records(filter->fo_catalog, 1, &fdd->fdd_cookie); +#endif /* XXX unlink from PENDING directory now too */ - rc2 = filter_destroy_internal(obd, dparent, dchild); + rc2 = filter_destroy_internal(obd, fdd->fdd_objid, dparent, + dchild); if (rc2 && !rc) rc = rc2; - rc = filter_finish_transno(exp, handle, oti, rc); + rc = filter_finish_transno(exp, oti, rc); rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0); if (rc2) { CERROR("error on commit, err = %d\n", rc2); @@ -1189,14 +1284,12 @@ cleanup: RETURN(rc); } -/* obd methods */ /* mount the file system (secretly) */ -static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, - char *option) +int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, + char *option) { struct obd_ioctl_data* data = buf; struct filter_obd *filter = &obd->u.filter; - struct vfsmount *mnt; int rc = 0; ENTRY; @@ -1208,7 +1301,8 @@ static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, if (IS_ERR(obd->obd_fsops)) RETURN(PTR_ERR(obd->obd_fsops)); - mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, option); + mnt = do_kern_mount(data->ioc_inlbuf2, MS_NOATIME | MS_NODIRATIME, + data->ioc_inlbuf1, option); rc = PTR_ERR(mnt); if (IS_ERR(mnt)) GOTO(err_ops, rc); @@ -1257,14 +1351,27 @@ static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, spin_lock_init(&filter->fo_objidlock); INIT_LIST_HEAD(&filter->fo_export_list); + ptlrpc_init_client(MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, + "filter_mdc", &filter->fo_mdc_client); + sema_init(&filter->fo_sem, 1); + obd->obd_namespace = ldlm_namespace_new("filter-tgt", LDLM_NAMESPACE_SERVER); - if (!obd->obd_namespace) + if (obd->obd_namespace == NULL) GOTO(err_post, rc = -ENOMEM); ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, "filter_ldlm_cb_client", &obd->obd_ldlm_client); + /* Create a non-replaying connection for recovery logging, so that + * we don't create a client entry for this local connection, and do + * not log or assign transaction numbers for logging operations. */ +#ifdef ENABLE_ORPHANS + filter->fo_catalog = filter_get_catalog(obd); + if (IS_ERR(filter->fo_catalog)) + GOTO(err_post, rc = PTR_ERR(filter->fo_catalog)); +#endif + RETURN(0); err_post: @@ -1284,82 +1391,67 @@ static int filter_setup(struct obd_device *obd, obd_count len, void *buf) struct obd_ioctl_data* data = buf; char *option = NULL; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + /* bug 1577: implement async-delete for 2.5 */ if (!strcmp(data->ioc_inlbuf2, "ext3")) option = "asyncdel"; +#endif return filter_common_setup(obd, len, buf, option); } -/* sanobd setup methods - use a specific mount option */ -static int filter_san_setup(struct obd_device *obd, obd_count len, void *buf) -{ - struct obd_ioctl_data* data = buf; - char *option = NULL; - - if (!data->ioc_inlbuf2) - RETURN(-EINVAL); - - /* for extN/ext3 filesystem, we must mount it with 'writeback' mode */ - if (!strcmp(data->ioc_inlbuf2, "extN")) - option = "data=writeback"; - else if (!strcmp(data->ioc_inlbuf2, "ext3")) - option = "data=writeback,asyncdel"; - else - LBUG(); /* just a reminder */ - - return filter_common_setup(obd, len, buf, option); -} - -static int filter_cleanup(struct obd_device *obd, int force, int failover) +static int filter_cleanup(struct obd_device *obd, int flags) { - struct super_block *sb; + struct filter_obd *filter = &obd->u.filter; ENTRY; - if (failover) + if (flags & OBD_OPT_FAILOVER) CERROR("%s: shutting down for failover; client state will" " be preserved.\n", obd->obd_name); if (!list_empty(&obd->obd_exports)) { CERROR("%s: still has clients!\n", obd->obd_name); - class_disconnect_exports(obd, failover); + class_disconnect_exports(obd, flags); if (!list_empty(&obd->obd_exports)) { CERROR("still has exports after forced cleanup?\n"); RETURN(-EBUSY); } } +#ifdef ENABLE_ORPHANS + filter_put_catalog(filter->fo_catalog); +#endif + ldlm_namespace_free(obd->obd_namespace); - sb = obd->u.filter.fo_sb; - if (!sb) + if (filter->fo_sb == NULL) RETURN(0); filter_post(obd); - shrink_dcache_parent(sb->s_root); - unlock_kernel(); + shrink_dcache_parent(filter->fo_sb->s_root); + filter->fo_sb = 0; - if (atomic_read(&obd->u.filter.fo_vfsmnt->mnt_count) > 1){ + if (atomic_read(&filter->fo_vfsmnt->mnt_count) > 1) CERROR("%s: mount point busy, mnt_count: %d\n", obd->obd_name, - atomic_read(&obd->u.filter.fo_vfsmnt->mnt_count)); - } - - mntput(obd->u.filter.fo_vfsmnt); - obd->u.filter.fo_sb = 0; -/* destroy_buffers(obd->u.filter.fo_sb->s_dev);*/ + atomic_read(&filter->fo_vfsmnt->mnt_count)); + unlock_kernel(); + mntput(filter->fo_vfsmnt); + //destroy_buffers(filter->fo_sb->s_dev); + filter->fo_sb = NULL; fsfilt_put_ops(obd->obd_fsops); lock_kernel(); RETURN(0); } -int filter_attach(struct obd_device *obd, obd_count len, void *data) +static int filter_attach(struct obd_device *obd, obd_count len, void *data) { struct lprocfs_static_vars lvars; int rc; - lprocfs_init_vars(&lvars); + lprocfs_init_vars(filter, &lvars); rc = lprocfs_obd_attach(obd, lvars.obd_vars); if (rc != 0) return rc; @@ -1376,7 +1468,7 @@ int filter_attach(struct obd_device *obd, obd_count len, void *data) return rc; } -int filter_detach(struct obd_device *dev) +static int filter_detach(struct obd_device *dev) { lprocfs_free_obd_stats(dev); return lprocfs_obd_detach(dev); @@ -1391,17 +1483,16 @@ static int filter_connect(struct lustre_handle *conn, struct obd_device *obd, struct filter_client_data *fcd; struct filter_obd *filter = &obd->u.filter; int rc; - ENTRY; - if (!conn || !obd || !cluuid) + if (conn == NULL || obd == NULL || cluuid == NULL) RETURN(-EINVAL); rc = class_connect(conn, obd, cluuid); if (rc) RETURN(rc); exp = class_conn2export(conn); - LASSERT(exp); + LASSERT(exp != NULL); fed = &exp->exp_filter_data; class_export_put(exp); @@ -1450,37 +1541,37 @@ static void filter_destroy_export(struct obd_export *exp) list_del(&ffd->ffd_export_list); spin_unlock(&fed->fed_lock); - CERROR("force close file %*s (hdl %p:"LPX64") on disconnect\n", - ffd->ffd_file->f_dentry->d_name.len, + CDEBUG(D_INFO, "force close file %*s (hdl %p:"LPX64") on " + "disconnect\n", ffd->ffd_file->f_dentry->d_name.len, ffd->ffd_file->f_dentry->d_name.name, ffd, ffd->ffd_handle.h_cookie); - filter_close_internal(exp, ffd, NULL, exp->exp_failover); + filter_close_internal(exp, ffd, NULL, exp->exp_flags); spin_lock(&fed->fed_lock); } spin_unlock(&fed->fed_lock); if (exp->exp_obd->obd_replayable) - filter_client_free(exp, exp->exp_failover); + filter_client_free(exp, exp->exp_flags); EXIT; } /* also incredibly similar to mds_disconnect */ -static int filter_disconnect(struct lustre_handle *conn, int failover) +static int filter_disconnect(struct lustre_handle *conn, int flags) { struct obd_export *exp = class_conn2export(conn); + unsigned long irqflags; int rc; - unsigned long flags; ENTRY; LASSERT(exp); ldlm_cancel_locks_for_export(exp); - spin_lock_irqsave(&exp->exp_lock, flags); - exp->exp_failover = failover; - spin_unlock_irqrestore(&exp->exp_lock, flags); + spin_lock_irqsave(&exp->exp_lock, irqflags); + exp->exp_flags = flags; + spin_unlock_irqrestore(&exp->exp_lock, irqflags); - rc = class_disconnect(conn, failover); + rc = class_disconnect(conn, flags); fsfilt_sync(exp->exp_obd, exp->exp_obd->u.filter.fo_sb); class_export_put(exp); @@ -1488,29 +1579,8 @@ static int filter_disconnect(struct lustre_handle *conn, int failover) RETURN(rc); } -static void filter_from_inode(struct obdo *oa, struct inode *inode, int valid) -{ - int type = oa->o_mode & S_IFMT; - ENTRY; - - CDEBUG(D_INFO, "src inode %lu (%p), dst obdo "LPU64" valid 0x%08x\n", - inode->i_ino, inode, oa->o_id, valid); - /* Don't copy the inode number in place of the object ID */ - obdo_from_inode(oa, inode, valid); - oa->o_mode &= ~S_IFMT; - oa->o_mode |= type; - - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { - obd_rdev rdev = kdev_t_to_nr(inode->i_rdev); - oa->o_rdev = rdev; - oa->o_valid |= OBD_MD_FLRDEV; - } - - EXIT; -} - -static struct dentry *__filter_oa2dentry(struct lustre_handle *conn, - struct obdo *oa, char *what) +struct dentry *__filter_oa2dentry(struct obd_device *obd, + struct obdo *oa, const char *what) { struct dentry *dchild = NULL; @@ -1525,22 +1595,14 @@ static struct dentry *__filter_oa2dentry(struct lustre_handle *conn, LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC); filter_ffd_put(ffd); - CDEBUG(D_INODE, - "got child objid %*s: %p, count = %d\n", - dchild->d_name.len, dchild->d_name.name, + CDEBUG(D_INODE,"%s got child objid %*s: %p, count %d\n", + what, dchild->d_name.len, dchild->d_name.name, dchild, atomic_read(&dchild->d_count)); } } - if (!dchild) { - struct obd_device *obd = class_conn2obd(conn); - - if (!obd) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); - RETURN(ERR_PTR(-EINVAL)); - } + if (!dchild) dchild = filter_fid2dentry(obd, NULL, oa->o_mode, oa->o_id); - } if (IS_ERR(dchild)) { CERROR("%s error looking up object: "LPU64"\n", what, oa->o_id); @@ -1556,20 +1618,27 @@ static struct dentry *__filter_oa2dentry(struct lustre_handle *conn, return dchild; } -#define filter_oa2dentry(conn, oa) __filter_oa2dentry(conn, oa, __FUNCTION__) - static int filter_getattr(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *md) { struct dentry *dentry = NULL; + struct obd_device *obd; int rc = 0; ENTRY; - dentry = filter_oa2dentry(conn, oa); + obd = class_conn2obd(conn); + if (obd == NULL) { + CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie); + RETURN(-EINVAL); + } + + dentry = filter_oa2dentry(obd, oa); if (IS_ERR(dentry)) RETURN(PTR_ERR(dentry)); - filter_from_inode(oa, dentry->d_inode, oa->o_valid); + /* Limit the valid bits in the return data to what we actually use */ + oa->o_valid = OBD_MD_FLID; + obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS); f_dput(dentry); RETURN(rc); @@ -1580,48 +1649,55 @@ static int filter_setattr(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *md, struct obd_trans_info *oti) { struct obd_run_ctxt saved; - struct obd_export *export = class_conn2export(conn); - struct obd_device *obd = class_conn2obd(conn); - struct filter_obd *filter = &obd->u.filter; + struct obd_export *exp; + struct filter_obd *filter; struct dentry *dentry; struct iattr iattr; - struct inode *inode; - void * handle; + void *handle; int rc, rc2; ENTRY; - dentry = filter_oa2dentry(conn, oa); + LASSERT(oti != NULL); + exp = class_conn2export(conn); + if (!exp) { + CERROR("invalid client cookie "LPX64"\n", conn->cookie); + RETURN(-EINVAL); + } + dentry = filter_oa2dentry(exp->exp_obd, oa); if (IS_ERR(dentry)) GOTO(out_exp, rc = PTR_ERR(dentry)); + filter = &exp->exp_obd->u.filter; + iattr_from_obdo(&iattr, oa, oa->o_valid); - iattr.ia_mode = (iattr.ia_mode & ~S_IFMT) | S_IFREG; - inode = dentry->d_inode; push_ctxt(&saved, &filter->fo_ctxt, NULL); lock_kernel(); + + /* XXX this could be a rwsem instead, if filter_preprw played along */ if (iattr.ia_valid & ATTR_SIZE) - down(&inode->i_sem); + down(&dentry->d_inode->i_sem); - handle = fsfilt_start(obd, dentry->d_inode, FSFILT_OP_SETATTR); + handle = fsfilt_start(exp->exp_obd, dentry->d_inode, FSFILT_OP_SETATTR, + oti); if (IS_ERR(handle)) GOTO(out_unlock, rc = PTR_ERR(handle)); - rc = fsfilt_setattr(obd, dentry, handle, &iattr, 1); - rc = filter_finish_transno(export, handle, oti, rc); - rc2 = fsfilt_commit(obd, dentry->d_inode, handle, 0); + rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1); + rc = filter_finish_transno(exp, oti, rc); + rc2 = fsfilt_commit(exp->exp_obd, dentry->d_inode, handle, 0); if (rc2) { CERROR("error on commit, err = %d\n", rc2); if (!rc) rc = rc2; } - if (iattr.ia_valid & ATTR_SIZE) { - up(&inode->i_sem); - oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLCTIME | OBD_MD_FLMTIME; - obdo_from_inode(oa, inode, oa->o_valid); - } + if (iattr.ia_valid & ATTR_SIZE) + up(&dentry->d_inode->i_sem); + + oa->o_valid = OBD_MD_FLID; + obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS); out_unlock: unlock_kernel(); @@ -1629,7 +1705,7 @@ out_unlock: f_dput(dentry); out_exp: - class_export_put(export); + class_export_put(exp); RETURN(rc); } @@ -1637,7 +1713,7 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti, struct obd_client_handle *och) { - struct obd_export *export = NULL; + struct obd_export *exp; struct lustre_handle *handle; struct filter_file_data *ffd; struct file *filp; @@ -1645,19 +1721,19 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa, int rc = 0; ENTRY; - export = class_conn2export(conn); - if (!export) { - CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n", - conn->cookie); - GOTO(out, rc = -EINVAL); + exp = class_conn2export(conn); + if (exp == NULL) { + CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie); + RETURN(-EINVAL); } - filp = filter_obj_open(export, oa->o_id, oa->o_mode, + filp = filter_obj_open(exp, oti, oa->o_id, oa->o_mode, LCK_PR, &parent_lockh); if (IS_ERR(filp)) GOTO(out, rc = PTR_ERR(filp)); - filter_from_inode(oa, filp->f_dentry->d_inode, oa->o_valid); + oa->o_valid = OBD_MD_FLID; + obdo_from_inode(oa, filp->f_dentry->d_inode, FILTER_VALID_FLAGS); ffd = filp->private_data; handle = obdo_handle(oa); @@ -1665,7 +1741,7 @@ static int filter_open(struct lustre_handle *conn, struct obdo *oa, oa->o_valid |= OBD_MD_FLHANDLE; out: - class_export_put(export); + class_export_put(exp); if (!rc) { memcpy(&oti->oti_ack_locks[0].lock, &parent_lockh, sizeof(parent_lockh)); @@ -1677,15 +1753,16 @@ out: static int filter_close(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti) { - struct obd_export *exp = class_conn2export(conn); + struct obd_export *exp; struct filter_file_data *ffd; struct filter_export_data *fed; int rc; ENTRY; - if (!exp) { - CDEBUG(D_IOCTL, "invalid client cookie"LPX64"\n", conn->cookie); - GOTO(out, rc = -EINVAL); + exp = class_conn2export(conn); + if (exp == NULL) { + CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie); + RETURN(-EINVAL); } if (!(oa->o_valid & OBD_MD_FLHANDLE)) { @@ -1705,6 +1782,9 @@ static int filter_close(struct lustre_handle *conn, struct obdo *oa, list_del(&ffd->ffd_export_list); spin_unlock(&fed->fed_lock); + oa->o_valid = OBD_MD_FLID; + obdo_from_inode(oa,ffd->ffd_file->f_dentry->d_inode,FILTER_VALID_FLAGS); + rc = filter_close_internal(exp, ffd, oti, 0); filter_ffd_put(ffd); GOTO(out, rc); @@ -1717,24 +1797,25 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti) { struct obd_export *exp; - struct obd_device *obd = class_conn2obd(conn); - struct filter_obd *filter = &obd->u.filter; + struct obd_device *obd; + struct filter_obd *filter; struct obd_run_ctxt saved; struct lustre_handle parent_lockh; struct dentry *dparent; + struct ll_fid mds_fid = { .id = 0 }; struct dentry *dchild = NULL; - struct iattr; void *handle; int err, rc, cleanup_phase; ENTRY; - if (!obd) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); + exp = class_conn2export(conn); + if (exp == NULL) { + CDEBUG(D_IOCTL,"invalid client cookie "LPX64"\n", conn->cookie); RETURN(-EINVAL); } - exp = class_conn2export(conn); - + obd = exp->exp_obd; + filter = &obd->u.filter; push_ctxt(&saved, &filter->fo_ctxt, NULL); retry: oa->o_id = filter_next_id(filter); @@ -1760,21 +1841,42 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa, } cleanup_phase = 2; - handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_CREATE); + handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_CREATE_LOG, oti); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); rc = vfs_create(dparent->d_inode, dchild, oa->o_mode); - if (rc) + if (rc) { CERROR("create failed rc = %d\n", rc); + } else if (oa->o_valid & (OBD_MD_FLCTIME|OBD_MD_FLMTIME|OBD_MD_FLSIZE)){ + struct iattr attr; - rc = filter_finish_transno(exp, handle, oti, rc); - err = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd); - if (err) { - CERROR("unable to write lastobjid but file created\n"); - if (!rc) - rc = err; + iattr_from_obdo(&attr, oa, oa->o_valid); + rc = fsfilt_setattr(obd, dchild, handle, &attr, 1); + if (rc) + CERROR("create setattr failed rc = %d\n", rc); } + rc = filter_finish_transno(exp, oti, rc); + err = filter_update_server_data(obd, filter->fo_rcvd_filp, + filter->fo_fsd); + if (err) + CERROR("unable to write lastobjid but file created\n"); + + /* Set flags for fields we have set in the inode struct */ + if (!rc && mds_fid.id && (oa->o_valid & OBD_MD_FLCOOKIE)) { + err = filter_log_op_create(obd->u.filter.fo_catalog, &mds_fid, + dchild->d_inode->i_ino, + dchild->d_inode->i_generation, + oti->oti_logcookies); + if (err) { + CERROR("error logging create record: rc %d\n", err); + oa->o_valid = OBD_MD_FLID; + } else { + oa->o_valid = OBD_MD_FLID | OBD_MD_FLCOOKIE; + } + } else + oa->o_valid = OBD_MD_FLID; + err = fsfilt_commit(obd, dparent->d_inode, handle, 0); if (err) { CERROR("error on commit, err = %d\n", err); @@ -1786,9 +1888,7 @@ static int filter_create(struct lustre_handle *conn, struct obdo *oa, GOTO(cleanup, rc); /* Set flags for fields we have set in the inode struct */ - oa->o_valid = OBD_MD_FLID | OBD_MD_FLBLKSZ | OBD_MD_FLBLOCKS | - OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME; - filter_from_inode(oa, dchild->d_inode, oa->o_valid); + obdo_from_inode(oa, dchild->d_inode, FILTER_VALID_FLAGS); EXIT; cleanup: @@ -1819,24 +1919,25 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *ea, struct obd_trans_info *oti) { struct obd_export *exp; - struct obd_device *obd = class_conn2obd(conn); - struct filter_obd *filter = &obd->u.filter; - struct dentry *dparent, *dchild = NULL; + struct obd_device *obd; + struct filter_obd *filter; + struct dentry *dchild = NULL, *dparent = NULL; struct filter_dentry_data *fdd; struct obd_run_ctxt saved; void *handle = NULL; struct lustre_handle parent_lockh; + struct llog_cookie *fcc = NULL; int rc, rc2, cleanup_phase = 0; ENTRY; - if (!obd) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); + exp = class_conn2export(conn); + if (exp == NULL) { + CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",conn->cookie); RETURN(-EINVAL); } - exp = class_conn2export(conn); - - CDEBUG(D_INODE, "destroying objid "LPU64"\n", oa->o_id); + obd = exp->exp_obd; + filter = &obd->u.filter; push_ctxt(&saved, &filter->fo_ctxt, NULL); dparent = filter_parent_lock(obd, oa->o_mode, oa->o_id, @@ -1850,38 +1951,53 @@ static int filter_destroy(struct lustre_handle *conn, struct obdo *oa, GOTO(cleanup, rc = -ENOENT); cleanup_phase = 2; - if (!dchild->d_inode) { + if (dchild->d_inode == NULL) { CERROR("destroying non-existent object "LPU64"\n", oa->o_id); GOTO(cleanup, rc = -ENOENT); } - - handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK); + handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK_LOG, oti); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); cleanup_phase = 3; fdd = dchild->d_fsdata; - if (fdd && atomic_read(&fdd->fdd_open_count)) { - LASSERT(fdd->fdd_magic = FILTER_DENTRY_MAGIC); + + /* Our MDC connection is established by the MDS to us */ + if ((oa->o_valid & OBD_MD_FLCOOKIE) && filter->fo_mdc_imp != NULL) { + OBD_ALLOC(fcc, sizeof(*fcc)); + if (fcc != NULL) + memcpy(fcc, obdo_logcookie(oa), sizeof(*fcc)); + } + + if (fdd != NULL && atomic_read(&fdd->fdd_open_count)) { + LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC); if (!(fdd->fdd_flags & FILTER_FLAG_DESTROY)) { fdd->fdd_flags |= FILTER_FLAG_DESTROY; - /* XXX put into PENDING directory in case of crash */ + +#ifdef ENABLE_ORPHANS + filter_log_op_orphan(filter->fo_catalog, oa->o_id, + oa->o_generation,&fdd->fdd_cookie); +#endif CDEBUG(D_INODE, "defer destroy of %dx open objid "LPU64"\n", atomic_read(&fdd->fdd_open_count), oa->o_id); - } else + } else { CDEBUG(D_INODE, "repeat destroy of %dx open objid "LPU64"\n", atomic_read(&fdd->fdd_open_count), oa->o_id); + } GOTO(cleanup, rc = 0); } - rc = filter_destroy_internal(obd, dparent, dchild); + rc = filter_destroy_internal(obd, oa->o_id, dparent, dchild); cleanup: switch(cleanup_phase) { case 3: - rc = filter_finish_transno(exp, handle, oti, rc); + if (fcc != NULL) + fsfilt_set_last_rcvd(obd, 0, oti->oti_handle, + filter_cancel_cookies_cb, fcc); + rc = filter_finish_transno(exp, oti, rc); rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0); if (rc2) { CERROR("error on commit, err = %d\n", rc2); @@ -1930,742 +2046,17 @@ static int filter_truncate(struct lustre_handle *conn, struct obdo *oa, RETURN(error); } -static inline void lustre_put_page(struct page *page) -{ - page_cache_release(page); -} - -static int filter_start_page_read(struct inode *inode, struct niobuf_local *lnb) -{ - struct address_space *mapping = inode->i_mapping; - struct page *page; - unsigned long index = lnb->offset >> PAGE_SHIFT; - int rc; - - page = grab_cache_page(mapping, index); /* locked page */ - if (IS_ERR(page)) - return lnb->rc = PTR_ERR(page); - - lnb->page = page; - - if (inode->i_size < lnb->offset + lnb->len - 1) - lnb->rc = inode->i_size - lnb->offset; - else - lnb->rc = lnb->len; - - if (PageUptodate(page)) { - unlock_page(page); - return 0; - } - - rc = mapping->a_ops->readpage(NULL, page); - if (rc < 0) { - CERROR("page index %lu, rc = %d\n", index, rc); - lnb->page = NULL; - lustre_put_page(page); - return lnb->rc = rc; - } - - return 0; -} - -static int filter_finish_page_read(struct niobuf_local *lnb) -{ - if (lnb->page == NULL) - return 0; - - if (PageUptodate(lnb->page)) - return 0; - - wait_on_page(lnb->page); - if (!PageUptodate(lnb->page)) { - CERROR("page index %lu/offset "LPX64" not uptodate\n", - lnb->page->index, lnb->offset); - GOTO(err_page, lnb->rc = -EIO); - } - if (PageError(lnb->page)) { - CERROR("page index %lu/offset "LPX64" has error\n", - lnb->page->index, lnb->offset); - GOTO(err_page, lnb->rc = -EIO); - } - - return 0; - -err_page: - lustre_put_page(lnb->page); - lnb->page = NULL; - return lnb->rc; -} - -static struct page *lustre_get_page_write(struct inode *inode, - unsigned long index) -{ - struct address_space *mapping = inode->i_mapping; - struct page *page; - int rc; - - page = grab_cache_page(mapping, index); /* locked page */ - - if (!IS_ERR(page)) { - /* Note: Called with "O" and "PAGE_SIZE" this is essentially - * a no-op for most filesystems, because we write the whole - * page. For partial-page I/O this will read in the page. - */ - rc = mapping->a_ops->prepare_write(NULL, page, 0, PAGE_SIZE); - if (rc) { - CERROR("page index %lu, rc = %d\n", index, rc); - if (rc != -ENOSPC) - LBUG(); - GOTO(err_unlock, rc); - } - /* XXX not sure if we need this if we are overwriting page */ - if (PageError(page)) { - CERROR("error on page index %lu, rc = %d\n", index, rc); - LBUG(); - GOTO(err_unlock, rc = -EIO); - } - } - return page; - -err_unlock: - unlock_page(page); - lustre_put_page(page); - return ERR_PTR(rc); -} - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -int waitfor_one_page(struct page *page) -{ - wait_on_page_locked(page); - return 0; -} -#endif - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -/* We should only change the file mtime (and not the ctime, like - * update_inode_times() in generic_file_write()) when we only change data. - */ -static inline void inode_update_time(struct inode *inode, int ctime_too) -{ - time_t now = CURRENT_TIME; - if (inode->i_mtime == now && (!ctime_too || inode->i_ctime == now)) - return; - inode->i_mtime = now; - if (ctime_too) - inode->i_ctime = now; - mark_inode_dirty_sync(inode); -} -#endif - -static int lustre_commit_write(struct niobuf_local *lnb) -{ - struct page *page = lnb->page; - unsigned from = lnb->offset & ~PAGE_MASK; - unsigned to = from + lnb->len; - struct inode *inode = page->mapping->host; - int err; - - LASSERT(to <= PAGE_SIZE); - err = page->mapping->a_ops->commit_write(NULL, page, from, to); - if (!err && IS_SYNC(inode)) - err = waitfor_one_page(page); - //SetPageUptodate(page); // the client commit_write will do this - - SetPageReferenced(page); - unlock_page(page); - lustre_put_page(page); - return err; -} - -int filter_get_page_write(struct inode *inode, struct niobuf_local *lnb, - int *pglocked) -{ - unsigned long index = lnb->offset >> PAGE_SHIFT; - struct address_space *mapping = inode->i_mapping; - struct page *page; - int rc; - - //ASSERT_PAGE_INDEX(index, GOTO(err, rc = -EINVAL)); - if (*pglocked) - page = grab_cache_page_nowait(mapping, index); /* locked page */ - else - page = grab_cache_page(mapping, index); /* locked page */ - - - /* This page is currently locked, so get a temporary page instead. */ - if (!page) { - CDEBUG(D_ERROR,"ino %lu page %ld locked\n", inode->i_ino,index); - page = alloc_pages(GFP_KERNEL, 0); /* locked page */ - if (!page) { - CERROR("no memory for a temp page\n"); - GOTO(err, rc = -ENOMEM); - } - page->index = index; - lnb->page = page; - lnb->flags |= N_LOCAL_TEMP_PAGE; - } else if (!IS_ERR(page)) { - (*pglocked)++; - - rc = mapping->a_ops->prepare_write(NULL, page, - lnb->offset & ~PAGE_MASK, - lnb->len); - if (rc) { - if (rc != -ENOSPC) - CERROR("page index %lu, rc = %d\n", index, rc); - GOTO(err_unlock, rc); - } - /* XXX not sure if we need this if we are overwriting page */ - if (PageError(page)) { - CERROR("error on page index %lu, rc = %d\n", index, rc); - LBUG(); - GOTO(err_unlock, rc = -EIO); - } - lnb->page = page; - } - - return 0; - -err_unlock: - unlock_page(page); - lustre_put_page(page); -err: - return lnb->rc = rc; -} - -/* - * We need to balance prepare_write() calls with commit_write() calls. - * If the page has been prepared, but we have no data for it, we don't - * want to overwrite valid data on disk, but we still need to zero out - * data for space which was newly allocated. Like part of what happens - * in __block_prepare_write() for newly allocated blocks. - * - * XXX currently __block_prepare_write() creates buffers for all the - * pages, and the filesystems mark these buffers as BH_New if they - * were newly allocated from disk. We use the BH_New flag similarly. - */ -static int filter_commit_write(struct niobuf_local *lnb, int err) -{ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - if (err) { - unsigned block_start, block_end; - struct buffer_head *bh, *head = lnb->page->buffers; - unsigned blocksize = head->b_size; - - /* debugging: just seeing if this ever happens */ - CDEBUG(err == -ENOSPC ? D_INODE : D_ERROR, - "called for ino %lu:%lu on err %d\n", - lnb->page->mapping->host->i_ino, lnb->page->index, err); - - /* Currently one buffer per page, but in the future... */ - for (bh = head, block_start = 0; bh != head || !block_start; - block_start = block_end, bh = bh->b_this_page) { - block_end = block_start + blocksize; - if (buffer_new(bh)) { - memset(kmap(lnb->page) + block_start, 0, - blocksize); - kunmap(lnb->page); - } - } - } -#endif - return lustre_commit_write(lnb); -} - -static int filter_preprw(int cmd, struct obd_export *exp, struct obdo *obdo, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *nb, - struct niobuf_local *res, void **desc_private, - struct obd_trans_info *oti) -{ - struct obd_run_ctxt saved; - struct obd_device *obd; - struct obd_ioobj *o; - struct niobuf_remote *rnb; - struct niobuf_local *lnb; - struct fsfilt_objinfo *fso; - struct dentry *dentry; - struct inode *inode; - int pglocked = 0, rc = 0, i, j, tot_bytes = 0; - unsigned long now = jiffies; - ENTRY; - - memset(res, 0, niocount * sizeof(*res)); - - obd = exp->exp_obd; - if (obd == NULL) - RETURN(-EINVAL); - - // theoretically we support multi-obj BRW RPCs, but until then... - LASSERT(objcount == 1); - - OBD_ALLOC(fso, objcount * sizeof(*fso)); - if (!fso) - RETURN(-ENOMEM); - - push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); - - for (i = 0, o = obj; i < objcount; i++, o++) { - struct filter_dentry_data *fdd; - - LASSERT(o->ioo_bufcnt); - - dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id); - - if (IS_ERR(dentry)) - GOTO(out_objinfo, rc = PTR_ERR(dentry)); - - fso[i].fso_dentry = dentry; - fso[i].fso_bufcnt = o->ioo_bufcnt; - - if (!dentry->d_inode) { - CERROR("trying to BRW to non-existent file "LPU64"\n", - o->ioo_id); - f_dput(dentry); - GOTO(out_objinfo, rc = -ENOENT); - } - - /* If we ever start to support mutli-object BRW RPCs, we will - * need to get locks on mulitple inodes (in order) or use the - * DLM to do the locking for us (and use the same locking in - * filter_setattr() for truncate). That isn't all, because - * there still exists the possibility of a truncate starting - * a new transaction while holding the ext3 rwsem = write - * while some writes (which have started their transactions - * here) blocking on the ext3 rwsem = read => lock inversion. - * - * The handling gets very ugly when dealing with locked pages. - * It may be easier to just get rid of the locked page code - * (which has problems of its own) and either discover we do - * not need it anymore (i.e. it was a symptom of another bug) - * or ensure we get the page locks in an appropriate order. - */ - if (cmd & OBD_BRW_WRITE) - down(&dentry->d_inode->i_sem); - fdd = dentry->d_fsdata; - if (!fdd || !atomic_read(&fdd->fdd_open_count)) - CDEBUG(D_PAGE, "I/O to unopened object "LPU64"\n", - o->ioo_id); - } - - if (time_after(jiffies, now + 15*HZ)) - CERROR("slow prep setup %lus\n", (jiffies - now) / HZ); - - if (cmd & OBD_BRW_WRITE) { - *desc_private = fsfilt_brw_start(obd, objcount, fso, - niocount, nb); - if (IS_ERR(*desc_private)) { - rc = PTR_ERR(*desc_private); - CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, - "error starting transaction: rc = %d\n", rc); - *desc_private = NULL; - GOTO(out_objinfo, rc); - } - } - - for (i = 0, o = obj, rnb = nb, lnb = res; i < objcount; i++, o++) { - dentry = fso[i].fso_dentry; - inode = dentry->d_inode; - - for (j = 0; j < o->ioo_bufcnt; j++, rnb++, lnb++) { - if (j == 0) - lnb->dentry = dentry; - else - lnb->dentry = dget(dentry); - - lnb->offset = rnb->offset; - lnb->len = rnb->len; - lnb->flags = rnb->flags; - lnb->start = jiffies; - - if (cmd & OBD_BRW_WRITE) { - rc = filter_get_page_write(inode,lnb,&pglocked); - if (rc) - up(&dentry->d_inode->i_sem); - } else if (inode->i_size <= rnb->offset) { - /* If there's no more data, abort early. - * lnb->page == NULL and lnb->rc == 0, so it's - * easy to detect later. */ - f_dput(dentry); - lnb->dentry = NULL; - break; - } else { - rc = filter_start_page_read(inode, lnb); - } - - if (rc) { - CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, - "page err %u@"LPU64" %u/%u %p: rc %d\n", - lnb->len, lnb->offset, j, o->ioo_bufcnt, - dentry, rc); - f_dput(dentry); - GOTO(out_pages, rc); - } - - tot_bytes += lnb->len; - - if ((cmd & OBD_BRW_READ) && lnb->rc < lnb->len) { - /* Likewise with a partial read */ - break; - } - } - } - - if (time_after(jiffies, now + 15*HZ)) - CERROR("slow prep get page %lus\n", (jiffies - now) / HZ); - - if (cmd & OBD_BRW_READ) { - lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES, - tot_bytes); - while (lnb-- > res) { - rc = filter_finish_page_read(lnb); - if (rc) { - CERROR("error page %u@"LPU64" %u %p: rc %d\n", - lnb->len, lnb->offset, lnb - res, - lnb->dentry, rc); - f_dput(lnb->dentry); - GOTO(out_pages, rc); - } - } - } else - lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_WRITE_BYTES, - tot_bytes); - - if (time_after(jiffies, now + 15*HZ)) - CERROR("slow prep finish page %lus\n", (jiffies - now) / HZ); - - EXIT; -out: - OBD_FREE(fso, objcount * sizeof(*fso)); - current->journal_info = NULL; - pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); - return rc; - -out_pages: - while (lnb-- > res) { - if (cmd & OBD_BRW_WRITE) { - filter_commit_write(lnb, rc); - up(&lnb->dentry->d_inode->i_sem); - } else { - lustre_put_page(lnb->page); - } - f_dput(lnb->dentry); - } - if (cmd & OBD_BRW_WRITE) { - filter_finish_transno(exp, *desc_private, oti, rc); - fsfilt_commit(obd, - filter_parent(obd,S_IFREG,obj->ioo_id)->d_inode, - *desc_private, 0); - } - goto out; /* dropped the dentry refs already (one per page) */ - -out_objinfo: - for (i = 0; i < objcount && fso[i].fso_dentry; i++) { - if (cmd & OBD_BRW_WRITE) - up(&fso[i].fso_dentry->d_inode->i_sem); - f_dput(fso[i].fso_dentry); - } - goto out; -} - -static int filter_write_locked_page(struct niobuf_local *lnb) -{ - struct page *lpage; - void *lpage_addr; - void *lnb_addr; - int rc; - ENTRY; - - lpage = lustre_get_page_write(lnb->dentry->d_inode, lnb->page->index); - if (IS_ERR(lpage)) { - /* It is highly unlikely that we would ever get an error here. - * The page we want to get was previously locked, so it had to - * have already allocated the space, and we were just writing - * over the same data, so there would be no hole in the file. - * - * XXX: possibility of a race with truncate could exist, need - * to check that. There are no guarantees w.r.t. - * write order even on a local filesystem, although the - * normal response would be to return the number of bytes - * successfully written and leave the rest to the app. - */ - rc = PTR_ERR(lpage); - CERROR("error getting locked page index %ld: rc = %d\n", - lnb->page->index, rc); - LBUG(); - lustre_commit_write(lnb); - RETURN(rc); - } - - /* 2 kmaps == vanishingly small deadlock opportunity */ - lpage_addr = kmap(lpage); - lnb_addr = kmap(lnb->page); - - memcpy(lpage_addr, lnb_addr, PAGE_SIZE); - - kunmap(lnb->page); - kunmap(lpage); - - lustre_put_page(lnb->page); - - lnb->page = lpage; - rc = lustre_commit_write(lnb); - if (rc) - CERROR("error committing locked page %ld: rc = %d\n", - lnb->page->index, rc); - - RETURN(rc); -} - static int filter_syncfs(struct obd_export *exp) { - struct obd_device *obd = exp->exp_obd; ENTRY; - RETURN(fsfilt_sync(obd, obd->u.filter.fo_sb)); -} - -static int filter_commitrw(int cmd, struct obd_export *exp, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_local *res, - void *desc_private, struct obd_trans_info *oti) -{ - struct obd_run_ctxt saved; - struct obd_ioobj *o; - struct niobuf_local *lnb; - struct obd_device *obd = exp->exp_obd; - int found_locked = 0, rc = 0, i; - unsigned long now = jiffies; /* DEBUGGING OST TIMEOUTS */ - ENTRY; - - push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); - - LASSERT(!current->journal_info); - current->journal_info = desc_private; - - for (i = 0, o = obj, lnb = res; i < objcount; i++, o++) { - int j; - - if (cmd & OBD_BRW_WRITE) { - inode_update_time(lnb->dentry->d_inode, 1); - up(&lnb->dentry->d_inode->i_sem); - } - for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) { - if (lnb->page == NULL) { - continue; - } - - if (lnb->flags & N_LOCAL_TEMP_PAGE) { - found_locked++; - continue; - } - - if (time_after(jiffies, lnb->start + 15*HZ)) - CERROR("slow commitrw %lus\n", - (jiffies - lnb->start) / HZ); - - if (cmd & OBD_BRW_WRITE) { - int err = filter_commit_write(lnb, 0); - - if (!rc) - rc = err; - } else { - lustre_put_page(lnb->page); - } - - f_dput(lnb->dentry); - if (time_after(jiffies, lnb->start + 15*HZ)) - CERROR("slow commit_write %lus\n", - (jiffies - lnb->start) / HZ); - } - } - - for (i = 0, o = obj, lnb = res; found_locked > 0 && i < objcount; - i++, o++) { - int j; - for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) { - int err; - if (!(lnb->flags & N_LOCAL_TEMP_PAGE)) - continue; - - if (time_after(jiffies, lnb->start + 15*HZ)) - CERROR("slow commitrw locked %lus\n", - (jiffies - lnb->start) / HZ); - - err = filter_write_locked_page(lnb); - if (!rc) - rc = err; - f_dput(lnb->dentry); - found_locked--; - - if (time_after(jiffies, lnb->start + 15*HZ)) - CERROR("slow commit_write locked %lus\n", - (jiffies - lnb->start) / HZ); - } - } - - if (cmd & OBD_BRW_WRITE) { - /* We just want any dentry for the commit, for now */ - struct dentry *dparent = filter_parent(obd, S_IFREG, 0); - int err; - - rc = filter_finish_transno(exp, desc_private, oti, rc); - err = fsfilt_commit(obd, dparent->d_inode, desc_private, - obd_sync_filter); - if (err) - rc = err; - if (obd_sync_filter) - LASSERT(oti->oti_transno <= obd->obd_last_committed); - - if (time_after(jiffies, now + 15*HZ)) - CERROR("slow commitrw commit %lus\n", (jiffies-now)/HZ); - } - - LASSERT(!current->journal_info); - - pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); - RETURN(rc); + RETURN(fsfilt_sync(exp->exp_obd, exp->exp_obd->u.filter.fo_sb)); } -static int filter_brw(int cmd, struct lustre_handle *conn, - struct lov_stripe_md *lsm, obd_count oa_bufs, - struct brw_page *pga, struct obd_trans_info *oti) +static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs, + unsigned long max_age) { - struct obd_export *export = class_conn2export(conn); - struct obd_ioobj ioo; - struct niobuf_local *lnb; - struct niobuf_remote *rnb; - obd_count i; - void *desc_private; - int ret = 0; ENTRY; - - if (export == NULL) - RETURN(-EINVAL); - - OBD_ALLOC(lnb, oa_bufs * sizeof(struct niobuf_local)); - OBD_ALLOC(rnb, oa_bufs * sizeof(struct niobuf_remote)); - - if (lnb == NULL || rnb == NULL) - GOTO(out, ret = -ENOMEM); - - for (i = 0; i < oa_bufs; i++) { - rnb[i].offset = pga[i].off; - rnb[i].len = pga[i].count; - } - - ioo.ioo_id = lsm->lsm_object_id; - ioo.ioo_gr = 0; - ioo.ioo_type = S_IFREG; - ioo.ioo_bufcnt = oa_bufs; - - ret = filter_preprw(cmd, export, NULL, 1, &ioo, oa_bufs, rnb, lnb, - &desc_private, oti); - if (ret != 0) - GOTO(out, ret); - - for (i = 0; i < oa_bufs; i++) { - void *virt = kmap(pga[i].pg); - obd_off off = pga[i].off & ~PAGE_MASK; - void *addr = kmap(lnb[i].page); - - /* 2 kmaps == vanishingly small deadlock opportunity */ - - if (cmd & OBD_BRW_WRITE) - memcpy(addr + off, virt + off, pga[i].count); - else - memcpy(virt + off, addr + off, pga[i].count); - - kunmap(addr); - kunmap(virt); - } - - ret = filter_commitrw(cmd, export, 1, &ioo, oa_bufs, lnb, desc_private, - oti); - -out: - if (lnb) - OBD_FREE(lnb, oa_bufs * sizeof(struct niobuf_local)); - if (rnb) - OBD_FREE(rnb, oa_bufs * sizeof(struct niobuf_remote)); - class_export_put(export); - RETURN(ret); -} - -static int filter_san_preprw(int cmd, struct lustre_handle *conn, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *nb) -{ - struct obd_device *obd; - struct obd_ioobj *o = obj; - struct niobuf_remote *rnb = nb; - int rc = 0; - int i; - ENTRY; - - obd = class_conn2obd(conn); - if (!obd) { - CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n", - conn->cookie); - RETURN(-EINVAL); - } - - for (i = 0; i < objcount; i++, o++) { - struct dentry *dentry; - struct inode *inode; - int (*fs_bmap)(struct address_space *, long); - int j; - - dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id); - if (IS_ERR(dentry)) - GOTO(out, rc = PTR_ERR(dentry)); - inode = dentry->d_inode; - if (!inode) { - CERROR("trying to BRW to non-existent file "LPU64"\n", - o->ioo_id); - f_dput(dentry); - GOTO(out, rc = -ENOENT); - } - fs_bmap = inode->i_mapping->a_ops->bmap; - - for (j = 0; j < o->ioo_bufcnt; j++, rnb++) { - long block; - - block = rnb->offset >> inode->i_blkbits; - - if (cmd == OBD_BRW_READ) { - block = fs_bmap(inode->i_mapping, block); - } else { - loff_t newsize = rnb->offset + rnb->len; - /* fs_prep_san_write will also update inode - * size for us: - * (1) new alloced block - * (2) existed block but size extented - */ - /* FIXME We could call fs_prep_san_write() - * only once for all the blocks allocation. - * Now call it once for each block, for - * simplicity. And if error happens, we - * probably need to release previous alloced - * block */ - rc = fs_prep_san_write(obd, inode, &block, - 1, newsize); - if (rc) - break; - } - - rnb->offset = block; - } - f_dput(dentry); - } -out: - RETURN(rc); -} - -static int filter_statfs(struct obd_export *exp, struct obd_statfs *osfs) -{ - struct obd_device *obd = exp->exp_obd; - ENTRY; - RETURN(fsfilt_statfs(obd, obd->u.filter.fo_sb, osfs)); } @@ -2676,7 +2067,7 @@ static int filter_get_info(struct lustre_handle *conn, __u32 keylen, ENTRY; obd = class_conn2obd(conn); - if (!obd) { + if (obd == NULL) { CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n", conn->cookie); RETURN(-EINVAL); @@ -2702,77 +2093,46 @@ static int filter_get_info(struct lustre_handle *conn, __u32 keylen, RETURN(-EINVAL); } -int filter_copy_data(struct lustre_handle *dst_conn, struct obdo *dst, - struct lustre_handle *src_conn, struct obdo *src, - obd_size count, obd_off offset, struct obd_trans_info *oti) +static int filter_set_info(struct lustre_handle *conn, __u32 keylen, + void *key, __u32 vallen, void *val) { - struct page *page; - struct lov_stripe_md srcmd, dstmd; - unsigned long index = 0; - int err = 0; - - LBUG(); /* THIS CODE IS NOT CORRECT -phil */ - - memset(&srcmd, 0, sizeof(srcmd)); - memset(&dstmd, 0, sizeof(dstmd)); - srcmd.lsm_object_id = src->o_id; - dstmd.lsm_object_id = dst->o_id; - + struct obd_device *obd; + struct obd_export *exp; + struct obd_import *imp; ENTRY; - CDEBUG(D_INFO, "src: ino "LPU64" blocks "LPU64", size "LPU64 - ", dst: ino "LPU64"\n", - src->o_id, src->o_blocks, src->o_size, dst->o_id); - page = alloc_page(GFP_USER); - if (page == NULL) - RETURN(-ENOMEM); - - wait_on_page(page); - /* XXX with brw vector I/O, we could batch up reads and writes here, - * all we need to do is allocate multiple pages to handle the I/Os - * and arrays to handle the request parameters. - */ - while (index < ((src->o_size + PAGE_SIZE - 1) >> PAGE_SHIFT)) { - struct brw_page pg; - - pg.pg = page; - pg.count = PAGE_SIZE; - pg.off = (page->index) << PAGE_SHIFT; - pg.flag = 0; - - page->index = index; - err = obd_brw(OBD_BRW_READ, src_conn, &srcmd, 1, &pg, NULL); - if (err) { - EXIT; - break; - } + obd = class_conn2obd(conn); + if (obd == NULL) { + CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n", + conn->cookie); + RETURN(-EINVAL); + } - pg.flag = OBD_BRW_CREATE; - CDEBUG(D_INFO, "Read page %ld ...\n", page->index); + if (keylen < strlen("mds_conn") || + memcmp(key, "mds_conn", keylen) != 0) + RETURN(-EINVAL); - err = obd_brw(OBD_BRW_WRITE, dst_conn, &dstmd, 1, &pg, oti); + CERROR("Received MDS connection ("LPX64")\n", conn->cookie); + memcpy(&obd->u.filter.fo_mdc_conn, conn, sizeof(*conn)); - /* XXX should handle dst->o_size, dst->o_blocks here */ - if (err) { - EXIT; - break; - } + imp = obd->u.filter.fo_mdc_imp = class_new_import(); - CDEBUG(D_INFO, "Wrote page %ld ...\n", page->index); + exp = class_conn2export(conn); + imp->imp_connection = ptlrpc_connection_addref(exp->exp_connection); + class_export_put(exp); - index++; - } - dst->o_size = src->o_size; - dst->o_blocks = src->o_blocks; - dst->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; - unlock_page(page); - __free_page(page); + imp->imp_client = &obd->u.filter.fo_mdc_client; + imp->imp_remote_handle = *conn; + imp->imp_obd = obd; + imp->imp_dlm_fake = 1; /* XXX rename imp_dlm_fake to something else */ + imp->imp_level = LUSTRE_CONN_FULL; + class_import_put(imp); - RETURN(err); + RETURN(0); } int filter_iocontrol(unsigned int cmd, struct lustre_handle *conn, - int len, void *karg, void *uarg) + int len, void *karg, void *uarg) { struct obd_device *obd = class_conn2obd(conn); @@ -2788,12 +2148,12 @@ int filter_iocontrol(unsigned int cmd, struct lustre_handle *conn, RETURN(0); } - static struct obd_ops filter_obd_ops = { o_owner: THIS_MODULE, o_attach: filter_attach, o_detach: filter_detach, o_get_info: filter_get_info, + o_set_info: filter_set_info, o_setup: filter_setup, o_cleanup: filter_cleanup, o_connect: filter_connect, @@ -2810,15 +2170,9 @@ static struct obd_ops filter_obd_ops = { o_punch: filter_truncate, o_preprw: filter_preprw, o_commitrw: filter_commitrw, + o_log_cancel: filter_log_cancel, o_destroy_export: filter_destroy_export, o_iocontrol: filter_iocontrol, -#if 0 - o_san_preprw: filter_san_preprw, - o_preallocate: filter_preallocate_inodes, - o_migrate: filter_migrate, - o_copy: filter_copy_data, - o_iterate: filter_iterate -#endif }; static struct obd_ops filter_sanobd_ops = { @@ -2826,6 +2180,7 @@ static struct obd_ops filter_sanobd_ops = { o_attach: filter_attach, o_detach: filter_detach, o_get_info: filter_get_info, + o_set_info: filter_set_info, o_setup: filter_san_setup, o_cleanup: filter_cleanup, o_connect: filter_connect, @@ -2841,18 +2196,12 @@ static struct obd_ops filter_sanobd_ops = { o_punch: filter_truncate, o_preprw: filter_preprw, o_commitrw: filter_commitrw, + o_log_cancel: filter_log_cancel, o_san_preprw: filter_san_preprw, o_destroy_export: filter_destroy_export, o_iocontrol: filter_iocontrol, -#if 0 - o_preallocate: filter_preallocate_inodes, - o_migrate: filter_migrate, - o_copy: filter_copy_data, - o_iterate: filter_iterate -#endif }; - static int __init obdfilter_init(void) { struct lprocfs_static_vars lvars; @@ -2860,7 +2209,7 @@ static int __init obdfilter_init(void) printk(KERN_INFO "Lustre Filtering OBD driver; info@clusterfs.com\n"); - lprocfs_init_vars(&lvars); + lprocfs_init_vars(filter, &lvars); rc = class_register_type(&filter_obd_ops, lvars.module_vars, OBD_FILTER_DEVICENAME); diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c index 1319dbd..411a9fb 100644 --- a/lustre/obdfilter/lproc_obdfilter.c +++ b/lustre/obdfilter/lproc_obdfilter.c @@ -22,41 +22,16 @@ #define DEBUG_SUBSYSTEM S_CLASS #include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include -#endif #include #include #ifndef LPROCFS -struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; -struct lprocfs_vars lprocfs_module_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_module_vars[] = { {0} }; #else -static inline int lprocfs_filter_statfs(void *data, struct statfs *sfs) -{ - struct obd_device *dev = (struct obd_device *) data; - LASSERT(dev != NULL); - return vfs_statfs(dev->u.filter.fo_sb, sfs); -} - -DEFINE_LPROCFS_STATFS_FCT(rd_blksize, lprocfs_filter_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_filter_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, lprocfs_filter_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, lprocfs_filter_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, lprocfs_filter_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, lprocfs_filter_statfs); - -int rd_fstype(char *page, char **start, off_t off, int count, int *eof, - void *data) -{ - struct obd_device *dev = (struct obd_device *)data; - LASSERT(dev != NULL); - return snprintf(page, count, "%s\n", dev->u.filter.fo_fstype); -} - -int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off, int count, - int *eof, void *data) +static int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off, + int count, int *eof, void *data) { struct obd_device* obd = (struct obd_device *)data; @@ -67,23 +42,23 @@ int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off, int count, obd->u.filter.fo_vfsmnt->mnt_devname); } -struct lprocfs_vars lprocfs_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, - { "blocksize", rd_blksize, 0, 0 }, - { "kbytestotal", rd_kbytestotal, 0, 0 }, - { "kbytesfree", rd_kbytesfree, 0, 0 }, - { "filestotal", rd_filestotal, 0, 0 }, - { "filesfree", rd_filesfree, 0, 0 }, - { "filegroups", rd_filegroups, 0, 0 }, - { "fstype", rd_fstype, 0, 0 }, - { "mntdev", lprocfs_filter_rd_mntdev, 0, 0 }, +static struct lprocfs_vars lprocfs_obd_vars[] = { + { "uuid", lprocfs_rd_uuid, 0, 0 }, + { "blocksize", lprocfs_rd_blksize, 0, 0 }, + { "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 }, + { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 }, + { "filestotal", lprocfs_rd_filestotal, 0, 0 }, + { "filesfree", lprocfs_rd_filesfree, 0, 0 }, + //{ "filegroups", lprocfs_rd_filegroups, 0, 0 }, + { "fstype", lprocfs_rd_fstype, 0, 0 }, + { "mntdev", lprocfs_filter_rd_mntdev, 0, 0 }, { 0 } }; -struct lprocfs_vars lprocfs_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, +static struct lprocfs_vars lprocfs_module_vars[] = { + { "num_refs", lprocfs_rd_numrefs, 0, 0 }, { 0 } }; #endif /* LPROCFS */ -LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars) +LPROCFS_INIT_VARS(filter,lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/osc/.cvsignore b/lustre/osc/.cvsignore index e530020..49c6100 100644 --- a/lustre/osc/.cvsignore +++ b/lustre/osc/.cvsignore @@ -6,3 +6,4 @@ Makefile Makefile.in .deps TAGS +.*.cmd diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index d5e4ec1..e9affd0 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -29,34 +29,26 @@ #include #ifndef LPROCFS -struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; -struct lprocfs_vars lprocfs_module_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_module_vars[] = { {0} }; #else - -DEFINE_LPROCFS_STATFS_FCT(rd_blksize, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, obd_self_statfs); - -struct lprocfs_vars lprocfs_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, - { "blocksize", rd_blksize, 0, 0 }, - { "kbytestotal", rd_kbytestotal, 0, 0 }, - { "kbytesfree", rd_kbytesfree, 0, 0 }, - { "filestotal", rd_filestotal, 0, 0 }, - { "filesfree", rd_filesfree, 0, 0 }, - { "filegroups", rd_filegroups, 0, 0 }, +static struct lprocfs_vars lprocfs_obd_vars[] = { + { "uuid", lprocfs_rd_uuid, 0, 0 }, + { "blocksize", lprocfs_rd_blksize, 0, 0 }, + { "kbytestotal", lprocfs_rd_kbytestotal, 0, 0 }, + { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 }, + { "filestotal", lprocfs_rd_filestotal, 0, 0 }, + { "filesfree", lprocfs_rd_filesfree, 0, 0 }, + //{ "filegroups", lprocfs_rd_filegroups, 0, 0 }, { "ost_server_uuid", lprocfs_rd_server_uuid, 0, 0 }, - { "ost_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 }, + { "ost_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 }, { 0 } }; -struct lprocfs_vars lprocfs_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, +static struct lprocfs_vars lprocfs_module_vars[] = { + { "num_refs", lprocfs_rd_numrefs, 0, 0 }, { 0 } }; #endif /* LPROCFS */ -LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars) +LPROCFS_INIT_VARS(osc,lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/osc/osc_lib.c b/lustre/osc/osc_lib.c index aa04a1a..c8cd6ad 100644 --- a/lustre/osc/osc_lib.c +++ b/lustre/osc/osc_lib.c @@ -34,8 +34,7 @@ static kdev_t path2dev(char *path) { struct dentry *dentry; struct nameidata nd; - kdev_t dev; - KDEVT_VAL(dev, 0); + kdev_t dev = KDEVT_INIT(0); if (!path_init(path, LOOKUP_FOLLOW, &nd)) return 0; diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 4bda8de..89061fd 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -30,32 +30,33 @@ #define DEBUG_SUBSYSTEM S_OSC #ifdef __KERNEL__ -#include -#include -#include -#include -#include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include -#include -#else -#include -#endif -#else -#include +# include +# include +# include +# include +# include +# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +# include +# include +# else +# include +# endif +#else /* __KERNEL__ */ +# include #endif #include #include /* for mds_objid */ #include #include +#include #include #ifndef __CYGWIN__ -#include -#include +# include +# include #else -#include +# include #endif #include @@ -64,11 +65,13 @@ #include /* for PTL_MD_MAX_IOV */ #include +static struct llog_cookie zero_cookie = { { 0 } }; + static int osc_attach(struct obd_device *dev, obd_count len, void *data) { struct lprocfs_static_vars lvars; - lprocfs_init_vars(&lvars); + lprocfs_init_vars(osc,&lvars); return lprocfs_obd_attach(dev, lvars.obd_vars); } @@ -119,29 +122,29 @@ static int osc_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp, if (lmm_bytes < sizeof (*lmm)) { CERROR("lov_mds_md too small: %d, need %d\n", lmm_bytes, (int)sizeof(*lmm)); - RETURN (-EINVAL); + RETURN(-EINVAL); } /* XXX LOV_MAGIC etc check? */ - if (lmm->lmm_object_id == cpu_to_le64 (0)) { - CERROR ("lov_mds_md: zero lmm_object_id\n"); - RETURN (-EINVAL); + if (lmm->lmm_object_id == cpu_to_le64(0)) { + CERROR("lov_mds_md: zero lmm_object_id\n"); + RETURN(-EINVAL); } } lsm_size = lov_stripe_md_size(1); - if (!lsmp) + if (lsmp == NULL) RETURN(lsm_size); - if (*lsmp && !lmm) { + if (*lsmp != NULL && lmm == NULL) { OBD_FREE(*lsmp, lsm_size); *lsmp = NULL; RETURN(0); } - if (!*lsmp) { + if (*lsmp == NULL) { OBD_ALLOC(*lsmp, lsm_size); - if (!*lsmp) + if (*lsmp == NULL) RETURN(-ENOMEM); (*lsmp)->lsm_oinfo[0].loi_dirty_ot = @@ -149,7 +152,7 @@ static int osc_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp, ot_init((*lsmp)->lsm_oinfo[0].loi_dirty_ot); } - if (lmm) { + if (lmm != NULL) { /* XXX zero *lsmp? */ (*lsmp)->lsm_object_id = le64_to_cpu (lmm->lmm_object_id); LASSERT((*lsmp)->lsm_object_id); @@ -167,29 +170,27 @@ static int osc_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp, static int osc_getattr_interpret(struct ptlrpc_request *req, struct osc_getattr_async_args *aa, int rc) { - struct obdo *oa = aa->aa_oa; struct ost_body *body; ENTRY; - if (rc != 0) { - CERROR("failed: rc = %d\n", rc); - RETURN (rc); - } - - body = lustre_swab_repbuf(req, 0, sizeof (*body), lustre_swab_ost_body); - if (body == NULL) { - CERROR ("can't unpack ost_body\n"); - RETURN (-EPROTO); - } + if (rc != 0) + RETURN(rc); - CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode); - memcpy(oa, &body->oa, sizeof(*oa)); + body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_ost_body); + if (body) { + CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode); + memcpy(aa->aa_oa, &body->oa, sizeof(*aa->aa_oa)); - /* This should really be sent by the OST */ - oa->o_blksize = OSC_BRW_MAX_SIZE; - oa->o_valid |= OBD_MD_FLBLKSZ; + /* This should really be sent by the OST */ + aa->aa_oa->o_blksize = OSC_BRW_MAX_SIZE; + aa->aa_oa->o_valid |= OBD_MD_FLBLKSZ; + } else { + CERROR("can't unpack ost_body\n"); + rc = -EPROTO; + aa->aa_oa->o_valid = 0; + } - RETURN (0); + RETURN(rc); } static int osc_getattr_async(struct lustre_handle *conn, struct obdo *oa, @@ -505,7 +506,7 @@ static int osc_create(struct lustre_handle *conn, struct obdo *oa, GOTO(out, rc = -ENOMEM); body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->oa, oa, sizeof(*oa)); + memcpy(&body->oa, oa, sizeof(body->oa)); request->rq_replen = lustre_msg_size(1, &size); @@ -513,8 +514,8 @@ static int osc_create(struct lustre_handle *conn, struct obdo *oa, if (rc) GOTO(out_req, rc); - body = lustre_swab_repbuf (request, 0, sizeof (*body), - lustre_swab_ost_body); + body = lustre_swab_repbuf(request, 0, sizeof(*body), + lustre_swab_ost_body); if (body == NULL) { CERROR ("can't unpack ost_body\n"); GOTO (out_req, rc = -EPROTO); @@ -531,13 +532,19 @@ static int osc_create(struct lustre_handle *conn, struct obdo *oa, * This needs to be fixed in a big way. */ lsm->lsm_object_id = oa->o_id; - lsm->lsm_stripe_count = 0; - lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES; *ea = lsm; - if (oti != NULL) + if (oti != NULL) { oti->oti_transno = request->rq_repmsg->transno; + if (oa->o_valid & OBD_MD_FLCOOKIE) { + if (!oti->oti_logcookies) + oti_alloc_cookies(oti, 1); + memcpy(oti->oti_logcookies, obdo_logcookie(oa), + sizeof(oti->oti_onecookie)); + } + } + CDEBUG(D_HA, "transno: "LPD64"\n", request->rq_repmsg->transno); EXIT; out_req: @@ -616,14 +623,20 @@ static int osc_destroy(struct lustre_handle *conn, struct obdo *oa, body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body)); memcpy(&body->oa, oa, sizeof(*oa)); + if (oti && oa->o_valid & OBD_MD_FLCOOKIE) { + memcpy(obdo_logcookie(oa), oti->oti_logcookies, + sizeof(*oti->oti_logcookies)); + oti->oti_logcookies++; + } + request->rq_replen = lustre_msg_size(1, &size); rc = ptlrpc_queue_wait(request); if (rc) GOTO(out, rc); - body = lustre_swab_repbuf (request, 0, sizeof (*body), - lustre_swab_ost_body); + body = lustre_swab_repbuf(request, 0, sizeof(*body), + lustre_swab_ost_body); if (body == NULL) { CERROR ("Can't unpack body\n"); GOTO (out, rc = -EPROTO); @@ -663,7 +676,7 @@ static void osc_update_grant(struct client_obd *cli, struct ost_body *body) return; } - CDEBUG(D_INODE, "got "LPU64" grant\n", body->oa.o_rdev); + CDEBUG(D_ERROR, "got "LPU64" grant\n", body->oa.o_rdev); down(&cli->cl_dirty_sem); cli->cl_dirty_granted = body->oa.o_rdev; /* XXX check for over-run and wake up the io thread that @@ -708,9 +721,8 @@ static void handle_short_read(int nob_read, obd_count page_count, } } -static int check_write_rcs (struct ptlrpc_request *request, - int niocount, obd_count page_count, - struct brw_page *pga) +static int check_write_rcs(struct ptlrpc_request *request, int niocount, + obd_count page_count, struct brw_page *pga) { int i; __u32 *remote_rcs; @@ -778,11 +790,10 @@ static obd_count cksum_pages(int nob, obd_count page_count, } #endif -static int osc_brw_prep_request(struct obd_import *imp, +static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, struct lov_stripe_md *lsm, obd_count page_count, - struct brw_page *pga, int cmd, - int *requested_nobp, int *niocountp, - struct ptlrpc_request **reqp) + struct brw_page *pga, int *requested_nobp, + int *niocountp, struct ptlrpc_request **reqp) { struct ptlrpc_request *req; struct ptlrpc_bulk_desc *desc; @@ -804,11 +815,11 @@ static int osc_brw_prep_request(struct obd_import *imp, if (!can_merge_pages (&pga[i - 1], &pga[i])) niocount++; - size[0] = sizeof (*body); - size[1] = sizeof (*ioobj); - size[2] = niocount * sizeof (*niobuf); + size[0] = sizeof(*body); + size[1] = sizeof(*ioobj); + size[2] = niocount * sizeof(*niobuf); - req = ptlrpc_prep_req (imp, opc, 3, size, NULL); + req = ptlrpc_prep_req(imp, opc, 3, size, NULL); if (req == NULL) return (-ENOMEM); @@ -819,16 +830,18 @@ static int osc_brw_prep_request(struct obd_import *imp, desc = ptlrpc_prep_bulk_imp(req, BULK_PUT_SINK, OST_BULK_PORTAL); if (desc == NULL) - GOTO (out, rc = -ENOMEM); + GOTO(out, rc = -ENOMEM); /* NB request now owns desc and will free it when it gets freed */ body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body)); ioobj = lustre_msg_buf(req->rq_reqmsg, 1, sizeof(*ioobj)); niobuf = lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf)); - ioobj->ioo_id = lsm->lsm_object_id; - ioobj->ioo_gr = 0; - ioobj->ioo_type = S_IFREG; + memcpy(&body->oa, oa, sizeof(*oa)); + + ioobj->ioo_id = oa->o_id; + ioobj->ioo_gr = oa->o_valid & 0 ? oa->o_gr : 0; + ioobj->ioo_type = oa->o_mode; ioobj->ioo_bufcnt = niocount; LASSERT (page_count > 0); @@ -836,19 +849,18 @@ static int osc_brw_prep_request(struct obd_import *imp, struct brw_page *pg = &pga[i]; struct brw_page *pg_prev = pg - 1; - LASSERT (pg->count > 0); - LASSERT ((pg->off & (PAGE_SIZE - 1)) + pg->count <= PAGE_SIZE); - LASSERT (i == 0 || pg->off > pg_prev->off); + LASSERT(pg->count > 0); + LASSERT((pg->off & ~PAGE_MASK) + pg->count <= PAGE_SIZE); + LASSERT(i == 0 || pg->off > pg_prev->off); - rc = ptlrpc_prep_bulk_page (desc, pg->pg, - pg->off & (PAGE_SIZE - 1), - pg->count); + rc = ptlrpc_prep_bulk_page(desc, pg->pg, pg->off & ~PAGE_MASK, + pg->count); if (rc != 0) - GOTO (out, rc); + GOTO(out, rc); requested_nob += pg->count; - if (i > 0 && can_merge_pages (pg_prev, pg)) { + if (i > 0 && can_merge_pages(pg_prev, pg)) { niobuf--; niobuf->len += pg->count; } else { @@ -858,17 +870,17 @@ static int osc_brw_prep_request(struct obd_import *imp, } } - LASSERT ((void *)(niobuf - niocount) == - lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf))); + LASSERT((void *)(niobuf - niocount) == + lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf))); #if CHECKSUM_BULK body->oa.o_valid |= OBD_MD_FLCKSUM; if (opc == OST_BRW_WRITE) - body->oa.o_nlink = cksum_pages (requested_nob, page_count, pga); + body->oa.o_nlink = cksum_pages(requested_nob, page_count, pga); #endif osc_announce_cached(cli, body); - spin_lock_irqsave (&req->rq_lock, flags); + spin_lock_irqsave(&req->rq_lock, flags); req->rq_no_resend = 1; - spin_unlock_irqrestore (&req->rq_lock, flags); + spin_unlock_irqrestore(&req->rq_lock, flags); /* size[0] still sizeof (*body) */ if (opc == OST_WRITE) { @@ -890,21 +902,23 @@ static int osc_brw_prep_request(struct obd_import *imp, return (rc); } -static int osc_brw_fini_request (struct ptlrpc_request *req, - int requested_nob, int niocount, - obd_count page_count, struct brw_page *pga, - int rc) +static int osc_brw_fini_request(struct ptlrpc_request *req, struct obdo *oa, + int requested_nob, int niocount, + obd_count page_count, struct brw_page *pga, + int rc) { struct client_obd *cli = &req->rq_import->imp_obd->u.cli; struct ost_body *body; + if (rc < 0) return (rc); - body = lustre_swab_repbuf(req, 0, sizeof (*body), lustre_swab_ost_body); + body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_ost_body); if (body == NULL) { CERROR ("Can't unpack body\n"); - RETURN(-EPROTO); + return (-EPROTO); } + osc_update_grant(cli, body); if (req->rq_reqmsg->opc == OST_WRITE) { @@ -913,22 +927,23 @@ static int osc_brw_fini_request (struct ptlrpc_request *req, return (-EPROTO); } - return (check_write_rcs(req, niocount, page_count, pga)); + return(check_write_rcs(req, niocount, page_count, pga)); } if (rc > requested_nob) { - CERROR ("Unexpected rc %d (%d requested)\n", - rc, requested_nob); + CERROR("Unexpected rc %d (%d requested)\n", rc, requested_nob); return (-EPROTO); } if (rc < requested_nob) handle_short_read(rc, page_count, pga); + memcpy(oa, &body->oa, sizeof(*oa)); + #if CHECKSUM_BULK - if (body->oa.o_valid & OBD_MD_FLCKSUM) { + if (oa->o_valid & OBD_MD_FLCKSUM) { static int cksum_counter; - obd_count server_cksum = body->oa.o_nlink; + obd_count server_cksum = oa->o_nlink; obd_count cksum = cksum_pages(rc, page_count, pga); cksum_counter++; @@ -937,6 +952,7 @@ static int osc_brw_fini_request (struct ptlrpc_request *req, ", server NID "LPX64"\n", server_cksum, cksum, imp->imp_connection->c_peer.peer_nid); cksum_counter = 0; + oa->o_rdev = cksum; } else if ((cksum_counter & (-cksum_counter)) == cksum_counter) CERROR("Checksum %u from "LPX64" OK: %x\n", cksum_counter, @@ -953,9 +969,9 @@ static int osc_brw_fini_request (struct ptlrpc_request *req, return (0); } -static int osc_brw_internal(struct lustre_handle *conn, +static int osc_brw_internal(int cmd, struct lustre_handle *conn,struct obdo *oa, struct lov_stripe_md *lsm, - obd_count page_count, struct brw_page *pga, int cmd) + obd_count page_count, struct brw_page *pga) { int requested_nob; int niocount; @@ -964,8 +980,9 @@ static int osc_brw_internal(struct lustre_handle *conn, ENTRY; restart_bulk: - rc = osc_brw_prep_request(class_conn2cliimp(conn), lsm, page_count, pga, - cmd, &requested_nob, &niocount, &request); + rc = osc_brw_prep_request(cmd, class_conn2cliimp(conn), oa, lsm, + page_count, pga, &requested_nob, &niocount, + &request); /* NB ^ sets rq_no_resend */ if (rc != 0) @@ -979,8 +996,8 @@ restart_bulk: goto restart_bulk; } - rc = osc_brw_fini_request (request, requested_nob, niocount, - page_count, pga, rc); + rc = osc_brw_fini_request(request, oa, requested_nob, niocount, + page_count, pga, rc); ptlrpc_req_finished(request); RETURN (rc); @@ -989,6 +1006,7 @@ restart_bulk: static int brw_interpret(struct ptlrpc_request *request, struct osc_brw_async_args *aa, int rc) { + struct obdo *oa = aa->aa_oa; int requested_nob = aa->aa_requested_nob; int niocount = aa->aa_nio_count; obd_count page_count = aa->aa_page_count; @@ -1002,14 +1020,14 @@ static int brw_interpret(struct ptlrpc_request *request, //goto restart_bulk; } - rc = osc_brw_fini_request (request, requested_nob, niocount, - page_count, pga, rc); + rc = osc_brw_fini_request(request, oa, requested_nob, niocount, + page_count, pga, rc); RETURN (rc); } -static int async_internal(struct lustre_handle *conn, struct lov_stripe_md *lsm, - obd_count page_count, struct brw_page *pga, - struct ptlrpc_request_set *set, int cmd) +static int async_internal(int cmd, struct lustre_handle *conn, struct obdo *oa, + struct lov_stripe_md *lsm, obd_count page_count, + struct brw_page *pga, struct ptlrpc_request_set *set) { struct ptlrpc_request *request; int requested_nob; @@ -1018,14 +1036,15 @@ static int async_internal(struct lustre_handle *conn, struct lov_stripe_md *lsm, int rc; ENTRY; - rc = osc_brw_prep_request (class_conn2cliimp(conn), - lsm, page_count, pga, cmd, - &requested_nob, &nio_count, &request); + rc = osc_brw_prep_request(cmd, class_conn2cliimp(conn), oa, lsm, + page_count, pga, &requested_nob, &nio_count, + &request); /* NB ^ sets rq_no_resend */ if (rc == 0) { - LASSERT (sizeof (*aa) <= sizeof (request->rq_async_args)); + LASSERT(sizeof(*aa) <= sizeof(request->rq_async_args)); aa = (struct osc_brw_async_args *)&request->rq_async_args; + aa->aa_oa = oa; aa->aa_requested_nob = requested_nob; aa->aa_nio_count = nio_count; aa->aa_page_count = page_count; @@ -1096,7 +1115,7 @@ static obd_count check_elan_limit(struct brw_page *pg, obd_count pages) return i; } -static int osc_brw(int cmd, struct lustre_handle *conn, +static int osc_brw(int cmd, struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *md, obd_count page_count, struct brw_page *pga, struct obd_trans_info *oti) { @@ -1124,7 +1143,7 @@ static int osc_brw(int cmd, struct lustre_handle *conn, sort_brw_pages(pga, pages_per_brw); pages_per_brw = check_elan_limit(pga, pages_per_brw); - rc = osc_brw_internal(conn, md, pages_per_brw, pga, cmd); + rc = osc_brw_internal(cmd, conn, oa, md, pages_per_brw, pga); if (rc != 0) RETURN(rc); @@ -1135,7 +1154,7 @@ static int osc_brw(int cmd, struct lustre_handle *conn, RETURN(0); } -static int osc_brw_async(int cmd, struct lustre_handle *conn, +static int osc_brw_async(int cmd, struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *md, obd_count page_count, struct brw_page *pga, struct ptlrpc_request_set *set, struct obd_trans_info *oti) @@ -1164,7 +1183,7 @@ static int osc_brw_async(int cmd, struct lustre_handle *conn, sort_brw_pages(pga, pages_per_brw); pages_per_brw = check_elan_limit(pga, pages_per_brw); - rc = async_internal(conn, md, pages_per_brw, pga, set, cmd); + rc = async_internal(cmd, conn, oa, md, pages_per_brw, pga, set); if (rc != 0) RETURN(rc); @@ -1178,9 +1197,8 @@ static int osc_brw_async(int cmd, struct lustre_handle *conn, #ifdef __KERNEL__ /* Note: caller will lock/unlock, and set uptodate on the pages */ #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -static int sanosc_brw_read(struct lustre_handle *conn, - struct lov_stripe_md *lsm, - obd_count page_count, +static int sanosc_brw_read(struct lustre_handle *conn, struct obdo *oa, + struct lov_stripe_md *lsm, obd_count page_count, struct brw_page *pga) { struct ptlrpc_request *request = NULL; @@ -1201,14 +1219,16 @@ static int sanosc_brw_read(struct lustre_handle *conn, if (!request) RETURN(-ENOMEM); - body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body)); - iooptr = lustre_msg_buf(request->rq_reqmsg, 1, sizeof (*iooptr)); + body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof(*body)); + iooptr = lustre_msg_buf(request->rq_reqmsg, 1, sizeof(*iooptr)); nioptr = lustre_msg_buf(request->rq_reqmsg, 2, - sizeof (*nioptr) * page_count); + sizeof(*nioptr) * page_count); + + memcpy(&body->oa, oa, sizeof(body->oa)); - iooptr->ioo_id = lsm->lsm_object_id; - iooptr->ioo_gr = 0; - iooptr->ioo_type = S_IFREG; + iooptr->ioo_id = oa->o_id; + iooptr->ioo_gr = oa->o_valid & 0 ? oa->o_gr : 0; + iooptr->ioo_type = oa->o_mode; iooptr->ioo_bufcnt = page_count; for (mapped = 0; mapped < page_count; mapped++, nioptr++) { @@ -1227,8 +1247,17 @@ static int sanosc_brw_read(struct lustre_handle *conn, if (rc) GOTO(out_req, rc); - swab = lustre_msg_swabbed (request->rq_repmsg); - LASSERT_REPSWAB (request, 1); + body = lustre_swab_repbuf(request, 0, sizeof(*body), + lustre_swab_ost_body); + if (body == NULL) { + CERROR("Can't unpack body\n"); + GOTO(out_req, rc = -EPROTO); + } + + memcpy(oa, &body->oa, sizeof(*oa)); + + swab = lustre_msg_swabbed(request->rq_repmsg); + LASSERT_REPSWAB(request, 1); nioptr = lustre_msg_buf(request->rq_repmsg, 1, size[1]); if (!nioptr) { /* nioptr missing or short */ @@ -1300,9 +1329,8 @@ out_req: RETURN(rc); } -static int sanosc_brw_write(struct lustre_handle *conn, - struct lov_stripe_md *lsm, - obd_count page_count, +static int sanosc_brw_write(struct lustre_handle *conn, struct obdo *oa, + struct lov_stripe_md *lsm, obd_count page_count, struct brw_page *pga) { struct ptlrpc_request *request = NULL; @@ -1326,9 +1354,11 @@ static int sanosc_brw_write(struct lustre_handle *conn, nioptr = lustre_msg_buf(request->rq_reqmsg, 2, sizeof (*nioptr) * page_count); - iooptr->ioo_id = lsm->lsm_object_id; - iooptr->ioo_gr = 0; - iooptr->ioo_type = S_IFREG; + memcpy(&body->oa, oa, sizeof(body->oa)); + + iooptr->ioo_id = oa->o_id; + iooptr->ioo_gr = oa->o_valid & 0 ? oa->o_gr : 0; + iooptr->ioo_type = oa->o_mode; iooptr->ioo_bufcnt = page_count; /* pack request */ @@ -1414,7 +1444,7 @@ out_req: RETURN(rc); } -static int sanosc_brw(int cmd, struct lustre_handle *conn, +static int sanosc_brw(int cmd, struct lustre_handle *conn, struct obdo *oa, struct lov_stripe_md *lsm, obd_count page_count, struct brw_page *pga, struct obd_trans_info *oti) { @@ -1430,9 +1460,9 @@ static int sanosc_brw(int cmd, struct lustre_handle *conn, pages_per_brw = page_count; if (cmd & OBD_BRW_WRITE) - rc = sanosc_brw_write(conn, lsm, pages_per_brw, pga); + rc = sanosc_brw_write(conn, oa, lsm, pages_per_brw,pga); else - rc = sanosc_brw_read(conn, lsm, pages_per_brw, pga); + rc = sanosc_brw_read(conn, oa, lsm, pages_per_brw, pga); if (rc != 0) RETURN(rc); @@ -1445,7 +1475,7 @@ static int sanosc_brw(int cmd, struct lustre_handle *conn, #endif #endif -static int osc_mark_page_dirty(struct lustre_handle *conn, +static int osc_mark_page_dirty(struct lustre_handle *conn, struct lov_stripe_md *lsm, unsigned long offset) { struct client_obd *cli = &class_conn2obd(conn)->u.cli; @@ -1455,12 +1485,14 @@ static int osc_mark_page_dirty(struct lustre_handle *conn, down(&cli->cl_dirty_sem); - if (cli->cl_ost_can_grant && +#if 0 + if (cli->cl_ost_can_grant && (cli->cl_dirty + PAGE_CACHE_SIZE >= cli->cl_dirty_granted)) { CDEBUG(D_INODE, "granted "LPU64" < "LPU64"\n", cli->cl_dirty_granted, cli->cl_dirty + PAGE_CACHE_SIZE); GOTO(out, rc = -EDQUOT); } +#endif rc = ot_mark_offset(dirty_ot, offset); if (rc) @@ -1474,7 +1506,7 @@ out: RETURN(rc); } -static int osc_clear_dirty_pages(struct lustre_handle *conn, +static int osc_clear_dirty_pages(struct lustre_handle *conn, struct lov_stripe_md *lsm, unsigned long start, unsigned long end, unsigned long *cleared) @@ -1526,7 +1558,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm, struct lustre_handle *lockh) { struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} }; - struct obd_device *obddev = class_conn2obd(connh); + struct obd_device *obd = class_conn2obd(connh); struct ldlm_extent *extent = extentp; int rc; ENTRY; @@ -1537,7 +1569,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm, extent->end |= ~PAGE_MASK; /* Next, search for already existing extent locks that will cover us */ - rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_MATCH_DATA, &res_id, + rc = ldlm_lock_match(obd->obd_namespace, LDLM_FL_MATCH_DATA, &res_id, type, extent, sizeof(extent), mode, data, lockh); if (rc == 1) /* We already have a lock, and it's referenced */ @@ -1556,7 +1588,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm, * locks out from other users right now, too. */ if (mode == LCK_PR) { - rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_MATCH_DATA, + rc = ldlm_lock_match(obd->obd_namespace, LDLM_FL_MATCH_DATA, &res_id, type, extent, sizeof(extent), LCK_PW, data, lockh); if (rc == 1) { @@ -1570,7 +1602,7 @@ static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm, } } - rc = ldlm_cli_enqueue(connh, NULL, obddev->obd_namespace, parent_lock, + rc = ldlm_cli_enqueue(connh, NULL, obd->obd_namespace, parent_lock, res_id, type, extent, sizeof(extent), mode, flags, ldlm_completion_ast, callback, data, lockh); RETURN(rc); @@ -1581,7 +1613,7 @@ static int osc_match(struct lustre_handle *connh, struct lov_stripe_md *lsm, int *flags, void *data, struct lustre_handle *lockh) { struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} }; - struct obd_device *obddev = class_conn2obd(connh); + struct obd_device *obd = class_conn2obd(connh); struct ldlm_extent *extent = extentp; int rc; ENTRY; @@ -1592,7 +1624,7 @@ static int osc_match(struct lustre_handle *connh, struct lov_stripe_md *lsm, extent->end |= ~PAGE_MASK; /* Next, search for already existing extent locks that will cover us */ - rc = ldlm_lock_match(obddev->obd_namespace, *flags, &res_id, type, + rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id, type, extent, sizeof(extent), mode, data, lockh); if (rc) RETURN(rc); @@ -1601,7 +1633,7 @@ static int osc_match(struct lustre_handle *connh, struct lov_stripe_md *lsm, * VFS and page cache already protect us locally, so lots of readers/ * writers can share a single PW lock. */ if (mode == LCK_PR) { - rc = ldlm_lock_match(obddev->obd_namespace, *flags, &res_id, + rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id, type, extent, sizeof(extent), LCK_PW, data, lockh); if (rc == 1) { @@ -1628,22 +1660,28 @@ static int osc_cancel(struct lustre_handle *oconn, struct lov_stripe_md *md, static int osc_cancel_unused(struct lustre_handle *connh, struct lov_stripe_md *lsm, int flags, void *opaque) { - struct obd_device *obddev = class_conn2obd(connh); + struct obd_device *obd = class_conn2obd(connh); struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} }; - return ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags, + return ldlm_cli_cancel_unused(obd->obd_namespace, &res_id, flags, opaque); } -static int osc_statfs(struct obd_export *exp, struct obd_statfs *osfs) +static int osc_statfs(struct obd_device *obd, struct obd_statfs *osfs, + unsigned long max_age) { struct obd_statfs *msfs; struct ptlrpc_request *request; int rc, size = sizeof(*osfs); ENTRY; - request = ptlrpc_prep_req(exp->exp_obd->u.cli.cl_import, OST_STATFS, 0, - NULL, NULL); + /* We could possibly pass max_age in the request (as an absolute + * timestamp or a "seconds.usec ago") so the target can avoid doing + * extra calls into the filesystem if that isn't necessary (e.g. + * during mount that would help a bit). Having relative timestamps + * is not so great if request processing is slow, while absolute + * timestamps are not ideal because they need time synchronization. */ + request = ptlrpc_prep_req(obd->u.cli.cl_import, OST_STATFS,0,NULL,NULL); if (!request) RETURN(-ENOMEM); @@ -1655,14 +1693,14 @@ static int osc_statfs(struct obd_export *exp, struct obd_statfs *osfs) GOTO(out, rc); } - msfs = lustre_swab_repbuf (request, 0, sizeof (*msfs), - lustre_swab_obd_statfs); + msfs = lustre_swab_repbuf(request, 0, sizeof(*msfs), + lustre_swab_obd_statfs); if (msfs == NULL) { - CERROR ("Can't unpack obd_statfs\n"); - GOTO (out, rc = -EPROTO); + CERROR("Can't unpack obd_statfs\n"); + GOTO(out, rc = -EPROTO); } - memcpy (osfs, msfs, sizeof (*msfs)); + memcpy(osfs, msfs, sizeof(*osfs)); EXIT; out: @@ -1717,16 +1755,16 @@ static int osc_getstripe(struct lustre_handle *conn, struct lov_stripe_md *lsm, static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len, void *karg, void *uarg) { - struct obd_device *obddev = class_conn2obd(conn); + struct obd_device *obd = class_conn2obd(conn); struct obd_ioctl_data *data = karg; int err = 0; ENTRY; switch (cmd) { case IOC_OSC_REGISTER_LOV: { - if (obddev->u.cli.cl_containing_lov) + if (obd->u.cli.cl_containing_lov) GOTO(out, err = -EALREADY); - obddev->u.cli.cl_containing_lov = (struct obd_device *)karg; + obd->u.cli.cl_containing_lov = (struct obd_device *)karg; GOTO(out, err); } case OBD_IOC_LOV_GET_CONFIG: { @@ -1758,9 +1796,9 @@ static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len, desc->ld_default_stripe_size = 0; desc->ld_default_stripe_offset = 0; desc->ld_pattern = 0; - memcpy(&desc->ld_uuid, &obddev->obd_uuid, sizeof(uuid)); + memcpy(&desc->ld_uuid, &obd->obd_uuid, sizeof(uuid)); - memcpy(data->ioc_inlbuf2, &obddev->obd_uuid, sizeof(uuid)); + memcpy(data->ioc_inlbuf2, &obd->obd_uuid, sizeof(uuid)); err = copy_to_user((void *)uarg, buf, len); if (err) @@ -1777,15 +1815,15 @@ static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len, err = osc_getstripe(conn, karg, uarg); GOTO(out, err); case OBD_IOC_CLIENT_RECOVER: - err = ptlrpc_recover_import(obddev->u.cli.cl_import, + err = ptlrpc_recover_import(obd->u.cli.cl_import, data->ioc_inlbuf1); GOTO(out, err); case IOC_OSC_SET_ACTIVE: - err = ptlrpc_set_import_active(obddev->u.cli.cl_import, + err = ptlrpc_set_import_active(obd->u.cli.cl_import, data->ioc_offset); GOTO(out, err); default: - CERROR ("osc_ioctl(): unrecognised ioctl %#x\n", cmd); + CERROR("unrecognised ioctl %#x by %s\n", cmd, current->comm); GOTO(out, err = -ENOTTY); } out: @@ -1809,6 +1847,104 @@ static int osc_get_info(struct lustre_handle *conn, obd_count keylen, RETURN(-EINVAL); } +static int osc_set_info(struct lustre_handle *conn, obd_count keylen, + void *key, obd_count vallen, void *val) +{ + struct ptlrpc_request *req; + int rc, size = keylen; + char *bufs[1] = {key}; + ENTRY; + + if (keylen < strlen("mds_conn") || + memcmp(key, "mds_conn", strlen("mds_conn")) != 0) + RETURN(-EINVAL); + + req = ptlrpc_prep_req(class_conn2cliimp(conn), OST_SET_INFO, 1, + &size, bufs); + if (req == NULL) + RETURN(-ENOMEM); + + req->rq_replen = lustre_msg_size(0, NULL); + rc = ptlrpc_queue_wait(req); + ptlrpc_req_finished(req); + RETURN(rc); +} + +static int osc_log_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm, + int count, struct llog_cookie *cookies, int flags) +{ + struct obd_device *obd = class_conn2obd(conn); + struct llog_commit_data *llcd; + struct client_obd *cli; + int rc = 0; + ENTRY; + + cli = &obd->u.cli; + if ((count == 0 || cookies == NULL || + memcmp(cookies, &zero_cookie, sizeof(*cookies)) == 0)) { + down(&cli->cl_sem); + if (cli->cl_llcd == NULL || !(flags & OBD_LLOG_FL_SENDNOW)) + GOTO(out, rc); + + llcd = cli->cl_llcd; + GOTO(send_now, rc); + } + + down(&cli->cl_sem); + llcd = cli->cl_llcd; + if (llcd == NULL) { + llcd = llcd_grab(); + if (llcd == NULL) { + CERROR("couldn't get an llcd - dropped "LPX64":%x+%u\n", + cookies->lgc_lgl.lgl_oid, + cookies->lgc_lgl.lgl_ogen, cookies->lgc_index); + GOTO(out, rc = -ENOMEM); + } + llcd->llcd_import = cli->cl_import; + cli->cl_llcd = llcd; + } + + memcpy(llcd->llcd_cookies + llcd->llcd_cookiebytes, cookies, + sizeof(*cookies)); + llcd->llcd_cookiebytes += sizeof(*cookies); + + /* If we can't fit any more cookies into the page, we need to send it */ +send_now: + if ((PAGE_SIZE - llcd->llcd_cookiebytes < sizeof(*cookies) || + flags & OBD_LLOG_FL_SENDNOW)) { + cli->cl_llcd = NULL; + llcd_send(llcd); + } +out: + up(&cli->cl_sem); + + return rc; +} + +static int osc_disconnect(struct lustre_handle *conn, int flags) +{ + struct obd_device *obd = class_conn2obd(conn); + + /* flush any remaining cancel messages out to the target */ + if (obd->u.cli.cl_llcd) + osc_log_cancel(conn, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW); + + return client_import_disconnect(conn, flags); +} + +static int osc_log_add(struct lustre_handle *conn, + struct llog_handle *cathandle, + struct llog_trans_hdr *rec, struct lov_stripe_md *lsm, + struct llog_cookie *logcookies, int numcookies) +{ + ENTRY; + LASSERT(logcookies && numcookies > 0); + + llog_add_record(cathandle, rec, logcookies); + + RETURN(1); +} + struct obd_ops osc_obd_ops = { o_owner: THIS_MODULE, o_attach: osc_attach, @@ -1816,14 +1952,14 @@ struct obd_ops osc_obd_ops = { o_setup: client_obd_setup, o_cleanup: client_obd_cleanup, o_connect: client_import_connect, - o_disconnect: client_import_disconnect, + o_disconnect: osc_disconnect, o_statfs: osc_statfs, o_packmd: osc_packmd, o_unpackmd: osc_unpackmd, o_create: osc_create, o_destroy: osc_destroy, o_getattr: osc_getattr, - o_getattr_async: osc_getattr_async, + o_getattr_async:osc_getattr_async, o_setattr: osc_setattr, o_open: osc_open, o_close: osc_close, @@ -1833,14 +1969,18 @@ struct obd_ops osc_obd_ops = { o_enqueue: osc_enqueue, o_match: osc_match, o_cancel: osc_cancel, - o_cancel_unused: osc_cancel_unused, + o_cancel_unused:osc_cancel_unused, o_iocontrol: osc_iocontrol, o_get_info: osc_get_info, - .o_mark_page_dirty = osc_mark_page_dirty, - .o_clear_dirty_pages = osc_clear_dirty_pages, - .o_last_dirty_offset = osc_last_dirty_offset, + o_set_info: osc_set_info, + o_log_cancel: osc_log_cancel, + o_log_add: osc_log_add, + o_mark_page_dirty: osc_mark_page_dirty, + o_clear_dirty_pages: osc_clear_dirty_pages, + o_last_dirty_offset: osc_last_dirty_offset, }; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) struct obd_ops sanosc_obd_ops = { o_owner: THIS_MODULE, o_attach: osc_attach, @@ -1858,48 +1998,54 @@ struct obd_ops sanosc_obd_ops = { o_setattr: osc_setattr, o_open: osc_open, o_close: osc_close, -#ifdef __KERNEL__ o_setup: client_sanobd_setup, o_brw: sanosc_brw, -#endif o_punch: osc_punch, o_enqueue: osc_enqueue, o_match: osc_match, o_cancel: osc_cancel, o_cancel_unused: osc_cancel_unused, o_iocontrol: osc_iocontrol, - .o_mark_page_dirty = osc_mark_page_dirty, - .o_clear_dirty_pages = osc_clear_dirty_pages, - .o_last_dirty_offset = osc_last_dirty_offset, + o_log_cancel: osc_log_cancel, + o_log_add: osc_log_add, + o_mark_page_dirty: osc_mark_page_dirty, + o_clear_dirty_pages: osc_clear_dirty_pages, + o_last_dirty_offset: osc_last_dirty_offset, }; +#endif int __init osc_init(void) { - struct lprocfs_static_vars lvars; + struct lprocfs_static_vars lvars, sanlvars; int rc; ENTRY; LASSERT(sizeof(struct obd_client_handle) <= FD_OSTDATA_SIZE); LASSERT(sizeof(struct obd_client_handle) <= OBD_INLINESZ); - lprocfs_init_vars(&lvars); + lprocfs_init_vars(osc,&lvars); + lprocfs_init_vars(osc,&sanlvars); rc = class_register_type(&osc_obd_ops, lvars.module_vars, LUSTRE_OSC_NAME); if (rc) RETURN(rc); - rc = class_register_type(&sanosc_obd_ops, lvars.module_vars, +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + rc = class_register_type(&sanosc_obd_ops, sanlvars.module_vars, LUSTRE_SANOSC_NAME); if (rc) class_unregister_type(LUSTRE_OSC_NAME); +#endif RETURN(rc); } -static void __exit osc_exit(void) +static void /*__exit*/ osc_exit(void) { +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) class_unregister_type(LUSTRE_SANOSC_NAME); +#endif class_unregister_type(LUSTRE_OSC_NAME); } diff --git a/lustre/ost/.cvsignore b/lustre/ost/.cvsignore index e530020..49c6100 100644 --- a/lustre/ost/.cvsignore +++ b/lustre/ost/.cvsignore @@ -6,3 +6,4 @@ Makefile Makefile.in .deps TAGS +.*.cmd diff --git a/lustre/ost/lproc_ost.c b/lustre/ost/lproc_ost.c index c44093c..936706d 100644 --- a/lustre/ost/lproc_ost.c +++ b/lustre/ost/lproc_ost.c @@ -25,18 +25,18 @@ #include #ifndef LPROCFS -struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; -struct lprocfs_vars lprocfs_module_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_module_vars[] = { {0} }; #else -struct lprocfs_vars lprocfs_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, +static struct lprocfs_vars lprocfs_obd_vars[] = { + { "uuid", lprocfs_rd_uuid, 0, 0 }, { 0 } }; -struct lprocfs_vars lprocfs_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, +static struct lprocfs_vars lprocfs_module_vars[] = { + { "num_refs", lprocfs_rd_numrefs, 0, 0 }, { 0 } }; #endif /* LPROCFS */ -LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars) +LPROCFS_INIT_VARS(ost, lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 023deb2..6801e92 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -40,28 +40,25 @@ #include #include #include +#include +#include -inline void oti_init(struct obd_trans_info *oti, - struct ptlrpc_request *req) +void oti_init(struct obd_trans_info *oti, struct ptlrpc_request *req) { - if(oti == NULL) + if (oti == NULL) return; memset(oti, 0, sizeof *oti); - if (req->rq_repmsg && req->rq_reqmsg != 0) oti->oti_transno = req->rq_repmsg->transno; - - EXIT; } -inline void oti_to_request(struct obd_trans_info *oti, - struct ptlrpc_request *req) +void oti_to_request(struct obd_trans_info *oti, struct ptlrpc_request *req) { - int i; struct oti_req_ack_lock *ack_lock; + int i; - if(oti == NULL) + if (oti == NULL) return; if (req->rq_repmsg) @@ -75,7 +72,6 @@ inline void oti_to_request(struct obd_trans_info *oti, sizeof(req->rq_ack_locks[i].lock)); req->rq_ack_locks[i].mode = ack_lock->mode; } - EXIT; } static int ost_destroy(struct ptlrpc_request *req, struct obd_trans_info *oti) @@ -85,15 +81,16 @@ static int ost_destroy(struct ptlrpc_request *req, struct obd_trans_info *oti) int rc, size = sizeof(*body); ENTRY; - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); + body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body); if (body == NULL) - RETURN (-EFAULT); + RETURN(-EFAULT); rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); if (rc) RETURN(rc); + if (body->oa.o_valid & OBD_MD_FLCOOKIE) + oti->oti_logcookies = obdo_logcookie(&body->oa); req->rq_status = obd_destroy(conn, &body->oa, NULL, oti); RETURN(0); } @@ -105,16 +102,15 @@ static int ost_getattr(struct ptlrpc_request *req) int rc, size = sizeof(*body); ENTRY; - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); + body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body); if (body == NULL) - RETURN (-EFAULT); + RETURN(-EFAULT); rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); if (rc) RETURN(rc); - repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody)); + repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof(*repbody)); memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); req->rq_status = obd_getattr(conn, &repbody->oa, NULL); RETURN(0); @@ -130,10 +126,9 @@ static int ost_statfs(struct ptlrpc_request *req) if (rc) RETURN(rc); - osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*osfs)); - memset(osfs, 0, size); + osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*osfs)); - req->rq_status = obd_statfs(req->rq_export, osfs); + req->rq_status = obd_statfs(req->rq_export->exp_obd, osfs, jiffies-HZ); if (req->rq_status != 0) CERROR("ost: statfs failed: rc %d\n", req->rq_status); @@ -167,16 +162,15 @@ static int ost_open(struct ptlrpc_request *req, struct obd_trans_info *oti) int rc, size = sizeof(*repbody); ENTRY; - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); + body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body); if (body == NULL) - return (-EFAULT); + RETURN(-EFAULT); rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); if (rc) RETURN(rc); - repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody)); + repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof(*repbody)); memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); req->rq_status = obd_open(conn, &repbody->oa, NULL, oti, NULL); RETURN(0); @@ -189,16 +183,15 @@ static int ost_close(struct ptlrpc_request *req, struct obd_trans_info *oti) int rc, size = sizeof(*repbody); ENTRY; - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); + body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body); if (body == NULL) - RETURN (-EFAULT); + RETURN(-EFAULT); rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); if (rc) RETURN(rc); - repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody)); + repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody)); memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); req->rq_status = obd_close(conn, &repbody->oa, NULL, oti); RETURN(0); @@ -211,18 +204,19 @@ static int ost_create(struct ptlrpc_request *req, struct obd_trans_info *oti) int rc, size = sizeof(*repbody); ENTRY; - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); + body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body); if (body == NULL) - RETURN (-EFAULT); + RETURN(-EFAULT); rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); if (rc) RETURN(rc); - repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody)); + repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof(*repbody)); memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); + oti->oti_logcookies = obdo_logcookie(&repbody->oa); req->rq_status = obd_create(conn, &repbody->oa, NULL, oti); + //obd_log_cancel(conn, NULL, 1, oti->oti_logcookies, 0); RETURN(0); } @@ -233,10 +227,9 @@ static int ost_punch(struct ptlrpc_request *req, struct obd_trans_info *oti) int rc, size = sizeof(*repbody); ENTRY; - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); + body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body); if (body == NULL) - RETURN (-EFAULT); + RETURN(-EFAULT); if ((body->oa.o_valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) != (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) @@ -246,7 +239,7 @@ static int ost_punch(struct ptlrpc_request *req, struct obd_trans_info *oti) if (rc) RETURN(rc); - repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody)); + repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody)); memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); req->rq_status = obd_punch(conn, &repbody->oa, NULL, repbody->oa.o_size, repbody->oa.o_blocks, oti); @@ -260,16 +253,15 @@ static int ost_setattr(struct ptlrpc_request *req, struct obd_trans_info *oti) int rc, size = sizeof(*repbody); ENTRY; - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); + body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body); if (body == NULL) - RETURN (-EFAULT); + RETURN(-EFAULT); rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); if (rc) RETURN(rc); - repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody)); + repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody)); memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); req->rq_status = obd_setattr(conn, &repbody->oa, NULL, oti); @@ -285,9 +277,9 @@ static int ost_bulk_timeout(void *data) RETURN(1); } -static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo, - struct niobuf_remote *rnb, int nrnb, - struct niobuf_remote **pp_rnbp) +static int get_per_page_niobufs(struct obd_ioobj *ioo, int nioo, + struct niobuf_remote *rnb, int nrnb, + struct niobuf_remote **pp_rnbp) { /* Copy a remote niobuf, splitting it into page-sized chunks * and setting ioo[i].ioo_bufcnt accordingly */ @@ -305,14 +297,14 @@ static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo, obd_off p0 = offset >> PAGE_SHIFT; obd_off pn = (offset + rnb[rnbidx].len - 1)>>PAGE_SHIFT; - LASSERT (rnbidx < nrnb); + LASSERT(rnbidx < nrnb); npages += (pn + 1 - p0); if (rnb[rnbidx].len == 0) { CERROR("zero len BRW: obj %d objid "LPX64 " buf %u\n", i, ioo[i].ioo_id, j); - return (-EINVAL); + return -EINVAL; } if (j > 0 && rnb[rnbidx].offset <= rnb[rnbidx-1].offset) { @@ -320,20 +312,20 @@ static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo, " buf %u offset "LPX64" <= "LPX64"\n", i, ioo[i].ioo_id, j, rnb[rnbidx].offset, rnb[rnbidx].offset); - return (-EINVAL); + return -EINVAL; } } - LASSERT (rnbidx == nrnb); + LASSERT(rnbidx == nrnb); if (npages == nrnb) { /* all niobufs are for single pages */ *pp_rnbp = rnb; - return (npages); + return npages; } - OBD_ALLOC (pp_rnb, sizeof (*pp_rnb) * npages); + OBD_ALLOC(pp_rnb, sizeof(*pp_rnb) * npages); if (pp_rnb == NULL) - return (-ENOMEM); + return -ENOMEM; /* now do the actual split */ page = rnbidx = 0; @@ -344,35 +336,35 @@ static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo, obd_off off = rnb[rnbidx].offset; int nob = rnb[rnbidx].len; - LASSERT (rnbidx < nrnb); + LASSERT(rnbidx < nrnb); do { obd_off poff = off & (PAGE_SIZE - 1); int pnob = (poff + nob > PAGE_SIZE) ? PAGE_SIZE - poff : nob; - LASSERT (page < npages); + LASSERT(page < npages); pp_rnb[page].len = pnob; pp_rnb[page].offset = off; pp_rnb[page].flags = rnb->flags; - CDEBUG (D_PAGE, " obj %d id "LPX64 - "page %d(%d) "LPX64" for %d\n", - i, ioo[i].ioo_id, obj_pages, page, - pp_rnb[page].offset, pp_rnb[page].len); + CDEBUG(D_PAGE, " obj %d id "LPX64 + "page %d(%d) "LPX64" for %d\n", + i, ioo[i].ioo_id, obj_pages, page, + pp_rnb[page].offset, pp_rnb[page].len); page++; obj_pages++; off += pnob; nob -= pnob; } while (nob > 0); - LASSERT (nob == 0); + LASSERT(nob == 0); } ioo[i].ioo_bufcnt = obj_pages; } - LASSERT (page == npages); + LASSERT(page == npages); *pp_rnbp = pp_rnb; - return (npages); + return npages; } static void free_per_page_niobufs (int npages, struct niobuf_remote *pp_rnb, @@ -381,23 +373,19 @@ static void free_per_page_niobufs (int npages, struct niobuf_remote *pp_rnb, if (pp_rnb == rnb) /* didn't allocate above */ return; - OBD_FREE (pp_rnb, sizeof (*pp_rnb) * npages); + OBD_FREE(pp_rnb, sizeof(*pp_rnb) * npages); } #if CHECKSUM_BULK __u64 ost_checksum_bulk (struct ptlrpc_bulk_desc *desc) { __u64 cksum = 0; - struct list_head *tmp; - char *ptr; + struct ptlrpc_bulk_page *bp; - list_for_each (tmp, &desc->bd_page_list) { - struct ptlrpc_bulk_page *bp; - - bp = list_entry (tmp, struct ptlrpc_bulk_page, bp_link); - ptr = kmap (bp->bp_page); - ost_checksum (&cksum, ptr + bp->bp_pageoffset, bp->bp_buflen); - kunmap (bp->bp_page); + list_for_each_entry(bp, &desc->bd_page_list, bp_link) { + ost_checksum(&cksum, kmap(bp->bp_page) + bp->bp_pageoffset, + bp->bp_buflen); + kunmap(bp->bp_page); } } #endif @@ -409,9 +397,9 @@ static int ost_brw_read(struct ptlrpc_request *req) struct niobuf_remote *pp_rnb; struct niobuf_local *local_nb; struct obd_ioobj *ioo; - struct ost_body *body; + struct ost_body *body, *repbody; struct l_wait_info lwi; - void *desc_priv = NULL; + struct obd_trans_info oti = { 0 }; int size[1] = { sizeof(*body) }; int comms_error = 0; int niocount; @@ -426,35 +414,36 @@ static int ost_brw_read(struct ptlrpc_request *req) body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body); if (body == NULL) { - CERROR ("Missing/short ost_body\n"); - GOTO (out, rc = -EFAULT); + CERROR("Missing/short ost_body\n"); + GOTO(out, rc = -EFAULT); } - ioo = lustre_swab_reqbuf (req, 1, sizeof (*ioo), - lustre_swab_obd_ioobj); + ioo = lustre_swab_reqbuf(req, 1, sizeof(*ioo), lustre_swab_obd_ioobj); if (ioo == NULL) { - CERROR ("Missing/short ioobj\n"); - GOTO (out, rc = -EFAULT); + CERROR("Missing/short ioobj\n"); + GOTO(out, rc = -EFAULT); } niocount = ioo->ioo_bufcnt; - remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb), + remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof(*remote_nb), lustre_swab_niobuf_remote); if (remote_nb == NULL) { - CERROR ("Missing/short niobuf\n"); - GOTO (out, rc = -EFAULT); + CERROR("Missing/short niobuf\n"); + GOTO(out, rc = -EFAULT); } - if (lustre_msg_swabbed (req->rq_reqmsg)) { /* swab remaining niobufs */ + if (lustre_msg_swabbed(req->rq_reqmsg)) { /* swab remaining niobufs */ for (i = 1; i < niocount; i++) lustre_swab_niobuf_remote (&remote_nb[i]); } + size[0] = sizeof(*body); rc = lustre_pack_msg(1, size, NULL, &req->rq_replen, &req->rq_repmsg); if (rc) GOTO(out, rc); + /* FIXME all niobuf splitting should be done in obdfilter if needed */ /* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */ - npages = get_per_page_niobufs (ioo, 1, remote_nb, niocount, &pp_rnb); + npages = get_per_page_niobufs(ioo, 1, remote_nb, niocount, &pp_rnb); if (npages < 0) GOTO(out, rc = npages); @@ -462,12 +451,12 @@ static int ost_brw_read(struct ptlrpc_request *req) if (local_nb == NULL) GOTO(out_pp_rnb, rc = -ENOMEM); - desc = ptlrpc_prep_bulk_exp (req, BULK_PUT_SOURCE, OST_BULK_PORTAL); + desc = ptlrpc_prep_bulk_exp(req, BULK_PUT_SOURCE, OST_BULK_PORTAL); if (desc == NULL) GOTO(out_local, rc = -ENOMEM); - rc = obd_preprw(OBD_BRW_READ, req->rq_export, NULL, 1, ioo, npages, - pp_rnb, local_nb, &desc_priv, NULL); + rc = obd_preprw(OBD_BRW_READ, req->rq_export, &body->oa, 1, + ioo, npages, pp_rnb, local_nb, &oti); if (rc != 0) GOTO(out_bulk, rc); @@ -480,7 +469,7 @@ static int ost_brw_read(struct ptlrpc_request *req) break; } - LASSERT (page_rc <= pp_rnb[i].len); + LASSERT(page_rc <= pp_rnb[i].len); nob += page_rc; if (page_rc != 0) { /* some data! */ LASSERT (local_nb[i].page != NULL); @@ -493,8 +482,8 @@ static int ost_brw_read(struct ptlrpc_request *req) if (page_rc != pp_rnb[i].len) { /* short read */ /* All subsequent pages should be 0 */ - while (++i < npages) - LASSERT (local_nb[i].rc == 0); + while(++i < npages) + LASSERT(local_nb[i].rc == 0); break; } } @@ -509,7 +498,7 @@ static int ost_brw_read(struct ptlrpc_request *req) if (rc) { LASSERT(rc == -ETIMEDOUT); CERROR ("timeout waiting for bulk PUT\n"); - ptlrpc_abort_bulk (desc); + ptlrpc_abort_bulk(desc); } } else { CERROR("ptlrpc_bulk_put failed RC: %d\n", rc); @@ -518,25 +507,27 @@ static int ost_brw_read(struct ptlrpc_request *req) } /* Must commit after prep above in all cases */ - rc = obd_commitrw(OBD_BRW_READ, req->rq_export, 1, ioo, npages, - local_nb, desc_priv, NULL); + rc = obd_commitrw(OBD_BRW_READ, req->rq_export, &body->oa, 1, + ioo, npages, local_nb, &oti); + + repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody)); + memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa)); #if CHECKSUM_BULK if (rc == 0) { - body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body)); - body->oa.o_rdev = ost_checksum_bulk (desc); - body->oa.o_valid |= OBD_MD_FLCKSUM; + repbody->oa.o_rdev = ost_checksum_bulk(desc); + repbody->oa.o_valid |= OBD_MD_FLCKSUM; } #endif out_bulk: - ptlrpc_free_bulk (desc); + ptlrpc_free_bulk(desc); out_local: OBD_FREE(local_nb, sizeof(*local_nb) * npages); out_pp_rnb: - free_per_page_niobufs (npages, pp_rnb, remote_nb); + free_per_page_niobufs(npages, pp_rnb, remote_nb); out: - LASSERT (rc <= 0); + LASSERT(rc <= 0); if (rc == 0) { req->rq_status = nob; ptlrpc_reply(req); @@ -547,7 +538,7 @@ static int ost_brw_read(struct ptlrpc_request *req) } else { if (req->rq_repmsg != NULL) { /* reply out callback would free */ - OBD_FREE (req->rq_repmsg, req->rq_replen); + OBD_FREE(req->rq_repmsg, req->rq_replen); } CERROR("bulk IO comms error: evicting %s@%s nid "LPU64"\n", req->rq_export->exp_client_uuid.uuid, @@ -566,11 +557,10 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) struct niobuf_remote *pp_rnb; struct niobuf_local *local_nb; struct obd_ioobj *ioo; - struct ost_body *body; + struct ost_body *body, *repbody; struct l_wait_info lwi; - void *desc_priv = NULL; __u32 *rcs; - int size[2] = { sizeof (*body) }; + int size[2] = { sizeof(*body) }; int objcount, niocount, npages; int comms_error = 0; int rc, rc2, swab, i, j; @@ -580,39 +570,38 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) GOTO(out, rc = -EIO); /* pause before transaction has been started */ - OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK | OBD_FAIL_ONCE, + OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK | OBD_FAIL_ONCE, obd_timeout +1); - swab = lustre_msg_swabbed (req->rq_reqmsg); - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); + swab = lustre_msg_swabbed(req->rq_reqmsg); + body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body); if (body == NULL) { - CERROR ("Missing/short ost_body\n"); + CERROR("Missing/short ost_body\n"); GOTO(out, rc = -EFAULT); } - LASSERT_REQSWAB (req, 1); + LASSERT_REQSWAB(req, 1); objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo); if (objcount == 0) { - CERROR ("Missing/short ioobj\n"); - GOTO (out, rc = -EFAULT); + CERROR("Missing/short ioobj\n"); + GOTO(out, rc = -EFAULT); } - ioo = lustre_msg_buf (req->rq_reqmsg, 1, objcount * sizeof (*ioo)); + ioo = lustre_msg_buf (req->rq_reqmsg, 1, objcount * sizeof(*ioo)); LASSERT (ioo != NULL); for (niocount = i = 0; i < objcount; i++) { if (swab) lustre_swab_obd_ioobj (&ioo[i]); if (ioo[i].ioo_bufcnt == 0) { - CERROR ("ioo[%d] has zero bufcnt\n", i); - GOTO (out, rc = -EFAULT); + CERROR("ioo[%d] has zero bufcnt\n", i); + GOTO(out, rc = -EFAULT); } niocount += ioo[i].ioo_bufcnt; } - remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb), + remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof(*remote_nb), lustre_swab_niobuf_remote); if (remote_nb == NULL) { - CERROR ("Missing/short niobuf\n"); + CERROR("Missing/short niobuf\n"); GOTO(out, rc = -EFAULT); } if (swab) { /* swab the remaining niobufs */ @@ -620,30 +609,31 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) lustre_swab_niobuf_remote (&remote_nb[i]); } - size[1] = niocount * sizeof (*rcs); + size[1] = niocount * sizeof(*rcs); rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, &req->rq_repmsg); if (rc != 0) - GOTO (out, rc); - rcs = lustre_msg_buf (req->rq_repmsg, 1, niocount * sizeof (*rcs)); + GOTO(out, rc); + rcs = lustre_msg_buf(req->rq_repmsg, 1, niocount * sizeof(*rcs)); + /* FIXME all niobuf splitting should be done in obdfilter if needed */ /* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */ npages = get_per_page_niobufs(ioo, objcount,remote_nb,niocount,&pp_rnb); if (npages < 0) - GOTO (out, rc = npages); + GOTO(out, rc = npages); OBD_ALLOC(local_nb, sizeof(*local_nb) * npages); if (local_nb == NULL) GOTO(out_pp_rnb, rc = -ENOMEM); - desc = ptlrpc_prep_bulk_exp (req, BULK_GET_SINK, OST_BULK_PORTAL); + desc = ptlrpc_prep_bulk_exp(req, BULK_GET_SINK, OST_BULK_PORTAL); if (desc == NULL) GOTO(out_local, rc = -ENOMEM); - rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, NULL, objcount, ioo, - npages, pp_rnb, local_nb, &desc_priv, oti); + rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, &body->oa, objcount, + ioo, npages, pp_rnb, local_nb, oti); if (rc != 0) - GOTO (out_bulk, rc); + GOTO(out_bulk, rc); /* NB Having prepped, we must commit... */ @@ -664,8 +654,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) ptlrpc_bulk_complete(desc), &lwi); if (rc) { LASSERT(rc == -ETIMEDOUT); - CERROR ("timeout waiting for bulk GET\n"); - ptlrpc_abort_bulk (desc); + CERROR("timeout waiting for bulk GET\n"); + ptlrpc_abort_bulk(desc); } } else { CERROR("ptlrpc_bulk_get failed RC: %d\n", rc); @@ -673,17 +663,21 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) comms_error = rc != 0; } + repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody)); + memcpy(&repbody->oa, &body->oa, sizeof(repbody->oa)); + #if CHECKSUM_BULK if (rc == 0 && (body->oa.o_valid & OBD_MD_FLCKSUM) != 0) { static int cksum_counter; __u64 client_cksum = body->oa.o_rdev; - __u64 cksum = ost_checksum_bulk (desc); + __u64 cksum = ost_checksum_bulk(desc); if (client_cksum != cksum) { CERROR("Bad checksum: client "LPX64", server "LPX64 ", client NID "LPX64"\n", client_cksum, cksum, req->rq_connection->c_peer.peer_nid); cksum_counter = 1; + repbody->oa.o_rdev = cksum; } else { cksum_counter++; if ((cksum_counter & (-cksum_counter)) == cksum_counter) @@ -695,8 +689,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) } #endif /* Must commit after prep above in all cases */ - rc2 = obd_commitrw(OBD_BRW_WRITE, req->rq_export, objcount, ioo, - npages, local_nb, desc_priv, oti); + rc2 = obd_commitrw(OBD_BRW_WRITE, req->rq_export, &repbody->oa, + objcount, ioo, npages, local_nb, oti); if (rc == 0) { /* set per-requested niobuf return codes */ @@ -705,25 +699,25 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) rcs[i] = 0; do { - LASSERT (j < npages); + LASSERT(j < npages); if (local_nb[j].rc < 0) rcs[i] = local_nb[j].rc; nob -= pp_rnb[j].len; j++; } while (nob > 0); - LASSERT (nob == 0); + LASSERT(nob == 0); } - LASSERT (j == npages); + LASSERT(j == npages); } if (rc == 0) rc = rc2; out_bulk: - ptlrpc_free_bulk (desc); + ptlrpc_free_bulk(desc); out_local: OBD_FREE(local_nb, sizeof(*local_nb) * npages); out_pp_rnb: - free_per_page_niobufs (npages, pp_rnb, remote_nb); + free_per_page_niobufs(npages, pp_rnb, remote_nb); out: if (rc == 0) { oti_to_request(oti, req); @@ -748,10 +742,9 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) static int ost_san_brw(struct ptlrpc_request *req, int cmd) { - struct lustre_handle *conn = &req->rq_reqmsg->handle; struct niobuf_remote *remote_nb, *res_nb; struct obd_ioobj *ioo; - struct ost_body *body; + struct ost_body *body, *repbody; int rc, i, j, objcount, niocount, size[2] = {sizeof(*body)}; int n; int swab; @@ -759,19 +752,17 @@ static int ost_san_brw(struct ptlrpc_request *req, int cmd) /* XXX not set to use latest protocol */ - swab = lustre_msg_swabbed (req->rq_reqmsg); - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); + swab = lustre_msg_swabbed(req->rq_reqmsg); + body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body); if (body == NULL) { - CERROR ("Missing/short ost_body\n"); - GOTO (out, rc = -EFAULT); + CERROR("Missing/short ost_body\n"); + GOTO(out, rc = -EFAULT); } - ioo = lustre_swab_reqbuf(req, 1, sizeof (*ioo), - lustre_swab_obd_ioobj); + ioo = lustre_swab_reqbuf(req, 1, sizeof(*ioo), lustre_swab_obd_ioobj); if (ioo == NULL) { - CERROR ("Missing/short ioobj\n"); - GOTO (out, rc = -EFAULT); + CERROR("Missing/short ioobj\n"); + GOTO(out, rc = -EFAULT); } objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo); niocount = ioo[0].ioo_bufcnt; @@ -781,11 +772,11 @@ static int ost_san_brw(struct ptlrpc_request *req, int cmd) niocount += ioo[i].ioo_bufcnt; } - remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb), + remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof(*remote_nb), lustre_swab_niobuf_remote); if (remote_nb == NULL) { - CERROR ("Missing/short niobuf\n"); - GOTO (out, rc = -EFAULT); + CERROR("Missing/short niobuf\n"); + GOTO(out, rc = -EFAULT); } if (swab) { /* swab the remaining niobufs */ for (i = 1; i < niocount; i++) @@ -814,14 +805,17 @@ static int ost_san_brw(struct ptlrpc_request *req, int cmd) if (rc) GOTO(out, rc); - req->rq_status = obd_san_preprw(cmd, conn, objcount, ioo, - niocount, remote_nb); + req->rq_status = obd_san_preprw(cmd, req->rq_export, &body->oa, + objcount, ioo, niocount, remote_nb); if (req->rq_status) - GOTO (out, rc = 0); + GOTO(out, rc = 0); + + repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*repbody)); + memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); res_nb = lustre_msg_buf(req->rq_repmsg, 1, size[1]); - memcpy (res_nb, remote_nb, size[1]); + memcpy(res_nb, remote_nb, size[1]); rc = 0; out: if (rc) { @@ -835,6 +829,57 @@ out: return rc; } +static int ost_log_cancel(struct ptlrpc_request *req) +{ + struct lustre_handle *conn; + struct llog_cookie *logcookies; + int num_cookies, rc = 0; + ENTRY; + + logcookies = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*logcookies)); + if (logcookies == NULL) { + DEBUG_REQ(D_HA, req, "no cookies sent"); + RETURN(-EFAULT); + } + num_cookies = req->rq_reqmsg->buflens[0] / sizeof(*logcookies); + + /* workaround until we don't need to send replies */ + rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg); + if (rc) + RETURN(rc); + req->rq_repmsg->status = 0; + /* end workaround */ + + conn = (struct lustre_handle *)&req->rq_reqmsg->handle; + rc = obd_log_cancel(conn, NULL, num_cookies, logcookies, 0); + + RETURN(rc); +} + +static int ost_set_info(struct ptlrpc_request *req) +{ + struct lustre_handle *conn; + char *key; + int keylen, rc = 0; + ENTRY; + + key = lustre_msg_buf(req->rq_reqmsg, 0, 1); + if (key == NULL) { + DEBUG_REQ(D_HA, req, "no set_info key"); + RETURN(-EFAULT); + } + keylen = req->rq_reqmsg->buflens[0]; + + rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg); + if (rc) + RETURN(rc); + + conn = (struct lustre_handle *)&req->rq_reqmsg->handle; + rc = obd_set_info(conn, keylen, key, 0, NULL); + req->rq_repmsg->status = 0; + RETURN(rc); +} + static int filter_recovery_request(struct ptlrpc_request *req, struct obd_device *obd, int *process) { @@ -850,9 +895,10 @@ static int filter_recovery_request(struct ptlrpc_request *req, case OST_DESTROY: case OST_OPEN: case OST_PUNCH: - case OST_SETATTR: + case OST_SETATTR: case OST_SYNCFS: case OST_WRITE: + case OBD_LOG_CANCEL: case LDLM_ENQUEUE: *process = target_queue_recovery_request(req, obd); RETURN(0); @@ -881,7 +927,7 @@ static int ost_handle(struct ptlrpc_request *req) int abort_recovery, recovering; if (req->rq_export == NULL) { - CERROR("lustre_ost: operation %d on unconnected OST\n", + CDEBUG(D_HA, "operation %d on unconnected OST\n", req->rq_reqmsg->opc); req->rq_status = -ENOTCONN; GOTO(out, rc = -ENOTCONN); @@ -901,7 +947,7 @@ static int ost_handle(struct ptlrpc_request *req) if (rc || !should_process) RETURN(rc); } - } + } if (strcmp(req->rq_obd->obd_type->typ_name, "ost") != 0) GOTO(out, rc = -EINVAL); @@ -988,10 +1034,18 @@ static int ost_handle(struct ptlrpc_request *req) OBD_FAIL_RETURN(OBD_FAIL_OST_SYNCFS_NET, 0); rc = ost_syncfs(req); break; + case OST_SET_INFO: + DEBUG_REQ(D_INODE, req, "set_info"); + rc = ost_set_info(req); case OBD_PING: DEBUG_REQ(D_INODE, req, "ping"); rc = target_handle_ping(req); break; + case OBD_LOG_CANCEL: + CDEBUG(D_INODE, "log cancel\n"); + OBD_FAIL_RETURN(OBD_FAIL_OBD_LOG_CANCEL_NET, 0); + rc = ost_log_cancel(req); + break; case LDLM_ENQUEUE: CDEBUG(D_INODE, "enqueue\n"); OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0); @@ -1058,17 +1112,22 @@ out: static int ost_setup(struct obd_device *obddev, obd_count len, void *buf) { struct ost_obd *ost = &obddev->u.ost; - int err; - int i; + int err, i; ENTRY; +#ifdef ENABLE_ORPHANS + err = llog_start_commit_thread(); + if (err < 0) + RETURN(err); +#endif + ost->ost_service = ptlrpc_init_svc(OST_NEVENTS, OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE, OST_REQUEST_PORTAL, OSC_REPLY_PORTAL, ost_handle, "ost", obddev); if (!ost->ost_service) { CERROR("failed to start service\n"); - GOTO(error_disc, err = -ENOMEM); + RETURN(-ENOMEM); } for (i = 0; i < OST_NUM_THREADS; i++) { @@ -1077,17 +1136,14 @@ static int ost_setup(struct obd_device *obddev, obd_count len, void *buf) err = ptlrpc_start_thread(obddev, ost->ost_service, name); if (err) { CERROR("error starting thread #%d: rc %d\n", i, err); - GOTO(error_disc, err = -EINVAL); + RETURN(-EINVAL); } } RETURN(0); - -error_disc: - RETURN(err); } -static int ost_cleanup(struct obd_device *obddev, int force, int failover) +static int ost_cleanup(struct obd_device *obddev, int flags) { struct ost_obd *ost = &obddev->u.ost; int err = 0; @@ -1106,7 +1162,7 @@ int ost_attach(struct obd_device *dev, obd_count len, void *data) { struct lprocfs_static_vars lvars; - lprocfs_init_vars(&lvars); + lprocfs_init_vars(ost,&lvars); return lprocfs_obd_attach(dev, lvars.obd_vars); } @@ -1115,7 +1171,7 @@ int ost_detach(struct obd_device *dev) return lprocfs_obd_detach(dev); } -/* I don't think this function is ever used, since nothing +/* I don't think this function is ever used, since nothing * connects directly to this module. */ static int ost_connect(struct lustre_handle *conn, @@ -1153,12 +1209,12 @@ static int __init ost_init(void) struct lprocfs_static_vars lvars; ENTRY; - lprocfs_init_vars(&lvars); + lprocfs_init_vars(ost,&lvars); RETURN(class_register_type(&ost_obd_ops, lvars.module_vars, LUSTRE_OST_NAME)); } -static void __exit ost_exit(void) +static void /*__exit*/ ost_exit(void) { class_unregister_type(LUSTRE_OST_NAME); } diff --git a/lustre/portals/.cvsignore b/lustre/portals/.cvsignore index 99ac885..c1a9bdf 100644 --- a/lustre/portals/.cvsignore +++ b/lustre/portals/.cvsignore @@ -6,3 +6,4 @@ autom4te.cache config.log config.status configure +.*.o.cmd diff --git a/lustre/portals/Kernelenv.in b/lustre/portals/Kernelenv.in index 29a713f..7a48c58 100644 --- a/lustre/portals/Kernelenv.in +++ b/lustre/portals/Kernelenv.in @@ -1 +1,6 @@ -EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include +EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include +# portals/utils/debug.c wants from userspace. sigh. +HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS) +LIBREADLINE := @LIBREADLINE@ +# 2.5's makefiles aren't nice to cross dir libraries in host programs +PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o diff --git a/lustre/portals/Kernelenv.mk b/lustre/portals/Kernelenv.mk index 29a713f..7c66dfa 100644 --- a/lustre/portals/Kernelenv.mk +++ b/lustre/portals/Kernelenv.mk @@ -1 +1,4 @@ -EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include +EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include +HOSTCFLAGS := $(EXTRA_CFLAGS) +# the kernel doesn't want us to build archives for host binaries :/ +PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o diff --git a/lustre/portals/Makefile.mk b/lustre/portals/Makefile.mk index be0e51a..73a19df 100644 --- a/lustre/portals/Makefile.mk +++ b/lustre/portals/Makefile.mk @@ -1,6 +1,12 @@ -include fs/lustre/portals/Kernelenv +include $(src)/Kernelenv -obj-y += portals/ +# The ordering of these determines the order that each subsystem's +# module_init() functions are called in. if these are changed make sure +# they reflect the dependencies between each subsystem's _init functions. obj-y += libcfs/ -obj-y += knals/ +obj-y += portals/ obj-y += router/ +obj-y += knals/ +obj-y += tests/ + +obj-m += utils/ diff --git a/lustre/portals/archdep.m4 b/lustre/portals/archdep.m4 index 7a4e05c..1a7741bc 100644 --- a/lustre/portals/archdep.m4 +++ b/lustre/portals/archdep.m4 @@ -11,8 +11,13 @@ AC_ARG_WITH(lib, [ --with-lib compile lustre library], host_cpu="lib") AC_ARG_WITH(linux, [ --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux) AC_SUBST(LINUX) +if test x$enable_inkernel = xyes ; then + echo ln -s `pwd` $LINUX/fs/lustre + rm $LINUX/fs/lustre + ln -s `pwd` $LINUX/fs/lustre +fi -# --------- UML? -------------------- +# -------------------- AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...) if test $host_cpu = "lib" ; then host_cpu="lib" @@ -111,6 +116,13 @@ case ${host_cpu} in MOD_LINK=elf64_ia64 ;; + x86_64 ) + AC_MSG_RESULT($host_cpu) + KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -fomit-frame-pointer -mno-red-zone -mcmodel=kernel -pipe -fno-reorder-blocks -finline-limit=2000 -fno-strength-reduce -fno-asynchronous-unwind-tables' + KCPPFLAGS='-D__KERNEL__ -DMODULE' + MOD_LINK=elf_x86_64 +;; + sparc64 ) AC_MSG_RESULT($host_cpu) KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs' @@ -160,21 +172,33 @@ if test $host_cpu != "lib" ; then AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.) fi -# ------------ RELEASE and moduledir ------------------ +# ------------ LINUXRELEASE and moduledir ------------------ AC_MSG_CHECKING(for Linux release) dnl We need to rid ourselves of the nasty [ ] quotes. changequote(, ) dnl Get release from version.h - RELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`" + LINUXRELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`" changequote([, ]) - moduledir='$(libdir)/modules/'$RELEASE/kernel + moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel AC_SUBST(moduledir) modulefsdir='$(moduledir)/fs/$(PACKAGE)' AC_SUBST(modulefsdir) + AC_MSG_RESULT($LINUXRELEASE) + AC_SUBST(LINUXRELEASE) + +# ------------ RELEASE -------------------------------- + AC_MSG_CHECKING(lustre release) + + dnl We need to rid ourselves of the nasty [ ] quotes. + changequote(, ) + dnl Get release from version.h + RELEASE="`sed -ne 's/-/_/g' -e 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_]*\).*/\1/p' $LINUX/include/linux/version.h`_`date +%Y%m%d%H%M`" + changequote([, ]) + AC_MSG_RESULT($RELEASE) AC_SUBST(RELEASE) @@ -302,7 +326,7 @@ AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib) # This needs to run after we've defined the KCPPFLAGS AC_MSG_CHECKING(for kernel version) -AC_TRY_LINK([#define __KERNEL__ +AC_TRY_COMPILE([#define __KERNEL__ #include ], [struct task_struct p; p.sighand = NULL;], @@ -313,5 +337,5 @@ if test $RH_2_4_20 = 1; then AC_MSG_RESULT(redhat-2.4.20) CPPFLAGS="$CPPFLAGS -DCONFIG_RH_2_4_20" else - AC_MSG_RESULT($RELEASE) + AC_MSG_RESULT($LINUXRELEASE) fi diff --git a/lustre/portals/include/config.h.in b/lustre/portals/include/config.h.in index 3aa6909..f9605ab1 100644 --- a/lustre/portals/include/config.h.in +++ b/lustre/portals/include/config.h.in @@ -1,5 +1,11 @@ /* portals/include/config.h.in. Generated from configure.in by autoheader. */ +/* Compile with orphan support */ +#undef ENABLE_ORPHANS + +/* Use the Pinger */ +#undef ENABLE_PINGER + /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index ee3b9fc..2133391 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -4,7 +4,6 @@ #ifndef _KP30_INCLUDED #define _KP30_INCLUDED - #define PORTAL_DEBUG #ifndef offsetof @@ -13,10 +12,6 @@ #define LOWEST_BIT_SET(x) ((x) & ~((x) - 1)) -#ifndef CONFIG_SMP -# define smp_processor_id() 0 -#endif - /* * Debugging */ @@ -24,39 +19,34 @@ extern unsigned int portal_subsystem_debug; extern unsigned int portal_stack; extern unsigned int portal_debug; extern unsigned int portal_printk; -/* Debugging subsystems (8 bit ID) - * - * If you add debug subsystem #32, you need to send email to phil, because - * you're going to break kernel subsystem debug filtering. */ -#define S_UNDEFINED (0 << 24) -#define S_MDC (1 << 24) -#define S_MDS (2 << 24) -#define S_OSC (3 << 24) -#define S_OST (4 << 24) -#define S_CLASS (5 << 24) -#define S_OBDFS (6 << 24) /* obsolete */ -#define S_LLITE (7 << 24) -#define S_RPC (8 << 24) -#define S_EXT2OBD (9 << 24) /* obsolete */ -#define S_PORTALS (10 << 24) -#define S_SOCKNAL (11 << 24) -#define S_QSWNAL (12 << 24) -#define S_PINGER (13 << 24) -#define S_FILTER (14 << 24) -#define S_TRACE (15 << 24) /* obsolete */ -#define S_ECHO (16 << 24) -#define S_LDLM (17 << 24) -#define S_LOV (18 << 24) -#define S_GMNAL (19 << 24) -#define S_PTLROUTER (20 << 24) -#define S_COBD (21 << 24) -#define S_PTLBD (22 << 24) -#define S_LOG (23 << 24) - -/* If you change these values, please keep portals/linux/utils/debug.c +/* Debugging subsystems (32 bits, non-overlapping) */ +#define S_UNDEFINED (1 << 0) +#define S_MDC (1 << 1) +#define S_MDS (1 << 2) +#define S_OSC (1 << 3) +#define S_OST (1 << 4) +#define S_CLASS (1 << 5) +#define S_LOG (1 << 6) +#define S_LLITE (1 << 7) +#define S_RPC (1 << 8) +#define S_MGMT (1 << 9) +#define S_PORTALS (1 << 10) +#define S_SOCKNAL (1 << 11) +#define S_QSWNAL (1 << 12) +#define S_PINGER (1 << 13) +#define S_FILTER (1 << 14) +#define S_PTLBD (1 << 15) +#define S_ECHO (1 << 16) +#define S_LDLM (1 << 17) +#define S_LOV (1 << 18) +#define S_GMNAL (1 << 19) +#define S_PTLROUTER (1 << 20) +#define S_COBD (1 << 21) + +/* If you change these values, please keep portals/utils/debug.c * up to date! */ -/* Debugging masks (24 bits, non-overlapping) */ +/* Debugging masks (32 bits, non-overlapping) */ #define D_TRACE (1 << 0) /* ENTRY/EXIT markers */ #define D_INODE (1 << 1) #define D_SUPER (1 << 2) @@ -80,20 +70,23 @@ extern unsigned int portal_printk; #define D_RPCTRACE (1 << 20) /* for distributed debugging */ #define D_VFSTRACE (1 << 21) -#ifndef __KERNEL__ -#define THREAD_SIZE 8192 +#ifdef __KERNEL__ +# include /* THREAD_SIZE */ +#else +# define THREAD_SIZE 8192 #endif -#ifdef __ia64__ -#define CDEBUG_STACK() (THREAD_SIZE - \ + +#ifdef __KERNEL__ +# ifdef __ia64__ +# define CDEBUG_STACK (THREAD_SIZE - \ ((unsigned long)__builtin_dwarf_cfa() & \ (THREAD_SIZE - 1))) -#else -#define CDEBUG_STACK() (THREAD_SIZE - \ +# else +# define CDEBUG_STACK (THREAD_SIZE - \ ((unsigned long)__builtin_frame_address(0) & \ (THREAD_SIZE - 1))) -#endif +# endif -#ifdef __KERNEL__ #define CHECK_STACK(stack) \ do { \ if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \ @@ -105,20 +98,21 @@ extern unsigned int portal_printk; /*panic("LBUG");*/ \ } \ } while (0) -#else +#else /* __KERNEL__ */ #define CHECK_STACK(stack) do { } while(0) -#endif +#define CDEBUG_STACK (0L) +#endif /* __KERNEL__ */ #if 1 #define CDEBUG(mask, format, a...) \ do { \ - CHECK_STACK(CDEBUG_STACK()); \ + CHECK_STACK(CDEBUG_STACK); \ if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) || \ (portal_debug & (mask) && \ - portal_subsystem_debug & (1 << (DEBUG_SUBSYSTEM >> 24)))) \ + portal_subsystem_debug & DEBUG_SUBSYSTEM)) \ portals_debug_msg(DEBUG_SUBSYSTEM, mask, \ __FILE__, __FUNCTION__, __LINE__, \ - CDEBUG_STACK(), format , ## a); \ + CDEBUG_STACK, format, ## a); \ } while (0) #define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a) @@ -162,7 +156,6 @@ do { \ #define EXIT do { } while (0) #endif - #ifdef __KERNEL__ # include # include @@ -210,7 +203,8 @@ static inline void our_cond_resched(void) #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */ #ifdef PORTAL_DEBUG -extern void kportal_assertion_failed(char *expr,char *file,char *func,int line); +extern void kportal_assertion_failed(char *expr, char *file, const char *func, + const int line); #define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__, \ __FUNCTION__, __LINE__)) #else @@ -560,14 +554,14 @@ extern struct prof_ent prof_ents[MAX_PROFS]; #endif /* PORTALS_PROFILING */ /* debug.c */ -void portals_run_lbug_upcall(char * file, char *fn, int line); +void portals_run_lbug_upcall(char * file, const char *fn, const int line); void portals_debug_dumplog(void); int portals_debug_init(unsigned long bufsize); int portals_debug_cleanup(void); int portals_debug_clear_buffer(void); int portals_debug_mark_buffer(char *text); int portals_debug_set_daemon(unsigned int cmd, unsigned int length, - char *file, unsigned int size); + char *file, unsigned int size); __s32 portals_debug_copy_to_user(char *buf, unsigned long len); #if (__GNUC__) /* Use the special GNU C __attribute__ hack to have the compiler check the @@ -578,13 +572,14 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len); # warning printf has been defined as a macro... # undef printf #endif -void portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, - unsigned long stack, const char *format, ...) +void portals_debug_msg(int subsys, int mask, char *file, const char *fn, + const int line, unsigned long stack, + const char *format, ...) __attribute__ ((format (printf, 7, 8))); #else -void portals_debug_msg (int subsys, int mask, char *file, char *fn, - int line, unsigned long stack, - const char *format, ...); +void portals_debug_msg(int subsys, int mask, char *file, const char *fn, + const int line, unsigned long stack, + const char *format, ...); #endif /* __GNUC__ */ void portals_debug_set_level(unsigned int debug_level); @@ -618,9 +613,9 @@ extern void kportal_blockallsigs (void); # define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0); # define PORTAL_FREE(a, b) do { free(a); } while (0); # define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \ - printf ("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format, \ - (subsys) >> 24, (mask), (long)time(0), file, fn, line, \ - getpid() , stack, ## a); + printf("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format, \ + (subsys), (mask), (long)time(0), file, fn, line, \ + getpid() , stack, ## a); #endif #ifndef CURRENT_TIME @@ -911,13 +906,13 @@ ptl_handle_ni_t *kportal_get_ni (int nal); void kportal_put_ni (int nal); #ifdef __CYGWIN__ -#ifndef BITS_PER_LONG -#if (~0UL) == 0xffffffffUL -#define BITS_PER_LONG 32 -#else -#define BITS_PER_LONG 64 -#endif -#endif +# ifndef BITS_PER_LONG +# if (~0UL) == 0xffffffffUL +# define BITS_PER_LONG 32 +# else +# define BITS_PER_LONG 64 +# endif +# endif #endif #if (BITS_PER_LONG == 32 || __WORDSIZE == 32) diff --git a/lustre/portals/include/linux/portals_compat25.h b/lustre/portals/include/linux/portals_compat25.h index e28fbac..a7cb4d1 100644 --- a/lustre/portals/include/linux/portals_compat25.h +++ b/lustre/portals/include/linux/portals_compat25.h @@ -1,13 +1,56 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef _PORTALS_COMPAT_H +#define _PORTALS_COMPAT_H + +// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved +#if SPINLOCK_DEBUG +# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20) +# define SIGNAL_MASK_ASSERT() \ + LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC) +# else +# define SIGNAL_MASK_ASSERT() \ + LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC) +# endif +#else +# define SIGNAL_MASK_ASSERT() +#endif +// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved + #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20) -# define SIGNAL_MASK_LOCK(task, flags) \ + +# define SIGNAL_MASK_LOCK(task, flags) \ spin_lock_irqsave(&task->sighand->siglock, flags) -# define SIGNAL_MASK_UNLOCK(task, flags) \ +# define SIGNAL_MASK_UNLOCK(task, flags) \ spin_unlock_irqrestore(&task->sighand->siglock, flags) +# define USERMODEHELPER(path, argv, envp) \ + call_usermodehelper(path, argv, envp, 1) # define RECALC_SIGPENDING recalc_sigpending() -#else -# define SIGNAL_MASK_LOCK(task, flags) \ +# define CURRENT_SECONDS get_seconds() + +#else /* 2.4.x */ + +# define SIGNAL_MASK_LOCK(task, flags) \ spin_lock_irqsave(&task->sigmask_lock, flags) -# define SIGNAL_MASK_UNLOCK(task, flags) \ +# define SIGNAL_MASK_UNLOCK(task, flags) \ spin_unlock_irqrestore(&task->sigmask_lock, flags) +# define USERMODEHELPER(path, argv, envp) \ + call_usermodehelper(path, argv, envp) # define RECALC_SIGPENDING recalc_sigpending(current) +# define CURRENT_SECONDS CURRENT_TIME + +#endif + +#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) +# define THREAD_NAME(comm, fmt, a...) \ + sprintf(comm, fmt "|%d", ## a, current->thread.extern_pid) +#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +# define THREAD_NAME(comm, fmt, a...) \ + sprintf(comm, fmt "|%d", ## a, current->thread.mode.tt.extern_pid) +#else +# define THREAD_NAME(comm, fmt, a...) \ + sprintf(comm, fmt, ## a) #endif + +#endif /* _PORTALS_COMPAT_H */ diff --git a/lustre/portals/include/portals/list.h b/lustre/portals/include/portals/list.h index 2b63312..78a1e2d 100644 --- a/lustre/portals/include/portals/list.h +++ b/lustre/portals/include/portals/list.h @@ -1,6 +1,4 @@ #ifndef _LINUX_LIST_H -#define _LINUX_LIST_H - /* * Simple doubly linked list implementation. @@ -101,7 +99,9 @@ static inline void list_del_init(struct list_head *entry) __list_del(entry->prev, entry->next); INIT_LIST_HEAD(entry); } +#endif +#ifndef list_for_each_entry /** * list_move - delete from one list and add as another's head * @list: the entry to move @@ -124,7 +124,10 @@ static inline void list_move_tail(struct list_head *list, __list_del(list->prev, list->next); list_add_tail(list, head); } +#endif +#ifndef _LINUX_LIST_H +#define _LINUX_LIST_H /** * list_empty - tests whether a list is empty * @head: the list to test. diff --git a/lustre/portals/include/portals/lltrace.h b/lustre/portals/include/portals/lltrace.h index 7d1b304..d389aab 100644 --- a/lustre/portals/include/portals/lltrace.h +++ b/lustre/portals/include/portals/lltrace.h @@ -2,7 +2,7 @@ * vim:expandtab:shiftwidth=8:tabstop=8: * * Compile with: - * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl + * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl */ #ifndef __LTRACE_H_ #define __LTRACE_H_ @@ -31,20 +31,20 @@ static inline int ltrace_write_file(char* fname) argv[0] = "debug_kernel"; argv[1] = fname; argv[2] = "1"; - + fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]); - + return jt_dbg_debug_kernel(3, argv); } static inline int ltrace_clear() { char* argv[1]; - + argv[0] = "clear"; - + fprintf(stderr, "[ptlctl] %s\n", argv[0]); - + return jt_dbg_clear_debug_buf(1, argv); } @@ -52,9 +52,9 @@ static inline int ltrace_mark(int indent_level, char* text) { char* argv[2]; char mark_buf[PATH_MAX]; - + snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text); - + argv[0] = "mark"; argv[1] = mark_buf; return jt_dbg_mark_debug_buf(2, argv); @@ -65,9 +65,9 @@ static inline int ltrace_applymasks() char* argv[2]; argv[0] = "list"; argv[1] = "applymasks"; - + fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]); - + return jt_dbg_list(2, argv); } @@ -95,19 +95,19 @@ static inline int ltrace_start() #ifdef PORTALS_DEV_ID rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH); #endif - ltrace_filter("class"); + ltrace_filter("class"); ltrace_filter("socknal"); - ltrace_filter("qswnal"); - ltrace_filter("gmnal"); - ltrace_filter("portals"); - - ltrace_show("all_types"); - ltrace_filter("trace"); - ltrace_filter("malloc"); - ltrace_filter("net"); - ltrace_filter("page"); - ltrace_filter("other"); - ltrace_filter("info"); + ltrace_filter("qswnal"); + ltrace_filter("gmnal"); + ltrace_filter("portals"); + + ltrace_show("all_types"); + ltrace_filter("trace"); + ltrace_filter("malloc"); + ltrace_filter("net"); + ltrace_filter("page"); + ltrace_filter("other"); + ltrace_filter("info"); ltrace_applymasks(); return rc; @@ -146,7 +146,7 @@ static inline void ltrace_add_processnames(char* fname) struct timezone tz; int nob; int underuml = !not_uml(); - + gettimeofday(&tv, &tz); nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \""); @@ -167,7 +167,7 @@ static inline void ltrace_add_processnames(char* fname) "(%s:%d:%s() %d+%lu): ", "lltrace.h", __LINE__, __FUNCTION__, 0, 0L); } - + nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname); system(cmdbuf); } diff --git a/lustre/portals/include/portals/myrnal.h b/lustre/portals/include/portals/myrnal.h index 12b1925..13790f7 100644 --- a/lustre/portals/include/portals/myrnal.h +++ b/lustre/portals/include/portals/myrnal.h @@ -1,6 +1,3 @@ -/* -*/ - #ifndef MYRNAL_H #define MYRNAL_H diff --git a/lustre/portals/include/portals/nal.h b/lustre/portals/include/portals/nal.h index 88be63c..7cb3ab7 100644 --- a/lustre/portals/include/portals/nal.h +++ b/lustre/portals/include/portals/nal.h @@ -1,5 +1,3 @@ -/* -*/ #ifndef _NAL_H_ #define _NAL_H_ diff --git a/lustre/portals/include/portals/ppid.h b/lustre/portals/include/portals/ppid.h index 4727599..760f465 100644 --- a/lustre/portals/include/portals/ppid.h +++ b/lustre/portals/include/portals/ppid.h @@ -1,6 +1,3 @@ -/* - */ - #ifndef _INCppidh_ #define _INCppidh_ diff --git a/lustre/portals/include/portals/stringtab.h b/lustre/portals/include/portals/stringtab.h index c9683f7..33e4375 100644 --- a/lustre/portals/include/portals/stringtab.h +++ b/lustre/portals/include/portals/stringtab.h @@ -1,5 +1,3 @@ /* -*/ -/* * stringtab.h */ diff --git a/lustre/portals/include/portals/types.h b/lustre/portals/include/portals/types.h index d4038b6..0269290 100644 --- a/lustre/portals/include/portals/types.h +++ b/lustre/portals/include/portals/types.h @@ -2,14 +2,19 @@ #define _P30_TYPES_H_ #ifdef __linux__ -#include -#include +# include +# include #else -#include +# include typedef u_int32_t __u32; typedef u_int64_t __u64; -typedef unsigned long long cycles_t; -static inline cycles_t get_cycles(void) { return 0; } +#endif + +#ifdef __KERNEL__ +# include +#else +# include +# define do_gettimeofday(tv) gettimeofday(tv, NULL) #endif typedef __u64 ptl_nid_t; @@ -31,7 +36,7 @@ typedef ptl_handle_any_t ptl_handle_md_t; typedef ptl_handle_any_t ptl_handle_me_t; #define PTL_HANDLE_NONE \ -((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1}) + ((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1}) #define PTL_EQ_NONE PTL_HANDLE_NONE static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2) @@ -108,17 +113,15 @@ typedef struct { ptl_handle_me_t unlinked_me; ptl_md_t mem_desc; ptl_hdr_data_t hdr_data; - cycles_t arrival_time; + struct timeval arrival_time; volatile ptl_seq_t sequence; } ptl_event_t; - typedef enum { PTL_ACK_REQ, PTL_NOACK_REQ } ptl_ack_req_t; - typedef struct { volatile ptl_seq_t sequence; ptl_size_t size; @@ -130,7 +133,6 @@ typedef struct { ptl_eq_t *eq; } ptl_ni_t; - typedef struct { int max_match_entries; /* max number of match entries */ int max_mem_descriptors; /* max number of memory descriptors */ diff --git a/lustre/portals/knals/.cvsignore b/lustre/portals/knals/.cvsignore index 282522d..89a4aa6 100644 --- a/lustre/portals/knals/.cvsignore +++ b/lustre/portals/knals/.cvsignore @@ -1,2 +1,3 @@ Makefile Makefile.in +.*.o.cmd diff --git a/lustre/portals/knals/Makefile.mk b/lustre/portals/knals/Makefile.mk index ce40a60..cd5d9d6 100644 --- a/lustre/portals/knals/Makefile.mk +++ b/lustre/portals/knals/Makefile.mk @@ -1,4 +1,4 @@ -include ../Kernelenv +include $(obj)/../Kernelenv obj-y = socknal/ -# more coming... \ No newline at end of file +# more coming... diff --git a/lustre/portals/knals/gmnal/gmnal.c b/lustre/portals/knals/gmnal/gmnal.c index ceeea2a..0cffc158 100644 --- a/lustre/portals/knals/gmnal/gmnal.c +++ b/lustre/portals/knals/gmnal/gmnal.c @@ -124,7 +124,7 @@ static nal_t *kgmnal_init(int interface, ptl_pt_index_t ptl_size, return &kgmnal_api; } -static void __exit +static void /*__exit*/ kgmnal_finalize(void) { struct list_head *tmp; diff --git a/lustre/portals/knals/scimacnal/scimacnal.c b/lustre/portals/knals/scimacnal/scimacnal.c index 1066d69..479cc2c 100644 --- a/lustre/portals/knals/scimacnal/scimacnal.c +++ b/lustre/portals/knals/scimacnal/scimacnal.c @@ -112,7 +112,7 @@ static nal_t *kscimacnal_init(int interface, ptl_pt_index_t ptl_size, /* Called by kernel at module unload time */ -static void __exit +static void /*__exit*/ kscimacnal_finalize(void) { /* FIXME: How should the shutdown procedure really look? */ diff --git a/lustre/portals/knals/socknal/.cvsignore b/lustre/portals/knals/socknal/.cvsignore index e995588..95973d6 100644 --- a/lustre/portals/knals/socknal/.cvsignore +++ b/lustre/portals/knals/socknal/.cvsignore @@ -1,3 +1,4 @@ .deps Makefile Makefile.in +.*.o.cmd diff --git a/lustre/portals/knals/socknal/Makefile.mk b/lustre/portals/knals/socknal/Makefile.mk index 46edf01..5c1b366 100644 --- a/lustre/portals/knals/socknal/Makefile.mk +++ b/lustre/portals/knals/socknal/Makefile.mk @@ -3,7 +3,7 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -include ../../Kernelenv +include $(src)/../../Kernelenv obj-y += ksocknal.o ksocknal-objs := socknal.o socknal_cb.o diff --git a/lustre/portals/knals/toenal/toenal.c b/lustre/portals/knals/toenal/toenal.c index 1f5dc38..77ee473 100644 --- a/lustre/portals/knals/toenal/toenal.c +++ b/lustre/portals/knals/toenal/toenal.c @@ -379,7 +379,7 @@ ktoenal_cmd(struct portal_ioctl_data * data, void * private) } -void __exit +void /*__exit*/ ktoenal_module_fini (void) { CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", diff --git a/lustre/portals/knals/toenal/toenal_cb.c b/lustre/portals/knals/toenal/toenal_cb.c index ec37f6f..abd0731 100644 --- a/lustre/portals/knals/toenal/toenal_cb.c +++ b/lustre/portals/knals/toenal/toenal_cb.c @@ -893,6 +893,7 @@ ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags) spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags); goto get_fmb; /* => go get a fwd msg buffer */ default: + break; } /* Not Reached */ LBUG (); @@ -934,6 +935,7 @@ ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags) goto out; /* (later) */ default: + break; } /* Not Reached */ diff --git a/lustre/portals/libcfs/.cvsignore b/lustre/portals/libcfs/.cvsignore index 67d1a3d..7fa686f 100644 --- a/lustre/portals/libcfs/.cvsignore +++ b/lustre/portals/libcfs/.cvsignore @@ -2,3 +2,4 @@ Makefile Makefile.in link-stamp +.*.o.cmd diff --git a/lustre/portals/libcfs/Makefile.mk b/lustre/portals/libcfs/Makefile.mk index 3196ea2..9aa838f 100644 --- a/lustre/portals/libcfs/Makefile.mk +++ b/lustre/portals/libcfs/Makefile.mk @@ -6,4 +6,4 @@ include fs/lustre/portals/Kernelenv obj-y += libcfs.o -licfs-objs := module.o proc.o debug.o \ No newline at end of file +libcfs-objs := module.o proc.o debug.o diff --git a/lustre/portals/libcfs/debug.c b/lustre/portals/libcfs/debug.c index 8d26dbb..f37cd96 100644 --- a/lustre/portals/libcfs/debug.c +++ b/lustre/portals/libcfs/debug.c @@ -571,8 +571,8 @@ int portals_debug_init(unsigned long bufsize) memset(debug_buf, 0, debug_size); debug_wrapped = 0; - printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n", - bufsize, debug_buf); + //printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n", + //bufsize, debug_buf); atomic_set(&debug_off_a, debug_off); notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier); debug_size = bufsize; @@ -632,9 +632,9 @@ int portals_debug_mark_buffer(char *text) if (debug_buf == NULL) return -EINVAL; - CDEBUG(0, "*******************************************************************************\n"); + CDEBUG(0, "********************************************************\n"); CDEBUG(0, "DEBUG MARKER: %s\n", text); - CDEBUG(0, "*******************************************************************************\n"); + CDEBUG(0, "********************************************************\n"); return 0; } @@ -672,8 +672,8 @@ __s32 portals_debug_copy_to_user(char *buf, unsigned long len) /* FIXME: I'm not very smart; someone smarter should make this better. */ void -portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, - unsigned long stack, const char *format, ...) +portals_debug_msg(int subsys, int mask, char *file, const char *fn, + const int line, unsigned long stack, const char *format, ...) { va_list ap; unsigned long flags; @@ -728,8 +728,8 @@ portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, do_gettimeofday(&tv); prefix_nob = snprintf(debug_buf + debug_off, max_nob, - "%02x:%06x:%d:%lu.%06lu ", - subsys >> 24, mask, smp_processor_id(), + "%06x:%06x:%d:%lu.%06lu ", + subsys, mask, smp_processor_id(), tv.tv_sec, tv.tv_usec); max_nob -= prefix_nob; @@ -752,7 +752,7 @@ portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, va_start(ap, format); msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob, - max_nob, format, ap); + max_nob, format, ap); max_nob -= msg_nob; va_end(ap); @@ -790,7 +790,7 @@ void portals_debug_set_level(unsigned int debug_level) portal_debug = debug_level; } -void portals_run_lbug_upcall(char * file, char *fn, int line) +void portals_run_lbug_upcall(char *file, const char *fn, const int line) { char *argv[6]; char *envp[3]; @@ -803,7 +803,7 @@ void portals_run_lbug_upcall(char * file, char *fn, int line) argv[0] = portals_upcall; argv[1] = "LBUG"; argv[2] = file; - argv[3] = fn; + argv[3] = (char *)fn; argv[4] = buf; argv[5] = NULL; diff --git a/lustre/portals/libcfs/module.c b/lustre/portals/libcfs/module.c index 14cc325..e8eb290 100644 --- a/lustre/portals/libcfs/module.c +++ b/lustre/portals/libcfs/module.c @@ -62,10 +62,10 @@ static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1]; struct semaphore nal_cmd_sem; #ifdef PORTAL_DEBUG -void -kportal_assertion_failed (char *expr, char *file, char *func, int line) +void kportal_assertion_failed(char *expr, char *file, const char *func, + const int line) { - portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK(), + portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK, "ASSERTION(%s) failed\n", expr); LBUG_WITH_LOC(file, func, line); } diff --git a/lustre/portals/portals/.cvsignore b/lustre/portals/portals/.cvsignore index e995588..95973d6 100644 --- a/lustre/portals/portals/.cvsignore +++ b/lustre/portals/portals/.cvsignore @@ -1,3 +1,4 @@ .deps Makefile Makefile.in +.*.o.cmd diff --git a/lustre/portals/portals/Makefile.mk b/lustre/portals/portals/Makefile.mk index 5627ef7..7822846 100644 --- a/lustre/portals/portals/Makefile.mk +++ b/lustre/portals/portals/Makefile.mk @@ -3,7 +3,10 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -include ../Kernelenv +include $(src)/../Kernelenv obj-y += portals.o -portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o lib-move.o lib-msg.o lib-ni.o lib-not-impl.o lib-pid.o api-eq.o api-errno.o api-init.o api-md.o api-me.o api-ni.o api-wrap.o +portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o \ + lib-move.o lib-msg.o lib-ni.o lib-pid.o \ + api-eq.o api-errno.o api-init.o api-me.o api-ni.o \ + api-wrap.o diff --git a/lustre/portals/portals/api-init.c b/lustre/portals/portals/api-init.c index e59c922..dc1fead 100644 --- a/lustre/portals/portals/api-init.c +++ b/lustre/portals/portals/api-init.c @@ -26,7 +26,7 @@ #include int ptl_init; -unsigned int portal_subsystem_debug = 0xfff7e3ff; +unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_QSWNAL | S_SOCKNAL | S_GMNAL); unsigned int portal_debug = ~0; unsigned int portal_printk; unsigned int portal_stack; diff --git a/lustre/portals/portals/lib-move.c b/lustre/portals/portals/lib-move.c index fde4f16..02f8b60 100644 --- a/lustre/portals/portals/lib-move.c +++ b/lustre/portals/portals/lib-move.c @@ -544,7 +544,7 @@ get_new_msg (nal_cb_t *nal, lib_md_t *md) msg->send_ack = 0; msg->md = md; - msg->ev.arrival_time = get_cycles(); + do_gettimeofday(&msg->ev.arrival_time); md->pending++; if (md->threshold != PTL_MD_THRESH_INF) { LASSERT (md->threshold > 0); diff --git a/lustre/portals/router/.cvsignore b/lustre/portals/router/.cvsignore index e995588..95973d6 100644 --- a/lustre/portals/router/.cvsignore +++ b/lustre/portals/router/.cvsignore @@ -1,3 +1,4 @@ .deps Makefile Makefile.in +.*.o.cmd diff --git a/lustre/portals/router/Makefile.mk b/lustre/portals/router/Makefile.mk index 64bd09b..9b02c03 100644 --- a/lustre/portals/router/Makefile.mk +++ b/lustre/portals/router/Makefile.mk @@ -3,7 +3,7 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -include ../Kernelenv +include $(src)/../Kernelenv obj-y += kptlrouter.o kptlrouter-objs := router.o proc.o diff --git a/lustre/portals/router/router.c b/lustre/portals/router/router.c index 6074c3c..27a7fba 100644 --- a/lustre/portals/router/router.c +++ b/lustre/portals/router/router.c @@ -23,8 +23,8 @@ #include "router.h" -struct list_head kpr_routes; -struct list_head kpr_nals; +LIST_HEAD(kpr_routes); +LIST_HEAD(kpr_nals); unsigned long long kpr_fwd_bytes; unsigned long kpr_fwd_packets; @@ -35,7 +35,7 @@ atomic_t kpr_queue_depth; * * Once in a blue moon we register/deregister NALs and add/remove routing * entries (thread context only)... */ -rwlock_t kpr_rwlock; +rwlock_t kpr_rwlock = RW_LOCK_UNLOCKED; kpr_router_interface_t kpr_router_interface = { kprri_register: kpr_register_nal, @@ -55,7 +55,7 @@ kpr_control_interface_t kpr_control_interface = { int kpr_register_nal (kpr_nal_interface_t *nalif, void **argp) { - long flags; + unsigned long flags; struct list_head *e; kpr_nal_entry_t *ne; @@ -98,7 +98,7 @@ kpr_register_nal (kpr_nal_interface_t *nalif, void **argp) void kpr_shutdown_nal (void *arg) { - long flags; + unsigned long flags; kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid); @@ -123,7 +123,7 @@ kpr_shutdown_nal (void *arg) void kpr_deregister_nal (void *arg) { - long flags; + unsigned long flags; kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid); @@ -296,7 +296,7 @@ int kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid, ptl_nid_t hi_nid) { - long flags; + unsigned long flags; struct list_head *e; kpr_route_entry_t *re; @@ -345,7 +345,7 @@ kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid, int kpr_del_route (ptl_nid_t nid) { - long flags; + unsigned long flags; struct list_head *e; CDEBUG(D_OTHER, "Del route "LPX64"\n", nid); @@ -398,7 +398,7 @@ kpr_get_route(int idx, int *gateway_nalid, ptl_nid_t *gateway_nid, return (-ENOENT); } -static void __exit +static void /*__exit*/ kpr_finalise (void) { LASSERT (list_empty (&kpr_nals)); @@ -427,10 +427,6 @@ kpr_initialise (void) CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n", atomic_read(&portal_kmemory)); - rwlock_init(&kpr_rwlock); - INIT_LIST_HEAD(&kpr_routes); - INIT_LIST_HEAD(&kpr_nals); - kpr_proc_init(); PORTAL_SYMBOL_REGISTER(kpr_router_interface); diff --git a/lustre/portals/tests/.cvsignore b/lustre/portals/tests/.cvsignore index 051d1bd..d0c4c88 100644 --- a/lustre/portals/tests/.cvsignore +++ b/lustre/portals/tests/.cvsignore @@ -1,3 +1,4 @@ Makefile Makefile.in .deps +.*.o.cmd diff --git a/lustre/portals/tests/ping_cli.c b/lustre/portals/tests/ping_cli.c index 389ffbb..4d04ffb 100644 --- a/lustre/portals/tests/ping_cli.c +++ b/lustre/portals/tests/ping_cli.c @@ -260,7 +260,7 @@ pingcli_start(struct portal_ioctl_data *args) /* called by the portals_ioctl for ping requests */ -static int kping_client(struct portal_ioctl_data *args) +int kping_client(struct portal_ioctl_data *args) { PORTAL_ALLOC (client, sizeof(struct pingcli_data)); if (client == NULL) @@ -282,7 +282,7 @@ static int __init pingcli_init(void) } /* pingcli_init() */ -static void __exit pingcli_cleanup(void) +static void /*__exit*/ pingcli_cleanup(void) { PORTAL_SYMBOL_UNREGISTER (kping_client); } /* pingcli_cleanup() */ diff --git a/lustre/portals/tests/ping_srv.c b/lustre/portals/tests/ping_srv.c index 1037d09..873e11c 100644 --- a/lustre/portals/tests/ping_srv.c +++ b/lustre/portals/tests/ping_srv.c @@ -47,11 +47,11 @@ #include #define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval)) -#define MAXSIZE (16*1024*1024) +#define MAXSIZE (16*1024) static unsigned ping_head_magic; static unsigned ping_bulk_magic; -static int nal = 0; // Your NAL, +static int nal = SOCKNAL; // Your NAL, static unsigned long packets_valid = 0; // Valid packets static int running = 1; atomic_t pkt; @@ -282,7 +282,7 @@ static int __init pingsrv_init(void) } /* pingsrv_init() */ -static void __exit pingsrv_cleanup(void) +static void /*__exit*/ pingsrv_cleanup(void) { remove_proc_entry ("net/pingsrv", NULL); diff --git a/lustre/portals/tests/sping_cli.c b/lustre/portals/tests/sping_cli.c index 4cef08b..35e114b 100644 --- a/lustre/portals/tests/sping_cli.c +++ b/lustre/portals/tests/sping_cli.c @@ -235,7 +235,7 @@ pingcli_start(struct portal_ioctl_data *args) /* called by the portals_ioctl for ping requests */ -static int kping_client(struct portal_ioctl_data *args) +int kping_client(struct portal_ioctl_data *args) { PORTAL_ALLOC (client, sizeof(struct pingcli_data)); @@ -258,7 +258,7 @@ static int __init pingcli_init(void) } /* pingcli_init() */ -static void __exit pingcli_cleanup(void) +static void /*__exit*/ pingcli_cleanup(void) { PORTAL_SYMBOL_UNREGISTER (kping_client); } /* pingcli_cleanup() */ diff --git a/lustre/portals/tests/sping_srv.c b/lustre/portals/tests/sping_srv.c index a18ea35..2b45a46 100644 --- a/lustre/portals/tests/sping_srv.c +++ b/lustre/portals/tests/sping_srv.c @@ -269,7 +269,7 @@ static int __init pingsrv_init(void) } /* pingsrv_init() */ -static void __exit pingsrv_cleanup(void) +static void /*__exit*/ pingsrv_cleanup(void) { remove_proc_entry ("net/pingsrv", NULL); diff --git a/lustre/portals/unals/debug.c b/lustre/portals/unals/debug.c index 529bb2d..b73f042 100644 --- a/lustre/portals/unals/debug.c +++ b/lustre/portals/unals/debug.c @@ -84,8 +84,8 @@ int portals_debug_copy_to_user(char *buf, unsigned long len) /* FIXME: I'm not very smart; someone smarter should make this better. */ void -portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, - const char *format, ...) +portals_debug_msg (int subsys, int mask, char *file, const char *fn, + const int line, const char *format, ...) { va_list ap; unsigned long flags; diff --git a/lustre/portals/utils/.cvsignore b/lustre/portals/utils/.cvsignore index 148310a..8e474ad 100644 --- a/lustre/portals/utils/.cvsignore +++ b/lustre/portals/utils/.cvsignore @@ -5,4 +5,5 @@ debugctl ptlctl .deps routerstat -wirecheck \ No newline at end of file +wirecheck +.*.cmd diff --git a/lustre/portals/utils/debug.c b/lustre/portals/utils/debug.c index 9ab1c73d..0a009d2 100644 --- a/lustre/portals/utils/debug.c +++ b/lustre/portals/utils/debug.c @@ -53,17 +53,18 @@ static char rawbuf[8192]; static char *buf = rawbuf; static int max = 8192; //static int g_pfd = -1; -static int subsystem_array[1 << 8]; +static int subsystem_mask = ~0; static int debug_mask = ~0; static const char *portal_debug_subsystems[] = - {"undefined", "mdc", "mds", "osc", "ost", "class", "obdfs", "llite", - "rpc", "ext2obd", "portals", "socknal", "qswnal", "pinger", "filter", - "obdtrace", "echo", "ldlm", "lov", "gmnal", "router", "ptldb", NULL}; + {"undefined", "mdc", "mds", "osc", "ost", "class", "log", "llite", + "rpc", "mgmt", "portals", "socknal", "qswnal", "pinger", "filter", + "ptlbd", "echo", "ldlm", "lov", "gmnal", "router", "cobd", NULL}; static const char *portal_debug_masks[] = {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl", "blocks", "net", "warning", "buffs", "other", "dentry", "portals", - "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", NULL}; + "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", + NULL}; struct debug_daemon_cmd { char *cmd; @@ -88,7 +89,10 @@ static int do_debug_mask(char *name, int enable) printf("%s output from subsystem \"%s\"\n", enable ? "Enabling" : "Disabling", portal_debug_subsystems[i]); - subsystem_array[i] = enable; + if (enable) + subsystem_mask |= (1 << i); + else + subsystem_mask &= ~(1 << i); found = 1; } } @@ -111,7 +115,6 @@ static int do_debug_mask(char *name, int enable) int dbg_initialize(int argc, char **argv) { - memset(subsystem_array, 1, sizeof(subsystem_array)); return 0; } @@ -213,12 +216,7 @@ int jt_dbg_list(int argc, char **argv) for (i = 0; portal_debug_masks[i] != NULL; i++) printf(", %s", portal_debug_masks[i]); printf("\n"); - } - else if (strcasecmp(argv[1], "applymasks") == 0) { - unsigned int subsystem_mask = 0; - for (i = 0; portal_debug_subsystems[i] != NULL; i++) { - if (subsystem_array[i]) subsystem_mask |= (1 << i); - } + } else if (strcasecmp(argv[1], "applymasks") == 0) { applymask_all(subsystem_mask, debug_mask); } return 0; @@ -230,12 +228,6 @@ static void dump_buffer(FILE *fd, char *buf, int size, int raw) { char *p, *z; unsigned long subsystem, debug, dropped = 0, kept = 0; - int max_sub, max_type; - - for (max_sub = 0; portal_debug_subsystems[max_sub] != NULL; max_sub++) - ; - for (max_type = 0; portal_debug_masks[max_type] != NULL; max_type++) - ; while (size) { p = memchr(buf, '\n', size); @@ -247,8 +239,7 @@ static void dump_buffer(FILE *fd, char *buf, int size, int raw) z++; /* for some reason %*s isn't working. */ *p = '\0'; - if (subsystem < max_sub && - subsystem_array[subsystem] && + if ((subsystem_mask & subsystem) && (!debug || (debug_mask & debug))) { if (raw) fprintf(fd, "%s\n", buf); @@ -551,6 +542,8 @@ int jt_dbg_modules(int argc, char **argv) {"mds_ext3", "lustre/mds"}, {"mds_extN", "lustre/mds"}, {"ptlbd", "lustre/ptlbd"}, + {"mgmt_svc", "lustre/mgmt"}, + {"mgmt_cli", "lustre/mgmt"}, {NULL, NULL} }; char *path = ".."; diff --git a/lustre/portals/utils/portals.c b/lustre/portals/utils/portals.c index 90d66f5..a89f4f7 100644 --- a/lustre/portals/utils/portals.c +++ b/lustre/portals/utils/portals.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -106,6 +107,27 @@ nal2name (int nal) return ((e == NULL) ? "???" : e->name); } +static struct hostent * +ptl_gethostbyname(char * hname) { + struct hostent *he; + he = gethostbyname(hname); + if (!he) { + switch(h_errno) { + case HOST_NOT_FOUND: + case NO_ADDRESS: + fprintf(stderr, "Unable to resolve hostname: %s\n", + hname); + break; + default: + fprintf(stderr, "gethostbyname error: %s\n", + strerror(errno)); + break; + } + return NULL; + } + return he; +} + int ptl_parse_nid (ptl_nid_t *nidp, char *str) { @@ -127,7 +149,7 @@ ptl_parse_nid (ptl_nid_t *nidp, char *str) if ((('a' <= str[0] && str[0] <= 'z') || ('A' <= str[0] && str[0] <= 'Z')) && - (he = gethostbyname (str)) != NULL) + (he = ptl_gethostbyname (str)) != NULL) { __u32 addr = *(__u32 *)he->h_addr; @@ -351,12 +373,9 @@ int jt_ptl_connect(int argc, char **argv) goto usage; } - he = gethostbyname(argv[1]); - if (!he) { - fprintf(stderr, "gethostbyname error: %s\n", - strerror(errno)); + he = ptl_gethostbyname(argv[1]); + if (!he) return -1; - } g_port = atol(argv[2]); @@ -525,12 +544,9 @@ int jt_ptl_disconnect(int argc, char **argv) PORTAL_IOC_INIT(data); if (argc == 2) { - he = gethostbyname(argv[1]); - if (!he) { - fprintf(stderr, "gethostbyname error: %s\n", - strerror(errno)); + he = ptl_gethostbyname(argv[1]); + if (!he) return -1; - } data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */ @@ -582,12 +598,9 @@ int jt_ptl_push_connection (int argc, char **argv) PORTAL_IOC_INIT(data); if (argc == 2) { - he = gethostbyname(argv[1]); - if (!he) { - fprintf(stderr, "gethostbyname error: %s\n", - strerror(errno)); + he = ptl_gethostbyname(argv[1]); + if (!he) return -1; - } data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */ diff --git a/lustre/ptlbd/client.c b/lustre/ptlbd/client.c index af76523..0a6ad8f 100644 --- a/lustre/ptlbd/client.c +++ b/lustre/ptlbd/client.c @@ -76,7 +76,7 @@ static int ptlbd_cl_setup(struct obd_device *obd, obd_count len, void *buf) RETURN(0); } -static int ptlbd_cl_cleanup(struct obd_device *obd, int force, int failover) +static int ptlbd_cl_cleanup(struct obd_device *obd, int flags) { struct ptlbd_obd *ptlbd = &obd->u.ptlbd; struct obd_import *imp; @@ -99,9 +99,8 @@ static int ptlbd_cl_cleanup(struct obd_device *obd, int force, int failover) /* modelled after ptlrpc_import_connect() */ -int ptlbd_cl_connect(struct lustre_handle *conn, - struct obd_device *obd, - struct obd_uuid *target_uuid) +int ptlbd_cl_connect(struct lustre_handle *conn, struct obd_device *obd, + struct obd_uuid *target_uuid) { struct ptlbd_obd *ptlbd = &obd->u.ptlbd; struct obd_import *imp = ptlbd->bd_import; @@ -196,7 +195,7 @@ int ptlbd_cl_init(void) { struct lprocfs_static_vars lvars; - lprocfs_init_vars(&lvars); + lprocfs_init_vars(ptlbd,&lvars); return class_register_type(&ptlbd_cl_obd_ops, lvars.module_vars, OBD_PTLBD_CL_DEVICENAME); } diff --git a/lustre/ptlbd/main.c b/lustre/ptlbd/main.c index e3fde99..dc591f4 100644 --- a/lustre/ptlbd/main.c +++ b/lustre/ptlbd/main.c @@ -57,7 +57,7 @@ out_cl: RETURN(ret); } -static void __exit ptlbd_exit(void) +static void /*__exit*/ ptlbd_exit(void) { ENTRY; ptlbd_cl_exit(); diff --git a/lustre/ptlbd/server.c b/lustre/ptlbd/server.c index 34ec737..d293a86 100644 --- a/lustre/ptlbd/server.c +++ b/lustre/ptlbd/server.c @@ -74,7 +74,7 @@ out_filp: RETURN(rc); } -static int ptlbd_sv_cleanup(struct obd_device *obddev, int force, int failover) +static int ptlbd_sv_cleanup(struct obd_device *obddev, int flags) { struct ptlbd_obd *ptlbd = &obddev->u.ptlbd; ENTRY; @@ -102,7 +102,7 @@ int ptlbd_sv_init(void) { struct lprocfs_static_vars lvars; - lprocfs_init_vars(&lvars); + lprocfs_init_vars(ptlbd,&lvars); return class_register_type(&ptlbd_sv_obd_ops, lvars.module_vars, OBD_PTLBD_SV_DEVICENAME); } diff --git a/lustre/ptlrpc/.cvsignore b/lustre/ptlrpc/.cvsignore index 067f05c..cf51f30 100644 --- a/lustre/ptlrpc/.cvsignore +++ b/lustre/ptlrpc/.cvsignore @@ -7,3 +7,4 @@ Makefile.in .deps tags TAGS +.*.cmd diff --git a/lustre/ptlrpc/Makefile.am b/lustre/ptlrpc/Makefile.am index eb44329..355d48c 100644 --- a/lustre/ptlrpc/Makefile.am +++ b/lustre/ptlrpc/Makefile.am @@ -16,7 +16,7 @@ EXTRA_PROGRAMS = ptlrpc ptlrpc_SOURCES = recover.c connection.c ptlrpc_module.c events.c service.c \ client.c niobuf.c pack_generic.c lproc_ptlrpc.c pinger.c ptlrpc_lib.c \ -ptlrpc_internal.h +ptlrpc_internal.h recov_thread.c endif include $(top_srcdir)/Rules diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index a98af3e..50ea587 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -78,13 +78,13 @@ void ptlrpc_readdress_connection(struct ptlrpc_connection *conn, struct ptlrpc_peer peer; int err; - err = ptlrpc_uuid_to_peer (uuid, &peer); + err = ptlrpc_uuid_to_peer(uuid, &peer); if (err != 0) { CERROR("cannot find peer %s!\n", uuid->uuid); return; } - memcpy (&conn->c_peer, &peer, sizeof (peer)); + memcpy(&conn->c_peer, &peer, sizeof (peer)); return; } @@ -96,7 +96,7 @@ static inline struct ptlrpc_bulk_desc *new_bulk(void) if (!desc) return NULL; - spin_lock_init (&desc->bd_lock); + spin_lock_init(&desc->bd_lock); init_waitqueue_head(&desc->bd_waitq); INIT_LIST_HEAD(&desc->bd_page_list); desc->bd_md_h = PTL_HANDLE_NONE; @@ -108,10 +108,10 @@ static inline struct ptlrpc_bulk_desc *new_bulk(void) struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req, int type, int portal) { - struct obd_import *imp = req->rq_import; + struct obd_import *imp = req->rq_import; struct ptlrpc_bulk_desc *desc; - LASSERT (type == BULK_PUT_SINK || type == BULK_GET_SOURCE); + LASSERT(type == BULK_PUT_SINK || type == BULK_GET_SOURCE); desc = new_bulk(); if (desc == NULL) @@ -132,10 +132,10 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req, struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp (struct ptlrpc_request *req, int type, int portal) { - struct obd_export *exp = req->rq_export; + struct obd_export *exp = req->rq_export; struct ptlrpc_bulk_desc *desc; - LASSERT (type == BULK_PUT_SOURCE || type == BULK_GET_SINK); + LASSERT(type == BULK_PUT_SOURCE || type == BULK_GET_SINK); desc = new_bulk(); if (desc == NULL) @@ -159,12 +159,12 @@ int ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, OBD_ALLOC(bulk, sizeof(*bulk)); if (bulk == NULL) - return (-ENOMEM); + return -ENOMEM; - LASSERT (page != NULL); - LASSERT (pageoffset >= 0); - LASSERT (len > 0); - LASSERT (pageoffset + len <= PAGE_SIZE); + LASSERT(page != NULL); + LASSERT(pageoffset >= 0); + LASSERT(len > 0); + LASSERT(pageoffset + len <= PAGE_SIZE); bulk->bp_page = page; bulk->bp_pageoffset = pageoffset; @@ -181,9 +181,9 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) struct list_head *tmp, *next; ENTRY; - LASSERT (desc != NULL); - LASSERT (desc->bd_page_count != 0x5a5a5a5a); /* not freed already */ - LASSERT (!desc->bd_network_rw); /* network hands off or */ + LASSERT(desc != NULL); + LASSERT(desc->bd_page_count != 0x5a5a5a5a); /* not freed already */ + LASSERT(!desc->bd_network_rw); /* network hands off or */ list_for_each_safe(tmp, next, &desc->bd_page_list) { struct ptlrpc_bulk_page *bulk; @@ -191,7 +191,7 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) ptlrpc_free_bulk_page(bulk); } - LASSERT (desc->bd_page_count == 0); + LASSERT(desc->bd_page_count == 0); LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL)); if (desc->bd_export) @@ -205,7 +205,7 @@ void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk) { - LASSERT (bulk != NULL); + LASSERT(bulk != NULL); list_del(&bulk->bp_link); bulk->bp_desc->bd_page_count--; @@ -247,7 +247,7 @@ struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode, request->rq_connection = ptlrpc_connection_addref(imp->imp_connection); - spin_lock_init (&request->rq_lock); + spin_lock_init(&request->rq_lock); INIT_LIST_HEAD(&request->rq_list); init_waitqueue_head(&request->rq_wait_for_rep); request->rq_xid = ptlrpc_next_xid(); @@ -289,18 +289,18 @@ void ptlrpc_set_destroy(struct ptlrpc_request_set *set) struct ptlrpc_request *req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); - LASSERT (req->rq_phase == expected_phase); + LASSERT(req->rq_phase == expected_phase); n++; } - LASSERT (set->set_remaining == 0 || set->set_remaining == n); + LASSERT(set->set_remaining == 0 || set->set_remaining == n); list_for_each_safe(tmp, next, &set->set_requests) { struct ptlrpc_request *req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); list_del_init(&req->rq_set_chain); - LASSERT (req->rq_phase == expected_phase); + LASSERT(req->rq_phase == expected_phase); if (req->rq_phase == RQ_PHASE_NEW) { @@ -312,7 +312,8 @@ void ptlrpc_set_destroy(struct ptlrpc_request_set *set) /* higher level (i.e. LOV) failed; * let the sub reqs clean up */ req->rq_status = -EBADR; - interpreter(req, &req->rq_async_args, req->rq_status); + interpreter(req, &req->rq_async_args, + req->rq_status); } set->set_remaining--; } @@ -402,8 +403,8 @@ static int after_reply(struct ptlrpc_request *req, int *restartp) int rc; ENTRY; - LASSERT (!req->rq_receiving_reply); - LASSERT (req->rq_replied); + LASSERT(!req->rq_receiving_reply); + LASSERT(req->rq_replied); if (restartp != NULL) *restartp = 0; @@ -418,14 +419,14 @@ static int after_reply(struct ptlrpc_request *req, int *restartp) rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen); if (rc) { CERROR("unpack_rep failed: %d\n", rc); - RETURN (-EPROTO); + RETURN(-EPROTO); } if (req->rq_repmsg->type != PTL_RPC_MSG_REPLY && req->rq_repmsg->type != PTL_RPC_MSG_ERR) { CERROR("invalid packet type received (type=%u)\n", req->rq_repmsg->type); - RETURN (-EPROTO); + RETURN(-EPROTO); } /* Store transno in reqmsg for replay. */ @@ -447,6 +448,9 @@ static int after_reply(struct ptlrpc_request *req, int *restartp) if (req->rq_err) RETURN(-EIO); + if (req->rq_no_resend) + RETURN(rc); /* -ENOTCONN */ + if (req->rq_resend) { if (restartp == NULL) LBUG(); /* async resend not supported yet */ @@ -456,7 +460,7 @@ static int after_reply(struct ptlrpc_request *req, int *restartp) *restartp = 1; lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT); DEBUG_REQ(D_HA, req, "resending: "); - RETURN (0); + RETURN(0); } CERROR("request should be err or resend: %p\n", req); @@ -472,10 +476,9 @@ static int after_reply(struct ptlrpc_request *req, int *restartp) imp->imp_max_transno = req->rq_transno; /* Replay-enabled imports return commit-status information. */ - if (req->rq_repmsg->last_committed) { + if (req->rq_repmsg->last_committed) imp->imp_peer_committed_transno = req->rq_repmsg->last_committed; - } ptlrpc_free_committed(imp); spin_unlock_irqrestore(&imp->imp_lock, flags); } @@ -510,8 +513,8 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) continue; if (req->rq_phase == RQ_PHASE_INTERPRET) - GOTO (interpret, req->rq_status); - + GOTO(interpret, req->rq_status); + if (req->rq_err) { ptlrpc_unregister_reply(req); if (req->rq_status == 0) @@ -522,7 +525,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) list_del_init(&req->rq_list); spin_unlock_irqrestore(&imp->imp_lock, flags); - GOTO (interpret, req->rq_status); + GOTO(interpret, req->rq_status); } if (req->rq_intr) { @@ -535,7 +538,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) list_del_init(&req->rq_list); spin_unlock_irqrestore(&imp->imp_lock, flags); - GOTO (interpret, req->rq_status); + GOTO(interpret, req->rq_status); } if (req->rq_phase == RQ_PHASE_RPC) { @@ -553,13 +556,13 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) list_add_tail(&req->rq_list, &imp->imp_sending_list); - if (req->rq_import_generation < + if (req->rq_import_generation < imp->imp_generation) { req->rq_status = -EIO; req->rq_phase = RQ_PHASE_INTERPRET; - spin_unlock_irqrestore(&imp->imp_lock, + spin_unlock_irqrestore(&imp->imp_lock, flags); - GOTO (interpret, req->rq_status); + GOTO(interpret, req->rq_status); } spin_unlock_irqrestore(&imp->imp_lock, flags); @@ -571,16 +574,17 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) req->rq_resend = 0; spin_unlock_irqrestore(&req->rq_lock, flags); + ptlrpc_unregister_reply(req); if (req->rq_bulk) ptlrpc_unregister_bulk(req); - } + } rc = ptl_send_rpc(req); if (rc) { req->rq_status = rc; req->rq_phase = RQ_PHASE_INTERPRET; - GOTO (interpret, req->rq_status); + GOTO(interpret, req->rq_status); } } @@ -612,21 +616,21 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) */ if (req->rq_bulk == NULL || req->rq_status != 0) { req->rq_phase = RQ_PHASE_INTERPRET; - GOTO (interpret, req->rq_status); + GOTO(interpret, req->rq_status); } req->rq_phase = RQ_PHASE_BULK; } - LASSERT (req->rq_phase == RQ_PHASE_BULK); + LASSERT(req->rq_phase == RQ_PHASE_BULK); if (!ptlrpc_bulk_complete (req->rq_bulk)) continue; req->rq_phase = RQ_PHASE_INTERPRET; interpret: - LASSERT (req->rq_phase == RQ_PHASE_INTERPRET); - LASSERT (!req->rq_receiving_reply); + LASSERT(req->rq_phase == RQ_PHASE_INTERPRET); + LASSERT(!req->rq_receiving_reply); ptlrpc_unregister_reply(req); if (req->rq_bulk != NULL) @@ -651,7 +655,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) set->set_remaining--; } - RETURN (set->set_remaining == 0); + RETURN(set->set_remaining == 0); } int ptlrpc_expire_one_request(struct ptlrpc_request *req) @@ -695,7 +699,7 @@ static int expired_set(void *data) time_t now = LTIME_S (CURRENT_TIME); ENTRY; - LASSERT (set != NULL); + LASSERT(set != NULL); /* A timeout expired; see which reqs it applies to... */ list_for_each (tmp, &set->set_requests) { @@ -728,7 +732,7 @@ static void interrupted_set(void *data) struct list_head *tmp; unsigned long flags; - LASSERT (set != NULL); + LASSERT(set != NULL); CERROR("INTERRUPTED SET %p\n", set); list_for_each(tmp, &set->set_requests) { @@ -757,12 +761,13 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) int timeout; ENTRY; + SIGNAL_MASK_ASSERT(); /* XXX BUG 1511 */ LASSERT(!list_empty(&set->set_requests)); list_for_each(tmp, &set->set_requests) { req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); - LASSERT (req->rq_level == LUSTRE_CONN_FULL); - LASSERT (req->rq_phase == RQ_PHASE_NEW); + LASSERT(req->rq_level == LUSTRE_CONN_FULL); + LASSERT(req->rq_phase == RQ_PHASE_NEW); req->rq_phase = RQ_PHASE_RPC; imp = req->rq_import; @@ -789,7 +794,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) spin_lock (&req->rq_lock); req->rq_waiting = 1; spin_unlock (&req->rq_lock); - LASSERT (list_empty (&req->rq_list)); + LASSERT(list_empty (&req->rq_list)); // list_del(&req->rq_list); list_add_tail(&req->rq_list, &imp->imp_delayed_list); spin_unlock_irqrestore(&imp->imp_lock, flags); @@ -801,6 +806,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) list_add_tail(&req->rq_list, &imp->imp_sending_list); spin_unlock_irqrestore(&imp->imp_lock, flags); + req->rq_reqmsg->status = current->pid; CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:ni:nid:opc" " %s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm, imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status, @@ -820,7 +826,8 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) now = LTIME_S (CURRENT_TIME); timeout = 0; list_for_each (tmp, &set->set_requests) { - req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); + req = list_entry(tmp, struct ptlrpc_request, + rq_set_chain); /* request in-flight? */ if (!((req->rq_phase == RQ_PHASE_RPC && @@ -846,7 +853,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) expired_set, interrupted_set, set); rc = l_wait_event(set->set_waitq, ptlrpc_check_set(set), &lwi); - LASSERT (rc == 0 || rc == -EINTR || rc == -ETIMEDOUT); + LASSERT(rc == 0 || rc == -EINTR || rc == -ETIMEDOUT); /* -EINTR => all requests have been flagged rq_intr so next * check completes. @@ -857,13 +864,13 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) * the error cases -eeb. */ } while (rc != 0); - LASSERT (set->set_remaining == 0); + LASSERT(set->set_remaining == 0); rc = 0; list_for_each(tmp, &set->set_requests) { req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); - LASSERT (req->rq_phase == RQ_PHASE_COMPLETE); + LASSERT(req->rq_phase == RQ_PHASE_COMPLETE); if (req->rq_status != 0) rc = req->rq_status; } @@ -885,7 +892,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) return; } - LASSERT (!request->rq_receiving_reply); + LASSERT(!request->rq_receiving_reply); /* We must take it off the imp_replay_list first. Otherwise, we'll set * request->rq_reqmsg to NULL while osc_close is dereferencing it. */ @@ -940,7 +947,7 @@ static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked) if (request == NULL) RETURN(1); - if (request == (void *)(long)(0x5a5a5a5a5a5a5a5a) || + if (request == (void *)(long)(0x5a5a5a5a5a5a5a5a) || request->rq_obd == (void *)(long)(0x5a5a5a5a5a5a5a5a)) { CERROR("dereferencing freed request (bug 575)\n"); LBUG(); @@ -981,7 +988,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request) int rc; ENTRY; - LASSERT (!in_interrupt ()); /* might sleep */ + LASSERT(!in_interrupt ()); /* might sleep */ spin_lock_irqsave (&request->rq_lock, flags); if (!request->rq_receiving_reply) { /* not waiting for a reply */ @@ -991,7 +998,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request) return; } - LASSERT (!request->rq_replied); /* callback hasn't completed */ + LASSERT(!request->rq_replied); /* callback hasn't completed */ spin_unlock_irqrestore (&request->rq_lock, flags); rc = PtlMDUnlink (request->rq_reply_md_h); @@ -1000,8 +1007,8 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request) LBUG (); case PTL_OK: /* unlinked before completion */ - LASSERT (request->rq_receiving_reply); - LASSERT (!request->rq_replied); + LASSERT(request->rq_receiving_reply); + LASSERT(!request->rq_replied); spin_lock_irqsave (&request->rq_lock, flags); request->rq_receiving_reply = 0; spin_unlock_irqrestore (&request->rq_lock, flags); @@ -1018,7 +1025,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request) rc = l_wait_event (request->rq_wait_for_rep, request->rq_replied, &lwi); - LASSERT (rc == 0 || rc == -ETIMEDOUT); + LASSERT(rc == 0 || rc == -ETIMEDOUT); if (rc == 0) { spin_lock_irqsave (&request->rq_lock, flags); /* Ensure the callback has completed scheduling @@ -1032,8 +1039,8 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request) /* fall through */ case PTL_INV_MD: /* callback completed */ - LASSERT (!request->rq_receiving_reply); - LASSERT (request->rq_replied); + LASSERT(!request->rq_receiving_reply); + LASSERT(request->rq_replied); EXIT; return; } @@ -1061,7 +1068,7 @@ void ptlrpc_free_committed(struct obd_import *imp) req = list_entry(tmp, struct ptlrpc_request, rq_list); /* XXX ok to remove when 1357 resolved - rread 05/29/03 */ - LASSERT (req != last_req); + LASSERT(req != last_req); last_req = req; if (req->rq_import_generation < imp->imp_generation) { @@ -1208,14 +1215,13 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) struct l_wait_info lwi; struct obd_import *imp = req->rq_import; struct obd_device *obd = imp->imp_obd; - struct ptlrpc_connection *conn = imp->imp_connection; - unsigned int flags; + unsigned long flags; int do_restart = 0; int timeout = 0; ENTRY; - LASSERT (req->rq_set == NULL); - LASSERT (!req->rq_receiving_reply); + LASSERT(req->rq_set == NULL); + LASSERT(!req->rq_receiving_reply); /* for distributed debugging */ req->rq_reqmsg->status = current->pid; @@ -1224,7 +1230,8 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) "%s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm, imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status, req->rq_xid, - conn->c_peer.peer_ni->pni_name, conn->c_peer.peer_nid, + imp->imp_connection->c_peer.peer_ni->pni_name, + imp->imp_connection->c_peer.peer_nid, req->rq_reqmsg->opc); /* Mark phase here for a little debug help */ @@ -1242,13 +1249,13 @@ restart: if (req->rq_import->imp_invalid && req->rq_level == LUSTRE_CONN_FULL) { DEBUG_REQ(D_ERROR, req, "IMP_INVALID:"); spin_unlock_irqrestore(&imp->imp_lock, flags); - GOTO (out, rc = -EIO); + GOTO(out, rc = -EIO); } if (req->rq_import_generation < imp->imp_generation) { DEBUG_REQ(D_ERROR, req, "req old gen:"); spin_unlock_irqrestore(&imp->imp_lock, flags); - GOTO (out, rc = -EIO); + GOTO(out, rc = -EIO); } if (req->rq_level > imp->imp_level) { @@ -1256,7 +1263,7 @@ restart: if (req->rq_no_recov || obd->obd_no_recov || imp->imp_dlm_fake) { spin_unlock_irqrestore(&imp->imp_lock, flags); - GOTO (out, rc = -EWOULDBLOCK); + GOTO(out, rc = -EWOULDBLOCK); } list_add_tail(&req->rq_list, &imp->imp_delayed_list); @@ -1269,23 +1276,24 @@ restart: (req->rq_level <= imp->imp_level || req->rq_err), &lwi); - DEBUG_REQ(D_HA, req, "\"%s\" awake: (%d > %d)", - current->comm, req->rq_level, imp->imp_level); + DEBUG_REQ(D_HA, req, "\"%s\" awake: (%d > %d or %d == 1)", + current->comm, imp->imp_level, req->rq_level, + req->rq_err); spin_lock_irqsave(&imp->imp_lock, flags); list_del_init(&req->rq_list); - if (req->rq_err || + if (req->rq_err || req->rq_import_generation < imp->imp_generation) rc = -EIO; if (rc) { spin_unlock_irqrestore(&imp->imp_lock, flags); - GOTO (out, rc); + GOTO(out, rc); } - CERROR("process %d resumed\n", current->pid); + DEBUG_REQ(D_HA, req, "resumed"); } /* XXX this is the same as ptlrpc_set_wait */ @@ -1335,7 +1343,7 @@ restart: &reply_ev); reply_in_callback(&reply_ev); - LASSERT (reply_ev.mem_desc.user_ptr == (void *)req); + LASSERT(reply_ev.mem_desc.user_ptr == (void *)req); // ptlrpc_check_reply(req); // not required now it only tests } @@ -1347,7 +1355,8 @@ restart: "%s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm, imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status, req->rq_xid, - conn->c_peer.peer_ni->pni_name, conn->c_peer.peer_nid, + imp->imp_connection->c_peer.peer_ni->pni_name, + imp->imp_connection->c_peer.peer_nid, req->rq_reqmsg->opc); spin_lock_irqsave(&imp->imp_lock, flags); @@ -1421,7 +1430,7 @@ restart: ptlrpc_bulk_complete(req->rq_bulk), &lwi); if (brc != 0) { - LASSERT (brc == -ETIMEDOUT); + LASSERT(brc == -ETIMEDOUT); CERROR ("Timed out waiting for bulk\n"); rc = brc; } @@ -1429,14 +1438,14 @@ restart: if (rc < 0) { /* MDS blocks for put ACKs before replying */ /* OSC sets rq_no_resend for the time being */ - LASSERT (req->rq_no_resend); + LASSERT(req->rq_no_resend); ptlrpc_unregister_bulk (req); } } - LASSERT (!req->rq_receiving_reply); + LASSERT(!req->rq_receiving_reply); req->rq_phase = RQ_PHASE_INTERPRET; - RETURN (rc); + RETURN(rc); } int ptlrpc_replay_req(struct ptlrpc_request *req) @@ -1450,7 +1459,7 @@ int ptlrpc_replay_req(struct ptlrpc_request *req) * state it was left in */ /* Not handling automatic bulk replay yet (or ever?) */ - LASSERT (req->rq_bulk == NULL); + LASSERT(req->rq_bulk == NULL); DEBUG_REQ(D_NET, req, "about to replay"); diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index 07be1af..c4c47d3 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -50,6 +50,7 @@ struct ll_rpc_opcode { { OST_SAN_READ, "ost_san_read" }, { OST_SAN_WRITE, "ost_san_write" }, { OST_SYNCFS, "ost_syncfs" }, + { OST_SET_INFO, "ost_set_info" }, { MDS_GETATTR, "mds_getattr" }, { MDS_GETATTR_NAME, "mds_getattr_name" }, { MDS_CLOSE, "mds_close" }, @@ -60,6 +61,8 @@ struct ll_rpc_opcode { { MDS_GETSTATUS, "mds_getstatus" }, { MDS_STATFS, "mds_statfs" }, { MDS_GETLOVINFO, "mds_getlovinfo" }, + { MDS_PIN, "mds_pin" }, + { MDS_UNPIN, "mds_unpin" }, { LDLM_ENQUEUE, "ldlm_enqueue" }, { LDLM_CONVERT, "ldlm_convert" }, { LDLM_CANCEL, "ldlm_cancel" }, @@ -71,7 +74,8 @@ struct ll_rpc_opcode { { PTLBD_FLUSH, "ptlbd_flush" }, { PTLBD_CONNECT, "ptlbd_connect" }, { PTLBD_DISCONNECT, "ptlbd_disconnect" }, - { OBD_PING, "obd_ping" } + { OBD_PING, "obd_ping" }, + { OBD_LOG_CANCEL, "obd_log_cancel" }, }; const char* ll_opcode2str(__u32 opcode) @@ -119,7 +123,7 @@ void ptlrpc_lprocfs_register_service(struct obd_device *obddev, } lprocfs_counter_init(svc_stats, PTLRPC_REQWAIT_CNTR, - svc_counter_config, "req_waittime", "cycles"); + svc_counter_config, "req_waittime", "usec"); /* Wait for b_eq branch lprocfs_counter_init(svc_stats, PTLRPC_SVCEQDEPTH_CNTR, svc_counter_config, "svc_eqdepth", "reqs"); @@ -127,12 +131,12 @@ void ptlrpc_lprocfs_register_service(struct obd_device *obddev, /* no stddev on idletime */ lprocfs_counter_init(svc_stats, PTLRPC_SVCIDLETIME_CNTR, (LPROCFS_CNTR_EXTERNALLOCK|LPROCFS_CNTR_AVGMINMAX), - "svc_idletime", "cycles"); + "svc_idletime", "usec"); for (i = 0; i < LUSTRE_MAX_OPCODES; i++) { __u32 opcode = ll_rpc_opcode_table[i].opcode; lprocfs_counter_init(svc_stats, PTLRPC_LAST_CNTR + i, svc_counter_config, ll_opcode2str(opcode), - "cycles"); + "usec"); } rc = lprocfs_register_stats(svc_procroot, "stats", svc_stats); diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 3811d2a..0e2d651 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -187,14 +187,9 @@ void *lustre_msg_buf(struct lustre_msg *m, int n, int min_size) } buflen = m->buflens[n]; - if (buflen == 0) { - CERROR("msg %p buffer[%d] is zero length\n", m, n); - return NULL; - } - if (buflen < min_size) { CERROR("msg %p buffer[%d] size %d too small (required %d)\n", - m, n, buflen, min_size); + m, n, buflen, min_size); return NULL; } @@ -249,17 +244,16 @@ void *lustre_swab_reqbuf (struct ptlrpc_request *req, int index, int min_size, { void *ptr; - LASSERT_REQSWAB (req, index); + LASSERT_REQSWAB(req, index); ptr = lustre_msg_buf(req->rq_reqmsg, index, min_size); if (ptr == NULL) - return (NULL); + return NULL; - if (swabber != NULL && - lustre_msg_swabbed (req->rq_reqmsg)) + if (swabber != NULL && lustre_msg_swabbed(req->rq_reqmsg)) ((void (*)(void *))swabber)(ptr); - return (ptr); + return ptr; } /* Wrap up the normal fixed length case */ @@ -268,17 +262,16 @@ void *lustre_swab_repbuf (struct ptlrpc_request *req, int index, int min_size, { void *ptr; - LASSERT_REPSWAB (req, index); + LASSERT_REPSWAB(req, index); - ptr = lustre_msg_buf (req->rq_repmsg, index, min_size); + ptr = lustre_msg_buf(req->rq_repmsg, index, min_size); if (ptr == NULL) - return (NULL); + return NULL; - if (swabber != NULL && - lustre_msg_swabbed (req->rq_repmsg)) + if (swabber != NULL && lustre_msg_swabbed(req->rq_repmsg)) ((void (*)(void *))swabber)(ptr); - return (ptr); + return ptr; } /* byte flipping routines for all wire types declared in @@ -638,12 +631,12 @@ void lustre_assert_wire_constants (void) LASSERT (REINT_RENAME == 5); LASSERT (REINT_OPEN == 6); LASSERT (REINT_MAX == 6); - LASSERT (IT_INTENT_EXEC == 1); - LASSERT (IT_OPEN_LOOKUP == 2); - LASSERT (IT_OPEN_NEG == 4); - LASSERT (IT_OPEN_POS == 8); - LASSERT (IT_OPEN_CREATE == 16); - LASSERT (IT_OPEN_OPEN == 32); + LASSERT (DISP_IT_EXECD == 1); + LASSERT (DISP_LOOKUP_EXECD == 2); + LASSERT (DISP_LOOKUP_NEG == 4); + LASSERT (DISP_LOOKUP_POS == 8); + LASSERT (DISP_OPEN_CREATE == 16); + LASSERT (DISP_OPEN_OPEN == 32); LASSERT (MDS_STATUS_CONN == 1); LASSERT (MDS_STATUS_LOV == 2); LASSERT (MDS_OPEN_HAS_EA == 1); diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index ebc69e1..c81fb51 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -47,12 +47,12 @@ void ptlrpc_pinger_sending_on_import(struct obd_import *imp) int ptlrpc_pinger_add_import(struct obd_import *imp) { +#ifndef ENABLE_PINGER + return 0; +#else int rc; ENTRY; -#ifndef ENABLE_PINGER - RETURN(0); -#else if (!list_empty(&imp->imp_pinger_chain)) RETURN(-EALREADY); @@ -77,12 +77,12 @@ int ptlrpc_pinger_add_import(struct obd_import *imp) int ptlrpc_pinger_del_import(struct obd_import *imp) { +#ifndef ENABLE_PINGER + return 0; +#else int rc; ENTRY; -#ifndef ENABLE_PINGER - RETURN(0); -#else if (list_empty(&imp->imp_pinger_chain)) RETURN(-ENOENT); @@ -118,14 +118,7 @@ static int ptlrpc_pinger_main(void *arg) RECALC_SIGPENDING; SIGNAL_MASK_UNLOCK(current, flags); -#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) - sprintf(current->comm, "%s|%d", data->name,current->thread.extern_pid); -#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - sprintf(current->comm, "%s|%d", data->name, - current->thread.mode.tt.extern_pid); -#else - strcpy(current->comm, data->name); -#endif + THREAD_NAME(current->comm, "%s", data->name); unlock_kernel(); /* Record that the thread is running */ @@ -147,7 +140,8 @@ static int ptlrpc_pinger_main(void *arg) down(&pinger_sem); list_for_each(iter, &pinger_imports) { struct obd_import *imp = - list_entry(iter, struct obd_import, imp_pinger_chain); + list_entry(iter, struct obd_import, + imp_pinger_chain); int generation, level; unsigned long flags; @@ -159,16 +153,19 @@ static int ptlrpc_pinger_main(void *arg) spin_unlock_irqrestore(&imp->imp_lock, flags); if (level != LUSTRE_CONN_FULL) { - CDEBUG(D_HA, "not pinging %s (in recovery)\n", + CDEBUG(D_HA, + "not pinging %s (in recovery)\n", imp->imp_target_uuid.uuid); continue; } - req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, NULL); + req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, + NULL); if (!req) { CERROR("OOM trying to ping\n"); break; } + req->rq_no_resend = 1; req->rq_replen = lustre_msg_size(0, NULL); req->rq_level = LUSTRE_CONN_FULL; req->rq_phase = RQ_PHASE_RPC; diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index cb96c3c..8d66c88 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -33,19 +33,22 @@ struct ptlrpc_request_set; /* ldlm hooks that we need, managed via inter_module_{get,put} */ extern int (*ptlrpc_ldlm_namespace_cleanup)(struct ldlm_namespace *, int); extern int (*ptlrpc_ldlm_cli_cancel_unused)(struct ldlm_namespace *, - struct ldlm_res_id *, int); + struct ldlm_res_id *, int); extern int (*ptlrpc_ldlm_replay_locks)(struct obd_import *); int ptlrpc_get_ldlm_hooks(void); void ptlrpc_daemonize(void); void ptlrpc_request_handle_eviction(struct ptlrpc_request *); -void lustre_assert_wire_constants (void); +void lustre_assert_wire_constants(void); void ptlrpc_lprocfs_register_service(struct obd_device *obddev, struct ptlrpc_service *svc); void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc); +/* recovd_thread.c */ +int llog_init_commit_master(void); +int llog_cleanup_commit_master(int force); static inline int opcode_offset(__u32 opc) { if (opc < OST_LAST_OPC) { @@ -66,9 +69,9 @@ static inline int opcode_offset(__u32 opc) { (LDLM_LAST_OPC - LDLM_FIRST_OPC) + (MDS_LAST_OPC - MDS_FIRST_OPC) + (OST_LAST_OPC - OST_FIRST_OPC)); - } else if (opc == OBD_PING) { + } else if (opc < OBD_LAST_OPC) { /* OBD Ping */ - return (opc - OBD_PING + + return (opc - OBD_FIRST_OPC + (PTLBD_LAST_OPC - PTLBD_FIRST_OPC) + (LDLM_LAST_OPC - LDLM_FIRST_OPC) + (MDS_LAST_OPC - MDS_FIRST_OPC) + @@ -79,10 +82,11 @@ static inline int opcode_offset(__u32 opc) { } } -#define LUSTRE_MAX_OPCODES (1 + (PTLBD_LAST_OPC - PTLBD_FIRST_OPC) \ - + (LDLM_LAST_OPC - LDLM_FIRST_OPC) \ - + (MDS_LAST_OPC - MDS_FIRST_OPC) \ - + (OST_LAST_OPC - OST_FIRST_OPC)) +#define LUSTRE_MAX_OPCODES ((PTLBD_LAST_OPC - PTLBD_FIRST_OPC) + \ + (LDLM_LAST_OPC - LDLM_FIRST_OPC) + \ + (MDS_LAST_OPC - MDS_FIRST_OPC) + \ + (OST_LAST_OPC - OST_FIRST_OPC) + \ + (OBD_LAST_OPC - OBD_FIRST_OPC)) enum { PTLRPC_REQWAIT_CNTR = 0, diff --git a/lustre/ptlrpc/ptlrpc_lib.c b/lustre/ptlrpc/ptlrpc_lib.c index ccc05dc..3dfec9a 100644 --- a/lustre/ptlrpc/ptlrpc_lib.c +++ b/lustre/ptlrpc/ptlrpc_lib.c @@ -24,11 +24,12 @@ #ifdef __KERNEL__ # include -#else +#else # include #endif #include #include +#include #include #include @@ -40,19 +41,27 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) struct obd_import *imp; struct obd_uuid server_uuid; int rq_portal, rp_portal, connect_op; - char *name; + char *name = obddev->obd_type->typ_name; ENTRY; - if (obddev->obd_type->typ_ops->o_brw) { + /* In a more perfect world, we would hang a ptlrpc_client off of + * obd_type and just use the values from there. */ + if (!strcmp(name, LUSTRE_OSC_NAME)) { rq_portal = OST_REQUEST_PORTAL; rp_portal = OSC_REPLY_PORTAL; - name = "osc"; connect_op = OST_CONNECT; - } else { + } else if (!strcmp(name, LUSTRE_MDC_NAME)) { rq_portal = MDS_REQUEST_PORTAL; rp_portal = MDC_REPLY_PORTAL; - name = "mdc"; connect_op = MDS_CONNECT; + } else if (!strcmp(name, LUSTRE_MGMTCLI_NAME)) { + rq_portal = MGMT_REQUEST_PORTAL; + rp_portal = MGMT_REPLY_PORTAL; + connect_op = MGMT_CONNECT; + } else { + CERROR("unknown client OBD type \"%s\", can't setup\n", + name); + RETURN(-EINVAL); } if (data->ioc_inllen1 < 1) { @@ -108,18 +117,60 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) cli->cl_import = imp; cli->cl_max_mds_easize = sizeof(struct lov_mds_md); + cli->cl_max_mds_cookiesize = sizeof(struct llog_cookie); cli->cl_sandev = to_kdev_t(0); + /* Register with management client if we need to. */ + if (data->ioc_inllen3 > 0) { + char *mgmt_name = data->ioc_inlbuf3; + int rc; + struct obd_device *mgmt_obd; + mgmtcli_register_for_events_t register_f; + + CDEBUG(D_HA, "%s registering with %s for events about %s\n", + obddev->obd_name, mgmt_name, server_uuid.uuid); + + mgmt_obd = class_name2obd(mgmt_name); + if (!mgmt_obd) { + CERROR("can't find mgmtcli %s to register\n", + mgmt_name); + class_destroy_import(imp); + RETURN(-ENOENT); + } + + register_f = inter_module_get("mgmtcli_register_for_events"); + if (!register_f) { + CERROR("can't i_m_g mgmtcli_register_for_events\n"); + class_destroy_import(imp); + RETURN(-ENOSYS); + } + + rc = register_f(mgmt_obd, obddev, &imp->imp_target_uuid); + inter_module_put("mgmtcli_register_for_events"); + + if (!rc) + cli->cl_mgmtcli_obd = mgmt_obd; + + RETURN(rc); + } + RETURN(0); } -int client_obd_cleanup(struct obd_device *obddev, int force, int failover) +int client_obd_cleanup(struct obd_device *obddev, int flags) { - struct client_obd *client = &obddev->u.cli; + struct client_obd *cli = &obddev->u.cli; - if (!client->cl_import) + if (!cli->cl_import) RETURN(-EINVAL); - class_destroy_import(client->cl_import); - client->cl_import = NULL; + if (cli->cl_mgmtcli_obd) { + mgmtcli_deregister_for_events_t dereg_f; + + dereg_f = inter_module_get("mgmtcli_deregister_for_events"); + dereg_f(cli->cl_mgmtcli_obd, obddev); + inter_module_put("mgmtcli_deregister_for_events"); + } + class_destroy_import(cli->cl_import); + cli->cl_import = NULL; RETURN(0); } diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index 57f3653..4b75026 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -100,13 +100,14 @@ __init int ptlrpc_init(void) int rc; ENTRY; - lustre_assert_wire_constants (); - + lustre_assert_wire_constants(); + rc = ptlrpc_init_portals(); if (rc) RETURN(rc); ptlrpc_init_connection(); + llog_init_commit_master(); ptlrpc_put_connection_superhack = ptlrpc_put_connection; ptlrpc_abort_inflight_superhack = ptlrpc_abort_inflight; @@ -117,6 +118,9 @@ static void __exit ptlrpc_exit(void) { ptlrpc_exit_portals(); ptlrpc_cleanup_connection(); +#ifdef ENABLE_ORPHANS + llog_cleanup_commit_master(0); +#endif } /* connection.c */ diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index ca2afad..70e9b5c 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -24,11 +24,11 @@ #define DEBUG_SUBSYSTEM S_RPC #ifdef __KERNEL__ -#include -#include -#include +# include +# include +# include #else -#include +# include #endif #include @@ -62,7 +62,7 @@ int ptlrpc_reconnect_import(struct obd_import *imp) struct lustre_handle old_hdl; __u64 committed_before_reconnect = imp->imp_peer_committed_transno; - CERROR("reconnect handle "LPX64"\n", + CERROR("reconnect handle "LPX64"\n", imp->imp_dlm_handle.cookie); req = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp); @@ -89,7 +89,7 @@ int ptlrpc_reconnect_import(struct obd_import *imp) GOTO(out_disc, rc = -ENOTCONN); } - if (memcmp(&imp->imp_remote_handle, &req->rq_repmsg->handle, + if (memcmp(&imp->imp_remote_handle, &req->rq_repmsg->handle, sizeof(imp->imp_remote_handle))) { CERROR("%s@%s changed handle from "LPX64" to "LPX64 "; copying, but this may foreshadow disaster\n", @@ -104,12 +104,13 @@ int ptlrpc_reconnect_import(struct obd_import *imp) CERROR("reconnected to %s@%s after partition\n", imp->imp_target_uuid.uuid, conn->c_remote_uuid.uuid); GOTO(out_disc, rc = RECON_RESULT_RECONNECTED); - } else if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECOVERING) { + } else if (lustre_msg_get_op_flags(req->rq_repmsg) & + MSG_CONNECT_RECOVERING) { rc = RECON_RESULT_RECOVERING; } else { rc = RECON_RESULT_EVICTED; } - + old_hdl = imp->imp_remote_handle; imp->imp_remote_handle = req->rq_repmsg->handle; CERROR("reconnected to %s@%s ("LPX64", was "LPX64")!\n", @@ -150,9 +151,9 @@ void ptlrpc_run_recovery_over_upcall(struct obd_device *obd) rc = USERMODEHELPER(argv[0], argv, envp); if (rc < 0) { CERROR("Error invoking recovery upcall %s %s %s: %d; check " - "/proc/sys/lustre/upcall\n", + "/proc/sys/lustre/upcall\n", argv[0], argv[1], argv[2], rc); - + } else { CERROR("Invoked upcall %s %s %s", argv[0], argv[1], argv[2]); @@ -180,10 +181,10 @@ void ptlrpc_run_failed_import_upcall(struct obd_import* imp) rc = USERMODEHELPER(argv[0], argv, envp); if (rc < 0) { - CERROR("Error invoking recovery upcall %s %s %s %s %s: %d; check " - "/proc/sys/lustre/lustre_upcall\n", + CERROR("Error invoking recovery upcall %s %s %s %s %s: %d; " + "check /proc/sys/lustre/lustre_upcall\n", argv[0], argv[1], argv[2], argv[3], argv[4],rc); - + } else { CERROR("Invoked upcall %s %s %s %s %s\n", argv[0], argv[1], argv[2], argv[3], argv[4]); @@ -196,7 +197,6 @@ int ptlrpc_replay(struct obd_import *imp) struct list_head *tmp, *pos; struct ptlrpc_request *req; unsigned long flags; - __u64 committed = imp->imp_peer_committed_transno; ENTRY; /* It might have committed some after we last spoke, so make sure we @@ -207,7 +207,7 @@ int ptlrpc_replay(struct obd_import *imp) spin_unlock_irqrestore(&imp->imp_lock, flags); CDEBUG(D_HA, "import %p from %s has committed "LPD64"\n", - imp, imp->imp_target_uuid.uuid, committed); + imp, imp->imp_target_uuid.uuid, imp->imp_peer_committed_transno); list_for_each(tmp, &imp->imp_replay_list) { req = list_entry(tmp, struct ptlrpc_request, rq_list); @@ -221,7 +221,7 @@ int ptlrpc_replay(struct obd_import *imp) * than the one we're replaying (it can't be committed until it's * replayed, and we're doing that here). l_f_e_safe protects against * problems with the current request being committed, in the unlikely - * event of that race. So, in conclusion, I think that it's safe to + * event of that race. So, in conclusion, I think that it's safe to * perform this list-walk without the imp_lock held. * * But, the {mdc,osc}_replay_open callbacks both iterate @@ -235,7 +235,7 @@ int ptlrpc_replay(struct obd_import *imp) DEBUG_REQ(D_HA, req, "REPLAY:"); rc = ptlrpc_replay_req(req); - + if (rc) { CERROR("recovery replay error %d for req "LPD64"\n", rc, req->rq_xid); @@ -307,7 +307,6 @@ inline void ptlrpc_invalidate_import_state(struct obd_import *imp) ptlrpc_abort_inflight(imp); } - void ptlrpc_handle_failed_import(struct obd_import *imp) { ENTRY; @@ -329,7 +328,6 @@ void ptlrpc_request_handle_eviction(struct ptlrpc_request *failed_req) int rc; struct obd_import *imp= failed_req->rq_import; unsigned long flags; - struct ptlrpc_request *req; ENTRY; CDEBUG(D_HA, "import %s of %s@%s evicted: reconnecting\n", @@ -347,7 +345,6 @@ void ptlrpc_request_handle_eviction(struct ptlrpc_request *failed_req) failed_req->rq_err = 1; spin_unlock_irqrestore (&failed_req->rq_lock, flags); } - ptlrpc_req_finished(req); EXIT; } @@ -361,17 +358,23 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active) notify_obd = imp->imp_obd->u.cli.cl_containing_lov; - /* When deactivating, mark import invalid, and - abort in-flight requests. */ + /* When deactivating, mark import invalid, and abort in-flight + * requests. */ if (!active) { - CDEBUG(D_ERROR, "setting import %s INVALID\n", imp->imp_target_uuid.uuid); spin_lock_irqsave(&imp->imp_lock, flags); - imp->imp_invalid = 1; + /* This is a bit of a hack, but invalidating replayable + * imports makes a temporary reconnect failure into a much more + * ugly -- and hard to remedy -- situation. */ + if (!imp->imp_replayable) { + CDEBUG(D_HA, "setting import %s INVALID\n", + imp->imp_target_uuid.uuid); + imp->imp_invalid = 1; + } imp->imp_generation++; spin_unlock_irqrestore(&imp->imp_lock, flags); ptlrpc_invalidate_import_state(imp); -// ptlrpc_abort_inflight(imp); - } + //ptlrpc_abort_inflight(imp); + } if (notify_obd == NULL) GOTO(out, rc = 0); @@ -403,8 +406,9 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active) out: /* When activating, mark import valid */ - if (active) { - CDEBUG(D_ERROR, "setting import %s VALID\n", imp->imp_target_uuid.uuid); + if (active && !rc) { + CDEBUG(D_HA, "setting import %s VALID\n", + imp->imp_target_uuid.uuid); spin_lock_irqsave(&imp->imp_lock, flags); imp->imp_invalid = 0; spin_unlock_irqrestore(&imp->imp_lock, flags); @@ -420,7 +424,7 @@ void ptlrpc_fail_import(struct obd_import *imp, int generation) ENTRY; LASSERT (!imp->imp_dlm_fake); - + spin_lock_irqsave(&imp->imp_lock, flags); if (imp->imp_level != LUSTRE_CONN_FULL) in_recovery = 1; @@ -466,14 +470,14 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid) ENTRY; spin_lock_irqsave(&imp->imp_lock, flags); - if (imp->imp_level == LUSTRE_CONN_FULL || + if (imp->imp_level == LUSTRE_CONN_FULL || imp->imp_level == LUSTRE_CONN_NOTCONN) imp->imp_level = LUSTRE_CONN_RECOVER; else in_recover = 1; spin_unlock_irqrestore(&imp->imp_lock, flags); - if (in_recover == 1) + if (in_recover == 1) RETURN(-EALREADY); if (new_uuid) { diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index f2a1089..22ccb09 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -289,18 +289,24 @@ void ptlrpc_daemonize(void) reparent_to_init(); } +static long timeval_sub(struct timeval *large, struct timeval *small) +{ + return (large->tv_sec - small->tv_sec) * 1000000 + + (large->tv_usec - small->tv_usec); +} + static int ptlrpc_main(void *arg) { - struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg; + struct ptlrpc_svc_data *data = arg; struct obd_device *obddev = data->dev; struct ptlrpc_service *svc = data->svc; struct ptlrpc_thread *thread = data->thread; struct ptlrpc_request *request; ptl_event_t *event; - int rc = 0; unsigned long flags; - cycles_t workdone_time = -1; - cycles_t svc_workcycles = -1; + struct timeval start_time, finish_time; + long total; + int rc = 0; ENTRY; lock_kernel(); @@ -311,21 +317,14 @@ static int ptlrpc_main(void *arg) RECALC_SIGPENDING; SIGNAL_MASK_UNLOCK(current, flags); -#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) - sprintf(current->comm, "%s|%d", data->name,current->thread.extern_pid); -#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - sprintf(current->comm, "%s|%d", data->name, - current->thread.mode.tt.extern_pid); -#else - strcpy(current->comm, data->name); -#endif + THREAD_NAME(current->comm, "%s", data->name); unlock_kernel(); OBD_ALLOC(event, sizeof(*event)); - if (!event) + if (event == NULL) GOTO(out, rc = -ENOMEM); OBD_ALLOC(request, sizeof(*request)); - if (!request) + if (request == NULL) GOTO(out_event, rc = -ENOMEM); /* Record that the thread is running */ @@ -334,14 +333,15 @@ static int ptlrpc_main(void *arg) /* XXX maintain a list of all managed devices: insert here */ + do_gettimeofday(&finish_time); /* And now, loop forever on requests */ while (1) { struct l_wait_info lwi = { 0 }; l_wait_event(svc->srv_waitq, ptlrpc_check_event(svc, thread, event), &lwi); + spin_lock(&svc->srv_lock); if (thread->t_flags & SVC_STOPPING) { - spin_lock(&svc->srv_lock); thread->t_flags &= ~SVC_STOPPING; spin_unlock(&svc->srv_lock); @@ -349,65 +349,64 @@ static int ptlrpc_main(void *arg) break; } - if (thread->t_flags & SVC_EVENT) { - cycles_t workstart_time; - - spin_lock(&svc->srv_lock); - thread->t_flags &= ~SVC_EVENT; - /* Update Service Statistics */ - workstart_time = get_cycles(); - if (workdone_time != -1 && svc->svc_stats != NULL) { - /* Stats for req(n) are updated just before - * req(n+1) is executed. This avoids need to - * reacquire svc->srv_lock after - * call to handling_request(). - */ - int opc; - - /* req_waittime */ - lprocfs_counter_add(svc->svc_stats, - PTLRPC_REQWAIT_CNTR, - (workstart_time - - event->arrival_time)); - /* svc_eqdepth */ - /* Wait for b_eq branch - lprocfs_counter_add(svc->svc_stats, - PTLRPC_SVCEQDEPTH_CNTR, - 0); - */ - /* svc_idletime */ - lprocfs_counter_add(svc->svc_stats, - PTLRPC_SVCIDLETIME_CNTR, - (workstart_time - - workdone_time)); - /* previous request */ - opc = opcode_offset(request->rq_reqmsg->opc); - if (opc > 0) { - LASSERT(opc < LUSTRE_MAX_OPCODES); - lprocfs_counter_add(svc->svc_stats, opc, - PTLRPC_LAST_CNTR + - svc_workcycles); - } - } + if (!(thread->t_flags & SVC_EVENT)) { + CERROR("unknown flag in service"); spin_unlock(&svc->srv_lock); + LBUG(); + EXIT; + break; + } + + thread->t_flags &= ~SVC_EVENT; + spin_unlock(&svc->srv_lock); + + do_gettimeofday(&start_time); + total = timeval_sub(&start_time, &event->arrival_time); + if (svc->svc_stats != NULL) { + lprocfs_counter_add(svc->svc_stats, PTLRPC_REQWAIT_CNTR, + total); + lprocfs_counter_add(svc->svc_stats, + PTLRPC_SVCIDLETIME_CNTR, + timeval_sub(&start_time, + &finish_time)); +#if 0 /* Wait for b_eq branch */ + lprocfs_counter_add(svc->svc_stats, + PTLRPC_SVCEQDEPTH_CNTR, 0); +#endif + } + if (total / 1000000 > (long)obd_timeout) { + CERROR("Dropping request from NID "LPX64" because it's " + "%ld seconds old.\n", event->initiator.nid, + total / 1000000); /* bug 1502 */ + } else { + CDEBUG(D_HA, "request from NID "LPX64" noticed after " + "%ldus\n", event->initiator.nid, total); rc = handle_incoming_request(obddev, svc, event, request); - workdone_time = get_cycles(); - svc_workcycles = workdone_time - workstart_time; - continue; } - - CERROR("unknown break in service"); - LBUG(); - EXIT; - break; + do_gettimeofday(&finish_time); + total = timeval_sub(&finish_time, &start_time); + + CDEBUG((total / 1000000 > (long)obd_timeout) ? D_ERROR : D_HA, + "request "LPU64" from NID "LPX64" processed in %ldus " + "(%ldus total)\n", request->rq_xid, event->initiator.nid, + total, timeval_sub(&finish_time, &event->arrival_time)); + + if (svc->svc_stats != NULL) { + int opc = opcode_offset(request->rq_reqmsg->opc); + if (opc > 0) { + LASSERT(opc < LUSTRE_MAX_OPCODES); + lprocfs_counter_add(svc->svc_stats, + opc + PTLRPC_LAST_CNTR, + total); + } + } } /* NB should wait for all SENT callbacks to complete before exiting * here. Unfortunately at this time there is no way to track this - * state. - */ + * state. */ OBD_FREE(request, sizeof(*request)); out_event: OBD_FREE(event, sizeof(*event)); diff --git a/lustre/scripts/lustre.spec.in b/lustre/scripts/lustre.spec.in index a24a26a..1b2ba01 100644 --- a/lustre/scripts/lustre.spec.in +++ b/lustre/scripts/lustre.spec.in @@ -1,17 +1,17 @@ # lustre.spec %define version b_devel -%define kversion @RELEASE@ +%define kversion @LINUXRELEASE@ %define linuxdir @LINUX@ -Release: 0306170928kernel Summary: Lustre Lite File System Name: lustre-lite Version: %{version} +Release: @RELEASE@ Copyright: GPL Group: Utilities/System Requires: lustre-modules, PyXML -BuildRoot: /var/tmp/lustre-%{version}-root Source: ftp://ftp.lustre.com/pub/lustre/lustre-%{version}.tar.gz +BuildRoot: /var/tmp/lustre-%{version}-root %description The Lustre Lite Cluster File System: kernel drivers for file system, @@ -69,21 +69,10 @@ cd $RPM_BUILD_DIR/lustre-%{version} ./configure --with-linux='%{linuxdir}' make -#%ifarch i386 -#cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version} -#./configure --with-lib -#make -#%endif - %install cd $RPM_BUILD_DIR/lustre-%{version} make install prefix=$RPM_BUILD_ROOT -#%ifarch i386 -#cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version} -#make install prefix=$RPM_BUILD_ROOT -#%endif - %ifarch alpha # this hurts me conf_flag= @@ -226,20 +215,20 @@ if [ ! -e /dev/portals ]; then fi depmod -ae || exit 0 -grep -q obdclass /etc/modules.conf || \ - echo 'alias char-major-10-241 obdclass' >> /etc/modules.conf +#grep -q obdclass /etc/modules.conf || \ +# echo 'alias char-major-10-241 obdclass' >> /etc/modules.conf -grep -q '/dev/obd' /etc/modules.conf || \ - echo 'alias /dev/obd obdclass' >> /etc/modules.conf +#grep -q '/dev/obd' /etc/modules.conf || \ +# echo 'alias /dev/obd obdclass' >> /etc/modules.conf -grep -q '/dev/lustre' /etc/modules.conf || \ - echo 'alias /dev/lustre obdclass' >> /etc/modules.conf +#grep -q '/dev/lustre' /etc/modules.conf || \ +# echo 'alias /dev/lustre obdclass' >> /etc/modules.conf -grep -q portals /etc/modules.conf || \ - echo 'alias char-major-10-240 portals' >> /etc/modules.conf +#grep -q portals /etc/modules.conf || \ +# echo 'alias char-major-10-240 portals' >> /etc/modules.conf -grep -q '/dev/portals' /etc/modules.conf || \ - echo 'alias /dev/portals portals' >> /etc/modules.conf +#grep -q '/dev/portals' /etc/modules.conf || \ +# echo 'alias /dev/portals portals' >> /etc/modules.conf %postun depmod -ae || exit 0 @@ -257,6 +246,7 @@ if grep -q slapd-lustre $slapd; then cp $tmp $slapd rm $tmp fi + %clean #rm -rf $RPM_BUILD_ROOT diff --git a/lustre/tests/.cvsignore b/lustre/tests/.cvsignore index 2e5c1fe..21575d0 100644 --- a/lustre/tests/.cvsignore +++ b/lustre/tests/.cvsignore @@ -41,5 +41,9 @@ runas openfile unlinkmany fchdir_test +*.cmd getdents o_directory +mkdirdeep +utime +small_write diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 064de98..6600962 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -6,18 +6,21 @@ CFLAGS := -g -Wall # LDADD := -lreadline -ltermcap # -lefence EXTRA_DIST = $(pkgexample_SCRIPTS) $(noinst_SCRIPTS) $(noinst_DATA) \ sanity.sh rundbench mcreate -pkgexample_SCRIPTS = llmount.sh llmountcleanup.sh llecho.sh llechocleanup.sh local.sh echo.sh uml.sh lov.sh +pkgexample_SCRIPTS = llmount.sh llmountcleanup.sh llecho.sh llechocleanup.sh +pkgexample_SCRIPTS += local.sh echo.sh uml.sh lov.sh noinst_DATA = -noinst_SCRIPTS = leak_finder.pl llecho.sh llmount.sh llmountcleanup.sh tbox.sh \ - llrmount.sh runfailure-mds runvmstat runfailure-net runfailure-ost \ - runiozone runregression-net.sh runtests sanity.sh rundbench +noinst_SCRIPTS = leak_finder.pl llecho.sh llmount.sh llmountcleanup.sh tbox.sh +noinst_SCRIPTS += llrmount.sh runfailure-mds runvmstat runfailure-net +noinst_SCRIPTS += runfailure-ost runiozone runregression-net.sh runtests +noinst_SCRIPTS += sanity.sh rundbench noinst_PROGRAMS = openunlink testreq truncate directio openme writeme open_delay -noinst_PROGRAMS += munlink tchmod toexcl fsx test_brw openclose createdestroy -noinst_PROGRAMS += stat createmany statmany multifstat createtest mlink +noinst_PROGRAMS += tchmod toexcl fsx test_brw openclose createdestroy +noinst_PROGRAMS += stat createmany statmany multifstat createtest mlink utime noinst_PROGRAMS += opendirunlink opendevunlink unlinkmany fchdir_test checkstat -noinst_PROGRAMS += wantedi statone runas openfile getdents o_directory +noinst_PROGRAMS += wantedi statone runas openfile getdents mkdirdeep o_directory +noinst_PROGRAMS += small_write # noinst_PROGRAMS += ldaptest -sbin_PROGRAMS = mcreate mkdirmany +sbin_PROGRAMS = mcreate munlink mkdirmany # ldaptest_SOURCES = ldaptest.c tchmod_SOURCES = tchmod.c @@ -48,13 +51,15 @@ openfile_SOURCES = openfile.c wantedi_SOURCES = wantedi.c createtest_SOURCES = createtest.c open_delay_SOURCES = open_delay.c -opendirunlink_SOURCES=opendirunlink.c -opendevunlink_SOURCES=opendirunlink.c -fchdir_test_SOURCES=fchdir_test.c +opendirunlink_SOURCES = opendirunlink.c +opendevunlink_SOURCES = opendevunlink.c +fchdir_test_SOURCES = fchdir_test.c getdents_SOURCES=getdents.c o_directory_SOURCES = o_directory.c -#mkdirdeep_SOURCES= mkdirdeep.c -#mkdirdeep_LDADD=-L../portals/util -lptlctl -#mkdirdeep_CPPFLAGS=-I$(top_srcdir)/portals/include +utime_SOURCES = utime.c +mkdirdeep_SOURCES = mkdirdeep.c +mkdirdeep_LDADD=-L$(top_builddir)/portals/utils -lptlctl +mkdirdeep_CPPFLAGS=-I$(top_srcdir)/portals/include +small_write_SOURCES = small_write.c include $(top_srcdir)/Rules diff --git a/lustre/tests/acceptance-metadata-double.sh b/lustre/tests/acceptance-metadata-double.sh index f647a55..496f3b4 100644 --- a/lustre/tests/acceptance-metadata-double.sh +++ b/lustre/tests/acceptance-metadata-double.sh @@ -8,6 +8,7 @@ set -e SRCDIR="`dirname $0`" CREATE=$SRCDIR/create.pl +RENAME=$SRCDIR/rename.pl debug_client_on() { @@ -23,118 +24,71 @@ MNT=${MNT:-/mnt/lustre} debug_client_on echo "create.pl, 2 mounts, 1 thread, 10 ops, debug on" -perl $CREATE -- $MNT 2 10 +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=10 echo "create.pl, 2 mounts, 1 thread, 100 ops, debug on" -perl $CREATE --silent -- $MNT 2 100 -echo "create.pl --mcreate=0, 2 mounts, 1 thread, 10 ops, debug on" -perl $CREATE --mcreate=0 -- $MNT 2 10 -echo "create.pl --mcreate=0, 2 mounts, 1 thread, 100 ops, debug on" -perl $CREATE --mcreate=0 --silent -- $MNT 2 100 +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --silent +echo "create.pl --use_mcreate=0, 2 mounts, 1 thread, 10 ops, debug on" +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=10 --use_mcreate=0 +echo "create.pl --use_mcreate=0, 2 mounts, 1 thread, 100 ops, debug on" +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --use_mcreate=0 --silent echo "rename.pl, 2 mounts, 1 thread, 10 ops, debug on" -perl rename.pl --count=2 $MNT 10 +perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=10 echo "rename.pl, 2 mounts, 1 thread, 100 ops, debug on" -perl rename.pl --count=2 --silent $MNT 100 +perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=100 --silent debug_client_off echo "create.pl, 2 mounts, 1 thread, 1000 ops, debug off" -perl $CREATE --silent -- $MNT 2 1000 -echo "create.pl --mcreate=0, 2 mounts, 1 thread, 1000 ops, debug off" -perl $CREATE --silent --mcreate=0 -- $MNT 2 1000 +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=1000 --silent +echo "create.pl --use_mcreate=0, 2 mounts, 1 thread, 1000 ops, debug off" +perl $CREATE --silent --use_mcreate=0 -- $MNT 2 1000 +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=1000 --use_mcreate=0 --silent echo "rename.pl, 2 mounts, 1 thread, 1000 ops, debug off" -perl rename.pl --count=2 --silent $MNT 1000 +perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=1000 --silent debug_client_on echo "create.pl, 2 mounts, 2 threads, 100 ops, debug on" -perl $CREATE --silent -- $MNT 2 100 & -perl $CREATE --silent -- $MNT 2 100 & -wait -echo "create.pl --mcreate=0, 2 mounts, 2 threads, 100 ops, debug on" -perl $CREATE --silent --mcreate=0 -- $MNT 2 100 & -perl $CREATE --silent --mcreate=0 -- $MNT 2 100 & -wait +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=2 --silent +echo "create.pl --use_mcreate=0, 2 mounts, 2 threads, 100 ops, debug on" +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=2 --use_mcreate=0 --silent echo "rename.pl, 2 mounts, 2 thread, 1000 ops, debug on" -perl rename.pl --count=2 --silent $MNT 1000 & -perl rename.pl --count=2 --silent $MNT 1000 & -wait +perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=1000 --num_threads=2 --silent debug_client_off echo "create.pl, 2 mounts, 2 threads, 2000 ops, debug off" -perl $CREATE --silent -- $MNT 2 2000 & -perl $CREATE --silent -- $MNT 2 2000 & -wait -echo "create.pl --mcreate=0, 2 mounts, 2 threads, 2000 ops, debug off" -perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 & -perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 & -wait +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=2 --silent +echo "create.pl --use_mcreate=0, 2 mounts, 2 threads, 2000 ops, debug off" +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=2 --use_mcreate=0 --silent echo "rename.pl, 2 mounts, 2 threads, 2000 ops, debug off" -perl rename.pl --count=2 --silent $MNT 2000 & -perl rename.pl --count=2 --silent $MNT 2000 & -wait +perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=2 --silent debug_client_on echo "create.pl, 2 mounts, 4 threads, 100 ops, debug on" -for i in `seq 1 4`; do - perl $CREATE --silent -- $MNT 2 100 & -done -wait -echo "create.pl --mcreate=0, 2 mounts, 4 threads, 100 ops, debug on" -for i in `seq 1 4`; do - perl $CREATE --silent --mcreate=0 -- $MNT 2 100 & -done -wait +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=4 --silent +echo "create.pl --use_mcreate=0, 2 mounts, 4 threads, 100 ops, debug on" +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=100 --num_threads=4 --use_mcreate=0 --silent echo "rename.pl, 2 mounts, 4 threads, 2000 ops, debug on" -for i in `seq 1 4`; do - perl rename.pl --count=2 --silent $MNT 2000 & -done -wait +perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --silent debug_client_off echo "create.pl, 2 mounts, 4 threads, 2000 ops, debug off" -for i in `seq 1 4`; do - perl $CREATE --silent -- $MNT 2 2000 & -done -wait -echo "create.pl --mcreate=0, 2 mounts, 4 threads, 2000 ops, debug off" -for i in `seq 1 4`; do - perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 & -done -wait +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --silent +echo "create.pl --use_mcreate=0, 2 mounts, 4 threads, 2000 ops, debug off" +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --use_mcreate=0 --silent echo "rename.pl, 2 mounts, 4 threads, 2000 ops, debug off" -for i in `seq 1 4`; do - perl rename.pl --count=2 --silent $MNT 2000 & -done -wait +perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=4 --silent debug_client_on echo "create.pl, 2 mounts, 8 threads, 500 ops, debug on" -for i in `seq 1 8`; do - perl $CREATE --silent -- $MNT 2 500 & -done -wait -echo "create.pl --mcreate=0, 2 mounts, 8 threads, 500 ops, debug on" -for i in `seq 1 8`; do - perl $CREATE --silent --mcreate=0 -- $MNT 2 500 & -done -wait +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=500 --num_threads=8 --silent +echo "create.pl --use_mcreate=0, 2 mounts, 8 threads, 500 ops, debug on" +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=500 --num_threads=8 --use_mcreate=0 --silent echo "rename.pl, 2 mounts, 8 threads, 2000 ops, debug on" -for i in `seq 1 8`; do - perl rename.pl --count=2 --silent $MNT 2000 & -done -wait +perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --silent debug_client_off echo "create.pl, 2 mounts, 8 threads, 2000 ops, debug off" -for i in `seq 1 8`; do - perl $CREATE --silent -- $MNT 2 2000 & -done -wait -echo "create.pl --mcreate=0, 2 mounts, 8 threads, 2000 ops, debug off" -for i in `seq 1 8`; do - perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 & -done -wait +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --silent +echo "create.pl --use_mcreate=0, 2 mounts, 8 threads, 2000 ops, debug off" +perl $CREATE --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --use_mcreate=0 --silent echo "rename.pl, 2 mounts, 8 threads, 2000 ops, debug off" -for i in `seq 1 8`; do - perl rename.pl --count=2 --silent $MNT 2000 & -done -wait +perl $RENAME --mountpt=${MNT} --num_mounts=2 --iterations=2000 --num_threads=8 --silent diff --git a/lustre/tests/acceptance-metadata-single.sh b/lustre/tests/acceptance-metadata-single.sh index 53774e5..2bf0a53 100644 --- a/lustre/tests/acceptance-metadata-single.sh +++ b/lustre/tests/acceptance-metadata-single.sh @@ -8,6 +8,7 @@ set -e SRCDIR="`dirname $0`" CREATE=$SRCDIR/create.pl +RENAME=$SRCDIR/rename.pl debug_client_on() { @@ -23,121 +24,75 @@ MNT=${MNT:-/mnt/lustre} debug_client_on echo "create.pl, 1 mount, 1 thread, 10 ops, debug on" -perl $CREATE -- $MNT -1 10 +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=10 echo "create.pl, 1 mount, 1 thread, 100 ops, debug on" -perl $CREATE --silent -- $MNT -1 100 +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --silent echo "create.pl --mcreate=0, 1 mount, 1 thread, 10 ops, debug on" -perl $CREATE --mcreate=0 -- $MNT -1 10 +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=10 --use_mcreate=0 echo "create.pl --mcreate=0, 1 mount, 1 thread, 100 ops, debug on" -perl $CREATE --mcreate=0 --silent -- $MNT -1 100 +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --use_mcreate=0 --silent echo "rename.pl, 1 mount, 1 thread, 10 ops, debug on" -perl rename.pl $MNT 10 +perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=10 echo "rename.pl, 1 mount, 1 thread, 100 ops, debug on" -perl rename.pl --silent $MNT 100 +perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=100 --silent debug_client_off echo "create.pl, 1 mount, 1 thread, 1000 ops, debug off" -perl $CREATE --silent -- $MNT -1 1000 +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --silent echo "create.pl --mcreate=0, 1 mount, 1 thread, 1000 ops, debug off" -perl $CREATE --silent --mcreate=0 -- $MNT -1 1000 +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --use_mcreate=0 --silent echo "rename.pl, 1 mount, 1 thread, 1000 ops, debug off" -perl rename.pl --silent $MNT 1000 +perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --silent debug_client_on echo "create.pl, 1 mount, 2 threads, 100 ops, debug on" -perl $CREATE --silent -- $MNT -1 100 & -perl $CREATE --silent -- $MNT -1 100 & -wait +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=2 --silent echo "create.pl --mcreate=0, 1 mount, 2 threads, 100 ops, debug on" -perl $CREATE --silent --mcreate=0 -- $MNT -1 100 & -perl $CREATE --silent --mcreate=0 -- $MNT -1 100 & -wait +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=2 --use_mcreate=0 --silent echo "rename.pl, 1 mount, 2 thread, 1000 ops, debug on" -perl rename.pl --silent $MNT 1000 & -perl rename.pl --silent $MNT 1000 & -wait +perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=1000 --num_threads=2 --silent debug_client_off echo "create.pl, 1 mount, 2 threads, 2000 ops, debug off" -perl $CREATE --silent -- $MNT -1 2000 & -perl $CREATE --silent -- $MNT -1 2000 & -wait +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=2 --silent echo "create.pl --mcreate=0, 1 mount, 2 threads, 2000 ops, debug off" -perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 & -perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 & +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=2 --use_mcreate=0 --silent wait echo "rename.pl, 1 mount, 2 threads, 2000 ops, debug off" -perl rename.pl --silent $MNT 2000 & -perl rename.pl --silent $MNT 2000 & -wait +perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=2 --silent debug_client_on echo "create.pl, 1 mount, 4 threads, 100 ops, debug on" -for i in `seq 1 4`; do - perl $CREATE --silent -- $MNT -1 100 & -done -wait +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=4 --silent echo "create.pl --mcreate=0, 1 mount, 4 threads, 100 ops, debug on" -for i in `seq 1 4`; do - perl $CREATE --silent --mcreate=0 -- $MNT -1 100 & -done -wait +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=100 --num_threads=4 --use_mcreate=0 --silent echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug on" -for i in `seq 1 4`; do - perl rename.pl --silent $MNT 2000 & -done -wait +perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4 --silent debug_client_off echo "create.pl, 1 mount, 4 threads, 2000 ops, debug off" -for i in `seq 1 4`; do - perl $CREATE --silent -- $MNT -1 2000 & -done -wait +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4 --silent echo "create.pl --mcreate=0, 1 mount, 4 threads, 2000 ops, debug off" -for i in `seq 1 4`; do - perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 & -done -wait +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4 --use_mcreate=0 --silent echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug off" -for i in `seq 1 4`; do - perl rename.pl --silent $MNT 2000 & -done -wait +perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=4 --silent debug_client_on echo "create.pl, 1 mount, 8 threads, 500 ops, debug on" -for i in `seq 1 8`; do - perl $CREATE --silent -- $MNT -1 500 & -done -wait +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=500 --num_threads=8 --silent echo "create.pl --mcreate=0, 1 mount, 8 threads, 500 ops, debug on" -for i in `seq 1 8`; do - perl $CREATE --silent --mcreate=0 -- $MNT -1 500 & -done -wait +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=500 --num_threads=8 --use_mcreate=0 --silent echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug on" -for i in `seq 1 8`; do - perl rename.pl --silent $MNT 2000 & -done -wait +perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8 --silent debug_client_off echo "create.pl, 1 mount, 8 threads, 2000 ops, debug off" -for i in `seq 1 8`; do - perl $CREATE --silent -- $MNT -1 2000 & -done -wait +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8 --silent echo "create.pl --mcreate=0, 1 mount, 8 threads, 2000 ops, debug off" -for i in `seq 1 8`; do - perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 & -done -wait +perl $CREATE --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8 --use_mcreate=0 --silent echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug off" -for i in `seq 1 8`; do - perl rename.pl --silent $MNT 2000 & -done -wait +perl $RENAME --mountpt=${MNT} --num_mounts=-1 --iterations=2000 --num_threads=8 --silent + sh rundbench 1 sh rundbench 2 sh rundbench 4 diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh index 0d2d836..919ea1f 100755 --- a/lustre/tests/acceptance-small.sh +++ b/lustre/tests/acceptance-small.sh @@ -5,7 +5,7 @@ set -vxe [ "$CONFIGS" -a -z "$SANITYN" ] && SANITYN=no [ "$CONFIGS" ] || CONFIGS="local lov" -[ "$MAX_THREADS" ] || MAX_THREADS=50 +[ "$MAX_THREADS" ] || MAX_THREADS=10 if [ -z "$THREADS" ]; then KB=`awk '/MemTotal:/ { print $2 }' /proc/meminfo` THREADS=`expr $KB / 16384` @@ -76,7 +76,7 @@ for NAME in $CONFIGS; do if [ "$IOZONE_DIR" != "no" ]; then mount | grep $MNT || sh llmount.sh SPACE=`df $MNT | tail -1 | awk '{ print $4 }'` - IOZ_THREADS=`expr $SPACE / $SIZE` + IOZ_THREADS=`expr $SPACE / \( $SIZE + $SIZE / 1000 \)` [ $THREADS -lt $IOZ_THREADS ] && IOZ_THREADS=$THREADS $DEBUG_OFF diff --git a/lustre/tests/cobd.sh b/lustre/tests/cobd.sh index cb4f94d..983df93 100755 --- a/lustre/tests/cobd.sh +++ b/lustre/tests/cobd.sh @@ -6,10 +6,11 @@ config=${1:-$(basename $0 .sh)}.xml LMC=${LMC:-../utils/lmc -m $config} TMP=${TMP:-/tmp} -MDSDEV=$TMP/mds1 +MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} MDSSIZE=50000 +FSTYPE=${FSTYPE:-ext3} -OSTDEV=$TMP/ost1 +OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`} OSTSIZE=200000 rm -f $config @@ -18,12 +19,12 @@ ${LMC} --add node --node localhost || exit 10 ${LMC} --add net --node localhost --nid localhost --nettype tcp || exit 11 # configure mds server -${LMC} --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20 +${LMC} --add mds --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20 # configure ost -${LMC} --add ost --node localhost --obd obd1 --obdtype obdecho || exit 30 +${LMC} --add ost --node localhost --obd obd1 --fstype $FSTYPE --obdtype obdecho || exit 30 # configure ost -${LMC} --add ost --node localhost --obd obd2 --obdtype obdecho || exit 30 +${LMC} --add ost --node localhost --obd obd2 --fstype $FSTYPE --obdtype obdecho || exit 30 ${LMC} --add cobd --node localhost --real_obd obd1 --cache_obd obd2 diff --git a/lustre/tests/create.pl b/lustre/tests/create.pl index 6156869..c5f3f12 100644 --- a/lustre/tests/create.pl +++ b/lustre/tests/create.pl @@ -1,32 +1,162 @@ -#!/usr/bin/perl +#!/usr/bin/perl -w +use strict; +$|++; + +$ENV{PATH}="/bin:/usr/bin"; +$ENV{ENV}=""; +$ENV{BASH_ENV}=""; +use POSIX ":sys_wait_h"; + +use diagnostics; use Getopt::Long; +use vars qw( + $MAX_THREADS + ); + +# Don't try to run more than this many threads concurrently. +$MAX_THREADS = 16; + +# Initialize variables my $silent = 0; -my $mcreate = 1; # should we use mcreate or open? -my $files = 5; +my $use_mcreate = 1; # should we use mcreate or open? +my $num_files = 5; # number of files to create +my $iterations = 1; +my $num_threads = 1; +my $mountpt; +my $num_mounts = -1; +# Get options from the command line. GetOptions("silent!" => \$silent, - "mcreate=i" => \$mcreate, - "files=i" => \$files); + "use_mcreate=i" => \$use_mcreate, + "num_files=i" => \$num_files, + "mountpt=s" => \$mountpt, + "num_mounts=i" => \$num_mounts, + "iterations=i" => \$iterations, + "num_threads=i" => \$num_threads, + ) || die &usage; + +# Check for mandatory args. +if (!$mountpt || + !$num_mounts) { + die &usage; +} + +if ($num_threads > $MAX_THREADS) { + print "\nMAX_THREADS is currently set to $MAX_THREADS.\n\n"; + print "You will have to change this in the source\n"; + print "if you really want to run with $num_threads threads.\n\n"; + exit 1; +} -my $mtpt = shift || usage(); -my $mount_count = shift || usage(); -my $i = shift || usage(); -my $count = $i; +# Initialize rand() function. +srand (time ^ $$ ^ unpack "%L*", `ps axww | gzip`); + +######################################################################### +### MAIN + +for (my $i=1; $i<=$num_threads; $i++) { + my $status = &fork_and_create($i); + last if ($status != 0); +} + +# Wait for all our threads to finish. +my $child = 0; +do { + $child = waitpid(-1, WNOHANG); +} until $child > 0; +sleep 1; + +exit 0; + +######################################################################### +### SUBROUTINES sub usage () { - print "Usage: $0 [--silent] [--mcreate=n] [--files=n] \n"; - print "example: $0 /mnt/lustre 2 50\n"; - print " will test in /mnt/lustre1 and /mnt/lustre2\n"; - print " $0 /mnt/lustre -1 50\n"; - print " will test in /mnt/lustre only\n"; + print "\nUsage: $0 [--silent] [--use_mcreate=n] [--num_files=n] [--iterations=n] [--num_threads=n] --mountpt=/path/to/lustre/mount --num_mounts=n\n\n"; + print "\t--silent\tminimal output\n"; + print "\t--use_mcreate=n\tuse mcreate to create files, default=1 (yes)\n"; + print "\t--num_files=n\tnumber of files to create per iteration, default=5\n"; + print "\t--iterations=n\tnumber of iterations to perform, default=1\n"; + print "\t--num_threads=n\tnumber of thread to run, default=1\n"; + print "\t--mountpt\tlocation of lustre mount\n"; + print "\t--num_mounts=n\tnumber of lustre mounts to test across, default=-1 (single mount point without numeric suffix)\n\n"; + print "example: $0 --mountpt=/mnt/lustre --num_mounts=2 --iterations=50\n"; + print " will perform 50 interations in /mnt/lustre1 and /mnt/lustre2\n"; + print " $0 --mountpt=/mnt/lustre --num_mounts=-1 --iterations=50\n"; + print " will perform 50 iterations in /mnt/lustre only\n\n"; exit; } -sub do_open($) { - my $path = shift; +######################################################################### +sub fork_and_create ($) { + my ($thread_num) = @_; + + FORK: { + if (my $pid = fork) { + # parent here + # child process pid is available in $pid + return 0; + } elsif (defined $pid) { # $pid is zero here if defined + my $current_iteration=1; + while ($current_iteration <= $iterations) { + for (my $i=1; $i<=$num_files; $i++) { + my $which = ""; + if ($num_mounts > 0) { + $which = int(rand() * $num_mounts) + 1; + } + my $d = int(rand() * $num_files); + do_open("${mountpt}${which}/thread${thread_num}.${d}"); + + if ($num_mounts > 0) { + $which = int(rand() * $num_mounts) + 1; + } + $d = int(rand() * $num_files); + my $path = "${mountpt}${which}/thread${thread_num}.${d}"; + print "Thread $thread_num: Unlink $path start [" . $$."]...\n" if !$silent; + if (unlink($path)) { + print "Thread $thread_num: Unlink done [$$] $path: Success\n" if !$silent; + } else { + print "Thread $thread_num: Unlink done [$$] $path: $!\n"if !$silent; + } + } + if (($current_iteration) % 100 == 0) { + print STDERR "Thread $thread_num: " . $current_iteration . " operations [" . $$ . "]\n"; + } + $current_iteration++; + } + + my $which = ""; + if ($num_mounts > 0) { + $which = int(rand() * $num_mounts) + 1; + } + for (my $d = 0; $d < $num_files; $d++) { + my $path = "${mountpt}${which}/thread${thread_num}.${d}"; + unlink("$path") if (-e $path); + } + + print "Thread $thread_num: Done.\n"; + + exit 0; + + } elsif ($! =~ /No more process/) { + # EAGAIN, supposedly recoverable fork error + sleep 5; + redo FORK; + } else { + # weird fork error + die "Can't fork: $!\n"; + } + } + +} + +######################################################################### + +sub do_open ($) { + my ($path) = @_;; - if ($mcreate) { + if ($use_mcreate) { my $tmp = `./mcreate $path`; if ($tmp) { print "Creating $path [" . $$."]...\n" if !$silent; @@ -37,42 +167,9 @@ sub do_open($) { } } else { print "Opening $path [" . $$."]...\n"if !$silent; - open(FH, ">$path") || die "open($PATH): $!"; + open(FH, ">$path") || die "open($path: $!"; print "Open done [$$] $path: Success\n"if !$silent; close(FH) || die; } } -while ($i--) { - my $which = ""; - if ($mount_count > 0) { - $which = int(rand() * $mount_count) + 1; - } - $d = int(rand() * $files); - do_open("$mtpt$which/$d"); - - if ($mount_count > 0) { - $which = int(rand() * $mount_count) + 1; - } - $d = int(rand() * $files); - $path = "$mtpt$which/$d"; - print "Unlink $path start [" . $$."]...\n"if !$silent; - if (unlink($path)) { - print "Unlink done [$$] $path: Success\n"if !$silent; - } else { - print "Unlink done [$$] $path: $!\n"if !$silent; - } - if (($count - $i) % 100 == 0) { - print STDERR ($count - $i) . " operations [" . $$ . "]\n"; - } -} - -my $which = ""; -if ($mount_count > 0) { - $which = int(rand() * $mount_count) + 1; -} -for ($d = 0; $d < $files; $d++) { - unlink("$mtpt$which/$d"); -} - -print "Done.\n"; diff --git a/lustre/tests/directio.c b/lustre/tests/directio.c index e660ea4..cc92c80 100644 --- a/lustre/tests/directio.c +++ b/lustre/tests/directio.c @@ -41,7 +41,7 @@ int main(int argc, char **argv) return 1; } - printf("directio on %s for %dx%lu blocks \n", argv[1], blocks, + printf("directio on %s for %dx%lu bytes \n", argv[1], blocks, st.st_blksize); seek = (off64_t)seek_blocks * (off64_t)st.st_blksize; @@ -75,5 +75,6 @@ int main(int argc, char **argv) return 1; } + printf("PASS\n"); return 0; } diff --git a/lustre/tests/echo.sh b/lustre/tests/echo.sh index 335db41..b4fe5a4 100755 --- a/lustre/tests/echo.sh +++ b/lustre/tests/echo.sh @@ -21,8 +21,9 @@ CLIENTNID=${CLIENTNID:-$CLIENT} # FIXME: make LMC not require MDS for obdecho LOV -MDSDEV=${MDSDEV:-$TMP/mds1} +MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} MDSSIZE=10000 +FSTYPE=${FSTYPE:-ext3} STRIPE_BYTES=65536 STRIPES_PER_OBJ=2 # 0 means stripe over all OSTs @@ -33,7 +34,7 @@ $LMC --add node --node $SERVER || exit 1 $LMC --add net --node $SERVER --nid $SERVERNID --nettype $NET || exit 2 if (($LOV)); then - $LMC --add mds --node $SERVER --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10 + $LMC --add mds --node $SERVER --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 10 $LMC --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 11 $LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 12 $LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 13 diff --git a/lustre/tests/fsx.c b/lustre/tests/fsx.c index a2b1d5e..92a2342 100644 --- a/lustre/tests/fsx.c +++ b/lustre/tests/fsx.c @@ -294,9 +294,10 @@ save_buffer(char *buffer, off_t bufferlength, int fd) if (size_by_seek == (off_t)-1) prterr("save_buffer: lseek eof"); else if (bufferlength > size_by_seek) { - warn("save_buffer: .fsxgood file too short... will -save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek, - (unsigned long long)bufferlength); + warn("save_buffer: .fsxgood file too short... will" + "save 0x%llx bytes instead of 0x%llx\n", + (unsigned long long)size_by_seek, + (unsigned long long)bufferlength); bufferlength = size_by_seek; } } @@ -310,8 +311,8 @@ save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek, if (byteswritten == -1) prterr("save_buffer write"); else - warn("save_buffer: short write, 0x%x bytes instead -of 0x%llx\n", + warn("save_buffer: short write, 0x%x bytes instead" + "of 0x%llx\n", (unsigned)byteswritten, (unsigned long long)bufferlength); } @@ -372,11 +373,11 @@ check_buffers(unsigned offset, unsigned size) if (n) { prt("\t0x%5x\n", n); if (bad) - prt("operation# (mod 256) for the bad data -may be %u\n", ((unsigned)op & 0xff)); + prt("operation# (mod 256) for the bad data" + "may be %u\n", ((unsigned)op & 0xff)); else - prt("operation# (mod 256) for the bad data -unknown, check HOLE and EXTEND ops\n"); + prt("operation# (mod 256) for the bad data" + "unknown, check HOLE and EXTEND ops\n"); } else prt("????????????????\n"); report_failure(110); @@ -927,33 +928,33 @@ void usage(void) { fprintf(stdout, "usage: %s", - "fsx [-dnqLOW] [-b opnum] [-c Prob] [-l flen] [-m -start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t -truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] -fname\n\ - -b opnum: beginning operation number (default 1)\n\ - -c P: 1 in P chance of file close+open at each op (default infinity)\n\ - -d: debug output for all operations [-d -d = more debugging]\n\ - -l flen: the upper bound on file size (default 262144)\n\ - -m startop:endop: monitor (print debug output) specified byte range -(default 0:infinity)\n\ - -n: no verifications of file size\n\ - -o oplen: the upper bound on operation size (default 65536)\n\ - -p progressinterval: debug output at specified operation interval\n\ - -q: quieter operation\n\ - -r readbdy: 4096 would make reads page aligned (default 1)\n\ - -s style: 1 gives smaller truncates (default 0)\n\ - -t truncbdy: 4096 would make truncates page aligned (default 1)\n\ - -w writebdy: 4096 would make writes page aligned (default 1)\n\ - -D startingop: debug output starting at specified operation\n\ - -L: fsxLite - no file creations & no file size changes\n\ - -N numops: total # operations to do (default infinity)\n\ - -O: use oplen (see -o flag) for every op (default random)\n\ - -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n\ - -S seed: for random # generator (default 1) 0 gets timestamp\n\ - -W: mapped write operations DISabled\n\ - -R: read() system calls only (mapped reads disabled)\n\ - fname: this filename is REQUIRED (no default)\n"); + "fsx [-dnqLOW] [-b opnum] [-c Prob] [-l flen] [-m " +"start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t " +"truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] " +"fname\n" +" -b opnum: beginning operation number (default 1)\n" +" -c P: 1 in P chance of file close+open at each op (default infinity)\n" +" -d: debug output for all operations [-d -d = more debugging]\n" +" -l flen: the upper bound on file size (default 262144)\n" +" -m startop:endop: monitor (print debug output) specified byte rang" +"(default 0:infinity)\n" +" -n: no verifications of file size\n" +" -o oplen: the upper bound on operation size (default 65536)\n" +" -p progressinterval: debug output at specified operation interval\n" +" -q: quieter operation\n" +" -r readbdy: 4096 would make reads page aligned (default 1)\n" +" -s style: 1 gives smaller truncates (default 0)\n" +" -t truncbdy: 4096 would make truncates page aligned (default 1)\n" +" -w writebdy: 4096 would make writes page aligned (default 1)\n" +" -D startingop: debug output starting at specified operation\n" +" -L: fsxLite - no file creations & no file size changes\n" +" -N numops: total # operations to do (default infinity)\n" +" -O: use oplen (see -o flag) for every op (default random)\n" +" -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n" +" -S seed: for random # generator (default 1) 0 gets timestamp\n" +" -W: mapped write operations DISabled\n" +" -R: read() system calls only (mapped reads disabled)\n" +" fname: this filename is REQUIRED (no default)\n"); exit(90); } @@ -1020,8 +1021,8 @@ main(int argc, char **argv) case 'b': simulatedopcount = getnum(optarg, &endp); if (!quiet) - fprintf(stdout, "Will begin at operation -%ld\n", + fprintf(stdout, "Will begin at operation" + "%ld\n", simulatedopcount); if (simulatedopcount == 0) usage(); @@ -1206,8 +1207,8 @@ main(int argc, char **argv) prterr(fname); warn("main: error on write"); } else - warn("main: short write, 0x%x bytes instead -of 0x%x\n", + warn("main: short write, 0x%x bytes instead" + "of 0x%x\n", (unsigned)written, maxfilelen); exit(98); } diff --git a/lustre/tests/leak_finder.pl b/lustre/tests/leak_finder.pl index b8d234b..745f113 100644 --- a/lustre/tests/leak_finder.pl +++ b/lustre/tests/leak_finder.pl @@ -8,17 +8,21 @@ STDERR->autoflush(1); my ($line, $memory); my $debug_line = 0; +my $total = 0; +my $max = 0; + while ($line = <>) { $debug_line++; my ($file, $func, $lno, $name, $size, $addr, $type); - if ($line =~ m/^.*\((.*):(\d+):(.*)\(\) (\d+ \| )?\d+\+\d+\): [vk](.*) '(.*)': (\d+) at (.*) \(tot .*$/) { + if ($line =~ m/^.*\((.*):(\d+):(.*)\(\) (\d+ \| )?\d+\+\d+\): (k|v|slab-)(.*) '(.*)': (\d+) at (.*) \(tot (.*)\).*$/) { $file = $1; $lno = $2; $func = $3; - $type = $5; - $name = $6; - $size = $7; - $addr = $8; + $type = $6; + $name = $7; + $size = $8; + $addr = $9; + $tot = $10; # we can't dump the log after portals has exited, so skip "leaks" # from memory freed in the portals module unloading. @@ -31,13 +35,24 @@ while ($line = <>) { next; } - if ($type eq 'malloced') { + if (index($type, 'alloced') >= 0) { + if (defined($memory->{$addr})) { + print STDERR "*** Two allocs with the same address ($size bytes at $addr, $file:$func:$lno)\n"; + print STDERR " first malloc at $memory->{$addr}->{file}:$memory->{$addr}->{func}:$memory->{$addr}->{lno}, second at $file:$func:$lno\n"; + next; + } + $memory->{$addr}->{name} = $name; $memory->{$addr}->{size} = $size; $memory->{$addr}->{file} = $file; $memory->{$addr}->{func} = $func; $memory->{$addr}->{lno} = $lno; $memory->{$addr}->{debug_line} = $debug_line; + + $total += $size; + if ($total > $max) { + $max = $total; + } } else { if (!defined($memory->{$addr})) { print STDERR "*** Free without malloc ($size bytes at $addr, $file:$func:$lno)\n"; @@ -52,6 +67,11 @@ while ($line = <>) { } delete $memory->{$addr}; + $total -= $size; + } + if ($total != int($tot)) { + print "kernel total $tot != my total $total\n"; + $total = $tot; } } @@ -66,4 +86,4 @@ foreach $key (@sorted) { print STDERR "*** Leak: $memory->{$key}->{size} bytes allocated at $key ($memory->{$key}->{file}:$memory->{$key}->{func}:$memory->{$key}->{lno}, debug file line $memory->{$key}->{debug_line})\n"; } -print "Done.\n"; +print "maximum used: $max, amount leaked: $total\n"; diff --git a/lustre/tests/lkcdmap b/lustre/tests/lkcdmap index 20c8c20..dbfd7f0 100755 --- a/lustre/tests/lkcdmap +++ b/lustre/tests/lkcdmap @@ -4,10 +4,10 @@ LCMD=$TMP/lkcd-cmds-`hostname` echo "Storing LKCD module info in $LCMD" cat /tmp/ogdb-`hostname` | while read JUNK M JUNK; do MOD="../$M" - MAP=`echo $MOD | sed -e 's/\.o$/.map/'` - MODNAME=`basename $MOD | sed -e 's/\.o$//'` + MODNAME="`basename $MOD .o`" + MAP="$TMP/$MODNAME.map" nm $MOD > $MAP echo namelist -a $PWD/$MOD | tee -a $LCMD - echo symtab -a $PWD/$MAP $MODNAME | tee -a $LCMD + echo symtab -a $MAP $MODNAME | tee -a $LCMD done diff --git a/lustre/tests/llecho.sh b/lustre/tests/llecho.sh index 5afade1..3e3e03b 100644 --- a/lustre/tests/llecho.sh +++ b/lustre/tests/llecho.sh @@ -1,6 +1,8 @@ #!/bin/sh -LCONF=${LCONF:-../utils/lconf} +PATH=`dirname $0`/../utils:$PATH + +LCONF=${LCONF:-lconf} NAME=${NAME:-echo} config=$NAME.xml @@ -17,5 +19,5 @@ $LCONF $lustre_opt --reformat --gdb $OPTS $config || exit 4 cat < #include #include +#include +#include int main(int argc, char **argv) { @@ -34,7 +36,7 @@ int main(int argc, char **argv) fprintf(stderr, "creating special file %s\n", dname1); rc = mknod(dname1, 0777|S_IFIFO, 0); if (rc == -1) { - fprintf(stderr, "creating %s fails: %s\n", + fprintf(stderr, "creating %s fails: %s\n", dname1, strerror(errno)); exit(1); } @@ -47,7 +49,7 @@ int main(int argc, char **argv) dname1, strerror(errno)); exit(1); } - + // doesn't matter if the two dirs are the same?? fddev2 = open(dname2, O_RDONLY | O_NONBLOCK); if (fddev2 == -1) { @@ -55,40 +57,38 @@ int main(int argc, char **argv) dname2, strerror(errno)); exit(1); } - + // delete the special file fprintf (stderr, "unlinking %s\n", dname1); rc = unlink(dname1); if (rc) { - fprintf(stderr, "unlink %s error: %s\n", + fprintf(stderr, "unlink %s error: %s\n", dname1, strerror(errno)); exit(1); } - if (access(dname2, F_OK) == 0){ + if (access(dname2, F_OK) == 0) { fprintf(stderr, "%s still exists\n", dname2); exit(1); } - if (access(dname1, F_OK) == 0){ + if (access(dname1, F_OK) == 0) { fprintf(stderr, "%s still exists\n", dname1); exit(1); } // fchmod one special file rc = fchmod (fddev1, 0777); - if(rc == -1) - { - fprintf(stderr, "fchmod unlinked special file %s fails: %s\n", + if (rc == -1) { + fprintf(stderr, "fchmod unlinked special file %s fails: %s\n", dname1, strerror(errno)); exit(1); } - + // fstat two files to check if they are the same rc = fstat(fddev1, &st1); - if(rc == -1) - { - fprintf(stderr, "fstat unlinked special file %s fails: %s\n", + if (rc == -1) { + fprintf(stderr, "fstat unlinked special file %s fails: %s\n", dname1, strerror(errno)); exit(1); } @@ -103,7 +103,7 @@ int main(int argc, char **argv) if (st1.st_mode != st2.st_mode) { // can we do this? fprintf(stderr, "fstat different value on %s and %s\n", dname1, dname2); exit(1); - } + } fprintf(stderr, "Ok, everything goes well.\n"); return 0; diff --git a/lustre/tests/openfile.c b/lustre/tests/openfile.c index 7d8cc6b..7b97309 100644 --- a/lustre/tests/openfile.c +++ b/lustre/tests/openfile.c @@ -18,8 +18,8 @@ #include typedef struct flag_mapping { - char string[20]; - int flag; + const char *string; + const int flag; } FLAG_MAPPING; FLAG_MAPPING flag_table[] = { @@ -67,13 +67,13 @@ int main(int argc, char** argv) case 'f': { char *tmp; - cloned_flags = (char *)malloc(strlen(optarg)); + cloned_flags = (char *)malloc(strlen(optarg)+1); if (cloned_flags == NULL) { fprintf(stderr, "Insufficient memory.\n"); exit(-1); } - strncpy(cloned_flags, optarg, strlen(optarg)); + strncpy(cloned_flags, optarg, strlen(optarg)+1); for (tmp = strtok(optarg, ":|"); tmp; tmp = strtok(NULL, ":|")) { int i = 0; diff --git a/lustre/tests/openunlink.c b/lustre/tests/openunlink.c index e7671c8..96632a9 100644 --- a/lustre/tests/openunlink.c +++ b/lustre/tests/openunlink.c @@ -3,16 +3,18 @@ #include #include #include +#include #include #include -#define T1 "write before unlink\n" -#define T2 "write after unlink\n" +#define T1 "write data before unlink\n" +#define T2 "write data after unlink\n" char buf[128]; int main(int argc, char **argv) { - char *fname, *fname2; + char *fname, *fname2; + struct stat st; int fd, rc; if (argc < 2 || argc > 3) { @@ -20,11 +22,11 @@ int main(int argc, char **argv) exit(1); } - fname = argv[1]; - if (argc == 3) - fname2 = argv[2]; - else - fname2 = argv[1]; + fname = argv[1]; + if (argc == 3) + fname2 = argv[2]; + else + fname2 = argv[1]; fprintf(stderr, "opening\n"); fd = open(fname, O_RDWR | O_TRUNC | O_CREAT, 0644); @@ -36,50 +38,67 @@ int main(int argc, char **argv) fprintf(stderr, "writing\n"); rc = write(fd, T1, strlen(T1) + 1); if (rc != strlen(T1) + 1) { - fprintf(stderr, "write (normal) %s\n", strerror(errno)); + fprintf(stderr, "write (normal) %s (rc %d)\n", + strerror(errno), rc); + exit(1); + } + + if (argc == 3) { + fprintf(stderr, "closing %s\n", fname); + rc = close(fd); + if (rc) { + fprintf(stderr, "close (normal) %s\n", strerror(errno)); + exit(1); + } + + fprintf(stderr, "opening %s\n", fname2); + fd = open(fname2, O_RDWR); + if (fd == -1) { + fprintf(stderr, "open (unlink) %s\n", strerror(errno)); + exit(1); + } + + fprintf (stderr, "unlinking %s\n", fname2); + rc = unlink(fname2); + if (rc) { + fprintf(stderr, "unlink %s\n", strerror(errno)); + exit(1); + } + + if (access(fname2, F_OK) == 0) { + fprintf(stderr, "%s still exists\n", fname2); + exit(1); + } + } else { + fprintf(stderr, "resetting fd offset\n"); + rc = lseek(fd, 0, SEEK_SET); + if (rc) { + fprintf(stderr, "seek %s\n", strerror(errno)); + exit(1); + } + + printf("unlink %s and press enter\n", fname); + getc(stdin); + } + + if (access(fname, F_OK) == 0) { + fprintf(stderr, "%s still exists\n", fname); exit(1); } - if (argc == 3) { - fprintf(stderr, "closing %s\n", fname); - rc = close(fd); - if (rc) { - fprintf(stderr, "close (normal) %s\n", strerror(errno)); - exit(1); - } - - fprintf(stderr, "opening %s\n", fname2); - fd = open(fname2, O_RDWR); - if (fd == -1) { - fprintf(stderr, "open (unlink) %s\n", strerror(errno)); - exit(1); - } - - fprintf (stderr, "unlinking %s\n", fname2); - rc = unlink(fname2); - if (rc) { - fprintf(stderr, "unlink %s\n", strerror(errno)); - exit(1); - } - - if (access(fname2, F_OK) == 0) { - fprintf(stderr, "%s still exists\n", fname2); - exit(1); - } - } else { - printf("unlink %s and press enter\n", fname); - getc(stdin); - } - - if (access(fname, F_OK) == 0) { - fprintf(stderr, "%s still exists\n", fname); - exit(1); - } + fprintf(stderr, "fstating\n"); + rc = fstat(fd, &st); + if (rc) { + fprintf(stderr, "fstat (unlink) %s\n", strerror(errno)); + exit(1); + } + if (st.st_nlink != 0) + fprintf(stderr, "st_nlink = %d\n", (int)st.st_nlink); fprintf(stderr, "reading\n"); rc = read(fd, buf, strlen(T1) + 1); if (rc != strlen(T1) + 1) { - fprintf(stderr, "read (unlink) %s rc %d\n", + fprintf(stderr, "read (unlink) %s (rc %d)\n", strerror(errno), rc); exit(1); } @@ -92,7 +111,7 @@ int main(int argc, char **argv) fprintf(stderr, "truncating\n"); rc = ftruncate(fd, 0); - if (rc ) { + if (rc) { fprintf(stderr, "truncate (unlink) %s\n", strerror(errno)); exit(1); } @@ -124,8 +143,8 @@ int main(int argc, char **argv) fprintf(stderr, "reading again\n"); rc = read(fd, buf, strlen(T2) + 1); if (rc != strlen(T2) + 1) { - fprintf(stderr, "read (after unlink rewrite) %s\n", - strerror(errno)); + fprintf(stderr, "read (after unlink rewrite) %s (rc %d)\n", + strerror(errno), rc); exit(1); } @@ -135,7 +154,7 @@ int main(int argc, char **argv) exit(1); } - fprintf(stderr, "closing again\n"); + fprintf(stderr, "closing\n"); rc = close(fd); if (rc) { fprintf(stderr, "close (unlink) %s\n", strerror(errno)); diff --git a/lustre/tests/recovery-cleanup.sh b/lustre/tests/recovery-cleanup.sh index c8f85ee..fefd2d6 100755 --- a/lustre/tests/recovery-cleanup.sh +++ b/lustre/tests/recovery-cleanup.sh @@ -22,9 +22,10 @@ CLIENT=${CLIENT:-mdev8} NETWORKTYPE=${NETWORKTYPE:-tcp} MOUNTPT=${MOUNTPT:-/mnt/lustre} CONFIG=${CONFIG:-recovery-cleanup.xml} -MDSDEV=${MDSDEV:-/tmp/mds} -OSTDEV=${OSTDEV:-/tmp/ost} +MDSDEV=${MDSDEV:-/tmp/mds-`hostname`} MDSSIZE=${MDSSIZE:-100000} +FSTYPE=${FSTYPE:-ext3} +OSTDEV=${OSTDEV:-/tmp/ost-`hostname`} OSTSIZE=${OSTSIZE:-100000} do_mds() { @@ -51,10 +52,10 @@ make_config() { lmc -m $CONFIG --add net --node $NODE --nid `h2$NETWORKTYPE $NODE` \ --nettype $NETWORKTYPE || exit 4 done - lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --dev $MDSDEV \ - --size $MDSSIZE || exit 5 - lmc -m $CONFIG --add ost --node $OSTNODE --ost ost1 --dev $OSTDEV \ - --size $OSTSIZE || exit 6 + lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --fstype $FSTYPE \ + --dev $MDSDEV --size $MDSSIZE || exit 5 + lmc -m $CONFIG --add ost --node $OSTNODE --ost ost1 --fstype $FSTYPE \ + --dev $OSTDEV --size $OSTSIZE || exit 6 lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --mds mds1 \ --ost ost1 || exit 7 } diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index ebf0a0c..bc6a9c1 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -25,9 +25,9 @@ CLIENT=${CLIENT:-mdev8} NETWORKTYPE=${NETWORKTYPE:-tcp} MOUNTPT=${MOUNTPT:-/mnt/lustre} CONFIG=${CONFIG:-recovery-small.xml} -MDSDEV=${MDSDEV:-/tmp/mds} -OSTDEV=${OSTDEV:-/tmp/ost} +MDSDEV=${MDSDEV:-/tmp/mds-`hostname`} MDSSIZE=${MDSSIZE:-100000} +OSTDEV=${OSTDEV:-/tmp/ost-`hostname`} OSTSIZE=${OSTSIZE:-100000} UPCALL=${UPCALL:-$RPWD/recovery-small-upcall.sh} FSTYPE=${FSTYPE:-ext3} diff --git a/lustre/tests/rename.pl b/lustre/tests/rename.pl index 3ba9368..4ea020f 100644 --- a/lustre/tests/rename.pl +++ b/lustre/tests/rename.pl @@ -1,78 +1,202 @@ -#!/usr/bin/perl +#!/usr/bin/perl -w use strict; +$|++; + +$ENV{PATH}="/bin:/usr/bin"; +$ENV{ENV}=""; +$ENV{BASH_ENV}=""; + use diagnostics; use Getopt::Long; +use POSIX ":sys_wait_h"; -sub usage () { - print "Usage: $0 \n"; - print "example: $0 --count=2 /mnt/lustre 50\n"; - print " will test in /mnt/lustre1 and /mnt/lustre2\n"; - print " $0 --count=0 /mnt/lustre 50\n"; - print " will test in /mnt/lustre only\n"; - exit; -} -my ($j, $k, $d, $f1, $f2, $path, $silent); -my $count = 0; -my $create = 10; +use vars qw( + $MAX_THREADS + ); + +# Don't try to run more than this many threads concurrently. +$MAX_THREADS = 16; + +# Initialize variables +my $silent = 0; +my $create_files = 1; # should we create files or not? +my $use_mcreate = 1; # should we use mcreate or open? +my $num_dirs = 3; # number of directories to create +my $num_files = 6; # number of files to create +my $iterations = 1; +my $num_threads = 1; +my $mountpt; +my $num_mounts = -1; GetOptions("silent!"=> \$silent, - "count=i" => \$count, - "create=i" => \$create); + "use_mcreate=i" => \$use_mcreate, + "create_files=i" => \$create_files, + "use_mcreate=i" => \$use_mcreate, + "num_files=i" => \$num_files, + "num_dirs=i" => \$num_dirs, + "mountpt=s" => \$mountpt, + "num_mounts=i" => \$num_mounts, + "iterations=i" => \$iterations, + "num_threads=i" => \$num_threads, + ) || die &usage; -my $mtpt = shift || usage(); -my $i = shift || usage(); -my $total = $i; -my $files = 6; -my $dirs = 3; -my $mcreate = 0; # should we use mcreate or open? +# Check for mandatory args. +if (!$mountpt || + !$num_mounts) { + die &usage; +} -my $which = ""; -if ($count > 0) { - $which = int(rand() * $count) + 1; +if ($num_threads > $MAX_THREADS) { + print "\nMAX_THREADS is currently set to $MAX_THREADS.\n\n"; + print "You will have to change this in the source\n"; + print "if you really want to run with $num_threads threads.\n\n"; + exit 1; } -$k = $dirs; -if ($create == 0) { - $k = 0; +# Initialize rand() function. +srand (time ^ $$ ^ unpack "%L*", `ps axww | gzip`); + +######################################################################### +### MAIN + +my $which = ""; +if ($num_mounts > 0) { + $which = int(rand() * $num_mounts) + 1; } -while ($k--) { - $path = "$mtpt$which/$k"; - my $rc = mkdir $path, 0755; - print "mkdir $path failed: $!\n" if !$rc; - $j = $files; - while ($j--) { - `./mcreate $path/$j`; + +# Create files and directories (if necessary) +if ($create_files) { + for (my $i=1; $i<=$num_threads;$i++) { + for (my $j=0; $j<$num_dirs;$j++) { + my $path = "${mountpt}${which}/${i}.${j}"; + mkdir $path, 0755 || die "Can't mkdir $path: $!\n"; + for (my $k=0; $k<$num_files; $k++) { + my $filepath = "${path}/${k}"; + &create_file($filepath); + if (! -e $filepath) { + die "Error creating $filepath\n"; + } + } + } } } -while ($i--) { - my $which = ""; - if ($count > 0) { - $which = int(rand() * $count) + 1; - } - $d = int(rand() * $dirs); - $f1 = int(rand() * $files); - $f2 = int(rand() * $files); - print "[$$] $mtpt$which/$d/$f1 $mtpt$which/$d/$f2 ...\n" if !$silent; - my $rc = rename "$mtpt$which/$d/$f1", "$mtpt$which/$d/$f2"; - print "[$$] done: $rc\n" if !$silent; - if (($total - $i) % 100 == 0) { - print STDERR "[" . $$ . "]" . ($total - $i) . " operations\n"; +for (my $i=1; $i<=$num_threads; $i++) { + my $status = &fork_and_rename($i); + last if ($status != 0); +} + +# Wait for all our threads to finish. +# Wait for all our threads to finish. +my $child = 0; +do { + $child = waitpid(-1, WNOHANG); +} until $child > 0; +sleep 1; + +# Unlink files and directories (if necessary) +if ($create_files) { + for (my $i=1; $i<=$num_threads;$i++) { + for (my $j=0; $j<$num_dirs;$j++) { + my $path = "${mountpt}${which}/${i}.${j}"; + for (my $k=0; $k<=$num_files; $k++) { + my $filepath = "${path}/${k}"; + unlink("$filepath") if (-e $filepath); + } + my $rc = rmdir $path; + print "rmdir $path failed: $!\n" if !$rc; + } } } -$k = $dirs; -if ($create == 0) { - $k = 0; +exit 0; + +######################################################################### +### SUBROUTINES + +sub usage () { + print "\nUsage: $0 [--silent] [--create_files=n] [--use_mcreate=n] [--num_dirs=n] [--num_files=n] [--iterations=n] [--num_threads=n] --num_mounts=n --mountpt=/path/to/lustre/mount\n\n"; + print "\t--silent\tminimal output\n"; + print "\t--create_files=n\create files at start, default=1 (yes)\n"; + print "\t--use_mcreate=n\tuse mcreate to create files, default=1 (yes)\n"; + print "\t--num_dirs=n\tnumber of directories to create per iteration, default=3\n"; + print "\t--num_files=n\tnumber of files to create per directory, default=6\n"; + print "\t--iterations=n\tnumber of iterations to perform, default=1\n"; + print "\t--num_threads=n\tnumber of thread to run, default=1\n"; + print "\t--mountpt\tlocation of lustre mount\n"; + print "\t--num_mounts=n\tnumber of lustre mounts to test across, default=-1 (single mount point without numeric suffix)\n\n"; + print "example: $0 --mountpt=/mnt/lustre --num_mounts=2 --iterations=50\n"; + print " will perform 50 interations in /mnt/lustre1 and /mnt/lustre2\n"; + print " $0 --mountpt=/mnt/lustre --num_mounts=-1 --iterations=50\n"; + print " will perform 50 iterations in /mnt/lustre only\n\n"; + exit; } -while ($k--) { - $path = "$mtpt$which/$k"; - $j = $files; - while ($j--) { - unlink "$path/$j"; + + +######################################################################### +sub create_file ($) { + my ($path) = @_;; + + if ($use_mcreate) { + my $tmp = `./mcreate $path`; + if ($tmp =~ /.*error: (.*)\n/) { + die "Error mcreating $path: $!\n"; + } + } else { + open(FH, ">$path") || die "Error opening $path: $!\n"; + close(FH) || die; } - my $rc = rmdir $path; - print "rmdir $path failed: $!\n" if !$rc; + return 0; } -print "Done.\n"; +######################################################################### +sub fork_and_rename ($) { + my ($thread_num) = @_; + + FORK: { + if (my $pid = fork) { + # parent here + # child process pid is available in $pid + return 0; + } elsif (defined $pid) { # $pid is zero here if defined + + my $current_iteration=1; + while ($current_iteration <= $iterations) { + for (my $i=0; $i<$num_files; $i++) { + my $which = ""; + if ($num_mounts > 0) { + $which = int(rand() * $num_mounts) + 1; + } + + my $d = int(rand() * $num_dirs); + my $f1 = int(rand() * $num_files); + my $f2 = int(rand() * $num_files); + my $path_f1 = "${mountpt}${which}/${thread_num}.${d}/${f1}"; + my $path_f2 = "${mountpt}${which}/${thread_num}.${d}/${f2}"; + + print "Thread $thread_num: [$$] $path_f1 $path_f2 ...\n" if !$silent; + my $rc = rename $path_f1, $path_f2; + print "Thread $thread_num: [$$] done: $rc\n" if !$silent; + } + if (($current_iteration) % 100 == 0) { + print STDERR "Thread $thread_num: " . $current_iteration . " operations [" . $$ . "]\n"; + + } + $current_iteration++; + } + + print "Thread $thread_num: Done.\n"; + + exit 0; + + } elsif ($! =~ /No more process/) { + # EAGAIN, supposedly recoverable fork error + sleep 5; + redo FORK; + } else { + # weird fork error + die "Can't fork: $!\n"; + } + } + +} diff --git a/lustre/tests/runas.c b/lustre/tests/runas.c index 20981e8..1e859aa 100644 --- a/lustre/tests/runas.c +++ b/lustre/tests/runas.c @@ -7,40 +7,39 @@ #include #include #include +#include #include #define DEBUG 0 -void Usage_and_abort(void) +static const char usage[] = +"Usage: %s -u user_id [-g grp_id ] [ -G ] command\n" +" -u user_id switch to UID user_id\n" +" -g grp_id switch to GID grp_id\n" +" -G clear supplementary groups\n"; + +void Usage_and_abort(const char *name) { - fprintf(stderr, "Usage: runas -u user_id [ -g grp_id ]" - " command_to_be_run \n"); - exit(-1); + fprintf(stderr, usage, name); + exit(-1); } -// Usage: runas -u user_id [ -g grp_id ] [--] command_to_be_run -// return: the return value of "command_to_be_run" -// NOTE: returning -1 might be the return code of this program itself or -// the "command_to_be_run" - -// ROOT runs "runas" for free -// Other users run "runas" requires chmod 6755 "command_to_be_run" - int main(int argc, char **argv) { - char **my_argv; + char **my_argv, *name = argv[0]; int status; int c,i; int gid_is_set = 0; int uid_is_set = 0; + int clear_supp_groups = 0; uid_t user_id; gid_t grp_id; if (argc == 1) - Usage_and_abort(); + Usage_and_abort(name); // get UID and GID - while ((c = getopt (argc, argv, "+u:g:h")) != -1) { + while ((c = getopt (argc, argv, "+u:g:hG")) != -1) { switch (c) { case 'u': user_id = (uid_t)atoi(optarg); @@ -54,23 +53,23 @@ int main(int argc, char **argv) gid_is_set = 1; break; - case 'h': - Usage_and_abort(); + case 'G': + clear_supp_groups = 1; break; default: - //fprintf(stderr, "Bad parameters.\n"); - //Usage_and_abort (); + case 'h': + Usage_and_abort(name); break; } } if (!uid_is_set) - Usage_and_abort(); + Usage_and_abort(name); if (optind == argc) { - fprintf(stderr, "Bad parameters.\n"); - Usage_and_abort(); + fputs("Must specify command to run.\n", stderr); + Usage_and_abort(name); } // assemble the command @@ -99,6 +98,14 @@ int main(int argc, char **argv) exit(-1); } + if (clear_supp_groups) { + status = setgroups(0, NULL); + if (status == -1) { + perror("clearing supplementary groups"); + exit(-1); + } + } + // set UID status = setreuid(user_id, user_id ); if(status == -1) { @@ -107,8 +114,8 @@ int main(int argc, char **argv) exit(-1); } - - fprintf(stderr, "running as USER(%d), Grp (%d): ", user_id, grp_id ); + fprintf(stderr, "running as UID %d, GID %d%s:", user_id, grp_id, + clear_supp_groups ? ", cleared groups" : ""); for (i = 0; i < argc - optind; i++) fprintf(stderr, " [%s]", my_argv[i]); diff --git a/lustre/tests/rundbench b/lustre/tests/rundbench index cb417d2..821ac46 100755 --- a/lustre/tests/rundbench +++ b/lustre/tests/rundbench @@ -1,6 +1,6 @@ #!/bin/sh - -DIR=${DIR:-/mnt/lustre/`hostname`} +MNT=${MNT:-/mnt/lustre} +DIR=${DIR:-$MNT/`hostname`} #[ -e /proc/sys/portals/debug ] && echo 0 > /proc/sys/portals/debug mkdir -p $DIR TGT=$DIR/client.txt diff --git a/lustre/tests/runobdstat b/lustre/tests/runobdstat index 886ce8f2..ad60d6d 100644 --- a/lustre/tests/runobdstat +++ b/lustre/tests/runobdstat @@ -1,7 +1,7 @@ #!/bin/sh PATH=`dirname $0`/../utils:$PATH -obdstat filter 1 | while read LINE; do +llobdstat.pl $1 1 | while read LINE; do echo "`date +s`: $LINE" - [ "$1" ] && echo "`date +s`: $LINE" >> $1 + [ "$2" ] && echo "`date +s`: $LINE" >> $2 done diff --git a/lustre/tests/runregression-brw.sh b/lustre/tests/runregression-brw.sh index 4d86248..395ceb5 100644 --- a/lustre/tests/runregression-brw.sh +++ b/lustre/tests/runregression-brw.sh @@ -1,6 +1,6 @@ #!/bin/sh SRCDIR="`dirname $0`/" -export PATH=/sbin:/usr/sbin:$SRCDIR:$PATH +export PATH=/sbin:/usr/sbin:$SRCDIR/../utils:$PATH LOOPS=${LOOPS:-1} COUNT=${COUNT:-1000000} diff --git a/lustre/tests/runtests b/lustre/tests/runtests index e59f5f4..6a8aac8 100755 --- a/lustre/tests/runtests +++ b/lustre/tests/runtests @@ -35,41 +35,42 @@ while [ "$1" ]; do shift done -OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`" -if [ -z "$OSCMT" ]; then +MOUNT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`" +if [ -z "$MOUNT" ]; then sh llmount.sh - OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`" - [ -z "$OSCMT" ] && fail "no lustre filesystem mounted" 1 + MOUNT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`" + [ -z "$MOUNT" ] && fail "no lustre filesystem mounted" 1 I_MOUNTED="yes" fi -OSCTMP=`echo $OSCMT | tr "/" "."` +OSCTMP=`echo $MOUNT | tr "/" "."` USED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1` USED=`expr $USED + 16` # Some space for the status file # let's start slowly here... -log "touching $OSCMT" -touch $OSCMT || fail "can't touch $OSCMT" 2 -HOSTS=$OSCMT/hosts.$$ - -# this will cause the following cp to trigger bug #620096 -log "create an empty file $HOSTS" -mcreate $HOSTS - -log "copying /etc/hosts to $HOSTS" -cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS" 3 -log "comparing /etc/hosts and $HOSTS" -diff -u /etc/hosts $HOSTS || fail "$HOSTS different" 4 -log "renaming $HOSTS to $HOSTS.ren" -mv $HOSTS $HOSTS.ren || fail "can't rename $HOSTS to $HOSTS.ren" 5 -log "copying /etc/hosts to $HOSTS again" -cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS again" 6 -log "truncating $HOSTS" -> $HOSTS || fail "can't truncate $HOSTS" 8 -log "removing $HOSTS" -rm $HOSTS || fail "can't remove $HOSTS" 9 - -DST=$OSCMT/runtest.$$ +log "touching $MOUNT" +touch $MOUNT || fail "can't touch $MOUNT" 2 +HOSTS=$MOUNT/hosts.$$ + +if [ $COUNT -gt 10 -o $COUNT -eq 0 ]; then + # this will cause the following cp to trigger bug #620096 + log "create an empty file $HOSTS" + mcreate $HOSTS + log "copying /etc/hosts to $HOSTS" + cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS" 3 + log "comparing /etc/hosts and $HOSTS" + diff -u /etc/hosts $HOSTS || fail "$HOSTS different" 4 + log "renaming $HOSTS to $HOSTS.ren" + mv $HOSTS $HOSTS.ren || fail "can't rename $HOSTS to $HOSTS.ren" 5 + log "copying /etc/hosts to $HOSTS again" + cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS again" 6 + log "truncating $HOSTS" + > $HOSTS || fail "can't truncate $HOSTS" 8 + log "removing $HOSTS" + rm $HOSTS || fail "can't remove $HOSTS" 9 +fi + +DST=$MOUNT/runtest.$$ # let's start slowly here... log "creating $DST" mkdir $DST || fail "can't mkdir $DST" 10 @@ -102,27 +103,29 @@ done sh llmountcleanup.sh || exit 19 sh llrmount.sh || exit 20 -log "renaming $HOSTS.ren to $HOSTS" -mv $HOSTS.ren $HOSTS || fail "can't rename $HOSTS.ren to $HOSTS" 32 -log "truncating $HOSTS" -> $HOSTS || fail "can't truncate $HOSTS" 34 -log "removing $HOSTS" -rm $HOSTS || fail "can't remove $HOSTS again" 36 log "removing $DST" rm -r $V $DST || fail "can't remove $DST" 37 +if [ $COUNT -gt 10 -o $COUNT -eq 0 ]; then + log "renaming $HOSTS.ren to $HOSTS" + mv $HOSTS.ren $HOSTS || fail "can't rename $HOSTS.ren to $HOSTS" 32 + log "truncating $HOSTS" + > $HOSTS || fail "can't truncate $HOSTS" 34 + log "removing $HOSTS" + rm $HOSTS || fail "can't remove $HOSTS again" 36 +fi + # mkdirmany test (bug 589) -log "running mkdirmany $OSCMT/base$$ 100" -$MKDIRMANY $OSCMT/base$$ 100 || fail "mkdirmany failed" +log "running mkdirmany $MOUNT/base$$ 100" +$MKDIRMANY $MOUNT/base$$ 100 || fail "mkdirmany failed" log "removing mkdirmany directories" -rmdir $OSCMT/base$$* || fail "mkdirmany cleanup failed" +rmdir $MOUNT/base$$* || fail "mkdirmany cleanup failed" log "done" NOWUSED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1` -if [ $NOWUSED -gt $USED ]; then +if [ `expr $NOWUSED - $USED` -gt 1024 ]; then echo "Space not all freed: now ${NOWUSED}kB, was ${USED}kB." 1>&2 - echo "This is normal on BA OSTs, because of subdirectories." 1>&2 fi if [ "$I_MOUNTED" = "yes" ]; then diff --git a/lustre/tests/runvmstat b/lustre/tests/runvmstat index b04d84c..f414ccc 100755 --- a/lustre/tests/runvmstat +++ b/lustre/tests/runvmstat @@ -1,6 +1,6 @@ #!/bin/sh vmstat 1 | while read LINE ; do LINE="`date +%s`: $LINE" - echo $LINE - [ "$1" ] && echo $LINE >> $1 + echo "$LINE" + [ "$1" ] && echo "$LINE" >> $1 done diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 46d0072..09eb8e9 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -7,17 +7,19 @@ set -e ONLY=${ONLY:-"$*"} -ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"34 35"} # bugs 1365 and 1360 respectively +ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"35 32q 37 39"} # bugs 1360, 1504 SRCDIR=`dirname $0` PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH -CHECKSTAT=${CHECKSTAT:-"./checkstat -v"} +CHECKSTAT=${CHECKSTAT:-"checkstat -v"} CREATETEST=${CREATETEST:-createtest} LFIND=${LFIND:-lfind} LSTRIPE=${LSTRIPE:-lstripe} LCTL=${LCTL:-lctl} MCREATE=${MCREATE:-mcreate} +OPENFILE=${OPENFILE:-openfile} +OPENUNLINK=${OPENUNLINK:-openunlink} TOEXCL=${TOEXCL:-toexcl} TRUNCATE=${TRUNCATE:-truncate} @@ -29,22 +31,20 @@ else RUNAS=${RUNAS:-"runas -u $RUNAS_ID"} fi -MOUNT=${MOUNT:-/mnt/lustre} -DIR=${DIR:-$MOUNT} -export NAME=$NAME +export NAME=${NAME:-local} SAVE_PWD=$PWD clean() { - echo -n "cln.." - sh llmountcleanup.sh > /dev/null || exit 20 + echo -n "cln.." + sh llmountcleanup.sh > /dev/null || exit 20 } - CLEAN=${CLEAN:-clean} + start() { - echo -n "mnt.." - sh llrmount.sh > /dev/null || exit 10 - echo "done" + echo -n "mnt.." + sh llrmount.sh > /dev/null || exit 10 + echo "done" } START=${START:-start} @@ -54,7 +54,7 @@ log() { } run_one() { - if ! mount | grep -q $MOUNT; then + if ! mount | grep -q $DIR; then $START fi log "== test $1: $2" @@ -87,23 +87,33 @@ run_test() { } error() { - echo FAIL - exit 1 + echo "FAIL: $@" + exit 1 } pass() { - echo PASS + echo PASS } -if ! mount | grep $MOUNT; then +MOUNT="`mount | awk '/^'$NAME' .* lustre_lite / { print $3 }'`" +if [ -z "$MOUNT" ]; then sh llmount.sh + MOUNT="`mount | awk '/^'$NAME' .* lustre_lite / { print $3 }'`" + [ -z "$MOUNT" ] && error "NAME=$NAME not mounted" I_MOUNTED=yes fi +[ `echo $MOUNT | wc -w` -gt 1 ] && error "NAME=$NAME mounted more than once" + +DIR=${DIR:-$MOUNT} +[ -z "`echo $DIR | grep $MOUNT`" ] && echo "$DIR not in $MOUNT" && exit 99 + +rm -rf $DIR/[Rdfs][1-9]* + echo preparing for tests involving mounts -EXT2_DEV=/tmp/SANITY.LOOP -dd if=/dev/zero of=$EXT2_DEV bs=1k seek=1000 count=1 > /dev/null -mke2fs -F $EXT2_DEV > /dev/null +EXT2_DEV=${EXT2_DEV:-/tmp/SANITY.LOOP} +touch $EXT2_DEV +mke2fs -F $EXT2_DEV 1000 > /dev/null test_0() { touch $DIR/f @@ -178,12 +188,49 @@ test_5() { } run_test 5 "mkdir .../d5 .../d5/d2; chmod .../d5/d2 ============" -test_6() { - touch $DIR/f6 - chmod 0666 $DIR/f6 - $CHECKSTAT -t file -p 0666 $DIR/f6 || error +test_6a() { + touch $DIR/f6a + chmod 0666 $DIR/f6a || error + $CHECKSTAT -t file -p 0666 -u \#$UID $DIR/f6a || error +} +run_test 6a "touch .../f6a; chmod .../f6a ======================" + +test_6b() { + [ $RUNAS_ID -eq $UID ] && echo "skipping test 6b" && return + $RUNAS chmod 0444 $DIR/f6a && error + $CHECKSTAT -t file -p 0666 -u \#$UID $DIR/f6a || error +} +run_test 6b "$RUNAS chmod .../f6a (should return error) ==" + +test_6c() { + [ $RUNAS_ID -eq $UID ] && echo "skipping test 6c" && return + touch $DIR/f6c + chown $RUNAS_ID $DIR/f6c || error + $CHECKSTAT -t file -u \#$RUNAS_ID $DIR/f6c || error +} +run_test 6c "touch .../f6c; chown .../f6c ======================" + +test_6d() { + [ $RUNAS_ID -eq $UID ] && echo "skipping test 6d" && return + $RUNAS chown $UID $DIR/f6c && error + $CHECKSTAT -t file -u \#$RUNAS_ID $DIR/f6c || error } -run_test 6 "touch .../f6; chmod .../f6 =========================" +run_test 6d "$RUNAS chown .../f6c (should return error) ==" + +test_6e() { + [ $RUNAS_ID -eq $UID ] && echo "skipping test 6e" && return + touch $DIR/f6e + chgrp $RUNAS_ID $DIR/f6e || error + $CHECKSTAT -t file -u \#$UID -g \#$RUNAS_ID $DIR/f6e || error +} +run_test 6e "touch .../f6e; chgrp .../f6e ======================" + +test_6f() { + [ $RUNAS_ID -eq $UID ] && echo "skipping test 6f" && return + $RUNAS chgrp $UID $DIR/f6e && error + $CHECKSTAT -t file -u \#$UID -g \#$RUNAS_ID $DIR/f6e || error +} +run_test 6f "$RUNAS chgrp .../f6e (should return error) ==" test_7a() { mkdir $DIR/d7 @@ -357,7 +404,7 @@ test_23() { run_test 23 "O_CREAT|O_EXCL in subdir ==========================" test_24a() { - echo '============ rename sanity =================================' + echo '== rename sanity ==============================================' echo '-- same directory rename' mkdir $DIR/R1 touch $DIR/R1/f @@ -440,7 +487,7 @@ test_24i() { $CHECKSTAT -t dir $DIR/R9/a || error $CHECKSTAT -a file $DIR/R9/a/f || error } -run_test 24i "rename file to dir error: touch f ; mkdir a ; rename f a =====" +run_test 24i "rename file to dir error: touch f ; mkdir a ; rename f a" test_24j() { mkdir $DIR/R10 @@ -452,7 +499,7 @@ test_24j() { run_test 24j "source does not exist ============================" test_25a() { - echo '== symlink sanity =======================================' + echo '== symlink sanity =============================================' mkdir $DIR/d25 ln -s d25 $DIR/s25 touch $DIR/s25/foo || error @@ -473,7 +520,8 @@ test_26a() { run_test 26a "multiple component symlink =======================" test_26b() { - ln -s d26/d26-2/foo $DIR/s26-2 + mkdir -p $DIR/d26b/d26-2 + ln -s d26b/d26-2/foo $DIR/s26-2 touch $DIR/s26-2 || error } run_test 26b "multiple component symlink at end of lookup ======" @@ -500,12 +548,12 @@ test_26e() { run_test 26e "unlink multiple component recursive symlink ======" test_27a() { - echo '== stripe sanity ========================================' + echo '== stripe sanity ==============================================' mkdir $DIR/d27 $LSTRIPE $DIR/d27/f0 8192 0 1 $CHECKSTAT -t file $DIR/d27/f0 pass - log "test_27b: write to one stripe file =========================" + log "== test_27b: write to one stripe file =========================" cp /etc/hosts $DIR/d27/f0 } run_test 27a "one stripe file ==================================" @@ -513,7 +561,7 @@ run_test 27a "one stripe file ==================================" test_27c() { $LSTRIPE $DIR/d27/f01 8192 0 2 pass - log "test_27d: write to two stripe file file f01 ================" + log "== test_27d: write to two stripe file file f01 ================" dd if=/dev/zero of=$DIR/d27/f01 bs=4k count=4 } run_test 27c "create two stripe file f01 =======================" @@ -537,14 +585,15 @@ run_test 27e "lstripe existing file (should return error) ======" test_27f() { $LSTRIPE $DIR/d27/fbad 100 1 2 || true dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4 + $LFIND $DIR/d27/fbad } run_test 27f "lstripe with bad stripe size (should return error on LOV)" test_27g() { $MCREATE $DIR/d27/fnone || error pass - log "test 27.9: lfind ============================================" - $LFIND $DIR/d27 + log "== test 27h: lfind ============================================" + $LFIND $DIR/d27/fnone | grep -q "Has no stripe info" || error } run_test 27g "mcreate file without objects to test lfind =======" @@ -586,7 +635,7 @@ test_30() { run_test 30 "run binary from Lustre (execve) ===================" test_31() { - ./openunlink $DIR/f31 $DIR/f31 || error + $OPENUNLINK $DIR/f31 $DIR/f31 || error } run_test 31 "open-unlink file ==================================" @@ -627,7 +676,7 @@ test_32d() { ls -al $DIR/d32d/ext2-mountpoint/../d2/test_dir || error umount $DIR/d32d/ext2-mountpoint || error } -run_test 32d "open d32d/ext2-mountpoint/../d2/test_dir ==========" +run_test 32d "open d32d/ext2-mountpoint/../d2/test_dir =========" test_32e() { [ -e $DIR/d32e ] && rm -fr $DIR/d32e @@ -638,7 +687,7 @@ test_32e() { $CHECKSTAT -t link $DIR/d32e/tmp/symlink11 || error $CHECKSTAT -t link $DIR/d32e/symlink01 || error } -run_test 32e "stat d32e/symlink->tmp/symlink->lustre-subdir =====" +run_test 32e "stat d32e/symlink->tmp/symlink->lustre-subdir ====" test_32f() { [ -e $DIR/d32f ] && rm -fr $DIR/d32f @@ -649,7 +698,7 @@ test_32f() { ls $DIR/d32f/tmp/symlink11 || error ls $DIR/d32f/symlink01 || error } -run_test 32f "open d32f/symlink->tmp/symlink->lustre-subdir =====" +run_test 32f "open d32f/symlink->tmp/symlink->lustre-subdir ====" test_32g() { [ -e $DIR/d32g ] && rm -fr $DIR/d32g @@ -687,7 +736,7 @@ test_32i() { $CHECKSTAT -t file $DIR/d32i/ext2-mountpoint/../test_file || error umount $DIR/d32i/ext2-mountpoint || error } -run_test 32i "stat d32i/ext2-mountpoint/../test_file ============" +run_test 32i "stat d32i/ext2-mountpoint/../test_file ===========" test_32j() { [ -e $DIR/d32j ] && rm -fr $DIR/d32j @@ -697,10 +746,10 @@ test_32j() { cat $DIR/d32j/ext2-mountpoint/../test_file || error umount $DIR/d32j/ext2-mountpoint || error } -run_test 32j "open d32j/ext2-mountpoint/../test_file ============" +run_test 32j "open d32j/ext2-mountpoint/../test_file ===========" test_32k() { - [ -e $DIR/d32k ] && rm -fr $DIR/d32k + rm -fr $DIR/d32k mkdir -p $DIR/d32k/ext2-mountpoint mount -t ext2 -o loop $EXT2_DEV $DIR/d32k/ext2-mountpoint mkdir -p $DIR/d32k/d2 @@ -708,10 +757,10 @@ test_32k() { $CHECKSTAT -t file $DIR/d32k/ext2-mountpoint/../d2/test_file || error umount $DIR/d32k/ext2-mountpoint || error } -run_test 32k "stat d32k/ext2-mountpoint/../d2/test_file =========" +run_test 32k "stat d32k/ext2-mountpoint/../d2/test_file ========" test_32l() { - [ -e $DIR/d32l ] && rm -fr $DIR/d32l + rm -fr $DIR/d32l mkdir -p $DIR/d32l/ext2-mountpoint mount -t ext2 -o loop $EXT2_DEV $DIR/d32l/ext2-mountpoint || error mkdir -p $DIR/d32l/d2 @@ -719,10 +768,10 @@ test_32l() { cat $DIR/d32l/ext2-mountpoint/../d2/test_file || error umount $DIR/d32l/ext2-mountpoint || error } -run_test 32l "open d32l/ext2-mountpoint/../d2/test_file =========" +run_test 32l "open d32l/ext2-mountpoint/../d2/test_file ========" test_32m() { - [ -e $DIR/d32m ] && rm -fr $DIR/d32m + rm -fr $DIR/d32m mkdir -p $DIR/d32m/tmp TMP_DIR=$DIR/d32m/tmp ln -s $DIR $TMP_DIR/symlink11 @@ -730,10 +779,10 @@ test_32m() { $CHECKSTAT -t link $DIR/d32m/tmp/symlink11 || error $CHECKSTAT -t link $DIR/d32m/symlink01 || error } -run_test 32m "stat d32m/symlink->tmp/symlink->lustre-root =======" +run_test 32m "stat d32m/symlink->tmp/symlink->lustre-root ======" test_32n() { - [ -e $DIR/d32n ] && rm -fr $DIR/d32n + rm -fr $DIR/d32n mkdir -p $DIR/d32n/tmp TMP_DIR=$DIR/d32n/tmp ln -s $DIR $TMP_DIR/symlink11 @@ -741,11 +790,11 @@ test_32n() { ls -l $DIR/d32n/tmp/symlink11 || error ls -l $DIR/d32n/symlink01 || error } -run_test 32n "open d32n/symlink->tmp/symlink->lustre-root =======" +run_test 32n "open d32n/symlink->tmp/symlink->lustre-root ======" test_32o() { - [ -e $DIR/d32o ] && rm -fr $DIR/d32o - [ -e $DIR/test_file ] && rm -fr $DIR/test_file + rm -fr $DIR/d32o + rm -f $DIR/test_file touch $DIR/test_file mkdir -p $DIR/d32o/tmp TMP_DIR=$DIR/d32o/tmp @@ -759,8 +808,8 @@ test_32o() { run_test 32o "stat d32o/symlink->tmp/symlink->lustre-root/test_file" test_32p() { - [ -e $DIR/d32p ] && rm -fr $DIR/d32p - [ -e $DIR/test_file ] && rm -fr $DIR/test_file + rm -fr $DIR/d32p + rm -f $DIR/test_file touch $DIR/test_file mkdir -p $DIR/d32p/tmp TMP_DIR=$DIR/d32p/tmp @@ -771,109 +820,220 @@ test_32p() { } run_test 32p "open d32p/symlink->tmp/symlink->lustre-root/test_file" +test_32q() { + [ -e $DIR/d32q ] && rm -fr $DIR/d32q + mkdir -p $DIR/d32q + mount -t ext2 -o loop $EXT2_DEV $DIR/d32q + ls $DIR/d32q || error + umount $DIR/d32q || error +} +run_test 32q "ls a mounted file system =========================" + # chmod 444 /mnt/lustre/somefile # open(/mnt/lustre/somefile, O_RDWR) # Should return -1 test_33() { - [ -e $DIR/test_33_file ] && rm -fr $DIR/test_33_file + rm -f $DIR/test_33_file touch $DIR/test_33_file chmod 444 $DIR/test_33_file chown $RUNAS_ID $DIR/test_33_file - $RUNAS openfile -f O_RDWR $DIR/test_33_file && error || true + $RUNAS $OPENFILE -f O_RDWR $DIR/test_33_file && error || true } run_test 33 "write file with mode 444 (should return error) ====" -test_34() { - $MCREATE $DIR/f - $TRUNCATE $DIR/f 100 - rm $DIR/f +TEST_34_SIZE=${TEST_34_SIZE:-2000000000000} +test_34a() { + rm -f $DIR/test_34_file + $MCREATE $DIR/test_34_file || error + $LFIND $DIR/test_34_file | grep -q "Has no stripe information" || error + $TRUNCATE $DIR/test_34_file $TEST_34_SIZE || error + $LFIND $DIR/test_34_file | grep -q "Has no stripe information" || error + $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error +} +run_test 34a "truncate file that has not been opened ===========" + +test_34b() { + $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error + $OPENFILE -f O_RDONLY $DIR/test_34_file + $LFIND $DIR/test_34_file | grep -q "Has no stripe information" || error + $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error } -run_test 34 "truncate file that has not been opened ============" +run_test 34b "O_RDONLY opening file doesn't create objects =====" + +test_34c() { + $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error + $OPENFILE -f O_RDWR $DIR/test_34_file + $LFIND $DIR/test_34_file | grep -q "Has no stripe information" && error + $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error +} +run_test 34c "O_RDWR opening file-with-size works ==============" + +test_34d() { + dd if=/dev/zero of=$DIR/test_34_file conv=notrunc bs=4k count=1 || error + $CHECKSTAT -s $TEST_34_SIZE $DIR/test_34_file || error + rm $DIR/test_34_file +} +run_test 34d "write to sparse file =============================" + +test_34e() { + rm -f $DIR/test_34_file + $MCREATE $DIR/test_34_file || error + $TRUNCATE $DIR/test_34_file 1000 || error + $CHECKSTAT -s 1000 $DIR/test_34_file || error + $OPENFILE -f O_RDWR $DIR/test_34_file + $CHECKSTAT -s 1000 $DIR/test_34_file || error +} +run_test 34e "create objects, some with size and some without ==" test_35() { - [ -e $DIR/test_35_file ] && rm -fr $DIR/test_35_file cp /bin/sh $DIR/test_35_file chmod 444 $DIR/test_35_file chown $RUNAS_ID $DIR/test_35_file - $DIR/test_35_file && error - return 0 + $DIR/test_35_file && error || true + rm $DIR/test_35_file } run_test 35 "exec file with mode 444 (should return error) =====" test_36a() { - log 36 "cvs operations ====================================" - mkdir -p $DIR/cvsroot - chown $RUNAS_ID $DIR/cvsroot - $RUNAS cvs -d $DIR/cvsroot init + sleep 1 # we need a rest, or UMLs clock becomes skewed + rm -f $DIR/test_36_file + utime $DIR/test_36_file || error } -run_test 36a "cvs init =========================================" +run_test 36a "MDS utime check (mknod, utime) ===================" test_36b() { - # on the LLNL clusters, runas will still pick up root's $TMP settings, - # which will not be writable for the runas user, and then you get a CVS - # error message with a corrupt path string (CVS bug) and panic. - # We're not using much space, so just stick it in /tmp, which is - # safe. - OLDTMPDIR=$TMPDIR - OLDTMP=$TMP - TMPDIR=/tmp - TMP=/tmp - - cd /etc/init.d - $RUNAS cvs -d $DIR/cvsroot import -m "nomesg" reposname vtag rtag - - TMPDIR=$OLDTMPDIR - TMP=$OLDTMP + sleep 1 + echo "" > $DIR/test_36_file + utime $DIR/test_36_file || error } -run_test 36b "cvs import =======================================" +run_test 36b "OST utime check (open, utime) ====================" test_36c() { - cd $DIR - mkdir -p $DIR/reposname - chown $RUNAS_ID $DIR/reposname - $RUNAS cvs -d $DIR/cvsroot co reposname + sleep 1 + rm -f $DIR/d36/test_36_file + mkdir $DIR/d36 + chown $RUNAS_ID $DIR/d36 + $RUNAS utime $DIR/d36/test_36_file || error } -run_test 36c "cvs checkout =====================================" +run_test 36c "non-root MDS utime check (mknod, utime) ==========" test_36d() { - cd $DIR/reposname - $RUNAS touch foo36 - $RUNAS cvs add -m 'addmsg' foo36 + sleep 1 + echo "" > $DIR/d36/test_36_file + $RUNAS utime $DIR/d36/test_36_file || error } -run_test 36d "cvs add ==========================================" +run_test 36d "non-root OST utime check (open, utime) ===========" test_36e() { - cd $DIR/reposname - $RUNAS cvs update -} -run_test 36e "cvs update =======================================" - -# XXX change this: use a non root user -test_36f() { - cd $DIR/reposname - $RUNAS cvs commit -m 'nomsg' foo36 + sleep 1 + [ $RUNAS_ID -eq $UID ] && return + touch $DIR/d36/test_36_file2 + $RUNAS utime $DIR/d36/test_36_file2 && error || true } -run_test 36f "cvs commit =======================================" +run_test 36e "utime on non-owned file (should return error) ====" test_37() { mkdir -p $DIR/dextra echo f > $DIR/dextra/fbugfile - mount -t ext2 -o loop /$EXT2_DEV $DIR/dextra - ls $DIR/dextra |grep "\" && error - umount /$EXT2_DEV - rm -f DIR/dextra/fbugfile + mount -t ext2 -o loop $EXT2_DEV $DIR/dextra + ls $DIR/dextra | grep "\" && error + umount $DIR/dextra || error + rm -f $DIR/dextra/fbugfile || error } -run_test 37 "ls a mounted file system to check the old contents =====" +run_test 37 "ls a mounted file system to check old content =====" # open(file, O_DIRECTORY) will leak a request and not cleanup (bug 1501) test_38() { - o_directory $DIR/test38 + o_directory $DIR/test38 } run_test 38 "open a regular file with O_DIRECTORY ==============" - + +test_39() { + touch $DIR/test_39_file + touch $DIR/test_39_file2 +# ls -l $DIR/test_39_file $DIR/test_39_file2 +# ls -lu $DIR/test_39_file $DIR/test_39_file2 +# ls -lc $DIR/test_39_file $DIR/test_39_file2 + sleep 2 + $OPENFILE -f O_CREAT:O_TRUNC:O_WRONLY $DIR/test_39_file2 +# ls -l $DIR/test_39_file $DIR/test_39_file2 +# ls -lu $DIR/test_39_file $DIR/test_39_file2 +# ls -lc $DIR/test_39_file $DIR/test_39_file2 + [ $DIR/test_39_file2 -nt $DIR/test_39_file ] || error +} +run_test 39 "mtime changed on create ===========================" + +test_40() { + dd if=/dev/zero of=$DIR/f40 bs=4096 count=1 + $RUNAS $OPENFILE -f O_WRONLY:O_TRUNC $DIR/f40 && error + $CHECKSTAT -t file -s 4096 $DIR/f40 || error +} +run_test 40 "failed open(O_TRUNC) doesn't truncate =============" + +test_41() { + # bug 1553 + small_write $DIR/f41 18 +} +run_test 41 "test small file write + fstat =====================" + +# on the LLNL clusters, runas will still pick up root's $TMP settings, +# which will not be writable for the runas user, and then you get a CVS +# error message with a corrupt path string (CVS bug) and panic. +# We're not using much space, so just stick it in /tmp, which is safe. +OLDTMPDIR=$TMPDIR +OLDTMP=$TMP +TMPDIR=/tmp +TMP=/tmp +OLDHOME=$HOME +[ $RUNAS_ID -ne $UID ] && HOME=/tmp + +test_99a() { + echo 99 "cvs operations ====================================" + mkdir -p $DIR/d99cvsroot + chown $RUNAS_ID $DIR/d99cvsroot + $RUNAS cvs -d $DIR/d99cvsroot init || error +} +run_test 99a "cvs init =========================================" + +test_99b() { + cd /etc/init.d + $RUNAS cvs -d $DIR/d99cvsroot import -m "nomesg" d99reposname vtag rtag +} +run_test 99b "cvs import =======================================" + +test_99c() { + cd $DIR + mkdir -p $DIR/d99reposname + chown $RUNAS_ID $DIR/d99reposname + $RUNAS cvs -d $DIR/d99cvsroot co d99reposname +} +run_test 99c "cvs checkout =====================================" + +test_99d() { + cd $DIR/d99reposname + $RUNAS touch foo99 + $RUNAS cvs add -m 'addmsg' foo99 +} +run_test 99d "cvs add ==========================================" + +test_99e() { + cd $DIR/d99reposname + $RUNAS cvs update +} +run_test 99e "cvs update =======================================" + +test_99f() { + cd $DIR/d99reposname + $RUNAS cvs commit -m 'nomsg' foo99 +} +run_test 99f "cvs commit =======================================" + +TMPDIR=$OLDTMPDIR +TMP=$OLDTMP +HOME=$OLDHOME log "cleanup: ======================================================" -rm -r $DIR/[Rdfs][1-9]* +rm -rf $DIR/[Rdfs][1-9]* if [ "$I_MOUNTED" = "yes" ]; then sh llmountcleanup.sh || error fi diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index 8145e63..1895c8a 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -2,130 +2,207 @@ set -e -PATH=$PATH:. +ONLY=${ONLY:-"$*"} +ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"8"} # bug 1557 + +SRCDIR=`dirname $0` +PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH CHECKSTAT=${CHECKSTAT:-"checkstat -v"} -MOUNT1=${MOUNT1:-/mnt/lustre1} -MOUNT2=${MOUNT2:-/mnt/lustre2} +CREATETEST=${CREATETEST:-createtest} +LFIND=${LFIND:-lfind} +LSTRIPE=${LSTRIPE:-lstripe} +LCTL=${LCTL:-lctl} +MCREATE=${MCREATE:-mcreate} +OPENFILE=${OPENFILE:-openfile} +OPENUNLINK=${OPENUNLINK:-openunlink} +TOEXCL=${TOEXCL:-toexcl} +TRUNCATE=${TRUNCATE:-truncate} + +if [ $UID -ne 0 ]; then + RUNAS_ID="$UID" + RUNAS="" +else + RUNAS_ID=${RUNAS_ID:-500} + RUNAS=${RUNAS:-"runas -u $RUNAS_ID"} +fi + export NAME=${NAME:-mount2} +SAVE_PWD=$PWD + clean() { - echo -n "cln.." - sh llmountcleanup.sh > /dev/null + echo -n "cln.." + sh llmountcleanup.sh > /dev/null || exit 20 } - CLEAN=${CLEAN:-clean} + start() { - echo -n "mnt.." - sh llrmount.sh > /dev/null - echo -n "done" + echo -n "mnt.." + sh llrmount.sh > /dev/null || exit 10 + echo "done" } START=${START:-start} -error () { - echo FAIL - exit 1 -} - -pass() { - echo PASS -} - -mkdir -p $MOUNT2 -mount | grep $MOUNT1 || sh llmount.sh - -echo -n "test 1: check create on 2 mtpt's..." -touch $MOUNT1/f1 -[ -f $MOUNT2/f1 ] || error -pass - -echo "test 2: check attribute updates on 2 mtpt's..." -chmod 777 $MOUNT2/f1 -$CHECKSTAT -t file -p 0777 $MOUNT1/f1 || error -pass - -echo "test 2b: check cached attribute updates on 2 mtpt's..." -touch $MOUNT1/f2b -ls -l $MOUNT2/f2b -chmod 777 $MOUNT2/f2b -$CHECKSTAT -t file -p 0777 $MOUNT1/f2b || error -pass - -echo "test 2c: check cached attribute updates on 2 mtpt's..." -touch $MOUNT1/f2c -ls -l $MOUNT2/f2c -chmod 777 $MOUNT1/f2c -$CHECKSTAT -t file -p 0777 $MOUNT2/f2c || error -pass - -echo "test 3: check after remount attribute updates on 2 mtpt's..." -chmod a-x $MOUNT2/f1 -$CLEAN -$START -$CHECKSTAT -t file -p 0666 $MOUNT1/f1 || error -pass - -echo "test 4: unlink on one mountpoint removes file on other..." -rm $MOUNT2/f1 -$CHECKSTAT -a $MOUNT1/f1 || error -pass - -echo -n "test 5: symlink on one mtpt, readlink on another..." -( cd $MOUNT1 ; ln -s this/is/good lnk ) - -[ "this/is/good" = "`perl -e 'print readlink("/mnt/lustre2/lnk");'`" ] || error -pass - -echo -n "test 6: fstat validation on multiple mount points..." -./multifstat $MOUNT1/f6 $MOUNT2/f6 -pass - -if [ -n "$BUG_1365" ]; then -echo -n "test 7: create a file on one mount, truncate it on the other..." -mcreate $MOUNT1/f1 -truncate $MOUNT2/f1 100 -rm $MOUNT1/f1 -pass -else -echo "Skipping test for 1365: set \$BUG_1365 to run it (and crash, likely)." -fi +log() { + echo "$*" + lctl mark "$*" || true +} + +run_one() { + if ! mount | grep -q $DIR1; then + $START + fi + log "== test $1: $2" + test_$1 || error + pass + cd $SAVE_PWD + $CLEAN +} + +run_test() { + for O in $ONLY; do + if [ "`echo $1 | grep '\<'$O'[a-z]*\>'`" ]; then + echo "" + run_one $1 "$2" + return $? + else + echo -n "." + fi + done + for X in $EXCEPT $ALWAYS_EXCEPT; do + if [ "`echo $1 | grep '\<'$X'[a-z]*\>'`" ]; then + echo "skipping excluded test $1" + return 0 + fi + done + if [ -z "$ONLY" ]; then + run_one $1 "$2" + return $? + fi +} + +error () { + echo "FAIL: $@" + exit 1 +} + +pass() { + echo PASS +} + +MOUNT1=`mount| awk '/^'$NAME' .* lustre_lite / { print $3 }'| head -1` +MOUNT2=`mount| awk '/^'$NAME' .* lustre_lite / { print $3 }'| tail -1` +[ -z "$MOUNT1" ] && error "NAME=$NAME not mounted once" +[ "$MOUNT1" = "$MOUNT2" ] && error "NAME=$NAME not mounted twice" +[ `mount| awk '/^'$NAME' .* lustre_lite / { print $3 }'| wc -l` -ne 2 ] && \ + error "NAME=$NAME mounted more than twice" + +DIR1=${DIR1:-$MOUNT1} +DIR2=${DIR2:-$MOUNT2} +[ -z "`echo $DIR1 | grep $MOUNT1`" ] && echo "$DIR1 not in $MOUNT1" && exit 96 +[ -z "`echo $DIR2 | grep $MOUNT2`" ] && echo "$DIR2 not in $MOUNT2" && exit 95 + +rm -f $DIR1/[df][0-9]* $DIR1/lnk + +test_1a() { + touch $DIR1/f1 + [ -f $DIR2/f1 ] || error +} +run_test 1a "check create on 2 mtpt's ==========================" + +test_1b() { + chmod 777 $DIR2/f1 + $CHECKSTAT -t file -p 0777 $DIR1/f1 || error + chmod a-x $DIR2/f1 +} +run_test 1b "check attribute updates on 2 mtpt's ===============" + +test_1c() { + $CHECKSTAT -t file -p 0666 $DIR1/f1 || error +} +run_test 1c "check after remount attribute updates on 2 mtpt's =" + +test_1d() { + rm $DIR2/f1 + $CHECKSTAT -a $DIR1/f1 || error +} +run_test 1d "unlink on one mountpoint removes file on other ====" + +test_2a() { + touch $DIR1/f2a + ls -l $DIR2/f2a + chmod 777 $DIR2/f2a + $CHECKSTAT -t file -p 0777 $DIR1/f2a || error +} +run_test 2a "check cached attribute updates on 2 mtpt's ========" + +test_2b() { + touch $DIR1/f2b + ls -l $DIR2/f2b + chmod 777 $DIR1/f2b + $CHECKSTAT -t file -p 0777 $DIR2/f2b || error +} +run_test 2b "check cached attribute updates on 2 mtpt's ========" + +test_3() { + ( cd $DIR1 ; ln -s this/is/good lnk ) + [ "this/is/good" = "`perl -e 'print readlink("'$DIR2/lnk'");'`" ] || \ + error +} +run_test 3 "symlink on one mtpt, readlink on another ===========" + +test_4() { + ./multifstat $DIR1/f6 $DIR2/f6 +} +run_test 4 "fstat validation on multiple mount points ==========" + +test_5() { + mcreate $DIR1/f5 + truncate $DIR2/f5 100 + rm $DIR1/f5 +} +run_test 5 "create a file on one mount, truncate it on the other" + +test_6() { + ./openunlink $DIR1/f6 $DIR2/f6 || error +} +run_test 6 "remove of open file on other node ==================" + +test_7() { + ./opendirunlink $DIR1/d7 $DIR2/d7 || error +} +run_test 7 "remove of open directory on other node =============" + +test_8() { + ./opendevunlink $DIR1/dev8 $DIR2/dev8 || error +} +run_test 8 "remove of open special file on other node ==========" + +test_9() { + MTPT=1 + > $DIR2/f9 + for C in a b c d e f g h i j k l; do + DIR=`eval echo \\$DIR$MTPT` + echo -n $C >> $DIR/f9 + [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1 + done + [ "`cat $DIR1/f9`" = "abcdefghijkl" ] || error +} +run_test 9 "append of file with sub-page size on multiple mounts" + +test_10() { + MTPT=1 + OFFSET=0 + > $DIR2/f10 + for C in a b c d e f g h i j k l; do + DIR=`eval echo \\$DIR$MTPT` + echo -n $C | dd of=$DIR/f10 bs=1 seek=$OFFSET count=1 + [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1 + OFFSET=`expr $OFFSET + 1` + done + [ "`cat $DIR1/f10`" = "abcdefghijkl" ] || error +} +run_test 10 "write of file with sub-page size on multiple mounts " -echo "test 9: remove of open file on other node..." -./openunlink $MOUNT1/f9 $MOUNT2/f9 || error -pass - -echo "test 9b: remove of open directory on other node..." -./opendirunlink $MOUNT1/dir1 $MOUNT2/dir1 || error -pass - -#echo "test 9c: remove of open special file on other node..." -#./opendevunlink $MOUNT1/dev1 $MOUNT2/dev1 || error -#pass - -echo -n "test 10: append of file with sub-page size on multiple mounts..." -MTPT=1 -> $MOUNT2/f10 -for C in a b c d e f g h i j k l; do - MOUNT=`eval echo \\$MOUNT$MTPT` - echo -n $C >> $MOUNT/f10 - [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1 -done -[ "`cat $MOUNT1/f10`" = "abcdefghijkl" ] && pass || error - -echo -n "test 11: write of file with sub-page size on multiple mounts..." -MTPT=1 -OFFSET=0 -> $MOUNT2/f11 -for C in a b c d e f g h i j k l; do - MOUNT=`eval echo \\$MOUNT$MTPT` - echo -n $C | dd of=$MOUNT/f11 bs=1 seek=$OFFSET count=1 - [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1 - OFFSET=`expr $OFFSET + 1` -done -[ "`cat $MOUNT1/f11`" = "abcdefghijkl" ] && pass || error - -rm -f $MOUNT1/f[0-9]* $MOUNT1/lnk - -$CLEAN - -exit +rm -f $DIR1/f[0-9]* $DIR1/lnk diff --git a/lustre/tests/uml.sh b/lustre/tests/uml.sh index 2b3adc3..f7a9241 100644 --- a/lustre/tests/uml.sh +++ b/lustre/tests/uml.sh @@ -6,7 +6,7 @@ config=${1:-uml.xml} LMC=${LMC:-lmc} TMP=${TMP:-/tmp} -MDSDEV=${MDSDEV:-$TMP/mds1} +MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} MDSSIZE=${MDSSIZE:-50000} OSTDEVBASE=$TMP/ost @@ -19,6 +19,7 @@ STRIPECNT=${STRIPECNT:-1} FSTYPE=${FSTYPE:-ext3} NETTYPE=${NETTYPE:-tcp} +NIDTYPE=${NIDTYPE:-$NODETYPE} # NOTE - You can't have different MDS/OST nodes and also have clients on the # MDS/OST nodes without using --endlevel and --startlevel during lconf. @@ -50,6 +51,10 @@ CLIENTS=${CLIENTS:-"uml3"} rm -f $config +h2localhost () { + echo localhost +} + h2tcp () { case $1 in client) echo '\*' ;; @@ -68,7 +73,7 @@ h2elan () { echo -n "adding NET for:" for NODE in `echo $MDSNODE $OSTNODES $CLIENTS | tr -s " " "\n" | sort -u`; do echo -n " $NODE" - ${LMC} -m $config --add net --node $NODE --nid `h2$NETTYPE $NODE` --nettype $NETTYPE || exit 1 + ${LMC} -m $config --add net --node $NODE --nid `h2$NIDTYPE $NODE` --nettype $NETTYPE || exit 1 done # configure mds server @@ -82,7 +87,7 @@ echo -n "adding OST on:" for NODE in $OSTNODES; do eval OSTDEV=\$OSTDEV$COUNT echo -n " $NODE" - OSTDEV=${OSTDEV:-$OSTDEVBASE$COUNT} + OSTDEV=${OSTDEV:-$OSTDEVBASE$COUNT-`hostname`} ${LMC} -m $config --add ost --node $NODE --lov lov1 --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE || exit 21 COUNT=`expr $COUNT + 1` done diff --git a/lustre/tests/utime.c b/lustre/tests/utime.c index c6a5d7d..9fe9f26 100644 --- a/lustre/tests/utime.c +++ b/lustre/tests/utime.c @@ -30,7 +30,14 @@ int main(int argc, char *argv[]) if (argc != 2) usage(argv[0]); - before_mknod = time(0); + /* Adjust the before time back one second, because the kernel's + * CURRENT_TIME (lockless clock reading, used to set inode times) + * may drift against the do_gettimeofday() time (TSC-corrected and + * locked clock reading, used to return timestamps to user space). + * This means that the mknod time could be a second older than the + * before time, even for a local filesystem such as ext3. + */ + before_mknod = time(0) - 1; rc = mknod(filename, 0700, S_IFREG); after_mknod = time(0); if (rc && errno != EEXIST) { @@ -52,13 +59,15 @@ int main(int argc, char *argv[]) return 4; } - printf("%s: good mknod times %lu <= %lu <= %lu\n", - prog, before_mknod, st.st_mtime, after_mknod); + printf("%s: good mknod times %lu%s <= %lu <= %lu\n", + prog, before_mknod, before_mknod == st.st_mtime ? "*":"", + st.st_mtime, after_mknod); sleep(5); } - before_utime = time(0); + /* See above */ + before_utime = time(0) - 1; rc = utime(filename, NULL); after_utime = time(0); if (rc) { @@ -80,8 +89,9 @@ int main(int argc, char *argv[]) return 7; } - printf("%s: good utime times %lu <= %lu <= %lu\n", - prog, before_utime, st.st_mtime, after_utime); + printf("%s: good utime times %lu%s <= %lu <= %lu\n", + prog, before_utime, before_utime == st.st_mtime ? "*" : "", + st.st_mtime, after_utime); return 0; } diff --git a/lustre/utils/.cvsignore b/lustre/utils/.cvsignore index 06a1588..20f4185 100644 --- a/lustre/utils/.cvsignore +++ b/lustre/utils/.cvsignore @@ -15,4 +15,6 @@ obdstat obdio obdbarrier lload -wirecheck \ No newline at end of file +wirecheck +.*.cmd +.*.d diff --git a/lustre/utils/Lustre/__init__.py b/lustre/utils/Lustre/__init__.py index c1b93e6..7a21df3 100644 --- a/lustre/utils/Lustre/__init__.py +++ b/lustre/utils/Lustre/__init__.py @@ -4,4 +4,4 @@ from lustredb import LustreDB, LustreDB_XML, LustreDB_LDAP from error import LconfError, OptionError from cmdline import Options -CONFIG_VERSION="2003060501" +CONFIG_VERSION="2003070801" diff --git a/lustre/utils/lactive b/lustre/utils/lactive index a5e8580..04841eb 100644 --- a/lustre/utils/lactive +++ b/lustre/utils/lactive @@ -31,6 +31,7 @@ import sys, getopt, types import string, os import ldap +from stat import S_IROTH, S_IRGRP PYMOD_DIR = "/usr/lib/lustre/python" def development_mode(): @@ -43,13 +44,14 @@ if not development_mode(): sys.path.append(PYMOD_DIR) import Lustre +PARAM = Lustre.Options.PARAM lactive_options = [ - ('ldapurl',"LDAP server URL", Lustre.Options.PARAM, - "ldap://localhost"), - ('config', "Cluster config name used for LDAP query", Lustre.Options.PARAM), - ('group', "The group of devices to update", Lustre.Options.PARAM), - ('active', "The active node name", Lustre.Options.PARAM), + ('ldapurl',"LDAP server URL", PARAM, "ldap://localhost"), + ('config', "Cluster config name used for LDAP query", PARAM), + ('group', "The group of devices to update", PARAM), + ('active', "The active node name", PARAM), + ('pwfile', "File containing password", PARAM), ] def fatal(*args): @@ -57,7 +59,6 @@ def fatal(*args): print "! " + msg sys.exit(1) - cl = Lustre.Options("lactive","", lactive_options) config, args = cl.parse(sys.argv[1:]) @@ -66,10 +67,32 @@ if not (config.group or config.active): if not config.config: fatal("Missing config") - + +if config.pwfile: + try: + pwperm = os.stat(config.pwfile)[0] + pwreadable = pwperm & (S_IRGRP | S_IROTH) + if pwreadable: + if pwreadable == (S_IRGRP | S_IROTH): + readable_by = "group and others" + elif pwreadable == S_IRGRP: + readable_by = "group" + else: + readable_by = "others" + print "WARNING: Password file %s is readable by %s" % ( + config.pwfile, readable_by) + + pwfile = open(config.pwfile, "r") + pw = string.strip(pwfile.readline()) + pwfile.close() + except Exception, e: + fatal("Can't read secret from pwfile %s: %s" % (config.pwfile, e)) +else: + print "no pwfile specified, binding anonymously" + pw = "" + base = "config=%s,fs=lustre" % (config.config,) -db = Lustre.LustreDB_LDAP('', {}, base=base, pw = "secret", - url = config.ldapurl) +db = Lustre.LustreDB_LDAP('', {}, base=base, pw = pw, url = config.ldapurl) active_node = db.lookup_name(config.active) if not active_node: diff --git a/lustre/utils/lconf b/lustre/utils/lconf index 15e5a2c..92ec8e2 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -1,7 +1,8 @@ #!/usr/bin/env python # -# Copyright (C) 2002 Cluster File Systems, Inc. -# Author: Robert Read +# Copyright (C) 2002-2003 Cluster File Systems, Inc. +# Authors: Robert Read +# Mike Shaver # This file is part of Lustre, http://www.lustre.org. # # Lustre is free software; you can redistribute it and/or @@ -26,7 +27,7 @@ import sys, getopt, types import string, os, stat, popen2, socket, time, random, fcntl, select -import re, exceptions, signal +import re, exceptions, signal, traceback import xml.dom.minidom if sys.version[0] == '1': @@ -57,7 +58,7 @@ MAX_LOOP_DEVICES = 256 PORTALS_DIR = 'portals' -# Please keep these uptodate with the values in portals/kp30.h +# Please keep these in sync with the values in portals/kp30.h ptldebug_names = { "trace" : (1 << 0), "inode" : (1 << 1), @@ -107,6 +108,8 @@ subsystem_names = { "ptlrouter" : (20 << 24), "cobd" : (21 << 24), "ptlbd" : (22 << 24), + "log" : (23 << 24), + "mgmt" : (24 << 24), } @@ -423,8 +426,11 @@ class LCTLInterface: add_route %s %s %s quit """ % (net, gw, lo, hi) - self.run(cmds) - + try: + self.run(cmds) + except CommandError, e: + log ("ignore: ") + e.dump() def del_route(self, net, gw, lo, hi): cmds = """ @@ -443,7 +449,11 @@ class LCTLInterface: quit """ % (net, uuid, tgt, net, gw, tgt) - self.run(cmds) + try: + self.run(cmds) + except CommandError, e: + log ("ignore: ") + e.dump() # add a route to a range def del_route_host(self, net, uuid, gw, tgt): @@ -795,7 +805,6 @@ def get_local_address(net_type, wildcard): local=string.rstrip(local[0]) return local - # XXX: instead of device_list, ask for $name and see what we get def is_prepared(name): @@ -1020,8 +1029,8 @@ class Network(Module): self_nid = self.nid if gw_nid < self_nid: try: - lctl.disconnect(router.net_type, router.nid, router.port, - router.uuid) + lctl.disconnect(gw.net_type, gw.nid, gw.port, + gw.uuid) except CommandError, e: print "disconnectAll failed: ", self.name e.dump() @@ -1087,6 +1096,27 @@ class RouteTable(Module): e.dump() cleanup_error(e.rc) +class Management(Module): + def __init__(self, db): + Module.__init__(self, 'MGMT', db) + self.add_lustre_module('obdclass', 'obdclass') + self.add_lustre_module('ptlrpc', 'ptlrpc') + self.add_lustre_module('ldlm', 'ldlm') + self.add_lustre_module('mgmt', 'mgmt_svc') + + def prepare(self): + if is_prepared(self.name): + return + self.info() + lctl.newdev(attach="mgmt %s %s" % (self.name, self.uuid)) + + def safe_to_clean(self): + return 1 + + def cleanup(self): + if is_prepared(self.name): + Module.cleanup(self) + class LDLM(Module): def __init__(self,db): Module.__init__(self, 'LDLM', db) @@ -1109,7 +1139,7 @@ class LDLM(Module): Module.cleanup(self) class LOV(Module): - def __init__(self, db, uuid): + def __init__(self, db, uuid, fs_name): Module.__init__(self, 'LOV', db) self.add_lustre_module('mdc', 'mdc') self.add_lustre_module('lov', 'lov') @@ -1123,11 +1153,12 @@ class LOV(Module): self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist)) self.osclist = [] self.client_uuid = generate_client_uuid(self.name) + self.fs_name = fs_name self.mdc_name = '' - self.mdc = get_mdc(db, self.client_uuid, self.name, self.mds_uuid) + self.mdc = get_mdc(db, self.client_uuid, fs_name, self.mds_uuid) for obd_uuid in self.devlist: obd = self.db.lookup(obd_uuid) - osc = get_osc(obd, self.client_uuid, self.name) + osc = get_osc(obd, self.client_uuid, fs_name) if osc: self.osclist.append(osc) else: @@ -1142,7 +1173,7 @@ class LOV(Module): # isn't implemented here yet. osc.prepare(ignore_connect_failure=0) except CommandError, e: - print "Error preparing OSC %s (inactive)\n" % osc.uuid + print "Error preparing OSC %s\n" % osc.uuid raise e self.mdc.prepare() self.mdc_name = self.mdc.name @@ -1156,7 +1187,7 @@ class LOV(Module): Module.cleanup(self) for osc in self.osclist: osc.cleanup() - mdc = get_mdc(self.db, self.client_uuid, self.name, self.mds_uuid) + mdc = get_mdc(self.db, self.client_uuid, self.fs_name, self.mds_uuid) mdc.cleanup() def load_module(self): @@ -1172,12 +1203,12 @@ class LOV(Module): break class LOVConfig(Module): - def __init__(self,db): + def __init__(self, db): Module.__init__(self, 'LOVConfig', db) self.lov_uuid = self.db.get_first_ref('lov') l = self.db.lookup(self.lov_uuid) - self.lov = LOV(l, "YOU_SHOULD_NEVER_SEE_THIS_UUID") + self.lov = LOV(l, "YOU_SHOULD_NEVER_SEE_THIS_UUID", '') def prepare(self): lov = self.lov @@ -1410,9 +1441,20 @@ class OSD(Module): if not self.osdtype == 'obdecho': clean_loop(self.devpath) +def mgmt_uuid_for_fs(mtpt_name): + if not mtpt_name: + return '' + mtpt_db = toplevel.lookup_name(mtpt_name) + fs_uuid = mtpt_db.get_first_ref('filesystem') + fs = toplevel.lookup(fs_uuid) + if not fs: + return '' + return fs.get_first_ref('mgmt') + # Generic client module, used by OSC and MDC class Client(Module): - def __init__(self, tgtdb, uuid, module, owner): + def __init__(self, tgtdb, uuid, module, fs_name, self_name=None, + module_dir=None): self.target_name = tgtdb.getName() self.target_uuid = tgtdb.getUUID() self.db = tgtdb @@ -1427,11 +1469,22 @@ class Client(Module): self.module = module self.module_name = string.upper(module) - self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(), - self.target_name, owner) + if not self_name: + self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(), + self.target_name, fs_name) + else: + self.name = self_name self.uuid = uuid self.lookup_server(self.tgt_dev_uuid) - self.add_lustre_module(module, module) + mgmt_uuid = mgmt_uuid_for_fs(fs_name) + if mgmt_uuid: + self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid) + else: + self.mgmt_name = '' + self.fs_name = fs_name + if not module_dir: + module_dir = module + self.add_lustre_module(module_dir, module) def lookup_server(self, srv_uuid): """ Lookup a server's network information """ @@ -1461,7 +1514,8 @@ class Client(Module): raise e if srv: lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid), - setup ="%s %s" %(self.target_uuid, srv.uuid)) + setup ="%s %s %s" % (self.target_uuid, srv.uuid, + self.mgmt_name)) def cleanup(self): if is_prepared(self.name): @@ -1473,7 +1527,7 @@ class Client(Module): else: srv, r = find_route(self.get_servers()) if srv: - lctl.del_route_host(r[0], srv.uuid, r[1], r[2]) + lctl.del_route_host(r[0], srv.uuid, r[1], r[3]) except CommandError, e: log(self.module_name, "cleanup failed: ", self.name) e.dump() @@ -1481,13 +1535,22 @@ class Client(Module): class MDC(Client): - def __init__(self, db, uuid, owner): - Client.__init__(self, db, uuid, 'mdc', owner) + def __init__(self, db, uuid, fs_name): + Client.__init__(self, db, uuid, 'mdc', fs_name) + class OSC(Client): - def __init__(self, db, uuid, owner): - Client.__init__(self, db, uuid, 'osc', owner) + def __init__(self, db, uuid, fs_name): + Client.__init__(self, db, uuid, 'osc', fs_name) +def mgmtcli_name_for_uuid(uuid): + return 'MGMTCLI_%s' % uuid + +class ManagementClient(Client): + def __init__(self, db, uuid): + Client.__init__(self, db, uuid, 'mgmt_cli', '', + self_name = mgmtcli_name_for_uuid(db.getUUID()), + module_dir = 'mgmt') class COBD(Module): def __init__(self, db): @@ -1509,12 +1572,12 @@ class COBD(Module): # virtual interface for OSC and LOV class VOSC(Module): - def __init__(self, db, uuid, owner): + def __init__(self, db, uuid, fs_name): Module.__init__(self, 'VOSC', db) if db.get_class() == 'lov': - self.osc = LOV(db, uuid) + self.osc = LOV(db, uuid, fs_name) else: - self.osc = get_osc(db, uuid, owner) + self.osc = get_osc(db, uuid, fs_name) def get_uuid(self): return self.osc.uuid def get_name(self): @@ -1560,10 +1623,12 @@ class ECHO_CLIENT(Module): def load_module(self): self.osc.load_module() Module.load_module(self) + def cleanup_module(self): Module.cleanup_module(self) self.osc.cleanup_module() + def generate_client_uuid(name): client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576), name, @@ -1571,6 +1636,7 @@ def generate_client_uuid(name): int(random.random() * 1048576)) return client_uuid[:36] + class Mountpoint(Module): def __init__(self,db): Module.__init__(self, 'MTPT', db) @@ -1579,6 +1645,7 @@ class Mountpoint(Module): fs = self.db.lookup(self.fs_uuid) self.mds_uuid = fs.get_first_ref('mds') self.obd_uuid = fs.get_first_ref('obd') + self.mgmt_uuid = fs.get_first_ref('mgmt') obd = self.db.lookup(self.obd_uuid) client_uuid = generate_client_uuid(self.name) self.vosc = VOSC(obd, client_uuid, self.name) @@ -1586,12 +1653,18 @@ class Mountpoint(Module): self.add_lustre_module('mdc', 'mdc') self.mdc = get_mdc(db, client_uuid, self.name, self.mds_uuid) self.add_lustre_module('llite', 'llite') - + if self.mgmt_uuid: + self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid), + client_uuid) + else: + self.mgmtcli = None def prepare(self): if fs_is_mounted(self.path): log(self.path, "already mounted.") return + if self.mgmtcli: + self.mgmtcli.prepare() self.vosc.prepare() if self.vosc.need_mdc(): self.mdc.prepare() @@ -1632,13 +1705,20 @@ class Mountpoint(Module): self.vosc.cleanup() if self.vosc.need_mdc(): self.mdc.cleanup() + if self.mgmtcli: + self.mgmtcli.cleanup() def load_module(self): + if self.mgmtcli: + self.mgmtcli.load_module() self.vosc.load_module() Module.load_module(self) + def cleanup_module(self): Module.cleanup_module(self) self.vosc.cleanup_module() + if self.mgmtcli: + self.mgmtcli.cleanup_module() # ============================================================ @@ -1670,6 +1750,8 @@ def getServiceLevel(self): ret = 6 elif type in ('ldlm',): ret = 20 + elif type in ('mgmt',): + ret = 25 elif type in ('osd', 'cobd'): ret = 30 elif type in ('mdsdev',): @@ -1707,15 +1789,15 @@ def getServices(self): # # OSC is no longer in the xml, so we have to fake it. # this is getting ugly and begging for another refactoring -def get_osc(ost_db, uuid, owner): - osc = OSC(ost_db, uuid, owner) +def get_osc(ost_db, uuid, fs_name): + osc = OSC(ost_db, uuid, fs_name) return osc -def get_mdc(db, uuid, owner, mds_uuid): +def get_mdc(db, uuid, fs_name, mds_uuid): mds_db = db.lookup(mds_uuid); if not mds_db: panic("no mds:", mds_uuid) - mdc = MDC(mds_db, uuid, owner) + mdc = MDC(mds_db, uuid, fs_name) return mdc ############################################################ @@ -1842,6 +1924,8 @@ def newService(db): n = Mountpoint(db) elif type == 'echoclient': n = ECHO_CLIENT(db) + elif type == 'mgmt': + n = Management(db) else: panic ("unknown service type:", type) return n @@ -2060,7 +2144,7 @@ def sys_set_ptldebug(): def sys_set_subsystem(): if config.subsystem != None: try: - val = eval(config.ptldebug, ptldebug_names) + val = eval(config.subsystem, subsystem_names) val = "0x%x" % (val,) sysctl('portals/subsystem_debug', val) except NameError, e: @@ -2191,7 +2275,7 @@ lconf_options = [ ] def main(): - global lctl, config + global lctl, config, toplevel # in the upcall this is set to SIG_IGN signal.signal(signal.SIGCHLD, signal.SIG_DFL) @@ -2241,9 +2325,12 @@ def main(): dn = "config=%s,fs=lustre" % (config.config) db = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl) else: - cl.usage() + print 'Missing config file or ldap URL.' + print 'see lconf --help for command summary' sys.exit(1) + toplevel = db + ver = db.get_version() if not ver: panic("No version found in config data, please recreate.") @@ -2277,6 +2364,8 @@ if __name__ == "__main__": main() except Lustre.LconfError, e: print e +# traceback.print_exc(file=sys.stdout) + sys.exit(1) except CommandError, e: e.dump() sys.exit(e.rc) diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index 80cdcf2..a4681ec 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -205,6 +205,9 @@ command_t cmdlist[] = { {"debug_kernel", jt_dbg_debug_kernel, 0, "get debug buffer and dump to a file" "usage: debug_kernel [file] [raw]"}, + {"dk", jt_dbg_debug_kernel, 0, + "get debug buffer and dump to a file" + "usage: dk [file] [raw]"}, {"debug_file", jt_dbg_debug_file, 0, "read debug buffer from input and dump to output" "usage: debug_file [output] [raw]"}, diff --git a/lustre/utils/lfind.c b/lustre/utils/lfind.c index 847dd4f..45f837c 100644 --- a/lustre/utils/lfind.c +++ b/lustre/utils/lfind.c @@ -21,38 +21,38 @@ #define MAX_LOV_UUID_COUNT 1000 #define OBD_NOT_FOUND (-1) -char * cmd; -struct option longOpts[] = { +char *cmd; +struct option longOpts[] = { {"help", 0, 0, 'h'}, {"obd", 1, 0, 'o'}, {"query", 0, 0, 'q'}, {"verbose", 0, 0, 'v'}, {0, 0, 0, 0} - }; -int query; -int verbose; -char * shortOpts = "ho:qv"; -char * usageMsg = "[ --obd | --query ] ..."; - -int max_ost_count = MAX_LOV_UUID_COUNT; -struct obd_uuid * obduuid; -char * buf; -int buflen; -struct obd_uuid * uuids; + }; +int query; +int verbose; +char shortOpts[] = "ho:qv"; +char usageMsg[] = "[ --obd | --query ] ..."; + +int max_ost_count = MAX_LOV_UUID_COUNT; +struct obd_uuid *obduuid; +char *buf; +int buflen; +struct obd_uuid *uuids; struct obd_ioctl_data data; -struct lov_desc desc; -int uuidslen; -int cfglen; +struct lov_desc desc; +int uuidslen; +int cfglen; struct lov_mds_md *lmm; -int lmmlen; +int lmmlen; +int printed_UUIDs; void init(); void usage(FILE *stream); void errMsg(char *fmt, ...); -void processPath(const char *path); +void processPath(char *path); -int -main (int argc, char **argv) { +int main (int argc, char **argv) { int c; cmd = basename(argv[0]); @@ -61,8 +61,8 @@ main (int argc, char **argv) { switch (c) { case 'o': if (obduuid) { - errMsg("obd '%s' already specified: '%s'.", - obduuid, optarg); + printf("obd '%s' already specified: '%s'\n", + obduuid->uuid, optarg); exit(1); } @@ -81,7 +81,7 @@ main (int argc, char **argv) { usage(stderr); exit(1); default: - errMsg("Internal error. Valid '%s' unrecognized.", + printf("Internal error. Valid '%s' unrecognized\n", argv[optind - 1]); usage(stderr); exit(1); @@ -105,8 +105,7 @@ main (int argc, char **argv) { exit (0); } -void -init() +void init() { int datalen, desclen; @@ -141,8 +140,7 @@ init() } if ((buf = malloc(buflen)) == NULL) { - errMsg("Unable to allocate %d bytes of memory for ioctl's.", - buflen); + errMsg("Unable to allocate %d bytes of memory for ioctl's"); exit(1); } @@ -150,112 +148,120 @@ init() uuids = (struct obd_uuid *)buf; } -void -usage(FILE *stream) +void usage(FILE *stream) { fprintf(stream, "usage: %s %s\n", cmd, usageMsg); } -void -errMsg(char *fmt, ...) +void errMsg(char *fmt, ...) { va_list args; + int tmp_errno = errno; fprintf(stderr, "%s: ", cmd); va_start(args, fmt); vfprintf(stderr, fmt, args); va_end(args); - fprintf(stderr, "\n"); + fprintf(stderr, ": %s (%d)\n", strerror(tmp_errno), tmp_errno); } -void -processPath(const char *path) +void processPath(char *path) { int fd; int rc; int i; - int obdindex; + int obdindex = OBD_NOT_FOUND; int obdcount; struct obd_uuid *uuidp; + char *fname, *dirname; - if (query || verbose && !obduuid) { + if ((query || verbose) && !obduuid) { printf("%s\n", path); } - if ((fd = open(path, O_RDONLY | O_LOV_DELAY_CREATE)) < 0) { - errMsg("open \"%.20s\" failed.", path); - perror("open"); + fname = strrchr(path, '/'); + if (fname != NULL && fname[1] != '\0') { + *fname = '\0'; + fname++; + dirname = path; + } else if (fname != NULL && fname[1] == '\0') { + printf("need getdents support\n"); return; + } else { + dirname = "."; + fname = path; } - memset(&data, 0, sizeof(data)); - data.ioc_inllen1 = sizeof(desc); - data.ioc_inlbuf1 = (char *)&desc; - data.ioc_inllen2 = uuidslen; - data.ioc_inlbuf2 = (char *)uuids; + if ((fd = open(dirname, O_RDONLY)) < 0) { + errMsg("open \"%.20s\" failed", dirname); + return; + } - memset(&desc, 0, sizeof(desc)); - desc.ld_tgt_count = max_ost_count; + if (!printed_UUIDs) { + memset(&data, 0, sizeof(data)); + data.ioc_inllen1 = sizeof(desc); + data.ioc_inlbuf1 = (char *)&desc; + data.ioc_inllen2 = uuidslen; + data.ioc_inlbuf2 = (char *)uuids; - if (obd_ioctl_pack(&data, &buf, buflen)) { - errMsg("internal buffering error."); - exit(1); - } + memset(&desc, 0, sizeof(desc)); + desc.ld_tgt_count = max_ost_count; + + if (obd_ioctl_pack(&data, &buf, buflen)) { + errMsg("internal buffering error"); + exit(1); + } - rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf); - if (rc) { - if (errno == ENOTTY) { - if (!obduuid) { - printf("Not a regular file or not Lustre file.\n\n"); + rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf); + if (rc) { + if (errno == ENOTTY) { + if (!obduuid) { + errMsg("error getting LOV config"); + } + return; } - return; + errMsg("OBD_IOC_LOV_GET_CONFIG ioctl failed: %s"); + exit(1); } - errMsg("OBD_IOC_LOV_GET_CONFIG ioctl failed: %d.", errno); - perror("ioctl"); - exit(1); - } - if (obd_ioctl_unpack(&data, buf, buflen)) { - errMsg("Invalid reply from ioctl."); - exit(1); - } + if (obd_ioctl_unpack(&data, buf, buflen)) { + errMsg("Invalid reply from ioctl"); + exit(1); + } - obdcount = desc.ld_tgt_count; - if (obdcount == 0) - return; + obdcount = desc.ld_tgt_count; + if (obdcount == 0) + return; - obdindex = OBD_NOT_FOUND; + obdindex = OBD_NOT_FOUND; - if (obduuid) { - for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) { - if (strncmp((const char *)obduuid, (const char *)uuidp, - sizeof(*uuidp)) == 0) { - obdindex = i; + if (obduuid) { + for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) { + if (strncmp((char *)obduuid, (char *)uuidp, + sizeof(*uuidp)) == 0) { + obdindex = i; + } } - } - if (obdindex == OBD_NOT_FOUND) - return; - } else if (query || verbose) { - printf("OBDS:\n"); - for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) - printf("%4d: %s\n", i, (char *)uuidp); + if (obdindex == OBD_NOT_FOUND) + return; + } else if (query || verbose) { + printf("OBDS:\n"); + for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) + printf("%4d: %s\n", i, (char *)uuidp); + } + printed_UUIDs = 1; } - memset((void *)buf, 0, buflen); - lmm->lmm_magic = LOV_MAGIC; - lmm->lmm_ost_count = max_ost_count; - - rc = ioctl(fd, LL_IOC_LOV_GETSTRIPE, (void *)lmm); + strcpy((char *)lmm, fname); + rc = ioctl(fd, IOC_MDC_GETSTRIPE, (void *)lmm); if (rc) { if (errno == ENODATA) { - if(!obduuid) { - printf("Has no stripe information.\n\n"); - } + if (!obduuid) + printf("Has no stripe information.\n"); } else { - errMsg("LL_IOC_LOV_GETSTRIPE ioctl failed. %d", errno); - perror("ioctl"); + errMsg("IOC_MDC_GETSTRIPE ioctl failed"); } return; } diff --git a/lustre/utils/lmc b/lustre/utils/lmc index eaaed71..1a1bbc9 100755 --- a/lustre/utils/lmc +++ b/lustre/utils/lmc @@ -19,9 +19,9 @@ # """ -lmc - lustre configurtion data manager +lmc - lustre configuration data manager - See lustre book for documentation for lmc. + See Lustre book (http://www.lustre.org/docs/lustre.pdf) for documentation on lmc. """ @@ -98,6 +98,10 @@ Object creation command summary: --path /mnt/point --mds mds_name --ost ost_name OR --lov lov_name + +--add mgmt - Management/monitoring service + --node node_name + --mgmt mgmt_service_name """ PARAM = Lustre.Options.PARAM @@ -123,10 +127,10 @@ lmc_options = [ # network ('nettype', "Specify the network type. This can be tcp/elan/gm/scimac.", PARAM), ('nid', "Give the network ID, e.g ElanID/IP Address as used by portals.", PARAM), - ('tcpbuf', "Optional arguement to specify the TCP buffer size.", PARAM, "0"), - ('port', "Optional arguement to specify the TCP port number.", PARAM, DEFAULT_PORT), - ('nid_exchange', "Optional arguement to indicate if nid exchange should be done.", PARAM, 0), - ('irq_affinity', "Optional arguement.", PARAM, 0), + ('tcpbuf', "Optional argument to specify the TCP buffer size.", PARAM, "0"), + ('port', "Optional argument to specify the TCP port number.", PARAM, DEFAULT_PORT), + ('nid_exchange', "Optional argument to indicate if nid exchange should be done.", PARAM, 0), + ('irq_affinity', "Optional argument.", PARAM, 0), ('hostaddr', "", PARAM,""), ('cluster_id', "Specify the cluster ID", PARAM, "0"), @@ -143,12 +147,12 @@ lmc_options = [ ('mds', "Specify MDS name.", PARAM), ('ost', "Specify the OST name.", PARAM,""), ('osdtype', "This could obdfilter or obdecho.", PARAM, "obdfilter"), - ('failover', ""), + ('failover', "Enable failover support on OSTs or MDS?"), ('group', "", PARAM), ('dev', "Path of the device on local system.", PARAM,""), ('size', "Specify the size of the device if needed.", PARAM,"0"), ('journal_size', "Specify new journal size for underlying ext3 file system.", PARAM,"0"), - ('fstype', "Optional arguement to specify the filesystem type.", PARAM, "ext3"), + ('fstype', "Optional argument to specify the filesystem type.", PARAM, "ext3"), ('ostuuid', "", PARAM,""), ('nspath', "Local mount point of server namespace.", PARAM,""), ('format', ""), @@ -167,6 +171,8 @@ lmc_options = [ # cobd ('real_obd', "", PARAM), ('cache_obd', "", PARAM), + + ('mgmt', "Specify management/monitoring service name.", PARAM, ""), ] def error(*args): @@ -393,16 +399,25 @@ class GenConfig: mdd.appendChild(self.ref("target", mds_uuid)) return mdd + def mgmt(self, mgmt_name, mgmt_uuid, node_uuid): + mgmt = self.newService("mgmt", mgmt_name, mgmt_uuid) + mgmt.appendChild(self.ref("node", node_uuid)) + # Placeholder until mgmt-service failover. + mgmt.appendChild(self.ref("active", mgmt_uuid)) + return mgmt + def mountpoint(self, name, uuid, fs_uuid, path): mtpt = self.newService("mountpoint", name, uuid) mtpt.appendChild(self.ref("filesystem", fs_uuid)) self.addElement(mtpt, "path", path) return mtpt - def filesystem(self, name, uuid, mds_uuid, obd_uuid): + def filesystem(self, name, uuid, mds_uuid, obd_uuid, mgmt_uuid): fs = self.newService("filesystem", name, uuid) fs.appendChild(self.ref("mds", mds_uuid)) fs.appendChild(self.ref("obd", obd_uuid)) + if mgmt_uuid: + fs.appendChild(self.ref("mgmt", mgmt_uuid)) return fs def echo_client(self, name, uuid, osc_uuid): @@ -660,6 +675,23 @@ def add_mds(gen, lustre, options): lustre.appendChild(mdd) +def add_mgmt(gen, lustre, options): + node_name = get_option(options, 'node') + node_uuid = name2uuid(lustre, node_name) + mgmt_name = get_option(options, 'mgmt') + if not mgmt_name: + mgmt_name = new_name('MGMT_' + node_name) + mgmt_uuid = name2uuid(lustre, mgmt_name, fatal=0) + if not mgmt_uuid: + mgmt_uuid = new_uuid(mgmt_name) + mgmt = gen.mgmt(mgmt_name, mgmt_uuid, node_uuid) + lustre.appendChild(mgmt) + else: + mgmt = lookup(lustre, mgmt_uuid) + + node = findByName(lustre, node_name, "node") + node_add_profile(gen, node, 'mgmt', mgmt_uuid) + def add_ost(gen, lustre, options): node_name = get_option(options, 'node') lovname = get_option(options, 'lov') @@ -793,23 +825,27 @@ def add_lov(gen, lustre, options): lovconfig = gen.lovconfig(lovconfig_name, lovconfig_uuid, uuid) lustre.appendChild(lovconfig) -def new_filesystem(gen, lustre, mds_uuid, obd_uuid): +def new_filesystem(gen, lustre, mds_uuid, obd_uuid, mgmt_uuid): fs_name = new_name("FS_fsname") fs_uuid = new_uuid(fs_name) mds = lookup(lustre, mds_uuid) mds.appendChild(gen.ref("filesystem", fs_uuid)) - fs = gen.filesystem(fs_name, fs_uuid, mds_uuid, obd_uuid) + fs = gen.filesystem(fs_name, fs_uuid, mds_uuid, obd_uuid, mgmt_uuid) lustre.appendChild(fs) return fs_uuid -def get_fs_uuid(gen, lustre, mds_name, obd_name): +def get_fs_uuid(gen, lustre, mds_name, obd_name, mgmt_name): mds_uuid = name2uuid(lustre, mds_name, tag='mds') obd_uuid = name2uuid(lustre, obd_name, tag='lov', fatal=0) if not obd_uuid: obd_uuid = name2uuid(lustre, obd_name, tag='ost', fatal=1) + if mgmt_name: + mgmt_uuid = name2uuid(lustre, mgmt_name, tag='mgmt', fatal=1) + else: + mgmt_uuid = '' fs_uuid = lookup_filesystem(lustre, mds_uuid, obd_uuid) if not fs_uuid: - fs_uuid = new_filesystem(gen, lustre, mds_uuid, obd_uuid) + fs_uuid = new_filesystem(gen, lustre, mds_uuid, obd_uuid, mgmt_uuid) return fs_uuid def add_mtpt(gen, lustre, options): @@ -825,7 +861,8 @@ def add_mtpt(gen, lustre, options): lov_name = get_option(options, 'ost') if lov_name == '': error("--add mtpt requires either --filesystem or --mds with an --lov lov_name or --ost ost_name") - fs_uuid = get_fs_uuid(gen, lustre, mds_name, lov_name) + mgmt_name = get_option(options, 'mgmt') + fs_uuid = get_fs_uuid(gen, lustre, mds_name, lov_name, mgmt_name) else: fs_uuid = name2uuid(lustre, fs_name, tag='filesystem') @@ -910,6 +947,8 @@ def add(devtype, gen, lustre, options): add_echo_client(gen, lustre, options) elif devtype == 'cobd': add_cobd(gen, lustre, options) + elif devtype == 'mgmt': + add_mgmt(gen, lustre, options) else: error("unknown device type:", devtype) diff --git a/lustre/utils/obdbarrier.c b/lustre/utils/obdbarrier.c index 4373071..8774cef 100644 --- a/lustre/utils/obdbarrier.c +++ b/lustre/utils/obdbarrier.c @@ -214,9 +214,9 @@ main (int argc, char **argv) } } - free (b); + free(b); - obdio_disconnect (conn); + obdio_disconnect(conn, 0); return (rc == 0 ? 0 : 1); } diff --git a/lustre/utils/obdio.c b/lustre/utils/obdio.c index 8264761..24b9e2d 100644 --- a/lustre/utils/obdio.c +++ b/lustre/utils/obdio.c @@ -294,10 +294,10 @@ main (int argc, char **argv) if (conn == NULL) return (1); - rc = obdio_test_fixed_extent (conn, myhid, mypid, reps, locked, - oid, base_offset, size); + rc = obdio_test_fixed_extent(conn, myhid, mypid, reps, locked, + oid, base_offset, size); - obdio_disconnect (conn); + obdio_disconnect(conn, 0); return (rc == 0 ? 0 : 1); } diff --git a/lustre/utils/obdiolib.c b/lustre/utils/obdiolib.c index c871818..04dae88 100644 --- a/lustre/utils/obdiolib.c +++ b/lustre/utils/obdiolib.c @@ -116,7 +116,7 @@ obdio_connect (int device) } void -obdio_disconnect (struct obdio_conn *conn) +obdio_disconnect (struct obdio_conn *conn, int flags) { close (conn->oc_fd); /* obdclass will automatically close on last ref */ diff --git a/lustre/utils/obdiolib.h b/lustre/utils/obdiolib.h index 3811b41..b2ec6b6 100644 --- a/lustre/utils/obdiolib.h +++ b/lustre/utils/obdiolib.h @@ -48,22 +48,24 @@ struct obdio_barrier { }; extern struct obdio_conn * obdio_connect (int device); -extern void obdio_disconnect (struct obdio_conn *conn); -extern int obdio_open (struct obdio_conn *conn, uint64_t oid, +extern void obdio_disconnect(struct obdio_conn *conn, int flags); +extern int obdio_open(struct obdio_conn *conn, uint64_t oid, + struct lustre_handle *fh); +extern int obdio_close(struct obdio_conn *conn, uint64_t oid, struct lustre_handle *fh); -extern int obdio_close (struct obdio_conn *conn, uint64_t oid, - struct lustre_handle *fh); -extern int obdio_pread (struct obdio_conn *conn, uint64_t oid, +extern int obdio_pread(struct obdio_conn *conn, uint64_t oid, + char *buffer, uint32_t count, uint64_t offset); +extern int obdio_pwrite(struct obdio_conn *conn, uint64_t oid, char *buffer, uint32_t count, uint64_t offset); -extern int obdio_pwrite (struct obdio_conn *conn, uint64_t oid, - char *buffer, uint32_t count, uint64_t offset); -extern int obdio_enqueue (struct obdio_conn *conn, uint64_t oid, - int mode, uint64_t offset, uint32_t count, - struct lustre_handle *lh); -extern int obdio_cancel (struct obdio_conn *conn, struct lustre_handle *lh); -extern void *obdio_alloc_aligned_buffer (void **spacep, int size); -extern struct obdio_barrier *obdio_new_barrier (uint64_t oid, uint64_t id, int npeers) ; -extern int obdio_setup_barrier (struct obdio_conn *conn, struct obdio_barrier *b); -extern int obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b); +extern int obdio_enqueue(struct obdio_conn *conn, uint64_t oid, + int mode, uint64_t offset, uint32_t count, + struct lustre_handle *lh); +extern int obdio_cancel(struct obdio_conn *conn, struct lustre_handle *lh); +extern void *obdio_alloc_aligned_buffer(void **spacep, int size); +extern struct obdio_barrier *obdio_new_barrier(uint64_t oid, uint64_t id, + int npeers); +extern int obdio_setup_barrier(struct obdio_conn *conn, + struct obdio_barrier *b); +extern int obdio_barrier(struct obdio_conn *conn, struct obdio_barrier *b); #endif diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 5b6a589..86ae507 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -518,12 +518,12 @@ main (int argc, char **argv) CHECK_VALUE (REINT_OPEN); CHECK_VALUE (REINT_MAX); - CHECK_VALUE (IT_INTENT_EXEC); - CHECK_VALUE (IT_OPEN_LOOKUP); - CHECK_VALUE (IT_OPEN_NEG); - CHECK_VALUE (IT_OPEN_POS); - CHECK_VALUE (IT_OPEN_CREATE); - CHECK_VALUE (IT_OPEN_OPEN); + CHECK_VALUE (DISP_IT_EXECD); + CHECK_VALUE (DISP_LOOKUP_EXECD); + CHECK_VALUE (DISP_LOOKUP_NEG); + CHECK_VALUE (DISP_LOOKUP_POS); + CHECK_VALUE (DISP_OPEN_CREATE); + CHECK_VALUE (DISP_OPEN_OPEN); CHECK_VALUE (MDS_STATUS_CONN); CHECK_VALUE (MDS_STATUS_LOV);