From 619df1b4b8db28c3889cc9e3ef850122a5e8ce9e Mon Sep 17 00:00:00 2001 From: adilger Date: Fri, 5 Mar 2004 18:04:40 +0000 Subject: [PATCH] Merge documentation, liblustre, obdecho, smfs changes that landed on HEAD after b1_2 was branched to avoid merge problems later. Things not yet landed from this period: - bug 2308 changes to portals (including kiov changes all over the place) - GMNAL updates from llp2 (mixed in with 2308) Those will be landed separately. --- lustre/configure.in | 4 + lustre/include/liblustre.h | 141 +++-------- lustre/include/linux/lustre_debug.h | 4 +- lustre/include/linux/lustre_user.h | 7 + lustre/include/linux/obd.h | 1 - lustre/include/linux/obd_echo.h | 7 + .../patches/linux-2.4.20-tmpfs-xattr.patch | 65 ++++- lustre/liblustre/dir.c | 2 +- lustre/liblustre/file.c | 7 +- lustre/liblustre/llite_lib.h | 20 +- lustre/liblustre/namei.c | 16 +- lustre/liblustre/rw.c | 265 +++++++++++++++++---- lustre/liblustre/super.c | 57 ++--- lustre/liblustre/tests/Makefile.am | 17 +- lustre/liblustre/tests/sanity.c | 7 +- lustre/liblustre/tests/test_common.c | 12 + lustre/liblustre/tests/test_common.h | 1 + lustre/obdclass/debug.c | 8 +- lustre/obdecho/echo.c | 131 ++++++---- lustre/obdecho/echo_client.c | 114 +++++---- lustre/ptlrpc/pinger.c | 1 - lustre/smfs/dir.c | 36 ++- lustre/smfs/file.c | 41 ++-- lustre/smfs/inode.c | 68 ++++-- lustre/smfs/symlink.c | 24 +- lustre/tests/.RC_CURRENT.tag | 2 +- lustre/tests/test_brw.c | 16 +- lustre/utils/lfs.c | 3 +- lustre/utils/liblustreapi.c | 43 ++++ lustre/utils/obd.c | 11 +- 30 files changed, 732 insertions(+), 399 deletions(-) diff --git a/lustre/configure.in b/lustre/configure.in index aee06ac..4638e92 100644 --- a/lustre/configure.in +++ b/lustre/configure.in @@ -45,6 +45,10 @@ else fi AC_SUBST(SYSIO) +#build mpi-tests +AC_ARG_ENABLE(mpitests, [ --enable-mpitests build liblustre mpi tests]) +AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests) + # snap compilation AC_ARG_ENABLE(snapfs, [ --enable-snapfs build snapfs]) AM_CONDITIONAL(SNAPFS, test x$enable_snapfs = xyes) diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index 0b6da9f..064ac80 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -24,6 +24,7 @@ #ifndef LIBLUSTRE_H__ #define LIBLUSTRE_H__ +#include #include #ifndef __CYGWIN__ #include @@ -120,7 +121,6 @@ static inline void *kmalloc(int size, int prot) typedef struct { void *cwd; - }mm_segment_t; typedef int (read_proc_t)(char *page, char **start, off_t off, @@ -130,60 +130,12 @@ struct file; /* forward ref */ typedef int (write_proc_t)(struct file *file, const char *buffer, unsigned long count, void *data); -/* byteorder */ -#define __swab16(x) \ -({ \ - __u16 __x = (x); \ - ((__u16)( \ - (((__u16)(__x) & (__u16)0x00ffU) << 8) | \ - (((__u16)(__x) & (__u16)0xff00U) >> 8) )); \ -}) - -#define __swab32(x) \ -({ \ - __u32 __x = (x); \ - ((__u32)( \ - (((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \ - (((__u32)(__x) & (__u32)0x0000ff00UL) << 8) | \ - (((__u32)(__x) & (__u32)0x00ff0000UL) >> 8) | \ - (((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \ -}) - -#define __swab64(x) \ -({ \ - __u64 __x = (x); \ - ((__u64)( \ - (__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) | \ - (__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) | \ - (__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) | \ - (__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) << 8) | \ - (__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >> 8) | \ - (__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) | \ - (__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) | \ - (__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \ -}) - -#define __swab16s(x) __swab16(*(x)) -#define __swab32s(x) __swab32(*(x)) -#define __swab64s(x) __swab64(*(x)) - -#define __LITTLE_ENDIAN__ -#ifdef __LITTLE_ENDIAN__ -# define le16_to_cpu(x) ((__u16)(x)) -# define cpu_to_le16(x) ((__u16)(x)) -# define le32_to_cpu(x) ((__u32)(x)) -# define cpu_to_le32(x) ((__u32)(x)) -# define le64_to_cpu(x) ((__u64)(x)) -# define cpu_to_le64(x) ((__u64)(x)) -#else -# define le16_to_cpu(x) __swab16(x) -# define cpu_to_le16(x) __swab16(x) -# define le32_to_cpu(x) __swab32(x) -# define cpu_to_le32(x) __swab32(x) -# define le64_to_cpu(x) __swab64(x) -# define cpu_to_le64(x) __swab64(x) -# error "do more check here!!!" -#endif +# define le16_to_cpu(x) __le16_to_cpu(x) +# define cpu_to_le16(x) __cpu_to_le16(x) +# define le32_to_cpu(x) __le32_to_cpu(x) +# define cpu_to_le32(x) __cpu_to_le32(x) +# define le64_to_cpu(x) __le64_to_cpu(x) +# define cpu_to_le64(x) __cpu_to_le64(x) #define NIPQUAD(addr) \ ((unsigned char *)&addr)[0], \ @@ -191,17 +143,17 @@ typedef int (write_proc_t)(struct file *file, const char *buffer, ((unsigned char *)&addr)[2], \ ((unsigned char *)&addr)[3] -#if defined(__LITTLE_ENDIAN__) +#if defined(__LITTLE_ENDIAN) #define HIPQUAD(addr) \ ((unsigned char *)&addr)[3], \ ((unsigned char *)&addr)[2], \ ((unsigned char *)&addr)[1], \ ((unsigned char *)&addr)[0] -#elif defined(__BIG_ENDIAN__) +#elif defined(__BIG_ENDIAN) #define HIPQUAD NIPQUAD #else -#error "Please fix asm/byteorder.h" -#endif /* __LITTLE_ENDIAN__ */ +#error "Undefined byteorder??" +#endif /* __LITTLE_ENDIAN */ /* bits ops */ static __inline__ int set_bit(int nr,long * addr) @@ -284,12 +236,12 @@ static inline int request_module(char *name) return (-EINVAL); } -#define __MOD_INC_USE_COUNT(m) do {int a = 1; a++; } while (0) -#define __MOD_DEC_USE_COUNT(m) do {int a = 1; a++; } while (0) -#define MOD_INC_USE_COUNT do {int a = 1; a++; } while (0) -#define MOD_DEC_USE_COUNT do {int a = 1; a++; } while (0) -#define try_module_get __MOD_INC_USE_COUNT -#define module_put __MOD_DEC_USE_COUNT +#define __MOD_INC_USE_COUNT(m) do {} while (0) +#define __MOD_DEC_USE_COUNT(m) do {} while (0) +#define MOD_INC_USE_COUNT do {} while (0) +#define MOD_DEC_USE_COUNT do {} while (0) +#define try_module_get __MOD_INC_USE_COUNT +#define module_put __MOD_DEC_USE_COUNT /* module initialization */ extern int init_obdclass(void); @@ -318,22 +270,10 @@ static inline void local_irq_save(unsigned long flag) {return;} static inline void local_irq_restore(unsigned long flag) {return;} static inline int spin_is_locked(spinlock_t *l) {return 1;} -static inline void spin_lock_bh(spinlock_t *l) -{ - return; -} -static inline void spin_unlock_bh(spinlock_t *l) -{ - return; -} -static inline void spin_unlock_irqrestore(spinlock_t *a, unsigned long b) -{ - return; -} -static inline void spin_lock_irqsave(spinlock_t *a, unsigned long b) -{ - return; -} +static inline void spin_lock_bh(spinlock_t *l) {} +static inline void spin_unlock_bh(spinlock_t *l) {} +static inline void spin_lock_irqsave(spinlock_t *a, unsigned long b) {} +static inline void spin_unlock_irqrestore(spinlock_t *a, unsigned long b) {} #define min(x,y) ((x)<(y) ? (x) : (y)) #define max(x,y) ((x)>(y) ? (x) : (y)) @@ -435,7 +375,7 @@ struct page { }; #define kmap(page) (page)->addr -#define kunmap(a) do { int foo = 1; foo++; } while (0) +#define kunmap(a) do {} while (0) static inline struct page *alloc_pages(int mask, unsigned long order) { @@ -571,8 +511,6 @@ struct vfsmount { void *pwd; }; -#define cpu_to_le32(x) ((__u32)(x)) - /* semaphores */ struct rw_semaphore { int count; @@ -583,13 +521,13 @@ struct semaphore { int count; }; -#define down(a) do {(a)->count++;} while (0) -#define up(a) do {(a)->count--;} while (0) -#define down_read(a) do {(a)->count++;} while (0) -#define up_read(a) do {(a)->count--;} while (0) -#define down_write(a) do {(a)->count++;} while (0) -#define up_write(a) do {(a)->count--;} while (0) -#define sema_init(a,b) do { (a)->count = b; } while (0) +#define down(a) do {} while (0) +#define up(a) do {} while (0) +#define down_read(a) do {} while (0) +#define up_read(a) do {} while (0) +#define down_write(a) do {} while (0) +#define up_write(a) do {} while (0) +#define sema_init(a,b) do {} while (0) #define init_rwsem(a) do {} while (0) #define DECLARE_MUTEX(name) \ struct semaphore name = { 1 } @@ -658,32 +596,23 @@ extern struct task_struct *current; #define in_interrupt() (0) -#define schedule() do { int a; a++; } while (0) +#define schedule() do {} while (0) static inline int schedule_timeout(signed long t) { return 0; } -#define lock_kernel() do { int a; a++; } while (0) -#define daemonize(l) do { int a; a++; } while (0) -#define sigfillset(l) do { int a; a++; } while (0) -#define recalc_sigpending(l) do { int a; a++; } while (0) +#define lock_kernel() do {} while (0) +#define daemonize(l) do {} while (0) +#define sigfillset(l) do {} while (0) +#define recalc_sigpending(l) do {} while (0) #define kernel_thread(l,m,n) LBUG() #define USERMODEHELPER(path, argv, envp) (0) - -static inline int call_usermodehelper(char *prog, char **argv, char **evnp, int unknown) -{ - return 0; -} - - #define SIGNAL_MASK_ASSERT() - #define KERN_INFO - struct timer_list { struct list_head tl_list; void (*function)(unsigned long unused); @@ -717,7 +646,6 @@ static inline void del_timer(struct timer_list *l) #define time_after(a, b) \ ({ \ - printf("Error: inapproiate call time_after()\n"); \ 1; \ }) @@ -791,4 +719,3 @@ int liblustre_wait_event(int timeout); #endif - diff --git a/lustre/include/linux/lustre_debug.h b/lustre/include/linux/lustre_debug.h index 756d32e..669c0e8 100644 --- a/lustre/include/linux/lustre_debug.h +++ b/lustre/include/linux/lustre_debug.h @@ -48,6 +48,6 @@ int dump_rniobuf(struct niobuf_remote *rnb); int dump_ioo(struct obd_ioobj *nb); int dump_req(struct ptlrpc_request *req); int dump_obdo(struct obdo *oa); -int page_debug_setup(void *addr, int len, __u64 off, __u64 id); -int page_debug_check(char *who, void *addr, int len, __u64 off, __u64 id); +int block_debug_setup(void *addr, int len, __u64 off, __u64 id); +int block_debug_check(char *who, void *addr, int len, __u64 off, __u64 id); #endif diff --git a/lustre/include/linux/lustre_user.h b/lustre/include/linux/lustre_user.h index 2eba485..9866acf 100644 --- a/lustre/include/linux/lustre_user.h +++ b/lustre/include/linux/lustre_user.h @@ -25,6 +25,9 @@ #define _LUSTRE_USER_H #include +#define IOC_MDC_TYPE 'i' +#define IOC_MDC_GETSTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) + #define LL_IOC_GETFLAGS _IOR ('f', 151, long) #define LL_IOC_SETFLAGS _IOW ('f', 152, long) #define LL_IOC_CLRFLAGS _IOW ('f', 153, long) @@ -65,4 +68,8 @@ struct lov_user_md_v1 { /* LOV EA user data (host-endian) */ struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */ } __attribute__((packed)); +extern int op_create_file(char *name, long stripe_size, int stripe_offset, + int stripe_count); +extern int get_file_stripe(char *path, struct lov_user_md *lum); + #endif /* _LUSTRE_USER_H */ diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index ca545a9..24ee1c2 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -18,7 +18,6 @@ #define IOC_MDC_TYPE 'i' #define IOC_MDC_MIN_NR 20 #define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *) -#define IOC_MDC_GETSTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) #define IOC_MDC_MAX_NR 50 #ifdef __KERNEL__ diff --git a/lustre/include/linux/obd_echo.h b/lustre/include/linux/obd_echo.h index 5ff5e6c..d885579 100644 --- a/lustre/include/linux/obd_echo.h +++ b/lustre/include/linux/obd_echo.h @@ -13,6 +13,13 @@ #define OBD_ECHO_DEVICENAME "obdecho" #define OBD_ECHO_CLIENT_DEVICENAME "echo_client" +/* The persistent object (i.e. actually stores stuff!) */ +#define ECHO_PERSISTENT_OBJID 1ULL +#define ECHO_PERSISTENT_SIZE ((__u64)(1<<20)) + +/* block size to use for data verification */ +#define OBD_ECHO_BLOCK_SIZE (4<<10) + struct ec_object { struct list_head eco_obj_chain; struct obd_device *eco_device; diff --git a/lustre/kernel_patches/patches/linux-2.4.20-tmpfs-xattr.patch b/lustre/kernel_patches/patches/linux-2.4.20-tmpfs-xattr.patch index 2341ec0..a807ac7 100644 --- a/lustre/kernel_patches/patches/linux-2.4.20-tmpfs-xattr.patch +++ b/lustre/kernel_patches/patches/linux-2.4.20-tmpfs-xattr.patch @@ -11,9 +11,20 @@ diff -rupN --exclude='ide*' linux-2.4.20.orig/fs/Config.in linux-2.4.20/fs/Confi define_bool CONFIG_RAMFS y tristate 'ISO 9660 CDROM file system support' CONFIG_ISO9660_FS +diff -rupN --exclude='ide*' linux-2.4.20.orig/include/linux/mm.h linux-2.4.20/include/linux/mm.h +--- linux-2.4.20.orig/include/linux/mm.h 2002-08-03 03:39:45.000000000 +0300 ++++ linux-2.4.20/include/linux/mm.h 2004-02-10 11:43:10.000000000 +0200 +@@ -468,6 +468,7 @@ extern void clear_page_tables(struct mm_ + extern int fail_writepage(struct page *); + struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int unused); + struct file *shmem_file_setup(char * name, loff_t size); ++int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr); + extern void shmem_lock(struct file * file, int lock); + extern int shmem_zero_setup(struct vm_area_struct *); + diff -rupN --exclude='ide*' linux-2.4.20.orig/include/linux/shmem_fs.h linux-2.4.20/include/linux/shmem_fs.h --- linux-2.4.20.orig/include/linux/shmem_fs.h 2001-12-21 19:42:03.000000000 +0200 -+++ linux-2.4.20/include/linux/shmem_fs.h 2004-02-08 21:40:34.000000000 +0200 ++++ linux-2.4.20/include/linux/shmem_fs.h 2004-02-10 18:39:17.000000000 +0200 @@ -3,6 +3,8 @@ /* inode in-kernel data */ @@ -34,7 +45,7 @@ diff -rupN --exclude='ide*' linux-2.4.20.orig/include/linux/shmem_fs.h linux-2.4 struct inode *inode; }; -@@ -39,6 +45,15 @@ struct shmem_sb_info { +@@ -39,6 +45,32 @@ struct shmem_sb_info { spinlock_t stat_lock; }; @@ -45,6 +56,23 @@ diff -rupN --exclude='ide*' linux-2.4.20.orig/include/linux/shmem_fs.h linux-2.4 + void *entity; + struct list_head list; +}; ++ ++extern struct shmem_xattr * ++shmem_xattr_find(struct inode *inode, const char *name); ++ ++extern ssize_t ++shmem_xattr_set(struct inode *inode, const char *name, ++ const void *value, u16 valuelen, int flags); ++ ++extern ssize_t ++shmem_xattr_get(struct inode *inode, const char *name, ++ void *value, size_t valuelen); ++ ++extern int ++shmem_xattr_delete(struct inode *inode, struct shmem_xattr *xattr); ++ ++extern int ++shmem_xattr_remove(struct inode *inode, const char *name); +#endif + #define SHMEM_I(inode) (&inode->u.shmem_i) @@ -52,7 +80,7 @@ diff -rupN --exclude='ide*' linux-2.4.20.orig/include/linux/shmem_fs.h linux-2.4 #endif diff -rupN --exclude='ide*' linux-2.4.20.orig/mm/shmem.c linux-2.4.20/mm/shmem.c --- linux-2.4.20.orig/mm/shmem.c 2002-11-29 01:53:15.000000000 +0200 -+++ linux-2.4.20/mm/shmem.c 2004-02-09 11:41:45.000000000 +0200 ++++ linux-2.4.20/mm/shmem.c 2004-02-10 18:44:05.000000000 +0200 @@ -27,6 +27,8 @@ #include #include @@ -131,7 +159,7 @@ diff -rupN --exclude='ide*' linux-2.4.20.orig/mm/shmem.c linux-2.4.20/mm/shmem.c +/* assigns @name and @value to passed @xattr. */ +static int +shmem_xattr_assign(struct shmem_xattr *xattr, -+ const char *name, void *value) ++ const char *name, const void *value) +{ + if (name) { + if (xattr->namelen != strlen(name)) @@ -183,9 +211,9 @@ diff -rupN --exclude='ide*' linux-2.4.20.orig/mm/shmem.c linux-2.4.20/mm/shmem.c +} + +/* allocates new xattr and fills it with passed value, name, etc. */ -+static ssize_t ++ssize_t +shmem_xattr_set(struct inode *inode, const char *name, -+ void *value, u16 valuelen, int flags) ++ const void *value, u16 valuelen, int flags) +{ + ssize_t error; + struct shmem_xattr *xattr; @@ -222,7 +250,7 @@ diff -rupN --exclude='ide*' linux-2.4.20.orig/mm/shmem.c linux-2.4.20/mm/shmem.c +} + +/* fills passed @value by attribute value found by @name. */ -+static ssize_t ++ssize_t +shmem_xattr_get(struct inode *inode, const char *name, + void *value, size_t valuelen) +{ @@ -245,7 +273,7 @@ diff -rupN --exclude='ide*' linux-2.4.20.orig/mm/shmem.c linux-2.4.20/mm/shmem.c +} + +/* deletes passed @xattr from inode xattr list and frees it. */ -+static int ++int +shmem_xattr_delete(struct inode *inode, struct shmem_xattr *xattr) +{ + struct shmem_inode_info *info; @@ -262,7 +290,7 @@ diff -rupN --exclude='ide*' linux-2.4.20.orig/mm/shmem.c linux-2.4.20/mm/shmem.c +} + +/* removes attribute found by passed @name. */ -+static int ++int +shmem_xattr_remove(struct inode *inode, const char *name) +{ + struct shmem_xattr *xattr; @@ -432,6 +460,15 @@ diff -rupN --exclude='ide*' linux-2.4.20.orig/mm/shmem.c linux-2.4.20/mm/shmem.c clear_inode(inode); } +@@ -634,7 +985,7 @@ wait_retry: + goto repeat; + } + +-static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr) ++int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr) + { + struct shmem_inode_info *info = SHMEM_I(inode); + int error; @@ -727,6 +1078,11 @@ struct inode *shmem_get_inode(struct sup info->inode = inode; spin_lock_init (&info->lock); @@ -494,3 +531,13 @@ diff -rupN --exclude='ide*' linux-2.4.20.orig/mm/shmem.c linux-2.4.20/mm/shmem.c #endif }; +@@ -1557,3 +1927,9 @@ int shmem_zero_setup(struct vm_area_stru + } + + EXPORT_SYMBOL(shmem_file_setup); ++EXPORT_SYMBOL(shmem_getpage); ++EXPORT_SYMBOL(shmem_xattr_find); ++EXPORT_SYMBOL(shmem_xattr_set); ++EXPORT_SYMBOL(shmem_xattr_get); ++EXPORT_SYMBOL(shmem_xattr_delete); ++EXPORT_SYMBOL(shmem_xattr_remove); diff --git a/lustre/liblustre/dir.c b/lustre/liblustre/dir.c index cceb1e0..c236d73 100644 --- a/lustre/liblustre/dir.c +++ b/lustre/liblustre/dir.c @@ -75,7 +75,7 @@ static int llu_dir_do_readpage(struct inode *inode, struct page *page) } rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED, - &res_id, LDLM_PLAIN, NULL, 0, LCK_PR, &lockh); + &res_id, LDLM_PLAIN, NULL, LCK_PR, &lockh); if (!rc) { llu_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0); diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c index de3f35e..9a5162d 100644 --- a/lustre/liblustre/file.c +++ b/lustre/liblustre/file.c @@ -305,7 +305,8 @@ int llu_file_release(struct inode *inode) CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%lu\n", lli->lli_st_ino, lli->lli_st_generation); - /* XXX don't do anything for '/'. but how to find the root pnode? */ + if (llu_is_root_inode(inode)) + RETURN(0); /* still opened by others? */ if (--lli->lli_open_count) @@ -327,8 +328,8 @@ int llu_iop_close(struct inode *inode) int rc; rc = llu_file_release(inode); - if (!llu_i2info(inode)->lli_open_count) - llu_i2info(inode)->lli_stale_flag = 1; + /* if open count == 0 && stale_flag is set, should we + * remove the inode immediately? */ return rc; } diff --git a/lustre/liblustre/llite_lib.h b/lustre/liblustre/llite_lib.h index 5908ef5..123bb5c 100644 --- a/lustre/liblustre/llite_lib.h +++ b/lustre/liblustre/llite_lib.h @@ -295,21 +295,15 @@ llu_file_write(struct inode *inode, const struct iovec *iovec, size_t iovlen, loff_t pos); struct llu_sysio_callback_args* llu_file_read(struct inode *inode, const struct iovec *iovec, - size_t iovlen, loff_t pos); -int llu_extent_lock_no_validate(struct ll_file_data *fd, - struct inode *inode, - struct lov_stripe_md *lsm, - int mode, - struct ldlm_extent *extent, - struct lustre_handle *lockh, - int ast_flags); + size_t iovlen, loff_t pos); +int llu_glimpse_size(struct inode *inode, struct ost_lvb *lvb); int llu_extent_lock(struct ll_file_data *fd, struct inode *inode, - struct lov_stripe_md *lsm, - int mode, struct ldlm_extent *extent, - struct lustre_handle *lockh); + struct lov_stripe_md *lsm, int mode, + ldlm_policy_data_t *policy, struct lustre_handle *lockh, + int ast_flags); int llu_extent_unlock(struct ll_file_data *fd, struct inode *inode, - struct lov_stripe_md *lsm, int mode, - struct lustre_handle *lockh); + struct lov_stripe_md *lsm, int mode, + struct lustre_handle *lockh); /* namei.c */ int llu_iop_lookup(struct pnode *pnode, diff --git a/lustre/liblustre/namei.c b/lustre/liblustre/namei.c index 1c00634..1f23df9 100644 --- a/lustre/liblustre/namei.c +++ b/lustre/liblustre/namei.c @@ -290,10 +290,8 @@ int llu_pb_revalidate(struct pnode *pnode, int flags, struct lookup_intent *it) ptlrpc_req_finished(req); if (rc == 0) { LASSERT(pb->pb_ino); - if (S_ISDIR(llu_i2info(pb->pb_ino)->lli_st_mode)) - llu_invalidate_inode_pages(pb->pb_ino); - llu_i2info(pb->pb_ino)->lli_stale_flag = 1; - unhook_stale_inode(pnode); + I_RELE(pb->pb_ino); + pb->pb_ino = NULL; } else { llu_lookup_finish_locks(it, pnode); llu_i2info(pb->pb_ino)->lli_stale_flag = 0; @@ -342,9 +340,8 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, /* If this is a stat, get the authoritative file size */ if (it->it_op == IT_GETATTR && S_ISREG(lli->lli_st_mode) && lli->lli_smd != NULL) { - struct ldlm_extent extent = {0, OBD_OBJECT_EOF}; - struct lustre_handle lockh = {0}; struct lov_stripe_md *lsm = lli->lli_smd; + struct ost_lvb lvb; ldlm_error_t rc; LASSERT(lsm->lsm_object_id != 0); @@ -352,13 +349,12 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, /* bug 2334: drop MDS lock before acquiring OST lock */ ll_intent_drop_lock(it); - rc = llu_extent_lock(NULL, inode, lsm, LCK_PR, &extent, - &lockh); - if (rc != ELDLM_OK) { + rc = llu_glimpse_size(inode, &lvb); + if (rc) { I_RELE(inode); RETURN(-EIO); } - llu_extent_unlock(NULL, inode, lsm, LCK_PR, &lockh); + lli->lli_st_size = lvb.lvb_size; } } else { ENTRY; diff --git a/lustre/liblustre/rw.c b/lustre/liblustre/rw.c index 8e5f59f..6fe7431 100644 --- a/lustre/liblustre/rw.c +++ b/lustre/liblustre/rw.c @@ -40,6 +40,33 @@ #include "llite_lib.h" +static int llu_lock_to_stripe_offset(struct inode *inode, struct ldlm_lock *lock) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct lov_stripe_md *lsm = lli->lli_smd; + struct obd_export *exp = llu_i2obdexp(inode); + struct { + char name[16]; + struct ldlm_lock *lock; + struct lov_stripe_md *lsm; + } key = { .name = "lock_to_stripe", .lock = lock, .lsm = lsm }; + __u32 stripe, vallen = sizeof(stripe); + int rc; + ENTRY; + + if (lsm->lsm_stripe_count == 1) + RETURN(0); + + /* get our offset in the lov */ + rc = obd_get_info(exp, sizeof(key), &key, &vallen, &stripe); + if (rc != 0) { + CERROR("obd_get_info: rc = %d\n", rc); + LBUG(); + } + LASSERT(stripe < lsm->lsm_stripe_count); + RETURN(stripe); +} + static int llu_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new, void *data, int flag) @@ -64,23 +91,27 @@ static int llu_extent_lock_callback(struct ldlm_lock *lock, case LDLM_CB_CANCELING: { struct inode *inode = llu_inode_from_lock(lock); struct llu_inode_info *lli; + struct lov_stripe_md *lsm; + __u32 stripe; + __u64 kms; if (!inode) RETURN(0); lli= llu_i2info(inode); - if (!lli) { - I_RELE(inode); - RETURN(0); - } - if (!lli->lli_smd) { - I_RELE(inode); - RETURN(0); - } - -/* - ll_pgcache_remove_extent(inode, lli->lli_smd, lock); - iput(inode); -*/ + if (!lli) + goto iput; + if (!lli->lli_smd) + goto iput; + lsm = lli->lli_smd; + + stripe = llu_lock_to_stripe_offset(inode, lock); + kms = ldlm_extent_shift_kms(lock, + lsm->lsm_oinfo[stripe].loi_kms); + if (lsm->lsm_oinfo[stripe].loi_kms != kms) + LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64, + lsm->lsm_oinfo[stripe].loi_kms, kms); + lsm->lsm_oinfo[stripe].loi_kms = kms; +iput: I_RELE(inode); break; } @@ -91,6 +122,135 @@ static int llu_extent_lock_callback(struct ldlm_lock *lock, RETURN(0); } +static int llu_glimpse_callback(struct ldlm_lock *lock, void *reqp) +{ + struct ptlrpc_request *req = reqp; + struct inode *inode = llu_inode_from_lock(lock); + struct obd_export *exp; + struct llu_inode_info *lli; + struct ost_lvb *lvb; + struct { + int stripe_number; + __u64 size; + struct lov_stripe_md *lsm; + } data; + __u32 vallen = sizeof(data); + int rc, size = sizeof(*lvb); + ENTRY; + + if (inode == NULL) + RETURN(0); + lli = llu_i2info(inode); + if (lli == NULL) + goto iput; + if (lli->lli_smd == NULL) + goto iput; + exp = llu_i2obdexp(inode); + + /* First, find out which stripe index this lock corresponds to. */ + if (lli->lli_smd->lsm_stripe_count > 1) + data.stripe_number = llu_lock_to_stripe_offset(inode, lock); + else + data.stripe_number = 0; + + data.size = lli->lli_st_size; + data.lsm = lli->lli_smd; + + rc = obd_get_info(exp, strlen("size_to_stripe"), "size_to_stripe", + &vallen, &data); + if (rc != 0) { + CERROR("obd_get_info: rc = %d\n", rc); + LBUG(); + } + + LDLM_DEBUG(lock, "i_size: %Lu -> stripe number %d -> size %Lu", + lli->lli_st_size, data.stripe_number, data.size); + + rc = lustre_pack_reply(req, 1, &size, NULL); + if (rc) { + CERROR("lustre_pack_reply: %d\n", rc); + goto iput; + } + + lvb = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*lvb)); + lvb->lvb_size = data.size; + ptlrpc_reply(req); + + iput: + I_RELE(inode); + RETURN(0); +} + +__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms); +__u64 lov_merge_mtime(struct lov_stripe_md *lsm, __u64 current_time); + +/* NB: lov_merge_size will prefer locally cached writes if they extend the + * file (because it prefers KMS over RSS when larger) */ +int llu_glimpse_size(struct inode *inode, struct ost_lvb *lvb) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct llu_sb_info *sbi = llu_i2sbi(inode); + ldlm_policy_data_t policy = { .l_extent = { 0, OBD_OBJECT_EOF } }; + struct lustre_handle lockh; + int rc, flags = LDLM_FL_HAS_INTENT; + ENTRY; + + CDEBUG(D_DLMTRACE, "Glimpsing inode %lu\n", lli->lli_st_ino); + + rc = obd_enqueue(sbi->ll_osc_exp, lli->lli_smd, LDLM_EXTENT, &policy, + LCK_PR, &flags, llu_extent_lock_callback, + ldlm_completion_ast, llu_glimpse_callback, inode, + sizeof(*lvb), lustre_swab_ost_lvb, &lockh); + if (rc > 0) + RETURN(-EIO); + + lvb->lvb_size = lov_merge_size(lli->lli_smd, 0); + //inode->i_mtime = lov_merge_mtime(lli->lli_smd, inode->i_mtime); + + CDEBUG(D_DLMTRACE, "glimpse: size: "LPU64"\n", lvb->lvb_size); + + obd_cancel(sbi->ll_osc_exp, lli->lli_smd, LCK_PR, &lockh); + + RETURN(rc); +} + +int llu_extent_lock(struct ll_file_data *fd, struct inode *inode, + struct lov_stripe_md *lsm, int mode, + ldlm_policy_data_t *policy, struct lustre_handle *lockh, + int ast_flags) +{ + struct llu_sb_info *sbi = llu_i2sbi(inode); + struct llu_inode_info *lli = llu_i2info(inode); + int rc; + ENTRY; + + LASSERT(lockh->cookie == 0); + + /* XXX phil: can we do this? won't it screw the file size up? */ + if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) || + (sbi->ll_flags & LL_SBI_NOLCK)) + RETURN(0); + + CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n", + lli->lli_st_ino, policy->l_extent.start, policy->l_extent.end); + + rc = obd_enqueue(sbi->ll_osc_exp, lsm, LDLM_EXTENT, policy, mode, + &ast_flags, llu_extent_lock_callback, + ldlm_completion_ast, llu_glimpse_callback, inode, + sizeof(struct ost_lvb), lustre_swab_ost_lvb, lockh); + if (rc > 0) + rc = -EIO; + + if (policy->l_extent.start == 0 && + policy->l_extent.end == OBD_OBJECT_EOF) + lli->lli_st_size = lov_merge_size(lsm, 1); + + //inode->i_mtime = lov_merge_mtime(lsm, inode->i_mtime); + + RETURN(rc); +} + +#if 0 int llu_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode, struct lov_stripe_md *lsm, @@ -168,6 +328,7 @@ int llu_extent_lock(struct ll_file_data *fd, struct inode *inode, RETURN(0); } +#endif int llu_extent_unlock(struct ll_file_data *fd, struct inode *inode, struct lov_stripe_md *lsm, int mode, @@ -176,12 +337,12 @@ int llu_extent_unlock(struct ll_file_data *fd, struct inode *inode, struct llu_sb_info *sbi = llu_i2sbi(inode); int rc; ENTRY; -#if 0 + /* XXX phil: can we do this? won't it screw the file size up? */ if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) || (sbi->ll_flags & LL_SBI_NOLCK)) RETURN(0); -#endif + rc = obd_cancel(sbi->ll_osc_exp, lsm, mode, lockh); RETURN(rc); @@ -532,6 +693,9 @@ out_cleanup: RETURN(ERR_PTR(rc)); } +void lov_increase_kms(struct obd_export *exp, struct lov_stripe_md *lsm, + obd_off size); + struct llu_sysio_callback_args* llu_file_write(struct inode *inode, const struct iovec *iovec, size_t iovlen, loff_t pos) @@ -540,9 +704,10 @@ llu_file_write(struct inode *inode, const struct iovec *iovec, struct ll_file_data *fd = lli->lli_file_data; struct lustre_handle lockh = {0}; struct lov_stripe_md *lsm = lli->lli_smd; + struct obd_export *exp = NULL; + ldlm_policy_data_t policy; struct llu_sysio_callback_args *lsca; struct llu_sysio_cookie *cookie; - struct ldlm_extent extent; ldlm_error_t err; int iovidx; ENTRY; @@ -553,6 +718,10 @@ llu_file_write(struct inode *inode, const struct iovec *iovec, LASSERT(iovlen <= MAX_IOVEC); + exp = llu_i2obdexp(inode); + if (exp == NULL) + RETURN(ERR_PTR(-EINVAL)); + OBD_ALLOC(lsca, sizeof(*lsca)); if (!lsca) RETURN(ERR_PTR(-ENOMEM)); @@ -565,24 +734,15 @@ llu_file_write(struct inode *inode, const struct iovec *iovec, if (count == 0) continue; - /* FIXME libsysio haven't handle O_APPEND */ - extent.start = pos; - extent.end = pos + count - 1; + if (pos + count > lli->lli_maxbytes) + GOTO(err_out, err = -ERANGE); -#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE - if ((pos & ~PAGE_CACHE_MASK) == 0 && - (count & ~PAGE_CACHE_MASK) == 0) - err = llu_extent_lock_no_validate(fd, inode, lsm, - LCK_PW, &extent, &lockh, 0); - else - err = llu_extent_lock(fd, inode, lsm, LCK_PW, - &extent, &lockh); -#else - /* server will handle partial write, so we don't - * care for file size here */ - err = llu_extent_lock_no_validate(fd, inode, lsm, LCK_PW, - &extent, &lockh, 0); -#endif + /* FIXME libsysio haven't handle O_APPEND?? */ + policy.l_extent.start = pos; + policy.l_extent.end = pos + count - 1; + + err = llu_extent_lock(fd, inode, lsm, LCK_PW, &policy, + &lockh, 0); if (err != ELDLM_OK) GOTO(err_out, err = -ENOLCK); @@ -594,12 +754,10 @@ llu_file_write(struct inode *inode, const struct iovec *iovec, /* save cookie */ lsca->cookies[lsca->ncookies++] = cookie; pos += count; - /* file size grow. XXX should be done here? */ - if (pos > lli->lli_st_size) { + lov_increase_kms(exp, lsm, pos); + /* file size grow */ + if (pos > lli->lli_st_size) lli->lli_st_size = pos; - set_bit(LLI_F_PREFER_EXTENDED_SIZE, - &lli->lli_flags); - } } else { llu_extent_unlock(fd, inode, lsm, LCK_PW, &lockh); GOTO(err_out, err = PTR_ERR(cookie)); @@ -649,15 +807,16 @@ static void llu_update_atime(struct inode *inode) struct llu_sysio_callback_args* llu_file_read(struct inode *inode, const struct iovec *iovec, - size_t iovlen, loff_t pos) + size_t iovlen, loff_t pos) { struct llu_inode_info *lli = llu_i2info(inode); struct ll_file_data *fd = lli->lli_file_data; struct lov_stripe_md *lsm = lli->lli_smd; struct lustre_handle lockh = { 0 }; - struct ldlm_extent extent; + ldlm_policy_data_t policy; struct llu_sysio_callback_args *lsca; struct llu_sysio_cookie *cookie; + __u64 kms; int iovidx; ldlm_error_t err; @@ -676,15 +835,31 @@ llu_file_read(struct inode *inode, const struct iovec *iovec, if (count == 0) continue; - extent.start = pos; - extent.end = pos + count - 1; + policy.l_extent.start = pos; + policy.l_extent.end = pos + count - 1; - err = llu_extent_lock(fd, inode, lsm, LCK_PR, &extent, &lockh); + err = llu_extent_lock(fd, inode, lsm, LCK_PR, &policy, &lockh, 0); if (err != ELDLM_OK) GOTO(err_out, err = -ENOLCK); - CDEBUG(D_INFO, "Reading inode %lu, "LPSZ" bytes, offset %Ld\n", - lli->lli_st_ino, count, pos); + kms = lov_merge_size(lsm, 1); + if (policy.l_extent.end > kms) { + /* A glimpse is necessary to determine whether we + * return a short read or some zeroes at the end of + * the buffer */ + struct ost_lvb lvb; + if (llu_glimpse_size(inode, &lvb)) { + llu_extent_unlock(fd, inode, lsm, LCK_PR, &lockh); + GOTO(err_out, err = -ENOLCK); + } + lli->lli_st_size = lvb.lvb_size; + } else { + lli->lli_st_size = kms; + } + + CDEBUG(D_INFO, "Reading inode %lu, "LPSZ" bytes, offset %Ld, " + "i_size "LPU64"\n", lli->lli_st_ino, count, pos, + lli->lli_st_size); if (pos >= lli->lli_st_size) { llu_extent_unlock(fd, inode, lsm, LCK_PR, &lockh); diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index 28b9e66..8321956 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -358,13 +358,13 @@ static int llu_have_md_lock(struct inode *inode) /* FIXME use LDLM_FL_TEST_LOCK instead */ flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING; if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_PLAIN, - NULL, 0, LCK_PR, &lockh)) { + NULL, LCK_PR, &lockh)) { ldlm_lock_decref(&lockh, LCK_PR); RETURN(1); } if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_PLAIN, - NULL, 0, LCK_PW, &lockh)) { + NULL, LCK_PW, &lockh)) { ldlm_lock_decref(&lockh, LCK_PW); RETURN(1); } @@ -433,22 +433,14 @@ static int llu_inode_revalidate(struct inode *inode) if (!lsm) /* object not yet allocated, don't validate size */ RETURN(0); - /* - * unfortunately stat comes in through revalidate and we don't - * differentiate this use from initial instantiation. we're - * also being wildly conservative and flushing write caches - * so that stat really returns the proper size. - */ + /* ll_glimpse_size will prefer locally cached writes if they extend + * the file */ { - struct ldlm_extent extent = {0, OBD_OBJECT_EOF}; - struct lustre_handle lockh = {0}; + struct ost_lvb lvb; ldlm_error_t err; - err = llu_extent_lock(NULL, inode, lsm, LCK_PR, &extent, &lockh); - if (err != ELDLM_OK) - RETURN(err); - - llu_extent_unlock(NULL, inode, lsm, LCK_PR, &lockh); + err = llu_glimpse_size(inode, &lvb); + lli->lli_st_size = lvb.lvb_size; } RETURN(0); } @@ -702,29 +694,18 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) } if (ia_valid & ATTR_SIZE) { - struct ldlm_extent extent = { .start = attr->ia_size, - .end = OBD_OBJECT_EOF }; + ldlm_policy_data_t policy = { .l_extent = {attr->ia_size, + OBD_OBJECT_EOF} }; struct lustre_handle lockh = { 0 }; int err, ast_flags = 0; /* XXX when we fix the AST intents to pass the discard-range * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA * XXX here. */ - - /* Writeback uses inode->i_size to determine how far out - * its cached pages go. ll_truncate gets a PW lock, canceling - * our lock, _after_ it has updated i_size. this can confuse - * - * We really need to get our PW lock before we change - * inode->i_size. If we don't we can race with other - * i_size updaters on our node, like ll_file_read. We - * can also race with i_size propogation to other - * nodes through dirtying and writeback of final cached - * pages. This last one is especially bad for racing - * o_append users on other nodes. */ - if (extent.start == 0) + if (attr->ia_size == 0) ast_flags = LDLM_AST_DISCARD_DATA; - rc = llu_extent_lock_no_validate(NULL, inode, lsm, LCK_PW, - &extent, &lockh, ast_flags); + + rc = llu_extent_lock(NULL, inode, lsm, LCK_PW, &policy, + &lockh, ast_flags); if (rc != ELDLM_OK) { if (rc > 0) RETURN(-ENOLCK); @@ -1256,12 +1237,15 @@ struct inode *llu_iget(struct filesys *fs, struct lustre_md *md) inode = _sysio_i_find(fs, &fileid); if (inode) { - if (llu_i2info(inode)->lli_st_generation == - md->body->generation) { + struct llu_inode_info *lli = llu_i2info(inode); + + if (lli->lli_stale_flag || + lli->lli_st_generation != md->body->generation) + I_RELE(inode); + else { llu_update_inode(inode, md->body, md->lsm); return inode; - } else - I_RELE(inode); + } } inode = llu_new_inode(fs, &fid); @@ -1514,3 +1498,4 @@ static struct inode_ops llu_inode_ops = { inop_gone: llu_iop_gone, }; +#warning "time_after() defined in liblustre.h need to be rewrite in userspace" diff --git a/lustre/liblustre/tests/Makefile.am b/lustre/liblustre/tests/Makefile.am index a99a4bb..150678a 100644 --- a/lustre/liblustre/tests/Makefile.am +++ b/lustre/liblustre/tests/Makefile.am @@ -15,8 +15,13 @@ LLIB_EXEC= ../liblustre.a -lpthread noinst_LIBRARIES = libtestcommon.a libtestcommon_a_SOURCES = test_common.c -bin_PROGRAMS = echo_test sanity recovery_small replay_single test_lock_cancel \ - replay_ost_single +def_tests = echo_test sanity recovery_small replay_single replay_ost_single + +if MPITESTS +bin_PROGRAMS = $(def_tests) test_lock_cancel +else +bin_PROGRAMS = $(def_tests) +endif echo_test_SOURCES = echo_test.c ../../utils/parser.c ../../utils/obd.c ../../utils/lustre_cfg.c echo_test_LDADD = ../liblsupport.a $(LIBREADLINE) -lpthread @@ -34,12 +39,14 @@ replay_single_SOURCES = replay_single.c replay_single_LDADD := ./libtestcommon.a $(LLIB_EXEC) replay_single_DEPENDENCIES = $(top_srcdir)/liblustre/liblustre.a -test_lock_cancel_SOURCES = test_lock_cancel.c -test_lock_cancel_LDADD := $(LLIB_EXEC) -lmpi -llam - replay_ost_single_SOURCES = replay_ost_single.c replay_ost_single_LDADD := ./libtestcommon.a $(LLIB_EXEC) replay_ost_single_DEPENDENCIES = $(top_srcdir)/liblustre/liblustre.a +if MPITESTS +test_lock_cancel_SOURCES = test_lock_cancel.c +test_lock_cancel_LDADD := $(LLIB_EXEC) -lmpi -llam +endif + include $(top_srcdir)/Rules diff --git a/lustre/liblustre/tests/sanity.c b/lustre/liblustre/tests/sanity.c index 664be40..944ae9c 100644 --- a/lustre/liblustre/tests/sanity.c +++ b/lustre/liblustre/tests/sanity.c @@ -146,7 +146,6 @@ static void pages_io(int xfer, loff_t pos) } } printf("succefully write %d pages(%d per xfer)\n", _npages, xfer); - memset(_buffer, 0, sizeof(_buffer)); /* read */ @@ -324,7 +323,7 @@ void t12() ENTRY("empty directory readdir"); t_mkdir(dir); - fd = t_open(dir); + fd = t_opendir(dir); t_ls(fd, buf, sizeof(buf)); t_close(fd); t_rmdir(dir); @@ -347,7 +346,7 @@ void t13() sprintf(name, "%s%s%05d", dir, prefix, i); t_touch(name); } - fd = t_open(dir); + fd = t_opendir(dir); t_ls(fd, buf, sizeof(buf)); t_close(fd); printf("Cleanup...\n"); @@ -375,7 +374,7 @@ void t14() sprintf(name, "%s%s%05d", dir, prefix, i); t_touch(name); } - fd = t_open(dir); + fd = t_opendir(dir); t_ls(fd, buf, sizeof(buf)); t_close(fd); printf("Cleanup...\n"); diff --git a/lustre/liblustre/tests/test_common.c b/lustre/liblustre/tests/test_common.c index 6f6676e..a87f0fa 100644 --- a/lustre/liblustre/tests/test_common.c +++ b/lustre/liblustre/tests/test_common.c @@ -181,6 +181,18 @@ int t_open(const char *path) return fd; } +int t_opendir(const char *path) +{ + int fd; + + fd = open(path, O_RDONLY); + if (fd < 0) { + printf("opendir(%s) error: %s\n", path, strerror(errno)); + EXIT_RET(fd); + } + return fd; +} + void t_close(int fd) { int rc; diff --git a/lustre/liblustre/tests/test_common.h b/lustre/liblustre/tests/test_common.h index 9d537cc..c3687b9 100644 --- a/lustre/liblustre/tests/test_common.h +++ b/lustre/liblustre/tests/test_common.h @@ -21,6 +21,7 @@ void t_chmod(const char *path, const char *format, ...); void t_rename(const char *oldpath, const char *newpath); int t_open_readonly(const char *path); int t_open(const char *path); +int t_opendir(const char *path); void t_close(int fd); int t_check_stat(const char *name, struct stat *buf); int t_check_stat_fail(const char *name); diff --git a/lustre/obdclass/debug.c b/lustre/obdclass/debug.c index 04d17b9..fa125c8 100644 --- a/lustre/obdclass/debug.c +++ b/lustre/obdclass/debug.c @@ -114,7 +114,7 @@ int dump_req(struct ptlrpc_request *req) */ #define LPDS sizeof(__u64) -int page_debug_setup(void *addr, int len, __u64 off, __u64 id) +int block_debug_setup(void *addr, int len, __u64 off, __u64 id) { LASSERT(addr); @@ -130,7 +130,7 @@ int page_debug_setup(void *addr, int len, __u64 off, __u64 id) return 0; } -int page_debug_check(char *who, void *addr, int end, __u64 off, __u64 id) +int block_debug_check(char *who, void *addr, int end, __u64 off, __u64 id) { __u64 ne_off; int err = 0; @@ -171,5 +171,5 @@ EXPORT_SYMBOL(dump_rniobuf); EXPORT_SYMBOL(dump_ioo); //EXPORT_SYMBOL(dump_req); EXPORT_SYMBOL(dump_obdo); -EXPORT_SYMBOL(page_debug_setup); -EXPORT_SYMBOL(page_debug_check); +EXPORT_SYMBOL(block_debug_setup); +EXPORT_SYMBOL(block_debug_check); diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c index 9485c1b..093f3ac 100644 --- a/lustre/obdecho/echo.c +++ b/lustre/obdecho/echo.c @@ -51,8 +51,8 @@ #define ECHO_INIT_OBJID 0x1000000000000000ULL #define ECHO_HANDLE_MAGIC 0xabcd0123fedc9876ULL -#define ECHO_OBJECT0_NPAGES 16 -static struct page *echo_object0_pages[ECHO_OBJECT0_NPAGES]; +#define ECHO_PERSISTENT_PAGES (ECHO_PERSISTENT_SIZE/PAGE_SIZE) +static struct page *echo_persistent_pages[ECHO_PERSISTENT_PAGES]; enum { LPROC_ECHO_READ_BYTES = 1, @@ -201,6 +201,62 @@ static int echo_setattr(struct obd_export *exp, struct obdo *oa, return 0; } +static void +echo_page_debug_setup(struct page *page, int rw, obd_id id, + __u64 offset, int len) +{ + int page_offset = offset & (PAGE_SIZE - 1); + char *addr = ((char *)kmap(page)) + page_offset; + + if (len % OBD_ECHO_BLOCK_SIZE != 0) + CERROR("Unexpected block size %d\n", len); + + while (len > 0) { + if (rw & OBD_BRW_READ) + block_debug_setup(addr, OBD_ECHO_BLOCK_SIZE, + offset, id); + else + block_debug_setup(addr, OBD_ECHO_BLOCK_SIZE, + 0xecc0ecc0ecc0ecc0ULL, + 0xecc0ecc0ecc0ecc0ULL); + + addr += OBD_ECHO_BLOCK_SIZE; + offset += OBD_ECHO_BLOCK_SIZE; + len -= OBD_ECHO_BLOCK_SIZE; + } + + kunmap(page); +} + +static int +echo_page_debug_check(struct page *page, obd_id id, + __u64 offset, int len) +{ + int page_offset = offset & (PAGE_SIZE - 1); + char *addr = ((char *)kmap(page)) + page_offset; + int rc = 0; + int rc2; + + if (len % OBD_ECHO_BLOCK_SIZE != 0) + CERROR("Unexpected block size %d\n", len); + + while (len > 0) { + rc2 = block_debug_check("echo", addr, OBD_ECHO_BLOCK_SIZE, + offset, id); + + if (rc2 != 0 && rc == 0) + rc = rc2; + + addr += OBD_ECHO_BLOCK_SIZE; + offset += OBD_ECHO_BLOCK_SIZE; + len -= OBD_ECHO_BLOCK_SIZE; + } + + kunmap(page); + + return (rc); +} + /* This allows us to verify that desc_private is passed unmolested */ #define DESC_PRIV 0x10293847 @@ -233,16 +289,15 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa, for (i = 0; i < objcount; i++, obj++) { int gfp_mask = (obj->ioo_id & 1) ? GFP_HIGHUSER : GFP_KERNEL; - int isobj0 = obj->ioo_id == 0; - int verify = !isobj0; + int ispersistent = obj->ioo_id == ECHO_PERSISTENT_OBJID; int j; for (j = 0 ; j < obj->ioo_bufcnt ; j++, nb++, r++) { - if (isobj0 && - (nb->offset >> PAGE_SHIFT) < ECHO_OBJECT0_NPAGES) { - r->page = echo_object0_pages[nb->offset >> - PAGE_SHIFT]; + if (ispersistent && + (nb->offset >> PAGE_SHIFT) < ECHO_PERSISTENT_PAGES) { + r->page = echo_persistent_pages[nb->offset >> + PAGE_SHIFT]; /* Take extra ref so __free_pages() can be called OK */ get_page (r->page); } else { @@ -266,22 +321,12 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa, CDEBUG(D_PAGE, "$$$$ get page %p @ "LPU64" for %d\n", r->page, r->offset, r->len); - if (cmd & OBD_BRW_READ) { - r->rc = r->len; - if (verify) { - page_debug_setup(kmap (r->page), r->len, - r->offset,obj->ioo_id); - kunmap (r->page); - } + if (cmd & OBD_BRW_READ) r->rc = r->len; - } else { - if (verify) { - page_debug_setup(kmap (r->page), r->len, - 0xecc0ecc0ecc0ecc0ULL, - 0xecc0ecc0ecc0ecc0ULL); - kunmap (r->page); - } - } + + if (!ispersistent) + echo_page_debug_setup(r->page, cmd, obj->ioo_id, + r->offset, r->len); } } if (cmd & OBD_BRW_READ) @@ -305,7 +350,7 @@ preprw_cleanup: CERROR("cleaning up %ld pages (%d obdos)\n", (long)(r - res), objcount); while (r-- > res) { kunmap(r->page); - /* NB if this is an 'object0' page, __free_pages will just + /* NB if this is a persistent page, __free_pages will just * lose the extra ref gained above */ __free_pages(r->page, 0); atomic_dec(&obd->u.echo.eo_prep); @@ -347,7 +392,7 @@ int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa, LASSERT(oti == NULL || oti->oti_handle == (void *)DESC_PRIV); for (i = 0; i < objcount; i++, obj++) { - int verify = obj->ioo_id != 0; + int verify = obj->ioo_id != ECHO_PERSISTENT_OBJID; int j; for (j = 0 ; j < obj->ioo_bufcnt ; j++, r++) { @@ -366,15 +411,15 @@ int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa, r->page, addr, r->offset); if (verify) { - vrc = page_debug_check("echo", addr, r->len, - r->offset, obj->ioo_id); + vrc = echo_page_debug_check(page, obj->ioo_id, + r->offset, r->len); /* check all the pages always */ if (vrc != 0 && rc == 0) rc = vrc; } kunmap(page); - /* NB see comment above regarding object0 pages */ + /* NB see comment above regarding persistent pages */ __free_pages(page, 0); atomic_dec(&obd->u.echo.eo_prep); } @@ -389,7 +434,7 @@ commitrw_cleanup: while (++r < res + niocount) { struct page *page = r->page; - /* NB see comment above regarding object0 pages */ + /* NB see comment above regarding persistent pages */ __free_pages(page, 0); atomic_dec(&obd->u.echo.eo_prep); } @@ -476,37 +521,37 @@ extern int echo_client_init(void); extern void echo_client_exit(void); static void -echo_object0_pages_fini (void) +echo_persistent_pages_fini (void) { int i; - for (i = 0; i < ECHO_OBJECT0_NPAGES; i++) - if (echo_object0_pages[i] != NULL) { - __free_pages (echo_object0_pages[i], 0); - echo_object0_pages[i] = NULL; + for (i = 0; i < ECHO_PERSISTENT_PAGES; i++) + if (echo_persistent_pages[i] != NULL) { + __free_pages (echo_persistent_pages[i], 0); + echo_persistent_pages[i] = NULL; } } static int -echo_object0_pages_init (void) +echo_persistent_pages_init (void) { struct page *pg; int i; - for (i = 0; i < ECHO_OBJECT0_NPAGES; i++) { - int gfp_mask = (i < ECHO_OBJECT0_NPAGES/2) ? + for (i = 0; i < ECHO_PERSISTENT_PAGES; i++) { + int gfp_mask = (i < ECHO_PERSISTENT_PAGES/2) ? GFP_KERNEL : GFP_HIGHUSER; pg = alloc_pages (gfp_mask, 0); if (pg == NULL) { - echo_object0_pages_fini (); + echo_persistent_pages_fini (); return (-ENOMEM); } memset (kmap (pg), 0, PAGE_SIZE); kunmap (pg); - echo_object0_pages[i] = pg; + echo_persistent_pages[i] = pg; } return (0); @@ -519,9 +564,11 @@ static int __init obdecho_init(void) printk(KERN_INFO "Lustre: Echo OBD driver; info@clusterfs.com\n"); + LASSERT(PAGE_SIZE % OBD_ECHO_BLOCK_SIZE == 0); + lprocfs_init_vars(echo, &lvars); - rc = echo_object0_pages_init (); + rc = echo_persistent_pages_init (); if (rc != 0) goto failed_0; @@ -536,7 +583,7 @@ static int __init obdecho_init(void) class_unregister_type(OBD_ECHO_DEVICENAME); failed_1: - echo_object0_pages_fini (); + echo_persistent_pages_fini (); failed_0: RETURN(rc); } @@ -545,7 +592,7 @@ static void /*__exit*/ obdecho_exit(void) { echo_client_exit(); class_unregister_type(OBD_ECHO_DEVICENAME); - echo_object0_pages_fini (); + echo_persistent_pages_fini (); } MODULE_AUTHOR("Cluster File Systems, Inc. "); diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index c302ec9..2dba463 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -274,7 +274,8 @@ echo_get_object (struct ec_object **ecop, struct obd_device *obd, struct ec_object *eco2; int rc; - if ((oa->o_valid & OBD_MD_FLID) == 0) + if ((oa->o_valid & OBD_MD_FLID) == 0 || + oa->o_id == 0) /* disallow use of object id 0 */ { CERROR ("No valid oid\n"); return (-EINVAL); @@ -423,47 +424,66 @@ echo_get_stripe_off_id (struct lov_stripe_md *lsm, obd_off *offp, obd_id *idp) *offp = offset * stripe_size + woffset % stripe_size; } -static void echo_page_debug_setup(struct lov_stripe_md *lsm, - struct page *page, int rw, obd_id id, - obd_off offset, obd_off count) +static void +echo_client_page_debug_setup(struct lov_stripe_md *lsm, + struct page *page, int rw, obd_id id, + obd_off offset, obd_off count) { - void *addr; - obd_off stripe_off; - obd_id stripe_id; + char *addr; + obd_off stripe_off; + obd_id stripe_id; + int delta; - if (id == 0) - return; + /* no partial pages on the client */ + LASSERT(count == PAGE_SIZE); addr = kmap(page); - if (rw == OBD_BRW_WRITE) { - stripe_off = offset; - stripe_id = id; - echo_get_stripe_off_id(lsm, &stripe_off, &stripe_id); - } else { - stripe_off = 0xdeadbeef00c0ffeeULL; - stripe_id = 0xdeadbeef00c0ffeeULL; + for (delta = 0; delta < PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) { + if (rw == OBD_BRW_WRITE) { + stripe_off = offset + delta; + stripe_id = id; + echo_get_stripe_off_id(lsm, &stripe_off, &stripe_id); + } else { + stripe_off = 0xdeadbeef00c0ffeeULL; + stripe_id = 0xdeadbeef00c0ffeeULL; + } + block_debug_setup(addr + delta, OBD_ECHO_BLOCK_SIZE, + stripe_off, stripe_id); } - page_debug_setup(addr, count, stripe_off, stripe_id); kunmap(page); } -static int echo_page_debug_check(struct lov_stripe_md *lsm, - struct page *page, obd_id id, - obd_off offset, obd_off count) +static int +echo_client_page_debug_check(struct lov_stripe_md *lsm, + struct page *page, obd_id id, + obd_off offset, obd_off count) { - obd_off stripe_off = offset; - obd_id stripe_id = id; - void *addr; - int rc; + obd_off stripe_off; + obd_id stripe_id; + char *addr; + int delta; + int rc; + int rc2; - if (id == 0) - return 0; + /* no partial pages on the client */ + LASSERT(count == PAGE_SIZE); addr = kmap(page); - echo_get_stripe_off_id (lsm, &stripe_off, &stripe_id); - rc = page_debug_check("test_brw", addr, count, stripe_off, stripe_id); + + for (rc = delta = 0; delta < PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) { + stripe_off = offset + delta; + stripe_id = id; + echo_get_stripe_off_id (lsm, &stripe_off, &stripe_id); + + rc2 = block_debug_check("test_brw", + addr + delta, OBD_ECHO_BLOCK_SIZE, + stripe_off, stripe_id); + if (rc2 != 0) + rc = rc2; + } + kunmap(page); return rc; } @@ -482,10 +502,11 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa, int verify = 0; int gfp_mask; - /* oa_id == 0 => speed test (no verification) else... - * oa & 1 => use HIGHMEM - */ - gfp_mask = ((oa->o_id & 1) == 0) ? GFP_KERNEL : GFP_HIGHUSER; + /* oa_id == ECHO_PERSISTENT_OBJID => speed test (no verification). + * oa & 1 => use HIGHMEM */ + + verify = (oa->o_id) != ECHO_PERSISTENT_OBJID; + gfp_mask = ((oa->o_id & 2) == 0) ? GFP_KERNEL : GFP_HIGHUSER; LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ); @@ -517,15 +538,16 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa, pgp->off = off; pgp->flag = 0; - echo_page_debug_setup(lsm, pgp->pg, rw, oa->o_id, off, - pgp->count); + if (verify) + echo_client_page_debug_setup(lsm, pgp->pg, rw, + oa->o_id, off, pgp->count); } rc = obd_brw(rw, ec->ec_exp, oa, lsm, npages, pga, oti); out: - if (rc == 0 && rw == OBD_BRW_READ) - verify = 1; + if (rc != 0 || rw != OBD_BRW_READ) + verify = 0; for (i = 0, pgp = pga; i < npages; i++, pgp++) { if (pgp->pg == NULL) @@ -533,8 +555,8 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa, if (verify) { int vrc; - vrc = echo_page_debug_check(lsm, pgp->pg, oa->o_id, - pgp->off, pgp->count); + vrc = echo_client_page_debug_check(lsm, pgp->pg, oa->o_id, + pgp->off, pgp->count); if (vrc != 0 && rc == 0) rc = vrc; } @@ -698,10 +720,11 @@ static void ec_ap_completion(void *data, int cmd, int rc) return; eas = eap->eap_eas; - if (cmd == OBD_BRW_READ) - echo_page_debug_check(eas->eas_lsm, eap->eap_page, - eas->eas_oa.o_id, eap->eap_off, - PAGE_SIZE); + if (cmd == OBD_BRW_READ && + eas->eas_oa.o_id != ECHO_PERSISTENT_OBJID) + echo_client_page_debug_check(eas->eas_lsm, eap->eap_page, + eas->eas_oa.o_id, eap->eap_off, + PAGE_SIZE); spin_lock_irqsave(&eas->eas_lock, flags); if (rc && !eas->eas_rc) @@ -823,9 +846,10 @@ static int echo_client_async_page(struct obd_export *exp, int rw, break; } - if (rw == OBD_BRW_WRITE) - echo_page_debug_setup(lsm, eap->eap_page, rw, oa->o_id, - eap->eap_off, PAGE_SIZE); + if (oa->o_id != ECHO_PERSISTENT_OBJID) + echo_client_page_debug_setup(lsm, eap->eap_page, rw, + oa->o_id, + eap->eap_off, PAGE_SIZE); /* always asserts urgent, which isn't quite right */ rc = obd_queue_async_io(exp, lsm, NULL, eap->eap_cookie, diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 0b47220..ad1d502 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -435,7 +435,6 @@ int ptlrpc_start_pinger(void) pinger_callback = liblustre_register_wait_callback(&pinger_check_rpcs, &pinger_args); #endif - obd_timeout = 10; return 0; } diff --git a/lustre/smfs/dir.c b/lustre/smfs/dir.c index e7507a3..2a99552 100644 --- a/lustre/smfs/dir.c +++ b/lustre/smfs/dir.c @@ -17,7 +17,7 @@ #define NAME_ALLOC_LEN(len) ((len+16) & ~15) -static void prepare_parent_dentry(struct dentry *dentry, struct inode *inode) +void prepare_parent_dentry(struct dentry *dentry, struct inode *inode) { atomic_set(&dentry->d_count, 1); dentry->d_vfs_flags = 0; @@ -162,6 +162,7 @@ static int smfs_link(struct dentry * old_dentry, prepare_parent_dentry(&parent_old, cache_dir); cache_old_dentry = d_alloc(&parent_old, &dentry->d_name); d_add(cache_old_dentry, cache_old_inode); + pre_smfs_inode(inode, cache_old_dentry->d_inode); if (cache_dir->i_op->link) rc = cache_dir->i_op->link(cache_old_dentry, cache_dir, cache_dentry); @@ -170,7 +171,7 @@ static int smfs_link(struct dentry * old_dentry, GOTO(exit, rc); atomic_inc(&inode->i_count); - duplicate_inode(cache_old_dentry->d_inode, inode); + post_smfs_inode(inode, cache_old_dentry->d_inode); d_instantiate(dentry, inode); exit: @@ -205,8 +206,8 @@ static int smfs_unlink(struct inode * dir, rc = cache_dir->i_op->unlink(cache_dir, cache_dentry); - duplicate_inode(cache_dentry->d_inode, dentry->d_inode); - duplicate_inode(cache_dir, dir); + post_smfs_inode(dentry->d_inode, cache_dentry->d_inode); + post_smfs_inode(dir, cache_dir); igrab(cache_dentry->d_inode); @@ -272,6 +273,7 @@ static int smfs_mkdir(struct inode * dir, prepare_parent_dentry(&parent, cache_dir); cache_dentry = d_alloc(&parent, &dentry->d_name); + pre_smfs_inode(dir, cache_dir); lock_kernel(); if (cache_dir->i_op->mkdir) rc = cache_dir->i_op->mkdir(cache_dir, cache_dentry, mode); @@ -289,7 +291,7 @@ static int smfs_mkdir(struct inode * dir, rc = post_kml_mkdir(dir, dentry); GOTO(exit, rc); } - duplicate_inode(cache_dir, dir); + post_smfs_inode(dir, cache_dir); exit: unlock_kernel(); smfs_trans_commit(handle); @@ -312,15 +314,17 @@ static int smfs_rmdir(struct inode * dir, prepare_parent_dentry(&parent, cache_dir); cache_dentry = d_alloc(&parent, &dentry->d_name); d_add(cache_dentry, cache_inode); - igrab(cache_inode); + pre_smfs_inode(dir, cache_dir); + pre_smfs_inode(dentry->d_inode, cache_dentry->d_inode); + + if (cache_dir->i_op->rmdir) rc = cache_dir->i_op->rmdir(cache_dir, cache_dentry); - duplicate_inode(cache_dir, dir); - duplicate_inode(cache_dentry->d_inode, dentry->d_inode); - + post_smfs_inode(dir, cache_dir); + post_smfs_inode(dentry->d_inode, cache_dentry->d_inode); d_unalloc(cache_dentry); RETURN(rc); } @@ -341,6 +345,9 @@ static int smfs_mknod(struct inode * dir, struct dentry *dentry, prepare_parent_dentry(&parent, cache_dir); cache_dentry = d_alloc(&parent, &dentry->d_name); + pre_smfs_inode(dir, cache_dir); + pre_smfs_inode(dentry->d_inode, cache_dentry->d_inode); + if (cache_dir->i_op->mknod) rc = cache_dir->i_op->mknod(cache_dir, cache_dentry, mode, rdev); @@ -351,8 +358,8 @@ static int smfs_mknod(struct inode * dir, struct dentry *dentry, inode = iget(dir->i_sb, cache_inode->i_ino); d_instantiate(dentry, inode); - duplicate_inode(cache_dir, dir); - duplicate_inode(cache_dentry->d_inode, dentry->d_inode); + post_smfs_inode(dir, cache_dir); + post_smfs_inode(dentry->d_inode, cache_dentry->d_inode); exit: d_unalloc(cache_dentry); RETURN(rc); @@ -380,12 +387,15 @@ static int smfs_rename(struct inode * old_dir, struct dentry *old_dentry, prepare_parent_dentry(&parent_new, cache_new_dir); cache_new_dentry = d_alloc(&parent_new, &new_dentry->d_name); + pre_smfs_inode(old_dir, cache_old_dir) ; + pre_smfs_inode(new_dir, cache_new_dir); + if (cache_old_dir->i_op->rename) rc = cache_old_dir->i_op->rename(cache_old_dir, cache_old_dentry, cache_new_dir, cache_new_dentry); - duplicate_inode(cache_old_dir, old_dir); - duplicate_inode(cache_new_dir, new_dir); + post_smfs_inode(old_dir, cache_old_dir) ; + post_smfs_inode(new_dir, cache_new_dir); if (cache_new_dentry->d_inode) { igrab(cache_new_dentry->d_inode); } diff --git a/lustre/smfs/file.c b/lustre/smfs/file.c index aad7c32..eaf1901 100644 --- a/lustre/smfs/file.c +++ b/lustre/smfs/file.c @@ -87,11 +87,13 @@ static ssize_t smfs_write (struct file *filp, const char *buf, smfs_prepare_cachefile(inode, filp, cache_inode, &open_file, &open_dentry); + pre_smfs_inode(inode, cache_inode); + if (cache_inode->i_fop->write) rc = cache_inode->i_fop->write(&open_file, buf, count, cache_ppos); *ppos = *cache_ppos; - duplicate_inode(cache_inode, inode); + post_smfs_inode(inode, cache_inode); smfs_update_file(filp, &open_file); RETURN(rc); @@ -118,7 +120,7 @@ int smfs_ioctl(struct inode * inode, struct file * filp, if (cache_inode->i_fop->ioctl) rc = cache_inode->i_fop->ioctl(cache_inode, &open_file, cmd, arg); - duplicate_inode(cache_inode, inode); + post_smfs_inode(inode, cache_inode); smfs_update_file(filp, &open_file); RETURN(rc); } @@ -153,11 +155,12 @@ static ssize_t smfs_read (struct file *filp, char *buf, &open_file, &open_dentry); + pre_smfs_inode(inode, cache_inode); if (cache_inode->i_fop->read) rc = cache_inode->i_fop->read(&open_file, buf, count, cache_ppos); *ppos = *cache_ppos; - duplicate_inode(cache_inode, inode); + post_smfs_inode(inode, cache_inode); smfs_update_file(filp, &open_file); RETURN(rc); } @@ -181,10 +184,11 @@ static loff_t smfs_llseek(struct file *file, smfs_prepare_cachefile(dentry->d_inode, file, cache_inode, &open_file, &open_dentry); + pre_smfs_inode(dentry->d_inode, cache_inode); if (cache_inode->i_fop->llseek) rc = cache_inode->i_fop->llseek(&open_file, offset, origin); - duplicate_inode(cache_inode, dentry->d_inode); + post_smfs_inode(dentry->d_inode, cache_inode); smfs_update_file(file, &open_file); RETURN(rc); @@ -208,10 +212,11 @@ static int smfs_mmap(struct file * file, struct vm_area_struct * vma) if (cache_inode->i_mapping == &cache_inode->i_data) inode->i_mapping = cache_inode->i_mapping; + pre_smfs_inode(inode, cache_inode); if (cache_inode->i_fop->mmap) rc = cache_inode->i_fop->mmap(&open_file, vma); - duplicate_inode(cache_inode, inode); + post_smfs_inode(inode, cache_inode); smfs_update_file(file, &open_file); RETURN(rc); @@ -231,10 +236,11 @@ static int smfs_open(struct inode * inode, struct file * filp) smfs_prepare_cachefile(inode, filp, cache_inode, &open_file, &open_dentry); + pre_smfs_inode(inode, cache_inode); if (cache_inode->i_fop->open) rc = cache_inode->i_fop->open(cache_inode, &open_file); - duplicate_inode(cache_inode, inode); + post_smfs_inode(inode, cache_inode); smfs_update_file(filp, &open_file); RETURN(rc); @@ -254,10 +260,11 @@ static int smfs_release(struct inode * inode, struct file * filp) smfs_prepare_cachefile(inode, filp, cache_inode, &open_file, &open_dentry); + pre_smfs_inode(inode, cache_inode); if (cache_inode->i_fop->release) rc = cache_inode->i_fop->release(cache_inode, &open_file); - duplicate_inode(cache_inode, inode); + post_smfs_inode(inode, cache_inode); smfs_update_file(filp, &open_file); RETURN(rc); @@ -279,10 +286,11 @@ int smfs_fsync(struct file * file, smfs_prepare_cachefile(inode, file, cache_inode, &open_file, &open_dentry); + pre_smfs_inode(inode, cache_inode); if (cache_inode->i_fop->fsync) rc = cache_inode->i_fop->fsync(&open_file, &open_dentry, datasync); - duplicate_inode(cache_inode, inode); + post_smfs_inode(inode, cache_inode); smfs_update_file(file, &open_file); RETURN(rc); @@ -326,7 +334,7 @@ static void smfs_truncate(struct inode * inode) if (cache_inode->i_op->truncate) cache_inode->i_op->truncate(cache_inode); - duplicate_inode(inode, cache_inode); + post_smfs_inode(inode, cache_inode); return; } @@ -344,10 +352,11 @@ int smfs_setattr(struct dentry *dentry, struct iattr *attr) RETURN(-ENOENT); smfs_prepare_cache_dentry(&open_dentry, cache_inode); + pre_smfs_inode(dentry->d_inode, cache_inode); if (cache_inode->i_op->setattr) rc = cache_inode->i_op->setattr(&open_dentry, attr); - duplicate_inode(cache_inode, dentry->d_inode); + post_smfs_inode(dentry->d_inode, cache_inode); RETURN(rc); } @@ -364,12 +373,13 @@ int smfs_setxattr(struct dentry *dentry, const char *name, if (!cache_inode) RETURN(-ENOENT); + pre_smfs_inode(dentry->d_inode, cache_inode); smfs_prepare_cache_dentry(&open_dentry, cache_inode); if (cache_inode->i_op->setattr) rc = cache_inode->i_op->setxattr(&open_dentry, name, value, size, flags); - duplicate_inode(cache_inode, dentry->d_inode); + post_smfs_inode(dentry->d_inode, cache_inode); RETURN(rc); } @@ -386,11 +396,12 @@ int smfs_getxattr(struct dentry *dentry, const char *name, RETURN(-ENOENT); smfs_prepare_cache_dentry(&open_dentry, cache_inode); + pre_smfs_inode(dentry->d_inode, cache_inode); if (cache_inode->i_op->setattr) rc = cache_inode->i_op->getxattr(&open_dentry, name, buffer, size); - duplicate_inode(cache_inode, dentry->d_inode); + post_smfs_inode(dentry->d_inode, cache_inode); RETURN(rc); } @@ -406,11 +417,12 @@ ssize_t smfs_listxattr(struct dentry *dentry, char *buffer, size_t size) RETURN(-ENOENT); smfs_prepare_cache_dentry(&open_dentry, cache_inode); + pre_smfs_inode(dentry->d_inode, cache_inode); if (cache_inode->i_op->listxattr) rc = cache_inode->i_op->listxattr(&open_dentry, buffer, size); - duplicate_inode(cache_inode, dentry->d_inode); + post_smfs_inode(dentry->d_inode, cache_inode); RETURN(rc); } @@ -426,11 +438,12 @@ int smfs_removexattr(struct dentry *dentry, const char *name) RETURN(-ENOENT); smfs_prepare_cache_dentry(&open_dentry, cache_inode); + pre_smfs_inode(dentry->d_inode, cache_inode); if (cache_inode->i_op->removexattr) rc = cache_inode->i_op->removexattr(&open_dentry, name); - duplicate_inode(cache_inode, dentry->d_inode); + post_smfs_inode(dentry->d_inode, cache_inode); RETURN(rc); } diff --git a/lustre/smfs/inode.c b/lustre/smfs/inode.c index 8160d32..117d503 100644 --- a/lustre/smfs/inode.c +++ b/lustre/smfs/inode.c @@ -13,25 +13,43 @@ #include #include "smfs_internal.h" -void duplicate_inode(struct inode *cache_inode, struct inode *inode) +static void duplicate_inode(struct inode *dst_inode, + struct inode *src_inode) { - - inode->i_mode = cache_inode->i_mode; - inode->i_uid = cache_inode->i_uid; - inode->i_gid = cache_inode->i_gid; - - inode->i_nlink = cache_inode->i_nlink; - inode->i_size = cache_inode->i_size; - inode->i_atime = cache_inode->i_atime; - inode->i_ctime = cache_inode->i_ctime; - inode->i_mtime = cache_inode->i_mtime; - inode->i_blksize = cache_inode->i_blksize; /* This is the optimal IO size - * (for stat), not the fs block - * size */ - inode->i_blocks = cache_inode->i_blocks; - inode->i_version = cache_inode->i_version; - inode->i_state = cache_inode->i_state; + dst_inode->i_mode = src_inode->i_mode; + dst_inode->i_uid = src_inode->i_uid; + dst_inode->i_gid = src_inode->i_gid; + dst_inode->i_nlink = src_inode->i_nlink; + dst_inode->i_size = src_inode->i_size; + dst_inode->i_atime = src_inode->i_atime; + dst_inode->i_ctime = src_inode->i_ctime; + dst_inode->i_mtime = src_inode->i_mtime; + dst_inode->i_blksize = src_inode->i_blksize; + dst_inode->i_blocks = src_inode->i_blocks; + dst_inode->i_version = src_inode->i_version; + dst_inode->i_state = src_inode->i_state; +} + +void post_smfs_inode(struct inode *inode, + struct inode *cache_inode) +{ + if (inode && cache_inode) { + duplicate_inode(inode, cache_inode); + /*Here we must release the cache_inode, + *Otherwise we will have no chance to + *do it + */ + cache_inode->i_state &=~I_LOCK; + } } +void pre_smfs_inode(struct inode *inode, + struct inode *cache_inode) +{ + if (inode && cache_inode) { + duplicate_inode(cache_inode, inode); + } +} + static void smfs_read_inode(struct inode *inode) { struct super_block *cache_sb; @@ -50,7 +68,7 @@ static void smfs_read_inode(struct inode *inode) if(cache_sb && cache_sb->s_op->read_inode) cache_sb->s_op->read_inode(cache_inode); - duplicate_inode(cache_inode, inode); + post_smfs_inode(inode, cache_inode); sm_set_inode_ops(cache_inode, inode); CDEBUG(D_INODE, "read_inode ino %lu icount %d \n", @@ -59,6 +77,7 @@ static void smfs_read_inode(struct inode *inode) iput(cache_inode); return; } + /* Although some filesystem(such as ext3) do not have * clear_inode method, but we need it to free the * cache inode @@ -115,7 +134,7 @@ static void smfs_delete_inode(struct inode *inode) atomic_dec(&cache_inode->i_count); } - duplicate_inode(inode, cache_inode); + pre_smfs_inode(inode, cache_inode); list_del(&cache_inode->i_hash); INIT_LIST_HEAD(&cache_inode->i_hash); @@ -128,7 +147,7 @@ static void smfs_delete_inode(struct inode *inode) if (cache_sb->s_op->delete_inode) cache_sb->s_op->delete_inode(cache_inode); - duplicate_inode(cache_inode, inode); + post_smfs_inode(inode, cache_inode); I2CI(inode) = NULL; return; @@ -145,11 +164,12 @@ static void smfs_write_inode(struct inode *inode, int wait) if (!cache_inode || !cache_sb) return; - duplicate_inode(inode, cache_inode); + pre_smfs_inode(inode, cache_inode); + if (cache_sb->s_op->write_inode) cache_sb->s_op->write_inode(cache_inode, wait); - duplicate_inode(cache_inode, inode); + post_smfs_inode(inode, cache_inode); return; } @@ -165,11 +185,11 @@ static void smfs_dirty_inode(struct inode *inode) if (!cache_inode || !cache_sb) return; - duplicate_inode(inode, cache_inode); + pre_smfs_inode(inode, cache_inode); if (cache_sb->s_op->dirty_inode) cache_sb->s_op->dirty_inode(cache_inode); - duplicate_inode(cache_inode, inode); + post_smfs_inode(inode, cache_inode); return; } diff --git a/lustre/smfs/symlink.c b/lustre/smfs/symlink.c index b0b3714..e1ca762 100644 --- a/lustre/smfs/symlink.c +++ b/lustre/smfs/symlink.c @@ -17,13 +17,18 @@ static int smfs_readlink(struct dentry * dentry, char * buffer, int buflen) { struct inode *cache_inode = I2CI(dentry->d_inode); + struct inode *cache_dir = NULL; struct dentry *cache_dentry; + struct dentry parent; int rc = 0; if (!cache_inode) RETURN(-ENOENT); - - cache_dentry = d_alloc(NULL, &dentry->d_name); + if (dentry->d_parent && dentry->d_parent->d_inode){ + cache_dir = I2CI(dentry->d_parent->d_inode); + prepare_parent_dentry(&parent, cache_dir); + } + cache_dentry = d_alloc(&parent, &dentry->d_name); d_add(cache_dentry, cache_inode); igrab(cache_inode); @@ -37,21 +42,28 @@ static int smfs_readlink(struct dentry * dentry, char * buffer, int buflen) static int smfs_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *cache_inode = I2CI(dentry->d_inode); + struct inode *cache_dir = NULL; struct dentry *cache_dentry; + struct dentry parent; int rc = 0; if (!cache_inode) RETURN(-ENOENT); - - cache_dentry = d_alloc(NULL, &dentry->d_name); + + if (dentry->d_parent && dentry->d_parent->d_inode){ + cache_dir = I2CI(dentry->d_parent->d_inode); + prepare_parent_dentry(&parent, cache_dir); + } + + cache_dentry = d_alloc(&parent, &dentry->d_name); + d_add(cache_dentry, cache_inode); igrab(cache_inode); - + if (cache_inode->i_op && cache_inode->i_op->follow_link) rc = cache_inode->i_op->follow_link(cache_dentry, nd); d_unalloc(cache_dentry); return rc; - } struct inode_operations smfs_sym_iops = { readlink: smfs_readlink, diff --git a/lustre/tests/.RC_CURRENT.tag b/lustre/tests/.RC_CURRENT.tag index 2ebb218..efebb89 100644 --- a/lustre/tests/.RC_CURRENT.tag +++ b/lustre/tests/.RC_CURRENT.tag @@ -1 +1 @@ -RC_1_1_9_1 +RC_1_1_9_6 diff --git a/lustre/tests/test_brw.c b/lustre/tests/test_brw.c index f242eba..54126a0 100644 --- a/lustre/tests/test_brw.c +++ b/lustre/tests/test_brw.c @@ -31,7 +31,7 @@ #define WRITE 2 #define LPDS sizeof(__u64) -int page_debug_setup(void *addr, int len, __u64 off, __u64 id) +int block_debug_setup(void *addr, int len, __u64 off, __u64 id) { off = cpu_to_le64(off); id = cpu_to_le64(id); @@ -45,7 +45,7 @@ int page_debug_setup(void *addr, int len, __u64 off, __u64 id) return 0; } -int page_debug_check(char *who, void *addr, int size, __u64 off, __u64 id) +int block_debug_check(char *who, void *addr, int size, __u64 off, __u64 id) { __u64 ne_off; int err = 0; @@ -181,14 +181,14 @@ int main(int argc, char **argv) int i; for (i = 0; i < len; i += st.st_blksize) - page_debug_setup(buf + i, st.st_blksize, offset + i, - objid); + block_debug_setup(buf + i, st.st_blksize, + offset + i, objid); rc = write(fd, buf, len); for (i = 0; i < len; i += st.st_blksize) { - if (page_debug_check("write", buf + i, st.st_blksize, - offset + i, objid)) + if (block_debug_check("write", buf + i, st.st_blksize, + offset + i, objid)) return 10; } @@ -216,8 +216,8 @@ int main(int argc, char **argv) } for (i = 0; i < len; i += st.st_blksize) { - if (page_debug_check("read", buf + i, st.st_blksize, - offset + i, objid)) + if (block_debug_check("read", buf + i, st.st_blksize, + offset + i, objid)) return 11; } } diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index a41f7a7..4685e46 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -34,12 +34,11 @@ #include #include +#include #include "parser.h" #include "obdctl.h" -extern int op_create_file(char *name, long stripe_size, int stripe_offset, - int stripe_count); extern int op_find(char *path, struct obd_uuid *obduuid, int recursive, int verbose, int quiet); extern int op_check(int type_num, char **obd_type_p, char *dir); diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 0cf4489..bb5bbe1 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -292,6 +292,49 @@ void lov_dump_user_lmm(struct find_param *param, char *dname, char *fname) } } +int get_file_stripe(char *path, struct lov_user_md *lum) +{ + char *dname, *fname; + int fd, rc = 0; + + fname = strrchr(path, '/'); + + /* It should be a file (or other non-directory) */ + if (fname == NULL) { + dname = (char *)malloc(2); + if (dname == NULL) + return ENOMEM; + strcpy(dname, "."); + fname = path; + } else { + dname = (char *)malloc(fname - path + 1); + if (dname == NULL) + return ENOMEM; + strncpy(dname, path, fname - path); + dname[fname - path + 1] = '\0'; + fname++; + } + + if ((fd = open(dname, O_RDONLY)) == -1) { + free(dname); + return errno; + } + + strncpy((char *)lum, fname, sizeof(*lum)); + if (ioctl(fd, IOC_MDC_GETSTRIPE, (void *)lum) == -1) { + close(fd); + free(dname); + return errno; + } + + if (close(fd) == -1) + rc = errno; + + free(dname); + + return rc; +} + static int process_file(DIR *dir, char *dname, char *fname, struct find_param *param) { diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index ad043aa..e4ef0a8 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -1242,6 +1242,7 @@ int jt_obd_test_brw(int argc, char **argv) struct timeval start, next_time; __u64 count, next_count, len, thr_offset = 0, objid = 3; int write = 0, verbose = 1, cmd, i, rc = 0, pages = 1; + int repeat_offset = 0; char *end; if (argc < 2 || argc > 7) { @@ -1267,8 +1268,11 @@ int jt_obd_test_brw(int argc, char **argv) if (argc >= 3) { if (argv[2][0] == 'w' || argv[2][0] == '1') write = 1; - else if (argv[2][0] == 'r' || argv[2][0] == '0') - write = 0; + /* else it's a read */ + + if (argv[2][0] != 0 && + argv[2][1] == 'r') + repeat_offset = 1; } if (argc >= 4) { @@ -1366,7 +1370,8 @@ int jt_obd_test_brw(int argc, char **argv) printf("%s: %s number %dx%d\n", jt_cmdname(argv[0]), write ? "write" : "read", i, pages); - data.ioc_offset += len; + if (!repeat_offset) + data.ioc_offset += len; } if (!rc) { -- 1.8.3.1