Whamcloud - gitweb
Branch: b1_4
authorgreen <green>
Thu, 26 May 2005 21:07:30 +0000 (21:07 +0000)
committergreen <green>
Thu, 26 May 2005 21:07:30 +0000 (21:07 +0000)
Liblustre revival.
Liblustre can be compiled and works now.

33 files changed:
lustre/ChangeLog
lustre/autoMakefile.am
lustre/autoconf/lustre-core.m4
lustre/include/liblustre.h
lustre/include/linux/lustre_lite.h
lustre/include/linux/lustre_quota.h
lustre/include/linux/obd_support.h
lustre/liblustre/Makefile.am
lustre/liblustre/dir.c
lustre/liblustre/file.c
lustre/liblustre/genlib.sh
lustre/liblustre/llite_lib.c
lustre/liblustre/llite_lib.h
lustre/liblustre/lutil.c
lustre/liblustre/namei.c
lustre/liblustre/rw.c
lustre/liblustre/super.c
lustre/liblustre/tests/Makefile.am
lustre/liblustre/tests/echo_test.c
lustre/liblustre/tests/recovery_small.c
lustre/liblustre/tests/replay_single.c
lustre/liblustre/tests/sanity.c
lustre/liblustre/tests/test_common.c
lustre/liblustre/tests/test_common.h
lustre/obdclass/genops.c
lustre/obdclass/llog_swab.c
lustre/osc/autoMakefile.am
lustre/osc/osc_quota.c
lustre/osc/osc_request.c
lustre/ptlrpc/autoMakefile.am
lustre/ptlrpc/import.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/service.c

index af7bb94..409e4cf 100644 (file)
@@ -2,6 +2,11 @@ tbd         Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.4.3
        * bug fixes
 
+Severity   : enhancement
+Bugzilla   : 2563
+Description: Liblustre support for 1.4.x
+Details    : Liblustre is now back. It compiles and works.
+
 Severity   : minor
 Frequency  : rare (extremely heavy IO load with hundreds of clients)
 Bugzilla   : 6172
index 6ab2f9d..29ba3f7 100644 (file)
@@ -6,12 +6,14 @@
 AUTOMAKE_OPTIONS = foreign
 
 ALWAYS_SUBDIRS := include lvfs obdclass ldlm ptlrpc osc lov obdecho \
-       liblustre doc utils tests conf scripts autoconf
+       doc utils tests conf scripts autoconf
 
 SERVER_SUBDIRS := ldiskfs obdfilter ost mds
 
 CLIENT_SUBDIRS := mdc llite
 
+LIBLUSTRE_SUBDIRS := liblustre
+
 SUBDIRS := $(ALWAYS_SUBDIRS)
 
 if SERVER
@@ -22,6 +24,14 @@ if CLIENT
 SUBDIRS += $(CLIENT_SUBDIRS)
 endif
 
+# this needs to be after the client subdirs
+if LIBLUSTRE
+if !CLIENT
+SUBDIRS += $(CLIENT_SUBDIRS)
+endif
+SUBDIRS += $(LIBLUSTRE_SUBDIRS)
+endif
+
 DIST_SUBDIRS := $(ALWAYS_SUBDIRS) $(SERVER_SUBDIRS) $(CLIENT_SUBDIRS)
 
 EXTRA_DIST = BUGS FDL kernel_patches
index bbc9fbd..6156424 100644 (file)
@@ -486,6 +486,7 @@ AM_CONDITIONAL(EXTN, test x$enable_extN = xyes)
 AM_CONDITIONAL(LDISKFS, test x$enable_ldiskfs = xyes)
 AM_CONDITIONAL(USE_QUILT, test x$QUILT != xno)
 AM_CONDITIONAL(LIBLUSTRE, test x$enable_liblustre = xyes)
+AM_CONDITIONAL(LIBLUSTRE_TESTS, test x$enable_liblustre_tests = xyes)
 AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests)
 AM_CONDITIONAL(CLIENT, test x$enable_client = xyes)
 AM_CONDITIONAL(SERVER, test x$enable_server = xyes)
index d618a40..ca328bd 100644 (file)
@@ -76,6 +76,10 @@ typedef unsigned short umode_t;
 
 #endif
 
+#ifndef CURRENT_SECONDS
+# define CURRENT_SECONDS time(0)
+#endif
+
 /* This is because lprocfs_status.h gets included here indirectly.  It would
  * be much better to just avoid lprocfs being included into liblustre entirely
  * but that requires more header surgery than I can handle right now.
@@ -314,14 +318,7 @@ static inline void spin_unlock_irqrestore(spinlock_t *a, unsigned long b) {}
 
 /* random */
 
-static inline void get_random_bytes(void *ptr, int size)
-{
-        int *p = (int *)ptr;
-        int i, count = size/sizeof(int);
-
-        for (i = 0; i< count; i++)
-                *p++ = rand();
-}
+void get_random_bytes(void *ptr, int size);
 
 /* memory */
 
@@ -377,6 +374,10 @@ static inline int kmem_cache_destroy(kmem_cache_t *a)
 
 /* struct page decl moved out from here into portals/include/libcfs/user-prim.h */
 
+/* 2.4 defines */
+#define PAGE_LIST_ENTRY list
+#define PAGE_LIST(page) ((page)->list)
+
 #define kmap(page) (page)->addr
 #define kunmap(a) do {} while (0)
 
@@ -566,12 +567,23 @@ struct task_struct {
         int pid;
         int fsuid;
         int fsgid;
+        int max_groups;
+        int ngroups;
+        gid_t *groups;
         __u32 cap_effective;
+        
+        struct fs_struct __fs;
 };
 
 extern struct task_struct *current;
-
-#define in_group_p(a) 0 /* FIXME */
+int in_group_p(gid_t gid);
+static inline int capable(int cap)
+{
+        if (current->cap_effective & (1 << cap))
+                return 1;
+        else
+                return 0;
+}
 
 #define set_current_state(foo) do { current->state = foo; } while (0)
 
@@ -611,6 +623,7 @@ static inline int schedule_timeout(signed long t)
 }
 
 #define lock_kernel() do {} while (0)
+#define unlock_kernel() do {} while (0)
 #define daemonize(l) do {} while (0)
 #define sigfillset(l) do {} while (0)
 #define recalc_sigpending(l) do {} while (0)
@@ -684,6 +697,33 @@ typedef struct { volatile int counter; } atomic_t;
 #define unlikely(exp) (exp)
 #endif
 
+/* FIXME sys/capability will finally included linux/fs.h thus
+ * cause numerous trouble on x86-64. as temporary solution for
+ * build broken at cary, we copy definition we need from capability.h
+ * FIXME
+ */
+struct _cap_struct;
+typedef struct _cap_struct *cap_t;
+typedef int cap_value_t;
+typedef enum {
+    CAP_EFFECTIVE=0,
+    CAP_PERMITTED=1,
+    CAP_INHERITABLE=2
+} cap_flag_t;
+typedef enum {
+    CAP_CLEAR=0,
+    CAP_SET=1
+} cap_flag_value_t;
+
+#define CAP_DAC_OVERRIDE        1
+#define CAP_DAC_READ_SEARCH     2
+#define CAP_FOWNER              3
+#define CAP_FSETID              4
+#define CAP_SYS_ADMIN          21
+
+cap_t   cap_get_proc(void);
+int     cap_get_flag(cap_t, cap_value_t, cap_flag_t, cap_flag_value_t *);
+
 /* log related */
 static inline int llog_init_commit_master(void) { return 0; }
 static inline int llog_cleanup_commit_master(int force) { return 0; }
@@ -728,6 +768,10 @@ void *liblustre_register_wait_callback(int (*fn)(void *arg), void *arg);
 void liblustre_deregister_wait_callback(void *notifier);
 int liblustre_wait_event(int timeout);
 
+/* quota */
+#define QUOTA_OK 0
+#define NO_QUOTA 1
+
 #include <linux/obd_support.h>
 #include <linux/lustre_idl.h>
 #include <linux/lustre_lib.h>
index c5c045d..094eafe 100644 (file)
@@ -69,6 +69,11 @@ enum {
 #include <linux/lustre_idl.h>
 #endif /* __KERNEL__ */
 
+#define LLAP_FROM_COOKIE(c)                                                    \
+        (LASSERT(((struct ll_async_page *)(c))->llap_magic == LLAP_MAGIC),     \
+         (struct ll_async_page *)(c))
+#define LL_MAX_BLKSIZE          (4UL * 1024 * 1024)
+
 #include <lustre/lustre_user.h>
 
 #endif
index 85fa3a2..7633ebe 100644 (file)
@@ -4,7 +4,9 @@
 #ifndef _LUSTRE_QUOTA_H
 #define _LUSTRE_QUOTA_H
 
-#include <linux/version.h>
+#ifdef __KERNEL__
+# include <linux/version.h>
+#endif
 #include <linux/quota.h>
 #include <linux/lustre_idl.h>
 
index f833f72..9cb4ea8 100644 (file)
@@ -256,7 +256,7 @@ do {                                                            \
 } while(0)
 #else
 /* sigh.  an expedient fix until OBD_RACE is fixed up */
-#define OBD_RACE(foo) LBUG()
+#define OBD_RACE(foo) do {} while(0)
 #endif
 
 #define fixme() CDEBUG(D_OTHER, "FIXME\n");
@@ -317,6 +317,20 @@ static inline void OBD_FAIL_WRITE(int id, struct super_block *sb)
 
 extern atomic_t portal_kmemory;
 
+#if defined(LUSTRE_UTILS) /* this version is for utils only */
+#define OBD_ALLOC_GFP(ptr, size, gfp_mask)                                    \
+do {                                                                          \
+        (ptr) = kmalloc(size, (gfp_mask));                                    \
+        if ((ptr) == NULL) {                                                  \
+                CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
+                       (int)(size), __FILE__, __LINE__);                      \
+        } else {                                                              \
+                memset(ptr, 0, size);                                         \
+                CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p\n",          \
+                       (int)(size), ptr);                                     \
+        }                                                                     \
+} while (0)
+#else /* this version is for the kernel and liblustre */
 #define OBD_ALLOC_GFP(ptr, size, gfp_mask)                                    \
 do {                                                                          \
         (ptr) = kmalloc(size, (gfp_mask));                                    \
@@ -334,17 +348,13 @@ do {                                                                          \
                        (int)(size), ptr, atomic_read(&obd_memory));           \
         }                                                                     \
 } while (0)
+#endif
 
 #ifndef OBD_GFP_MASK
 # define OBD_GFP_MASK GFP_NOFS
 #endif
 
-#ifdef __KERNEL__
 #define OBD_ALLOC(ptr, size) OBD_ALLOC_GFP(ptr, size, OBD_GFP_MASK)
-#else
-#define OBD_ALLOC(ptr, size) (ptr = malloc(size))
-#endif
-
 #define OBD_ALLOC_WAIT(ptr, size) OBD_ALLOC_GFP(ptr, size, GFP_KERNEL)
 
 #ifdef __arch_um__
index a776768..6fac0b2 100644 (file)
@@ -1,10 +1,11 @@
 ## Liblustre excecutables & libraries Makefile
 SUBDIRS = . tests
 
-AM_CPPFLAGS = $(HAVE_EFENCE) -I$(SYSIO)/include -D_LARGEFILE64_SOURCE=1 $(LLCPPFLAGS) -I$(top_srcdir)/portals/unals
+AM_CPPFLAGS = $(HAVE_EFENCE) -I$(SYSIO)/include -D_LARGEFILE64_SOURCE=1 \
+              $(LLCPPFLAGS) -I$(top_srcdir)/portals/unals
 AM_CFLAGS = $(LLCFLAGS)
 
-LIBS = $(LIBEFENCE)
+AM_LIBS = $(LIBEFENCE)
 
 LUSTRE_LIBS = libllite.a \
               $(top_builddir)/lustre/lov/liblov.a \
@@ -15,14 +16,16 @@ LUSTRE_LIBS = libllite.a \
               $(top_builddir)/lustre/obdclass/liblustreclass.a \
               $(top_builddir)/lustre/lvfs/liblvfs.a
 
+if !CRAY_PORTALS
 PTL_LIBS =    $(top_builddir)/portals/utils/libuptlctl.a \
               $(top_builddir)/portals/unals/libtcpnal.a \
               $(top_builddir)/portals/portals/libportals.a
+else
+PTL_LIBS =    $(top_builddir)/portals/utils/libuptlctl.a \
+              $(CRAY_PORTALS_LIBS)/libportals.a
+endif
 
-SYSIO_LIBS =  $(SYSIO)/drivers/native/libsysio_native.a \
-              $(SYSIO)/drivers/sockets/libsysio_sockets.a \
-              $(SYSIO)/src/libsysio.a \
-              $(SYSIO)/dev/stdfd/libsysio_stdfd.a
+SYSIO_LIBS =  $(SYSIO)/lib/libsysio.a
 
 if LIBLUSTRE
 lib_LIBRARIES = liblustre.a
@@ -40,16 +43,18 @@ install-exec-hook: liblustre.so
        done
 else
 install-exec-hook:
-
 endif
 
-libllite_a_SOURCES = llite_lib.c super.c namei.c rw.c file.c dir.c llite_lib.h
+libllite_a_SOURCES = llite_lib.c super.c namei.c rw.c file.c dir.c \
+                    lutil.c lutil.h llite_lib.h
 
 # for make rpms -- need cleanup
 liblustre_a_SOURCES = llite_lib.c super.c namei.c rw.c file.c dir.c \
                     llite_lib.h
 
 liblustre.a : $(LUSTRE_LIBS) $(PTL_LIBS) $(SYSIO_LIBS)
-       $(srcdir)/genlib.sh $(SYSIO) $(AR) $(LINK) || ( rm -f $@; exit 1 )
+       sh $(srcdir)/genlib.sh "$(SYSIO)" "$(CRAY_PORTALS_LIBS)" "$(LIBS)"
 
 EXTRA_DIST = genlib.sh
+
+CLEANFILES := liblsupport.a liblustre.so
index c125b79..ec33ac3 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Lustre Light directory handling
  *
- *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *  Copyright (c) 2002-2004 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
 #include <sys/fcntl.h>
 #include <sys/queue.h>
 
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
 #include <sysio.h>
 #include <fs.h>
 #include <mount.h>
 #include <inode.h>
+#ifdef HAVE_FILE_H
 #include <file.h>
+#endif
 
 #undef LIST_HEAD
 
+#ifdef HAVE_LINUX_TYPES_H
 #include <linux/types.h>
-#include <linux/dirent.h>
+#elif defined(HAVE_SYS_TYPES_H)
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_LINUX_UNISTD_H
 #include <linux/unistd.h>
+#elif defined(HAVE_UNISTD_H)
+#include <unistd.h>
+#endif
+
+#include <dirent.h>
 
 #include "llite_lib.h"
 
 static int llu_dir_do_readpage(struct inode *inode, struct page *page)
 {
         struct llu_inode_info *lli = llu_i2info(inode);
+        struct intnl_stat *st = llu_i2stat(inode);
         struct llu_sb_info *sbi = llu_i2sbi(inode);
         struct ll_fid mdc_fid;
         __u64 offset;
@@ -61,19 +77,9 @@ static int llu_dir_do_readpage(struct inode *inode, struct page *page)
         struct mdc_op_data data;
         struct obd_device *obddev = class_exp2obd(sbi->ll_mdc_exp);
         struct ldlm_res_id res_id =
-                { .name = {lli->lli_st_ino, (__u64)lli->lli_st_generation} };
+                { .name = {st->st_ino, (__u64)lli->lli_st_generation} };
         ENTRY;
 
-        if ((lli->lli_st_size + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT <= page->index) {
-                /* XXX why do we need this exactly, and why do we think that
-                 *     an all-zero directory page is useful?
-                 */
-                CERROR("memsetting dir page %lu to zero (size %lld)\n",
-                       page->index, lli->lli_st_size);
-                memset(page->addr, 0, PAGE_CACHE_SIZE);
-                GOTO(readpage_out, rc);
-        }
-
         rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED,
                              &res_id, LDLM_PLAIN, NULL, LCK_PR, &lockh);
         if (!rc) {
@@ -93,7 +99,7 @@ static int llu_dir_do_readpage(struct inode *inode, struct page *page)
         }
         ldlm_lock_dump_handle(D_OTHER, &lockh);
 
-        mdc_pack_fid(&mdc_fid, lli->lli_st_ino, lli->lli_st_generation, S_IFDIR);
+        mdc_pack_fid(&mdc_fid, st->st_ino, lli->lli_st_generation, S_IFDIR);
 
         offset = page->index << PAGE_SHIFT;
         rc = mdc_readpage(sbi->ll_mdc_exp, &mdc_fid,
@@ -103,12 +109,13 @@ static int llu_dir_do_readpage(struct inode *inode, struct page *page)
                 LASSERT (body != NULL);         /* checked by mdc_readpage() */
                 LASSERT_REPSWABBED (request, 0); /* swabbed by mdc_readpage() */
 
-                lli->lli_st_size = body->size;
+                st->st_size = body->size;
+        } else {
+                CERROR("read_dir_page(%ld) error %d\n", page->index, rc);
         }
         ptlrpc_req_finished(request);
         EXIT;
 
- readpage_out:
         ldlm_lock_decref(&lockh, LCK_PR);
         return rc;
 }
@@ -135,6 +142,29 @@ static struct page *llu_dir_read_page(struct inode *ino, int pgidx)
         return page;
 }
 
+enum {
+        EXT2_FT_UNKNOWN,
+        EXT2_FT_REG_FILE,
+        EXT2_FT_DIR,
+        EXT2_FT_CHRDEV,
+        EXT2_FT_BLKDEV,
+        EXT2_FT_FIFO,
+        EXT2_FT_SOCK,
+        EXT2_FT_SYMLINK,
+        EXT2_FT_MAX
+};
+
+static unsigned char ext2_filetype_table[EXT2_FT_MAX] = {
+        [EXT2_FT_UNKNOWN]       DT_UNKNOWN,
+        [EXT2_FT_REG_FILE]      DT_REG,
+        [EXT2_FT_DIR]           DT_DIR,
+        [EXT2_FT_CHRDEV]        DT_CHR,
+        [EXT2_FT_BLKDEV]        DT_BLK,
+        [EXT2_FT_FIFO]          DT_FIFO,
+        [EXT2_FT_SOCK]          DT_SOCK,
+        [EXT2_FT_SYMLINK]       DT_LNK,
+};
+
 #define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
 #define ROUND_UP64(x)   (((x)+sizeof(__u64)-1) & ~(sizeof(__u64)-1))
 
@@ -165,16 +195,24 @@ ssize_t llu_iop_getdirentries(struct inode *ino, char *buf, size_t nbytes,
                               _SYSIO_OFF_T *basep)
 {
         struct llu_inode_info *lli = llu_i2info(ino);
+        struct intnl_stat *st = llu_i2stat(ino);
         loff_t pos = *basep, offset;
         int maxpages, pgidx, filled = 0;
         ENTRY;
 
+        if (st->st_size == 0) {
+                CWARN("dir size is 0?\n");
+                RETURN(0);
+        }
+
+        liblustre_wait_event(0);
+
         if (pos == -1)
                 pos = lli->lli_dir_pos;
 
-        maxpages = lli->lli_st_size >> PAGE_CACHE_SHIFT;
-        pgidx = pos >> PAGE_CACHE_SHIFT;
-        offset = pos & ~PAGE_CACHE_MASK;
+        maxpages = (st->st_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+        pgidx = pos >> PAGE_SHIFT;
+        offset = pos & ~PAGE_MASK;
 
         for ( ; pgidx < maxpages ; pgidx++, offset = 0) {
                 struct page *page;
@@ -186,23 +224,24 @@ ssize_t llu_iop_getdirentries(struct inode *ino, char *buf, size_t nbytes,
                         continue;
 
                 /* size might have been updated by mdc_readpage */
-                maxpages = lli->lli_st_size >> PAGE_CACHE_SHIFT;
+                maxpages = (st->st_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
                 /* fill in buffer */
                 addr = page->addr;
-                limit = addr + PAGE_CACHE_SIZE - EXT2_DIR_REC_LEN(1);
+                limit = addr + PAGE_SIZE - EXT2_DIR_REC_LEN(1);
                 de = (struct ext2_dirent *) (addr + offset);
 
                 for ( ; (char*) de <= limit; de = ext2_next_entry(de)) {
                         if (de->inode) {
                                 int over;
-                                unsigned char d_type = 0;
+                                unsigned char d_type = DT_UNKNOWN;
 
-                                /* XXX handle type, etc here */
+                                if (de->file_type < EXT2_FT_MAX)
+                                        d_type = ext2_filetype_table[de->file_type];
 
                                 offset = (char*) de - addr;
                                 over =  filldir(buf, nbytes, de->name, de->name_len,
-                                                (pgidx << PAGE_CACHE_SHIFT) | offset,
+                                                (pgidx << PAGE_SHIFT) | offset,
                                                 le32_to_cpu(de->inode), d_type, &filled);
                                 if (over) {
                                         free_page(page);
@@ -214,7 +253,7 @@ ssize_t llu_iop_getdirentries(struct inode *ino, char *buf, size_t nbytes,
                 free_page(page);
         }
 done:
-        lli->lli_dir_pos = pgidx << PAGE_CACHE_SHIFT | offset;
+        lli->lli_dir_pos = pgidx << PAGE_SHIFT | offset;
         *basep = lli->lli_dir_pos;
         RETURN(filled);
 }
index 0aa6687..ab9017d 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Lustre Light file operations
  *
- *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *  Copyright (c) 2002-2004 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
 #include <assert.h>
 #include <time.h>
 #include <sys/types.h>
+#include <sys/stat.h>
 #include <sys/queue.h>
+#include <fcntl.h>
 
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
 #include <sysio.h>
 #include <fs.h>
 #include <mount.h>
 #include <inode.h>
+#ifdef HAVE_FILE_H
 #include <file.h>
+#endif
 
 #undef LIST_HEAD
 
@@ -66,29 +73,29 @@ void obdo_refresh_inode(struct inode *dst,
                         struct obdo *src,
                         obd_flag valid)
 {
-        struct llu_inode_info *lli = llu_i2info(dst);
+        struct intnl_stat *st = llu_i2stat(dst);
         valid &= src->o_valid;
 
         if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
                 CDEBUG(D_INODE, "valid %x, cur time %lu/%lu, new %lu/%lu\n",
-                       src->o_valid, LTIME_S(lli->lli_st_mtime), 
-                       LTIME_S(lli->lli_st_ctime),
+                       src->o_valid, LTIME_S(st->st_mtime), 
+                       LTIME_S(st->st_ctime),
                        (long)src->o_mtime, (long)src->o_ctime);
 
-        if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(lli->lli_st_atime))
-                LTIME_S(lli->lli_st_atime) = src->o_atime;
-        if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(lli->lli_st_mtime))
-                LTIME_S(lli->lli_st_mtime) = src->o_mtime;
-        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(lli->lli_st_ctime))
-                LTIME_S(lli->lli_st_ctime) = src->o_ctime;
-        if (valid & OBD_MD_FLSIZE && src->o_size > lli->lli_st_size)
-                lli->lli_st_size = src->o_size;
+        if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(st->st_atime))
+                LTIME_S(st->st_atime) = src->o_atime;
+        if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(st->st_mtime))
+                LTIME_S(st->st_mtime) = src->o_mtime;
+        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(st->st_ctime))
+                LTIME_S(st->st_ctime) = src->o_ctime;
+        if (valid & OBD_MD_FLSIZE && src->o_size > st->st_size)
+                st->st_size = src->o_size;
         /* optimum IO size */
         if (valid & OBD_MD_FLBLKSZ)
-                lli->lli_st_blksize = src->o_blksize;
+                st->st_blksize = src->o_blksize;
         /* allocation of space */
-        if (valid & OBD_MD_FLBLOCKS && src->o_blocks > lli->lli_st_blocks)
-                lli->lli_st_blocks = src->o_blocks;
+        if (valid & OBD_MD_FLBLOCKS && src->o_blocks > st->st_blocks)
+                st->st_blocks = src->o_blocks;
 }
 
 static int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it)
@@ -105,7 +112,7 @@ static int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it)
         /* already opened? */
         if (lli->lli_open_count++)
                 RETURN(0);
-                
+
         LASSERT(!lli->lli_file_data);
 
         OBD_ALLOC(fd, sizeof(*fd));
@@ -126,6 +133,7 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode)
 {
         struct inode *inode = pnode->p_base->pb_ino;
         struct llu_inode_info *lli = llu_i2info(inode);
+        struct intnl_stat *st = llu_i2stat(inode);
         struct ll_file_data *fd;
         struct ptlrpc_request *request;
         struct lookup_intent *it;
@@ -133,11 +141,13 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode)
         int rc = 0;
         ENTRY;
 
+        liblustre_wait_event(0);
+
         /* don't do anything for '/' */
         if (llu_is_root_inode(inode))
                 RETURN(0);
 
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", lli->lli_st_ino);
+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", st->st_ino);
         LL_GET_INTENT(inode, it);
 
         if (!it->d.lustre.it_disposition) {
@@ -152,7 +162,7 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode)
         if (rc)
                 LBUG();
 
-        if (!S_ISREG(lli->lli_st_mode))
+        if (!S_ISREG(st->st_mode))
                 GOTO(out_release, rc = 0);
                 
         fd = lli->lli_file_data;
@@ -166,7 +176,7 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode)
         }
         fd->fd_flags &= ~O_LOV_DELAY_CREATE;
 
-        lli->lli_open_flags = flags;
+        lli->lli_open_flags = flags & ~(O_CREAT | O_EXCL | O_TRUNC);
 
  out_release:
         request = it->d.lustre.it_data;
@@ -175,6 +185,22 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode)
         it->it_op_release(it);
         OBD_FREE(it, sizeof(*it));
 
+        /* libsysio haven't doing anything for O_TRUNC. here we
+         * simply simulate it as open(...); truncate(...);
+         */
+        if (rc == 0 && (flags & O_TRUNC) &&
+            S_ISREG(st->st_mode)) {
+                struct iattr attr;
+
+                memset(&attr, 0, sizeof(attr));
+                attr.ia_size = 0;
+                attr.ia_valid |= ATTR_SIZE | ATTR_RAW;
+                rc  = llu_setattr_raw(inode, &attr);
+                if (rc) {
+                        CERROR("error %d truncate in open()\n", rc);
+                }
+        }
+
         RETURN(rc);
 }
 
@@ -251,6 +277,7 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
 int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode)
 {
         struct llu_inode_info *lli = llu_i2info(inode);
+        struct intnl_stat *st = llu_i2stat(inode);
         struct ll_file_data *fd = lli->lli_file_data;
         struct ptlrpc_request *req = NULL;
         struct obd_client_handle *och = &fd->fd_mds_och;
@@ -258,7 +285,7 @@ int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode)
         int rc, valid;
         ENTRY;
 
-        obdo.o_id = lli->lli_st_ino;
+        obdo.o_id = st->st_ino;
         obdo.o_valid = OBD_MD_FLID;
         valid = OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLSIZE |OBD_MD_FLBLOCKS |
                 OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME;
@@ -278,12 +305,12 @@ int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode)
                 //ll_queue_done_writing(inode);
                 rc = 0;
         } else if (rc) {
-                CERROR("inode %lu close failed: rc %d\n", lli->lli_st_ino, rc);
+                CERROR("inode %llu close failed: rc %d\n", st->st_ino, rc);
         } else {
                 rc = llu_objects_destroy(req, inode);
                 if (rc)
-                        CERROR("inode %lu ll_objects destroy: rc = %d\n",
-                                lli->lli_st_ino, rc);
+                        CERROR("inode %llu ll_objects destroy: rc = %d\n",
+                                st->st_ino, rc);
         }
 
         mdc_clear_open_replay_data(och);
@@ -303,7 +330,7 @@ int llu_file_release(struct inode *inode)
         int rc = 0, rc2;
 
         ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%lu\n", lli->lli_st_ino,
+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu\n", llu_i2stat(inode)->st_ino,
                lli->lli_st_generation);
 
         if (llu_is_root_inode(inode))
@@ -324,54 +351,34 @@ int llu_file_release(struct inode *inode)
         RETURN(rc);
 }
 
+/*
+ * libsysio require us return 0
+ */
 int llu_iop_close(struct inode *inode)
 {
         int rc;
 
+        liblustre_wait_event(0);
+
         rc = llu_file_release(inode);
+        if (rc) {
+                CERROR("file close error %d\n", rc);
+        }
         /* if open count == 0 && stale_flag is set, should we
          * remove the inode immediately? */
-        return rc;
+        return 0;
 }
 
-int llu_iop_ipreadv(struct inode *ino,
-                    struct ioctx *ioctx)
+_SYSIO_OFF_T llu_iop_pos(struct inode *ino, _SYSIO_OFF_T off)
 {
         ENTRY;
 
-        if (!ioctx->ioctx_iovlen)
-                RETURN(0);
-        if (ioctx->ioctx_iovlen < 0)
-                RETURN(-EINVAL);
-
-        ioctx->ioctx_private = llu_file_read(ino,
-                                        ioctx->ioctx_iovec,
-                                        ioctx->ioctx_iovlen,
-                                        ioctx->ioctx_offset);
-        if (IS_ERR(ioctx->ioctx_private))
-                return (PTR_ERR(ioctx->ioctx_private));
-
-        RETURN(0);
-}
-
-int llu_iop_ipwritev(struct inode *ino,
-                     struct ioctx *ioctx)
-{
-        ENTRY;
+        liblustre_wait_event(0);
 
-        if (!ioctx->ioctx_iovlen)
-                RETURN(0);
-        if (ioctx->ioctx_iovlen < 0)
+        if (off < 0 || off > ll_file_maxbytes(ino))
                 RETURN(-EINVAL);
 
-        ioctx->ioctx_private = llu_file_write(ino,
-                                         ioctx->ioctx_iovec,
-                                         ioctx->ioctx_iovlen,
-                                         ioctx->ioctx_offset);
-        if (IS_ERR(ioctx->ioctx_private))
-                return (PTR_ERR(ioctx->ioctx_private));
-
-        RETURN(0);
+        RETURN(off);
 }
 
 /* this isn't where truncate starts.   roughly:
@@ -380,15 +387,17 @@ int llu_iop_ipwritev(struct inode *ino,
 static void llu_truncate(struct inode *inode)
 {
         struct llu_inode_info *lli = llu_i2info(inode);
+        struct intnl_stat *st = llu_i2stat(inode);
         struct lov_stripe_md *lsm = lli->lli_smd;
         struct obdo oa = {0};
-        int err;
+        int rc;
         ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%lu\n", lli->lli_st_ino,
-               lli->lli_st_generation);
+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu(%p) to %llu\n", st->st_ino,
+               lli->lli_st_generation, inode, st->st_size);
 
         if (!lsm) {
-                CERROR("truncate on inode %lu with no objects\n", lli->lli_st_ino);
+                CDEBUG(D_INODE, "truncate on inode %llu with no objects\n",
+                       st->st_ino);
                 EXIT;
                 return;
         }
@@ -398,14 +407,16 @@ static void llu_truncate(struct inode *inode)
         obdo_from_inode(&oa, inode, OBD_MD_FLTYPE|OBD_MD_FLMODE|OBD_MD_FLATIME|
                                     OBD_MD_FLMTIME | OBD_MD_FLCTIME);
 
+        obd_adjust_kms(llu_i2obdexp(inode), lsm, st->st_size, 1);
+
         CDEBUG(D_INFO, "calling punch for "LPX64" (all bytes after %Lu)\n",
-               oa.o_id, lli->lli_st_size);
+               oa.o_id, st->st_size);
 
         /* truncate == punch from new size to absolute end of file */
-        err = obd_punch(llu_i2obdexp(inode), &oa, lsm, lli->lli_st_size,
-                        OBD_OBJECT_EOF, NULL);
-        if (err)
-                CERROR("obd_truncate fails (%d) ino %lu\n", err, lli->lli_st_ino);
+        rc = obd_punch(llu_i2obdexp(inode), &oa, lsm, st->st_size,
+                       OBD_OBJECT_EOF, NULL);
+        if (rc)
+                CERROR("obd_truncate fails (%d) ino %llu\n", rc, st->st_ino);
         else
                 obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
                                           OBD_MD_FLATIME | OBD_MD_FLMTIME |
@@ -413,13 +424,11 @@ static void llu_truncate(struct inode *inode)
 
         EXIT;
         return;
-}
+} /* llu_truncate */
 
 int llu_vmtruncate(struct inode * inode, loff_t offset)
 {
-        struct llu_inode_info *lli = llu_i2info(inode);
-
-        lli->lli_st_size = offset;
+        llu_i2stat(inode)->st_size = offset;
 
         llu_truncate(inode);
 
index 4a3c356..f70116d 100755 (executable)
@@ -4,7 +4,10 @@
 #
 # This script is to generate lib lustre library as a whole. It will leave
 # two files on current directory: liblustre.a and liblustre.so.
-# Integrate them into Makefile.am later
+#
+# Most concern here is the libraries linking order
+#
+# FIXME: How to do this cleanly use makefile?
 #
 
 AR=/usr/bin/ar
@@ -14,14 +17,16 @@ RANLIB=/usr/bin/ranlib
 CWD=`pwd`
 
 SYSIO=$1
+CRAY_PORTALS_LIBS=$2
+LIBS=$3
+
+if [ ! -f $SYSIO/lib/libsysio.a ]; then
+  echo "ERROR: $SYSIO/lib/libsysio.a dosen't exist"
+  exit 1
+fi
 
-#if [ ! -f $SYSIO/lib/libsysio.a ]; then
-#  echo "ERROR: $SYSIO/lib/libsysio.a dosen't exist"
-#  exit 1
-#fi
-#
 # do cleanup at first
-#rm -f liblustre.so
+rm -f liblustre.so
 
 ALL_OBJS=
 
@@ -35,16 +40,34 @@ build_obj_list() {
 #
 # special treatment for libsysio
 #
-#sysio_tmp=$CWD/sysio_tmp_`date +%s`
-#build_sysio_obj_list() {
-#  _objs=`$AR -t $1`
-#  mkdir -p $sysio_tmp
-#  $AR -x $1
-#  mv $_objs $sysio_tmp
-#  for _lib in $_objs; do
-#    ALL_OBJS=$ALL_OBJS"$sysio_tmp/$_lib ";
-#  done
-#}
+sysio_tmp=$CWD/sysio_tmp_`date +%s`
+rm -rf $sysio_tmp
+build_sysio_obj_list() {
+  _objs=`$AR -t $1`
+  mkdir -p $sysio_tmp
+  cd $sysio_tmp
+  $AR -x $1
+  cd ..
+  for _lib in $_objs; do
+    ALL_OBJS=$ALL_OBJS"$sysio_tmp/$_lib ";
+  done
+}
+
+#
+# special treatment for libportals.a
+#
+cray_tmp=$CWD/cray_tmp_`date +%s`
+rm -rf $cray_tmp
+build_cray_portals_obj_list() {
+  _objs=`$AR -t $1`
+  mkdir -p $cray_tmp
+  cd $cray_tmp
+  $AR -x $1
+  cd ..
+  for _lib in $_objs; do
+    ALL_OBJS=$ALL_OBJS"$cray_tmp/$_lib ";
+  done
+}
 
 # lustre components libs
 build_obj_list . libllite.a
@@ -58,22 +81,24 @@ build_obj_list ../lvfs liblvfs.a
 
 # portals components libs
 build_obj_list ../../portals/utils libuptlctl.a
-build_obj_list ../../portals/unals libtcpnal.a
-build_obj_list ../../portals/portals libportals.a
+
+if [ "x$CRAY_PORTALS_LIBS" = "x" ]; then
+  build_obj_list ../../portals/unals libtcpnal.a
+  build_obj_list ../../portals/portals libportals.a
+# if libportals is already in our LIBS we don't need to link against it here
+elif $(echo "$LIBS" | grep -v -- "-lportals" >/dev/null) ; then
+  build_cray_portals_obj_list $CRAY_PORTALS_LIBS/libportals.a
+fi
 
 # create static lib lsupport
 rm -f $CWD/liblsupport.a
 $AR -cru $CWD/liblsupport.a $ALL_OBJS
 $RANLIB $CWD/liblsupport.a
 
-# libsysio components libs
-build_obj_list $SYSIO/drivers/native libsysio_native.a
-build_obj_list $SYSIO/drivers/sockets libsysio_sockets.a
-build_obj_list $SYSIO/src libsysio.a
-build_obj_list $SYSIO/dev/stdfd libsysio_stdfd.a
-#
-#build_sysio_obj_list $SYSIO/lib/libsysio.a
-#
+# if libsysio is already in our LIBS we don't need to link against it here
+if $(echo "$LIBS" | grep -v -- "-lsysio" >/dev/null) ; then
+  build_sysio_obj_list $SYSIO/lib/libsysio.a
+fi
 
 # create static lib lustre
 rm -f $CWD/liblustre.a
@@ -83,6 +108,7 @@ $RANLIB $CWD/liblustre.a
 # create shared lib lustre
 rm -f $CWD/liblustre.so
 $LD -shared -o $CWD/liblustre.so -init __liblustre_setup_ -fini __liblustre_cleanup_ \
-       $ALL_OBJS -lpthread
+       $ALL_OBJS -lcap -lpthread
 
-#rm -rf $sysio_tmp
+rm -rf $sysio_tmp
+rm -rf $cray_tmp
index d9f3470..cac2df0 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Lustre Light common routines
  *
- *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *  Copyright (c) 2002-2004 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
 #include <stdlib.h>
 #include <string.h>
 #include <assert.h>
+#include <signal.h>
 #include <sys/types.h>
+#include <sys/stat.h>
 #include <sys/queue.h>
 
-#include <netinet/in.h>
-#include <sys/socket.h>
-#include <arpa/inet.h>
-
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
 #include <sysio.h>
 #include <fs.h>
 #include <mount.h>
 #include <inode.h>
+#ifdef HAVE_FILE_H
 #include <file.h>
+#endif
+
+/* env variables */
+#define ENV_LUSTRE_MNTPNT               "LIBLUSTRE_MOUNT_POINT"
+#define ENV_LUSTRE_MNTTGT               "LIBLUSTRE_MOUNT_TARGET"
+#define ENV_LUSTRE_TIMEOUT              "LIBLUSTRE_TIMEOUT"
+#define ENV_LUSTRE_DUMPFILE             "LIBLUSTRE_DUMPFILE"
+#define ENV_LUSTRE_DEBUG_MASK           "LIBLUSTRE_DEBUG_MASK"
+#define ENV_LUSTRE_DEBUG_SUBSYS         "LIBLUSTRE_DEBUG_SUBSYS"
+#define ENV_LUSTRE_NAL_NAME             "LIBLUSTRE_NAL_NAME"
 
 /* both sys/queue.h (libsysio require it) and portals/lists.h have definition
  * of 'LIST_HEAD'. undef it to suppress warnings
  */
 #undef LIST_HEAD
-
-#include <portals/api-support.h> /* needed for ptpctl.h */
 #include <portals/ptlctl.h>     /* needed for parse_dump */
-#include <procbridge.h>
 
+#include "lutil.h"
 #include "llite_lib.h"
 
-unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_NAL);
-
-ptl_handle_ni_t         tcpnal_ni;
-struct task_struct     *current;
-
-/* portals interfaces */
-ptl_handle_ni_t *
-kportal_get_ni (int nal)
-{
-        switch (nal)
-        {
-        case SOCKNAL:
-                return &tcpnal_ni;
-        default:
-                return NULL;
-        }
-}
-
-inline void
-kportal_put_ni (int nal)
-{
-        return;
-}
-
-struct ldlm_namespace;
-struct ldlm_res_id;
-struct obd_import;
-
-void *inter_module_get(char *arg)
-{
-        if (!strcmp(arg, "tcpnal_ni"))
-                return &tcpnal_ni;
-        else if (!strcmp(arg, "ldlm_cli_cancel_unused"))
-                return ldlm_cli_cancel_unused;
-        else if (!strcmp(arg, "ldlm_namespace_cleanup"))
-                return ldlm_namespace_cleanup;
-        else if (!strcmp(arg, "ldlm_replay_locks"))
-                return ldlm_replay_locks;
-        else
-                return NULL;
-}
-
-/* XXX move to proper place */
-char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
-{
-        switch(nal){
-        case TCPNAL:
-                /* userspace NAL */
-        case SOCKNAL:
-                snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u",
-                         (__u32)(nid >> 32), HIPQUAD(nid));
-                break;
-        case QSWNAL:
-        case GMNAL:
-        case IBNAL:
-        case SCIMACNAL:
-                snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u",
-                         (__u32)(nid >> 32), (__u32)nid);
-                break;
-        default:
-                snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx",
-                         nal, (long long)nid);
-                break;
-        }
-        return str;
-}
-
-void init_current(char *comm)
-{ 
-        current = malloc(sizeof(*current));
-        current->fs = malloc(sizeof(*current->fs));
-        current->fs->umask = umask(0777);
-        umask(current->fs->umask);
-        strncpy(current->comm, comm, sizeof(current->comm));
-        current->pid = getpid();
-        current->fsuid = 0;
-        current->fsgid = 0;
-        current->cap_effective = -1;
-        memset(&current->pending, 0, sizeof(current->pending));
-}
-
-/* FIXME */
-void generate_random_uuid(unsigned char uuid_out[16])
+static int lllib_init(void)
 {
-        int *arr = (int*)uuid_out;
-        int i;
-
-        for (i = 0; i < sizeof(uuid_out)/sizeof(int); i++)
-                arr[i] = rand();
-}
-
-ptl_nid_t tcpnal_mynid;
+        liblustre_set_nal_nid();
 
-int init_lib_portals()
-{
-        int rc;
-        ENTRY;
-
-        PtlInit();
-        rc = PtlNIInit(procbridge_interface, 0, 0, 0, &tcpnal_ni);
-        if (rc != 0) {
-                CERROR("TCPNAL: PtlNIInit failed: error %d\n", rc);
-                PtlFini();
-                RETURN (rc);
-        }
-        PtlNIDebug(tcpnal_ni, ~0);
-        RETURN(rc);
-}
-
-int
-kportal_nal_cmd(struct portals_cfg *pcfg)
-{
-        /* handle portals command if we want */
-        return 0;
-}
-
-extern int class_handle_ioctl(unsigned int cmd, unsigned long arg);
-
-int lib_ioctl_nalcmd(int dev_id, int opc, void * ptr)
-{
-        struct portal_ioctl_data *ptldata;
-
-        if (opc == IOC_PORTAL_NAL_CMD) {
-                ptldata = (struct portal_ioctl_data *) ptr;
-
-                if (ptldata->ioc_nal_cmd == NAL_CMD_REGISTER_MYNID) {
-                        tcpnal_mynid = ptldata->ioc_nid;
-                        printf("mynid: %u.%u.%u.%u\n",
-                                (unsigned)(tcpnal_mynid>>24) & 0xFF,
-                                (unsigned)(tcpnal_mynid>>16) & 0xFF,
-                                (unsigned)(tcpnal_mynid>>8) & 0xFF,
-                                (unsigned)(tcpnal_mynid) & 0xFF);
-                }
-        }
-
-        return (0);
-}
-
-int lib_ioctl(int dev_id, int opc, void * ptr)
-{
-        int rc;
-
-        if (dev_id == OBD_DEV_ID) {
-                struct obd_ioctl_data *ioc = ptr;
-
-                //XXX hack!!!
-                ioc->ioc_plen1 = ioc->ioc_inllen1;
-                ioc->ioc_pbuf1 = ioc->ioc_bulk;
-                //XXX
-
-                rc = class_handle_ioctl(opc, (unsigned long)ptr);
-
-                printf ("proccssing ioctl cmd: %x, rc %d\n", opc,  rc);
-
-                if (rc)
-                        return rc;
-        }
-        return (0);
-}
-
-int lllib_init(char *dumpfile)
-{
-        if (!g_zconf) {
-                /* this parse only get my nid from config file
-                 * before initialize portals
-                 */
-                if (parse_dump(dumpfile, lib_ioctl_nalcmd))
-                        return -1;
-        } else {
-                /* XXX need setup mynid before tcpnal initialize */
-                tcpnal_mynid = ((uint64_t)getpid() << 32) | time(0);
-                printf("LibLustre: TCPNAL NID: %016llx\n", tcpnal_mynid);
-        }
-
-        init_current("dummy");
-        if (init_obdclass() ||
+        if (liblustre_init_current("dummy") ||
+            init_obdclass() ||
             init_lib_portals() ||
             ptlrpc_init() ||
             mdc_init() ||
@@ -231,20 +71,12 @@ int lllib_init(char *dumpfile)
             osc_init())
                 return -1;
 
-        if (!g_zconf && parse_dump(dumpfile, lib_ioctl))
-                return -1;
-
         return _sysio_fssw_register("llite", &llu_fssw_ops);
 }
-#if 0
-static void llu_check_request()
-{
-        liblustre_wait_event(0);
-}
-#endif
 
-int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov)
+int liblustre_process_log(struct config_llog_instance *cfg,
+                          char *mdsnid, char *mdsname, char *profile,
+                          int allow_recov)
 {
         struct lustre_cfg_bufs bufs;
         struct lustre_cfg *lcfg;
@@ -258,22 +90,30 @@ int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov)
         struct llog_ctxt *ctxt;
         ptl_nid_t nid = 0;
         int nal, err, rc = 0;
+        char *nal_name;
         ENTRY;
 
         generate_random_uuid(uuid);
         class_uuid_unparse(uuid, &mdc_uuid);
 
-        if (ptl_parse_nid(&nid, g_zconf_mdsnid)) {
-                CERROR("Can't parse NID %s\n", g_zconf_mdsnid);
+        if (ptl_parse_nid(&nid, mdsnid)) {
+                CERROR("Can't parse NID %s\n", mdsnid);
                 RETURN(-EINVAL);
         }
 
-        nal = ptl_name2nal("tcp");
+        nal_name = getenv(ENV_LUSTRE_NAL_NAME);
+        if (!nal_name) {
+#if CRAY_PORTALS
+                nal_name = "cray_qk_nal";
+#else
+                nal_name = "tcp";
+#endif
+        }
+        nal = ptl_name2nal(nal_name);
         if (nal <= 0) {
-                CERROR("Can't parse NAL tcp\n");
+                CERROR("Can't parse NAL %s\n", nal_name);
                 RETURN(-EINVAL);
         }
-
         lustre_cfg_bufs_reset(&bufs, NULL);
         lustre_cfg_bufs_set_string(&bufs, 1, peer);
         lcfg = lustre_cfg_new(LCFG_ADD_UUID, &bufs);
@@ -294,14 +134,14 @@ int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov)
                 GOTO(out_del_uuid, err);
 
         lustre_cfg_bufs_reset(&bufs, name);
-        lustre_cfg_bufs_set_string(&bufs, 1, g_zconf_mdsname);
+        lustre_cfg_bufs_set_string(&bufs, 1, mdsname);
         lustre_cfg_bufs_set_string(&bufs, 2, peer);
         lcfg = lustre_cfg_new(LCFG_SETUP, &bufs);
         err = class_process_config(lcfg);
         lustre_cfg_free(lcfg);
         if (err < 0)
                 GOTO(out_detach, err);
-        
+
         obd = class_name2obd(name);
         if (obd == NULL)
                 GOTO(out_cleanup, err = -EINVAL);
@@ -314,14 +154,14 @@ int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov)
         err = obd_connect(&mdc_conn, obd, &mdc_uuid, NULL /*connect_flags*/);
         if (err) {
                 CERROR("cannot connect to %s: rc = %d\n",
-                        g_zconf_mdsname, err);
+                        mdsname, err);
                 GOTO(out_cleanup, err);
         }
-        
+
         exp = class_conn2export(&mdc_conn);
-        
+
         ctxt = exp->exp_obd->obd_llog_ctxt[LLOG_CONFIG_REPL_CTXT];
-        rc = class_config_parse_llog(ctxt, g_zconf_profile, cfg);
+        rc = class_config_parse_llog(ctxt, profile, cfg);
         if (rc) {
                 CERROR("class_config_parse_llog failed: rc = %d\n", rc);
         }
@@ -353,13 +193,8 @@ out_del_uuid:
 out:
         if (rc == 0)
                 rc = err;
-        
-        RETURN(rc);
-}
 
-static void sighandler_USR1(int signum)
-{
-        /* do nothing */
+        RETURN(rc);
 }
 
 /* parse host:/mdsname/profile string */
@@ -375,7 +210,7 @@ int ll_parse_mount_target(const char *target, char **mdsnid,
         if ((s = strchr(buf, ':'))) {
                 *mdsnid = buf;
                 *s = '\0';
-                                                                                                                        
+
                 while (*++s == '/')
                         ;
                 *mdsname = s;
@@ -389,119 +224,161 @@ int ll_parse_mount_target(const char *target, char **mdsnid,
         return -1;
 }
 
-/* env variables */
-#define ENV_LUSTRE_MNTPNT               "LIBLUSTRE_MOUNT_POINT"
-#define ENV_LUSTRE_MNTTGT               "LIBLUSTRE_MOUNT_TARGET"
-#define ENV_LUSTRE_TIMEOUT              "LIBLUSTRE_TIMEOUT"
-#define ENV_LUSTRE_DUMPFILE             "LIBLUSTRE_DUMPFILE"
+/*
+ * early liblustre init
+ * called from C startup in catamount apps, before main()
+ *
+ * The following is a skeleton sysio startup sequence,
+ * as implemented in C startup (skipping error handling).
+ * In this framework none of these calls need be made here
+ * or in the apps themselves.  The NAMESPACE_STRING specifying
+ * the initial set of fs ops (creates, mounts, etc.) is passed
+ * as an environment variable.
+ *
+ *      _sysio_init();
+ *      _sysio_incore_init();
+ *      _sysio_native_init();
+ *      _sysio_lustre_init();
+ *      _sysio_boot(NAMESPACE_STRING);
+ *
+ * the name _sysio_lustre_init() follows the naming convention
+ * established in other fs drivers from libsysio:
+ *  _sysio_incore_init(), _sysio_native_init()
+ *
+ * _sysio_lustre_init() must be called before _sysio_boot()
+ * to enable libsysio's processing of namespace init strings containing
+ * lustre filesystem operations
+ */
+int _sysio_lustre_init(void)
+{
+        int err;
+        char *timeout = NULL;
+        char *debug_mask = NULL;
+        char *debug_subsys = NULL;
+#ifndef INIT_SYSIO
+        extern void __liblustre_cleanup_(void);
+#endif
 
-extern int _sysio_native_init();
+#if 0
+        portal_debug = -1;
+        portal_subsystem_debug = -1;
+#endif
 
-extern unsigned int obd_timeout;
+        liblustre_init_random();
+
+        err = lllib_init();
+        if (err) {
+                perror("init llite driver");
+                return err;
+        }
+        timeout = getenv(ENV_LUSTRE_TIMEOUT);
+        if (timeout) {
+                obd_timeout = (unsigned int) strtol(timeout, NULL, 0);
+                printf("LibLustre: set obd timeout as %u seconds\n",
+                        obd_timeout);
+        }
 
-/* global variables */
-int     g_zconf = 0;            /* zeroconf or dumpfile */
-char   *g_zconf_mdsname = NULL; /* mdsname, for zeroconf */
-char   *g_zconf_mdsnid = NULL;  /* mdsnid, for zeroconf */
-char   *g_zconf_profile = NULL; /* profile, for zeroconf */
+        /* debug masks */
+        debug_mask = getenv(ENV_LUSTRE_DEBUG_MASK);
+        if (debug_mask)
+                portal_debug = (unsigned int) strtol(debug_mask, NULL, 0);
 
+        debug_subsys = getenv(ENV_LUSTRE_DEBUG_SUBSYS);
+        if (debug_subsys)
+                portal_subsystem_debug =
+                                (unsigned int) strtol(debug_subsys, NULL, 0);
+
+#ifndef INIT_SYSIO
+        (void)atexit(__liblustre_cleanup_);
+#endif
+        return err;
+}
+
+extern int _sysio_native_init();
+extern unsigned int obd_timeout;
+
+char *lustre_path = NULL;
 
 void __liblustre_setup_(void)
 {
-        char *lustre_path = NULL;
         char *target = NULL;
-        char *timeout = NULL;
-        char *dumpfile = NULL;
         char *root_driver = "native";
         char *lustre_driver = "llite";
         char *root_path = "/";
         unsigned mntflgs = 0;
-
         int err;
 
-        /* consider tha case of starting multiple liblustre instances
-         * at a same time on single node.
-         */
-        srand(time(NULL) + getpid());
-
-        signal(SIGUSR1, sighandler_USR1);
-
         lustre_path = getenv(ENV_LUSTRE_MNTPNT);
         if (!lustre_path) {
                 lustre_path = "/mnt/lustre";
         }
 
+        /* mount target */
         target = getenv(ENV_LUSTRE_MNTTGT);
         if (!target) {
-                dumpfile = getenv(ENV_LUSTRE_DUMPFILE);
-                if (!dumpfile) {
-                        CERROR("Neither mount target, nor dumpfile\n");
-                        exit(1);
-                }
-                g_zconf = 0;
-                printf("LibLustre: mount point %s, dumpfile %s\n",
-                        lustre_path, dumpfile);
-        } else {
-                if (ll_parse_mount_target(target,
-                                          &g_zconf_mdsnid,
-                                          &g_zconf_mdsname,
-                                          &g_zconf_profile)) {
-                        CERROR("mal-formed target %s \n", target);
-                        exit(1);
-                }
-                g_zconf = 1;
-                printf("LibLustre: mount point %s, target %s\n",
-                        lustre_path, target);
+                printf("LibLustre: no mount target specified\n");
+                exit(1);
         }
+        printf("LibLustre: mount point %s, target %s\n",
+                lustre_path, target);
 
-        timeout = getenv(ENV_LUSTRE_TIMEOUT);
-        if (timeout) {
-                obd_timeout = (unsigned int) atoi(timeout);
-                printf("LibLustre: set obd timeout as %u seconds\n",
-                        obd_timeout);
-        }
 
-        if (_sysio_init() != 0) {
+#ifdef INIT_SYSIO
+        /* initialize libsysio & mount rootfs */
+        if (_sysio_init()) {
                 perror("init sysio");
                 exit(1);
         }
-
-        /* cygwin don't need native driver */
-#ifndef __CYGWIN__
         _sysio_native_init();
-#endif
 
         err = _sysio_mount_root(root_path, root_driver, mntflgs, NULL);
         if (err) {
-                perror(root_driver);
+                fprintf(stderr, "sysio mount failed: %s\n", strerror(errno));
                 exit(1);
         }
 
-#if 1
-        portal_debug = 0;
-        portal_subsystem_debug = 0;
-#endif
-        err = lllib_init(dumpfile);
-        if (err) {
-                perror("init llite driver");
+        if (_sysio_lustre_init())
                 exit(1);
-        }       
+#endif /* INIT_SYSIO */
 
-        err = mount("/", lustre_path, lustre_driver, mntflgs, NULL);
+        err = mount(target, lustre_path, lustre_driver, mntflgs, NULL);
         if (err) {
-                errno = -err;
-                perror(lustre_driver);
+                fprintf(stderr, "Lustre mount failed: %s\n", strerror(errno));
                 exit(1);
         }
-
-#if 0
-        __sysio_hook_sys_enter = llu_check_request;
-        __sysio_hook_sys_leave = NULL;
-#endif
 }
 
 void __liblustre_cleanup_(void)
 {
+#ifndef INIT_SYSIO
+        /* guard against being called multiple times */
+        static int cleaned = 0;
+
+        if (cleaned)
+                return;
+        cleaned++;
+#endif
+
+        /* user app might chdir to a lustre directory, and leave busy pnode
+         * during finaly libsysio cleanup. here we chdir back to "/".
+         * but it can't fix the situation that liblustre is mounted
+         * at "/".
+         */
+        chdir("/");
+#if 0
+        umount(lustre_path);
+#endif
+        /* we can't call umount here, because libsysio will not cleanup
+         * opening files for us. _sysio_shutdown() will cleanup fds at
+         * first but which will also close the sockets we need for umount
+         * liblutre. this delima lead to another hack in
+         * libsysio/src/file_hack.c FIXME
+         */
+#ifdef INIT_SYSIO
         _sysio_shutdown();
+        cleanup_lib_portals();
         PtlFini();
+#else
+        _sysio_shutdown();
+#endif
 }
index 4462311..e254ea0 100644 (file)
@@ -19,6 +19,8 @@
 struct ll_file_data {
         struct obd_client_handle fd_mds_och;
         __u32 fd_flags;
+        struct lustre_handle fd_cwlockh;
+        unsigned long fd_gid;
 };
 
 struct llu_sb_info
@@ -36,7 +38,6 @@ struct llu_sb_info
 };
 
 #define LL_SBI_NOLCK            0x1
-#define LL_SBI_READAHEAD        0x2
 
 #define LLI_F_HAVE_OST_SIZE_LOCK        0
 #define LLI_F_HAVE_MDS_SIZE_LOCK        1
@@ -49,15 +50,13 @@ struct llu_inode_info {
         char                   *lli_symlink_name;
         struct semaphore        lli_open_sem;
         __u64                   lli_maxbytes;
-        unsigned long           lli_flags;
+        unsigned long          lli_flags;
 
         /* for libsysio */
         struct file_identifier  lli_sysio_fid;
 
         struct lookup_intent   *lli_it;
 
-        /* XXX workaround for libsysio unlink */
-        int                     lli_stale_flag;
         /* XXX workaround for libsysio readdir */
         loff_t                  lli_dir_pos;
 
@@ -69,50 +68,9 @@ struct llu_inode_info {
         int                     lli_open_flags;
         int                     lli_open_count;
 
-        /* stat FIXME not 64 bit clean */
-        dev_t                   lli_st_dev;
-        ino_t                   lli_st_ino;
-        mode_t                  lli_st_mode;
-        nlink_t                 lli_st_nlink;
-        uid_t                   lli_st_uid;
-        gid_t                   lli_st_gid;
-        dev_t                   lli_st_rdev;
-        loff_t                  lli_st_size;
-        unsigned int            lli_st_blksize;
-        unsigned long           lli_st_blocks;
-        time_t                  lli_st_atime;
-        time_t                  lli_st_mtime;
-        time_t                  lli_st_ctime;
-
         /* not for stat, change it later */
-        int                    lli_st_flags;
-        unsigned long          lli_st_generation;
-};
-
-#define LLU_SYSIO_COOKIE_SIZE(exp, x) \
-        (sizeof(struct llu_sysio_cookie) + \
-         sizeof(struct ll_async_page) * (x) + \
-         sizeof(struct page) * (x) + \
-         llap_cookie_size * (x))
-
-struct llu_sysio_cookie {
-        struct obd_io_group    *lsc_oig;
-        struct inode           *lsc_inode;
-        int                     lsc_maxpages;
-        int                     lsc_npages;
-        struct ll_async_page   *lsc_llap;
-        struct page            *lsc_pages;
-        void                   *lsc_llap_cookie;
-        __u64                   lsc_rwcount;
-};
-
-/* XXX why uio.h haven't the definition? */
-#define MAX_IOVEC 32
-
-struct llu_sysio_callback_args
-{
-        int ncookies;
-        struct llu_sysio_cookie *cookies[MAX_IOVEC];
+        int                     lli_st_flags;
+        unsigned long           lli_st_generation;
 };
 
 static inline struct llu_sb_info *llu_fs2sbi(struct filesys *fs)
@@ -125,6 +83,11 @@ static inline struct llu_inode_info *llu_i2info(struct inode *inode)
         return (struct llu_inode_info*)(inode->i_private);
 }
 
+static inline struct intnl_stat *llu_i2stat(struct inode *inode)
+{
+        return &inode->i_stbuf;
+}
+
 static inline struct llu_sb_info *llu_i2sbi(struct inode *inode)
 {
         return llu_i2info(inode)->lli_sbi;
@@ -153,8 +116,8 @@ do {                                                                           \
                OBD_ALLOC(temp, sizeof(*temp));                                        \
         memcpy(temp, it, sizeof(*temp));                                       \
         llu_i2info(inode)->lli_it = temp;                                      \
-        CDEBUG(D_DENTRY, "alloc intent %p to inode %p(ino %lu)\n",             \
-                        temp, inode, llu_i2info(inode)->lli_st_ino);           \
+        CDEBUG(D_DENTRY, "alloc intent %p to inode %p(ino %llu)\n",            \
+                        temp, inode, llu_i2stat(inode)->st_ino);               \
 } while(0)
 
 
@@ -164,8 +127,8 @@ do {                                                                           \
                                                                                \
         LASSERT(it);                                                           \
         llu_i2info(inode)->lli_it = NULL;                                      \
-        CDEBUG(D_DENTRY, "dettach intent %p from inode %p(ino %lu)\n",         \
-                        it, inode, llu_i2info(inode)->lli_st_ino);             \
+        CDEBUG(D_DENTRY, "dettach intent %p from inode %p(ino %llu)\n",        \
+                        it, inode, llu_i2stat(inode)->st_ino);                 \
 } while(0)
 
 /* interpet return codes from intent lookup */
@@ -186,26 +149,24 @@ struct it_cb_data {
 static inline void ll_i2uctxt(struct ll_uctxt *ctxt, struct inode *i1,
                               struct inode *i2)
 {
-        struct llu_inode_info *lli1 = llu_i2info(i1);
-        struct llu_inode_info *lli2;
+        struct intnl_stat *st = llu_i2stat(i1);
 
         LASSERT(i1);
         LASSERT(ctxt);
 
-        if (in_group_p(lli1->lli_st_gid))
-                ctxt->gid1 = lli1->lli_st_gid;
+        if (in_group_p(st->st_gid))
+                ctxt->gid1 = st->st_gid;
         else
                 ctxt->gid1 = -1;
 
         if (i2) {
-                lli2 = llu_i2info(i2);
-                if (in_group_p(lli2->lli_st_gid))
-                        ctxt->gid2 = lli2->lli_st_gid;
+                st = llu_i2stat(i2);
+                if (in_group_p(st->st_gid))
+                        ctxt->gid2 = st->st_gid;
                 else
                         ctxt->gid2 = -1;
-        } else {
-                ctxt->gid2 = -1;
-        }
+        } else 
+                ctxt->gid2 = 0;
 }
 
 
@@ -215,12 +176,6 @@ typedef int (*intent_finish_cb)(struct ptlrpc_request *,
 int llu_intent_lock(struct inode *parent, struct pnode *pnode,
                     struct lookup_intent *, int flags, intent_finish_cb);
 
-/* FIXME */
-static inline int ll_permission(struct inode *inode, int flag, void * unused)
-{
-        return 0;
-}
-
 static inline __u64 ll_file_maxbytes(struct inode *inode)
 {
         return llu_i2info(inode)->lli_maxbytes;
@@ -232,16 +187,19 @@ struct mount_option_s
         char *osc_uuid;
 };
 
+#define IS_BAD_PTR(ptr)         \
+        ((unsigned long)(ptr) == 0 || (unsigned long)(ptr) > -1000UL)
+
 /* llite_lib.c */
 void generate_random_uuid(unsigned char uuid_out[16]);
-int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov);
+int liblustre_process_log(struct config_llog_instance *cfg,
+                       char *mdsnid,
+                       char *mdsname,
+                       char *profile,
+                       int allow_recov);
 int ll_parse_mount_target(const char *target, char **mdsnid,
                           char **mdsname, char **profile);
 
-extern int     g_zconf;
-extern char   *g_zconf_mdsnid;
-extern char   *g_zconf_mdsname;
-extern char   *g_zconf_profile;
 extern struct mount_option_s mount_option;
 
 /* super.c */
@@ -252,6 +210,7 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid);
 int ll_it_open_error(int phase, struct lookup_intent *it);
 struct inode *llu_iget(struct filesys *fs, struct lustre_md *md);
 int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm);
+int llu_setattr_raw(struct inode *inode, struct iattr *attr);
 
 extern struct fssw_ops llu_fssw_ops;
 
@@ -266,20 +225,15 @@ int llu_create(struct inode *dir, struct pnode_base *pnode, int mode);
 int llu_iop_open(struct pnode *pnode, int flags, mode_t mode);
 int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode);
 int llu_iop_close(struct inode *inode);
-int llu_iop_ipreadv(struct inode *ino, struct ioctx *ioctxp);
-int llu_iop_ipwritev(struct inode *ino, struct ioctx *ioctxp);
+_SYSIO_OFF_T llu_iop_pos(struct inode *ino, _SYSIO_OFF_T off);
 int llu_vmtruncate(struct inode * inode, loff_t offset);
 void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid);
 int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir);
 
 /* rw.c */
-int llu_iop_iodone(struct ioctx *ioctxp __IS_UNUSED);
-struct llu_sysio_callback_args*
-llu_file_write(struct inode *inode, const struct iovec *iovec,
-               size_t iovlen, loff_t pos);
-struct llu_sysio_callback_args*
-llu_file_read(struct inode *inode, const struct iovec *iovec,
-              size_t iovlen, loff_t pos);
+int llu_iop_read(struct inode *ino, struct ioctx *ioctxp);
+int llu_iop_write(struct inode *ino, struct ioctx *ioctxp);
+int llu_iop_iodone(struct ioctx *ioctxp);
 int llu_glimpse_size(struct inode *inode);
 int llu_extent_lock(struct ll_file_data *fd, struct inode *inode,
                     struct lov_stripe_md *lsm, int mode,
index 7ad8aa5..bc4f64e 100644 (file)
 #include <signal.h>
 #include <sys/types.h>
 
-#ifndef REDSTORM
 #include <fcntl.h>
+#ifdef HAVE_NETDB_H
 #include <netdb.h>
+#endif
 #include <syscall.h>
 #include <sys/utsname.h>
+#ifdef HAVE_NETINET_IN_H
 #include <netinet/in.h>
+#endif
 #include <sys/socket.h>
+#ifdef HAVE_ARPA_INET_H
 #include <arpa/inet.h>
-#else
-#include <sys/socket.h>
+#endif
+#ifdef HAVE_CATAMOUNT_DATA_H
 #include <catamount/data.h>
 #endif
 
 #include "lutil.h"
 
-#ifdef CRAY_PORTALS
+#if CRAY_PORTALS
 void portals_debug_dumplog(void){};
 #endif
 
-unsigned int portal_subsystem_debug = ~0 - S_NAL;
+unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_NAL);
 unsigned int portal_debug = 0;
 
 struct task_struct     *current;
@@ -68,28 +72,28 @@ void *inter_module_get(char *arg)
 char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
 {
         if (nid == PTL_NID_ANY) {
-                snprintf(str, PTL_NALFMT_SIZE - 1, "%s",
-                         "PTL_NID_ANY");
+                snprintf(str, PTL_NALFMT_SIZE, "%s", "PTL_NID_ANY");
                 return str;
         }
 
         switch(nal){
-#ifndef CRAY_PORTALS
+#if !CRAY_PORTALS
         case TCPNAL:
                 /* userspace NAL */
+        case IIBNAL:
         case OPENIBNAL:
         case SOCKNAL:
-                snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u",
+                snprintf(str, PTL_NALFMT_SIZE, "%u:%u.%u.%u.%u",
                          (__u32)(nid >> 32), HIPQUAD(nid));
                 break;
         case QSWNAL:
         case GMNAL:
-                snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u",
+                snprintf(str, PTL_NALFMT_SIZE, "%u:%u",
                          (__u32)(nid >> 32), (__u32)nid);
                 break;
 #endif
         default:
-                snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx",
+                snprintf(str, PTL_NALFMT_SIZE, "?%x? %llx",
                          nal, (long long)nid);
                 break;
         }
@@ -98,35 +102,22 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
 
 char *portals_id2str(int nal, ptl_process_id_t id, char *str)
 {
-        switch(nal){
-#ifndef CRAY_PORTALS
-        case TCPNAL:
-                /* userspace NAL */
-        case OPENIBNAL:
-        case SOCKNAL:
-                snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u",
-                         (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid);
-                break;
-        case QSWNAL:
-        case GMNAL:
-                snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u",
-                         (__u32)(id.nid >> 32), (__u32)id.nid, id.pid);
-                break;
-#endif
-        default:
-                snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx",
-                         nal, (long long)id.nid, (long)id.pid );
-                break;
-        }
+        int   len;
+        
+        portals_nid2str(nal, id.nid, str);
+        len = strlen(str);
+        snprintf(str + len, PTL_NALFMT_SIZE - len, ",%u", id.pid);
         return str;
 }
 
-#ifndef REDSTORM
 /*
  * random number generator stuff
  */
+#ifdef LIBLUSTRE_USE_URANDOM
 static int _rand_dev_fd = -1;
+#endif
 
+#ifdef HAVE_GETHOSTBYNAME
 static int get_ipv4_addr()
 {
         struct utsname myname;
@@ -148,25 +139,33 @@ static int get_ipv4_addr()
 
         return ip;
 }
+#endif
 
 void liblustre_init_random()
 {
         int seed;
         struct timeval tv;
 
+#ifdef LIBLUSTRE_USE_URANDOM
         _rand_dev_fd = syscall(SYS_open, "/dev/urandom", O_RDONLY);
         if (_rand_dev_fd >= 0) {
-                if (syscall(SYS_read, _rand_dev_fd, &seed, sizeof(int)) ==
-                    sizeof(int)) {
+                if (syscall(SYS_read, _rand_dev_fd,
+                            &seed, sizeof(int)) == sizeof(int)) {
                         srand(seed);
                         return;
                 }
                 syscall(SYS_close, _rand_dev_fd);
                 _rand_dev_fd = -1;
         }
+#endif /* LIBLUSTRE_USE_URANDOM */
 
+#ifdef HAVE_GETHOSTBYNAME
+        seed = get_ipv4_addr();
+#else
+        seed = _my_pnid;
+#endif
         gettimeofday(&tv, NULL);
-        srand(tv.tv_sec + tv.tv_usec + getpid() + __swab32(get_ipv4_addr()));
+        srand(tv.tv_sec + tv.tv_usec + getpid() + __swab32(seed));
 }
 
 void get_random_bytes(void *buf, int size)
@@ -174,12 +173,14 @@ void get_random_bytes(void *buf, int size)
         char *p = buf;
         LASSERT(size >= 0);
 
+#ifdef LIBLUSTRE_USE_URANDOM
         if (_rand_dev_fd >= 0) {
                 if (syscall(SYS_read, _rand_dev_fd, buf, size) == size)
                         return;
                 syscall(SYS_close, _rand_dev_fd);
                 _rand_dev_fd = -1;
         }
+#endif
 
         while (size--) 
                 *p++ = rand();
@@ -187,6 +188,7 @@ void get_random_bytes(void *buf, int size)
 
 static void init_capability(int *res)
 {
+#ifdef HAVE_LIBCAP
         cap_t syscap;
         cap_flag_value_t capval;
         int i;
@@ -207,10 +209,23 @@ static void init_capability(int *res)
                         }
                 }
         }
+#else
+       /*
+        * set fake cap flags to ship to linux server
+        * from client platforms that have none (eg. catamount)
+        *  full capability for root
+        *  no capability for anybody else
+        */
+#define FAKE_ROOT_CAP 0x1ffffeff
+#define FAKE_USER_CAP 0
+
+       *res = (current->fsuid == 0) ? FAKE_ROOT_CAP: FAKE_USER_CAP;
+#endif
 }
 
 void liblustre_set_nal_nid()
 {
+#ifdef HAVE_GETHOSTBYNAME
         pid_t pid;
         uint32_t ip;
         struct in_addr in;
@@ -226,36 +241,7 @@ void liblustre_set_nal_nid()
         in.s_addr = htonl(ip);
         printf("LibLustre: TCPNAL NID: %016llx (%s:%u)\n", 
                tcpnal_mynid, inet_ntoa(in), pid);
-}
-
-#else /* REDSTORM */
-
-void liblustre_init_random()
-{
-        struct timeval tv;
-        UINT32 nodeid;
-
-        gettimeofday(&tv, NULL);
-        nodeid = _my_pnid;
-        srand(tv.tv_sec + tv.tv_usec + getpid() + __swab32(nodeid));
-}
-
-void get_random_bytes(void *buf, int size)
-{
-        char *p = buf;
-        LASSERT(size >= 0);
-
-        while (size--) 
-                *p++ = rand();
-}
-
-static void init_capability(int *res)
-{
-        *res = 0;
-}
-
-void liblustre_set_nal_nid()
-{
+#else
         pid_t pid;
         uint32_t ip;
 
@@ -266,10 +252,9 @@ void liblustre_set_nal_nid()
         tcpnal_mynid = ip | pid;
         printf("LibLustre: NAL NID: %08x (%u)\n", 
                tcpnal_mynid, pid);
+#endif
 }
 
-#endif /* REDSOTRM */
-
 int in_group_p(gid_t gid)
 {
         int i;
index 74eddb2..9e69a9e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Lustre Light name resolution
  *
- *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *  Copyright (c) 2002-2004 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
 #include <sys/fcntl.h>
 #include <sys/queue.h>
 
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
 #include <sysio.h>
 #include <fs.h>
 #include <mount.h>
 #include <inode.h>
+#ifdef HAVE_FILE_H
 #include <file.h>
+#endif
 
 #undef LIST_HEAD
 
@@ -70,6 +75,7 @@ static void ll_intent_release(struct lookup_intent *it)
         EXIT;
 }
 
+#if 0
 /*
  * remove the stale inode from pnode
  */
@@ -94,6 +100,7 @@ void unhook_stale_inode(struct pnode *pno)
         EXIT;
         return;
 }
+#endif
 
 void llu_lookup_finish_locks(struct lookup_intent *it, struct pnode *pnode)
 {
@@ -102,14 +109,14 @@ void llu_lookup_finish_locks(struct lookup_intent *it, struct pnode *pnode)
 
         if (it && pnode->p_base->pb_ino != NULL) {
                 struct inode *inode = pnode->p_base->pb_ino;
-                CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%lu)\n",
-                       inode, llu_i2info(inode)->lli_st_ino,
+                CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%llu/%lu)\n",
+                       inode, llu_i2stat(inode)->st_ino,
                        llu_i2info(inode)->lli_st_generation);
                 mdc_set_lock_data(&it->d.lustre.it_lock_handle, inode);
         }
 
-        /* drop IT_LOOKUP locks */
-        if (it->it_op == IT_LOOKUP)
+        /* drop lookup/getattr locks */
+        if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR)
                 ll_intent_release(it);
 
 }
@@ -140,23 +147,25 @@ int llu_mdc_blocking_ast(struct ldlm_lock *lock,
         case LDLM_CB_CANCELING: {
                 struct inode *inode = llu_inode_from_lock(lock);
                 struct llu_inode_info *lli;
+                struct intnl_stat *st;
 
                 /* Invalidate all dentries associated with this inode */
                 if (inode == NULL)
                         break;
 
                 lli =  llu_i2info(inode);
+                st = llu_i2stat(inode);
 
                 clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags);
 
-                if (lock->l_resource->lr_name.name[0] != lli->lli_st_ino ||
+                if (lock->l_resource->lr_name.name[0] != st->st_ino ||
                     lock->l_resource->lr_name.name[1] != lli->lli_st_generation) {
-                        LDLM_ERROR(lock, "data mismatch with ino %lu/%lu",
-                                   lli->lli_st_ino, lli->lli_st_generation);
+                        LDLM_ERROR(lock, "data mismatch with ino %llu/%lu",
+                                   st->st_ino, lli->lli_st_generation);
                 }
-                if (S_ISDIR(lli->lli_st_mode)) {
-                        CDEBUG(D_INODE, "invalidating inode %lu\n",
-                               lli->lli_st_ino);
+                if (S_ISDIR(st->st_mode)) {
+                        CDEBUG(D_INODE, "invalidating inode %llu\n",
+                               st->st_ino);
 
                         llu_invalidate_inode_pages(inode);
                 }
@@ -215,8 +224,8 @@ int llu_pb_revalidate(struct pnode *pnode, int flags, struct lookup_intent *it)
         int rc;
         ENTRY;
 
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%x\n",
-               pb->pb_name.name, it ? it->it_op : 0);
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,intent=%x\n",
+               (int)pb->pb_name.len, pb->pb_name.name, it ? it->it_op : 0);
 
         /* We don't want to cache negative dentries, so return 0 immediately.
          * We believe that this is safe, that negative dentries cannot be
@@ -226,26 +235,16 @@ int llu_pb_revalidate(struct pnode *pnode, int flags, struct lookup_intent *it)
                 RETURN(0);
         }
 
-        /* check stale inode */
-        if (llu_i2info(pb->pb_ino)->lli_stale_flag)
-                unhook_stale_inode(pnode);
-
-        /* check again because unhook_stale_inode() might generate
-         * negative pnode */
-        if (pb->pb_ino == NULL) {
-                CDEBUG(D_INODE, "negative pb\n");
-                RETURN(0);
-        }
-
         /* This is due to bad interaction with libsysio. remove this when we
          * switched to libbsdio XXX
          */
         {
                 struct llu_inode_info *lli = llu_i2info(pb->pb_ino);
+                struct intnl_stat *st = llu_i2stat(pb->pb_ino);
                 if (lli->lli_it) {
-                        CDEBUG(D_INODE, "inode %lu still have intent "
+                        CDEBUG(D_INODE, "inode %llu still have intent "
                                         "%p(opc 0x%x), release it\n",
-                                        lli->lli_st_ino, lli->lli_it,
+                                        st->st_ino, lli->lli_it,
                                         lli->lli_it->it_op);
                         ll_intent_release(lli->lli_it);
                         OBD_FREE(lli->lli_it, sizeof(*lli->lli_it));
@@ -279,14 +278,19 @@ int llu_pb_revalidate(struct pnode *pnode, int flags, struct lookup_intent *it)
                 GOTO(out, rc = 0);
 
         rc = pnode_revalidate_finish(req, 1, it, pnode);
+        if (rc != 0) {
+                ll_intent_release(it);
+                GOTO(out, rc = 0);
+        }
+        rc = 1;
 
         /* Note: ll_intent_lock may cause a callback, check this! */
 
-        if (it->it_op & (IT_OPEN | IT_GETATTR))
+        if (it->it_op & IT_OPEN)
                 LL_SAVE_INTENT(pb->pb_ino, it);
-        RETURN(1);
+
  out:
-        if (req)
+        if (req && rc == 1)
                 ptlrpc_req_finished(req);
         if (rc == 0) {
                 LASSERT(pb->pb_ino);
@@ -294,9 +298,6 @@ int llu_pb_revalidate(struct pnode *pnode, int flags, struct lookup_intent *it)
                 pb->pb_ino = NULL;
         } else {
                 llu_lookup_finish_locks(it, pnode);
-                llu_i2info(pb->pb_ino)->lli_stale_flag = 0;
-                if (it->it_op & (IT_OPEN | IT_GETATTR))
-                        LL_SAVE_INTENT(pb->pb_ino, it);
         }
         RETURN(rc);
 }
@@ -311,13 +312,37 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset,
         struct inode *inode = NULL;
         int rc;
 
+        /* libsysio require us generate inode right away if success.
+         * so if mds created new inode for us we need make sure it
+         * succeeded. thus for any error we can't delay to the
+         * llu_file_open() time. */
+        if (it_disposition(it, DISP_OPEN_CREATE) &&
+            it_open_error(DISP_OPEN_CREATE, it)) {
+                CDEBUG(D_INODE, "detect mds create error\n");
+                return it_open_error(DISP_OPEN_CREATE, it);
+        }
+        if (it_disposition(it, DISP_OPEN_OPEN) &&
+            it_open_error(DISP_OPEN_OPEN, it)) {
+                CDEBUG(D_INODE, "detect mds open error\n");
+                /* undo which did by mdc_intent_lock */
+                if (it_disposition(it, DISP_OPEN_CREATE) &&
+                    !it_open_error(DISP_OPEN_CREATE, it)) {
+                        LASSERT(request);
+                        LASSERT(atomic_read(&request->rq_refcount) > 1);
+                        CDEBUG(D_INODE, "dec a ref of req %p\n", request);
+                        ptlrpc_req_finished(request);
+                }
+                return it_open_error(DISP_OPEN_OPEN, it);
+        }
+
         /* NB 1 request reference will be taken away by ll_intent_lock()
          * when I return
-         * Note: libsysio require the inode must be generated here
          */
-        if ((it->it_op & IT_CREAT) || !it_disposition(it, DISP_LOOKUP_NEG)) {
+        if (!it_disposition(it, DISP_LOOKUP_NEG) ||
+            (it->it_op & IT_CREAT)) {
                 struct lustre_md md;
                 struct llu_inode_info *lli;
+                struct intnl_stat *st;
                 ENTRY;
 
                 rc = mdc_req2lustre_md(request, offset, sbi->ll_osc_exp, &md);
@@ -325,23 +350,23 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset,
                         RETURN(rc);
 
                 inode = llu_iget(parent->i_fs, &md);
-                if (!inode) {
+                if (!inode || IS_ERR(inode)) {
                         /* free the lsm if we allocated one above */
                         if (md.lsm != NULL)
                                 obd_free_memmd(sbi->ll_osc_exp, &md.lsm);
-                        RETURN(-ENOMEM);
+                        RETURN(inode ? PTR_ERR(inode) : -ENOMEM);
                 } else if (md.lsm != NULL &&
                            llu_i2info(inode)->lli_smd != md.lsm) {
                         obd_free_memmd(sbi->ll_osc_exp, &md.lsm);
                 }
 
                 lli = llu_i2info(inode);
+                st = llu_i2stat(inode);
 
                 /* If this is a stat, get the authoritative file size */
-                if (it->it_op == IT_GETATTR && S_ISREG(lli->lli_st_mode) &&
+                if (it->it_op == IT_GETATTR && S_ISREG(st->st_mode) &&
                     lli->lli_smd != NULL) {
                         struct lov_stripe_md *lsm = lli->lli_smd;
-                        struct ost_lvb lvb;
                         ldlm_error_t rc;
 
                         LASSERT(lsm->lsm_object_id != 0);
@@ -360,7 +385,7 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset,
         }
 
         /* intent will be further used in cases of open()/getattr() */
-        if (inode && (it->it_op & (IT_OPEN | IT_GETATTR)))
+        if (inode && (it->it_op & IT_OPEN))
                 LL_SAVE_INTENT(inode, it);
 
         child->p_base->pb_ino = inode;
@@ -517,6 +542,8 @@ int llu_iop_lookup(struct pnode *pnode,
         int rc;
         ENTRY;
 
+        liblustre_wait_event(0);
+
         *inop = NULL;
 
         /* the mount root inode have no name, so don't call
@@ -550,4 +577,3 @@ int llu_iop_lookup(struct pnode *pnode,
 
         RETURN(rc);
 }
-
index ea99362..41c43d6 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Lustre Light block IO
  *
- *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *  Copyright (c) 2002-2004 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
 #include <assert.h>
 #include <time.h>
 #include <sys/types.h>
+#include <sys/stat.h>
 #include <sys/queue.h>
 #include <fcntl.h>
+#include <sys/uio.h>
 
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
 #include <sysio.h>
 #include <fs.h>
 #include <mount.h>
 #include <inode.h>
+#ifdef HAVE_FILE_H
 #include <file.h>
+#endif
 
 #undef LIST_HEAD
 
 #include "llite_lib.h"
 
+struct llu_io_group
+{
+        struct obd_io_group    *lig_oig;
+        struct inode           *lig_inode;
+        int                     lig_maxpages;
+        int                     lig_npages;
+        __u64                   lig_rwcount;
+        struct ll_async_page   *lig_llaps;
+        struct page            *lig_pages;
+        void                   *lig_llap_cookies;
+};
+
+#define LLU_IO_GROUP_SIZE(x) \
+        (sizeof(struct llu_io_group) + \
+         (sizeof(struct ll_async_page) + \
+          sizeof(struct page) + \
+          llap_cookie_size) * (x))
+
+struct llu_io_session
+{
+        struct inode           *lis_inode;
+        int                     lis_cmd;
+        int                     lis_max_groups;
+        int                     lis_ngroups;
+        struct llu_io_group    *lis_groups[0];
+};
+#define LLU_IO_SESSION_SIZE(x)  \
+        (sizeof(struct llu_io_session) + (x) * 2 * sizeof(void *))
+
+
+typedef ssize_t llu_file_piov_t(const struct iovec *iovec, int iovlen,
+                                _SYSIO_OFF_T pos, ssize_t len,
+                                void *private);
+
 size_t llap_cookie_size;
 
-static int llu_lock_to_stripe_offset(struct inode *inode,struct ldlm_lock *lock)
+static int llu_lock_to_stripe_offset(struct inode *inode, struct ldlm_lock *lock)
 {
         struct llu_inode_info *lli = llu_i2info(inode);
         struct lov_stripe_md *lsm = lli->lli_smd;
@@ -162,7 +203,7 @@ static int llu_glimpse_callback(struct ldlm_lock *lock, void *reqp)
         lvb->lvb_size = lli->lli_smd->lsm_oinfo[stripe].loi_kms;
 
         LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64,
-                   lli->lli_st_size, stripe, lvb->lvb_size);
+                   llu_i2stat(inode)->st_size, stripe, lvb->lvb_size);
  iput:
         I_RELE(inode);
  out:
@@ -175,37 +216,35 @@ static int llu_glimpse_callback(struct ldlm_lock *lock, void *reqp)
         return rc;
 }
 
-__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms);
-__u64 lov_merge_blocks(struct lov_stripe_md *lsm);
-__u64 lov_merge_mtime(struct lov_stripe_md *lsm, __u64 current_time);
-
 /* NB: lov_merge_size will prefer locally cached writes if they extend the
  * file (because it prefers KMS over RSS when larger) */
 int llu_glimpse_size(struct inode *inode)
 {
         struct llu_inode_info *lli = llu_i2info(inode);
+        struct intnl_stat *st = llu_i2stat(inode);
         struct llu_sb_info *sbi = llu_i2sbi(inode);
         ldlm_policy_data_t policy = { .l_extent = { 0, OBD_OBJECT_EOF } };
         struct lustre_handle lockh = { 0 };
         int rc, flags = LDLM_FL_HAS_INTENT;
         ENTRY;
 
-        CDEBUG(D_DLMTRACE, "Glimpsing inode %lu\n", lli->lli_st_ino);
+        CDEBUG(D_DLMTRACE, "Glimpsing inode %llu\n", st->st_ino);
 
         rc = obd_enqueue(sbi->ll_osc_exp, lli->lli_smd, LDLM_EXTENT, &policy,
                          LCK_PR, &flags, llu_extent_lock_callback,
                          ldlm_completion_ast, llu_glimpse_callback, inode,
                          sizeof(struct ost_lvb), lustre_swab_ost_lvb, &lockh);
-        if (rc > 0)
-                RETURN(-EIO);
-
-        lli->lli_st_size = lov_merge_size(lli->lli_smd, 0);
-        lli->lli_st_blocks = lov_merge_blocks(lli->lli_smd);
-        lli->lli_st_mtime = lov_merge_mtime(lli->lli_smd, lli->lli_st_mtime);
+        if (rc) {
+                CERROR("obd_enqueue returned rc %d, returning -EIO\n", rc);
+                RETURN(rc > 0 ? -EIO : rc);
+        }
 
-        CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %lu\n",
-               lli->lli_st_size, lli->lli_st_blocks);
+        st->st_size = lov_merge_size(lli->lli_smd, 0);
+        st->st_blocks = lov_merge_blocks(lli->lli_smd);
+        //lli->lli_st_mtime = lov_merge_mtime(lli->lli_smd, inode->i_mtime);
 
+        CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %llu\n",
+               st->st_size, st->st_blocks);
 
         obd_cancel(sbi->ll_osc_exp, lli->lli_smd, LCK_PR, &lockh);
 
@@ -218,7 +257,7 @@ int llu_extent_lock(struct ll_file_data *fd, struct inode *inode,
                     int ast_flags)
 {
         struct llu_sb_info *sbi = llu_i2sbi(inode);
-        struct llu_inode_info *lli = llu_i2info(inode);
+        struct intnl_stat *st = llu_i2stat(inode);
         int rc;
         ENTRY;
 
@@ -229,8 +268,8 @@ int llu_extent_lock(struct ll_file_data *fd, struct inode *inode,
             (sbi->ll_flags & LL_SBI_NOLCK))
                 RETURN(0);
 
-        CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n",
-               lli->lli_st_ino, policy->l_extent.start, policy->l_extent.end);
+        CDEBUG(D_DLMTRACE, "Locking inode %llu, start "LPU64" end "LPU64"\n",
+               st->st_ino, policy->l_extent.start, policy->l_extent.end);
 
         rc = obd_enqueue(sbi->ll_osc_exp, lsm, LDLM_EXTENT, policy, mode,
                          &ast_flags, llu_extent_lock_callback,
@@ -241,91 +280,13 @@ int llu_extent_lock(struct ll_file_data *fd, struct inode *inode,
 
         if (policy->l_extent.start == 0 &&
             policy->l_extent.end == OBD_OBJECT_EOF)
-                lli->lli_st_size = lov_merge_size(lsm, 1);
+                st->st_size = lov_merge_size(lsm, 1);
 
-        RETURN(rc);
-}
-
-#if 0
-int llu_extent_lock_no_validate(struct ll_file_data *fd,
-                                struct inode *inode,
-                                struct lov_stripe_md *lsm,
-                                int mode,
-                                struct ldlm_extent *extent,
-                                struct lustre_handle *lockh,
-                                int ast_flags)
-{
-        struct llu_sb_info *sbi = llu_i2sbi(inode);
-        struct llu_inode_info *lli = llu_i2info(inode);
-        int rc;
-        ENTRY;
-
-        LASSERT(lockh->cookie == 0);
-
-        /* XXX phil: can we do this?  won't it screw the file size up? */
-        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
-            (sbi->ll_flags & LL_SBI_NOLCK))
-                RETURN(0);
-
-        CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n",
-               lli->lli_st_ino, extent->start, extent->end);
-
-        rc = obd_enqueue(sbi->ll_osc_exp, lsm, NULL, LDLM_EXTENT, extent,
-                         sizeof(extent), mode, &ast_flags,
-                         llu_extent_lock_callback, inode, lockh);
+        //inode->i_mtime = lov_merge_mtime(lsm, inode->i_mtime);
 
         RETURN(rc);
 }
 
-/*
- * this grabs a lock and manually implements behaviour that makes it look like
- * the OST is returning the file size with each lock acquisition.
- */
-int llu_extent_lock(struct ll_file_data *fd, struct inode *inode,
-                    struct lov_stripe_md *lsm, int mode,
-                    struct ldlm_extent *extent, struct lustre_handle *lockh)
-{
-        struct llu_inode_info *lli = llu_i2info(inode);
-        struct obd_export *exp = llu_i2obdexp(inode);
-        struct ldlm_extent size_lock;
-        struct lustre_handle match_lockh = {0};
-        int flags, rc, matched;
-        ENTRY;
-
-        rc = llu_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh, 0);
-        if (rc != ELDLM_OK)
-                RETURN(rc);
-
-        if (test_bit(LLI_F_HAVE_OST_SIZE_LOCK, &lli->lli_flags))
-                RETURN(0);
-
-        rc = llu_inode_getattr(inode, lsm);
-        if (rc) {
-                llu_extent_unlock(fd, inode, lsm, mode, lockh);
-                RETURN(rc);
-        }
-
-        size_lock.start = lli->lli_st_size;
-        size_lock.end = OBD_OBJECT_EOF;
-
-        /* XXX I bet we should be checking the lock ignore flags.. */
-        /* FIXME use LDLM_FL_TEST_LOCK instead */
-        flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED;
-        matched = obd_match(exp, lsm, LDLM_EXTENT, &size_lock,
-                            sizeof(size_lock), LCK_PR, &flags, inode,
-                            &match_lockh);
-
-        /* hey, alright, we hold a size lock that covers the size we
-         * just found, its not going to change for a while.. */
-        if (matched == 1) {
-                set_bit(LLI_F_HAVE_OST_SIZE_LOCK, &lli->lli_flags);
-                obd_cancel(exp, lsm, LCK_PR, &match_lockh);
-        }
-
-        RETURN(0);
-}
-#endif
-
 int llu_extent_unlock(struct ll_file_data *fd, struct inode *inode,
                 struct lov_stripe_md *lsm, int mode,
                 struct lustre_handle *lockh)
@@ -354,14 +315,6 @@ struct ll_async_page {
         struct inode   *llap_inode;
 };
 
-static struct ll_async_page *llap_from_cookie(void *cookie)
-{
-        struct ll_async_page *llap = cookie;
-        if (llap->llap_magic != LLAP_MAGIC)
-                return ERR_PTR(-EINVAL);
-        return llap;
-};
-
 static void llu_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
 {
         struct ll_async_page *llap;
@@ -370,12 +323,7 @@ static void llu_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
         obd_flag valid_flags;
         ENTRY;
 
-        llap = llap_from_cookie(data);
-        if (IS_ERR(llap)) {
-                EXIT;
-                return;
-        }
-
+        llap = LLAP_FROM_COOKIE(data);
         inode = llap->llap_inode;
         lsm = llu_i2info(inode)->lli_smd;
 
@@ -394,13 +342,9 @@ static void llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
 {
         struct ll_async_page *llap;
         struct page *page;
+        ENTRY;
 
-        llap = llap_from_cookie(data);
-        if (IS_ERR(llap)) {
-                EXIT;
-                return;
-        }
-
+        llap = LLAP_FROM_COOKIE(data);
         llap->llap_queued = 0;
         page = llap->llap_page;
 
@@ -412,164 +356,43 @@ static void llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
         EXIT;
 }
 
-static struct obd_async_page_ops llu_async_page_ops = {
-        .ap_make_ready =        NULL,
-        .ap_refresh_count =     NULL,
-        .ap_fill_obdo =         llu_ap_fill_obdo,
-        .ap_completion =        llu_ap_completion,
-};
-
-static
-struct llu_sysio_cookie* get_sysio_cookie(struct inode *inode,
-                                          struct obd_export *exp, int maxpages)
+static void llu_ap_get_ucred(void *data, struct obd_ucred *ouc)
 {
-        struct llu_sysio_cookie *cookie;
-        int rc;
-
-        if (!llap_cookie_size)
-                llap_cookie_size = obd_prep_async_page(llu_i2obdexp(inode),
-                                                       NULL, NULL, NULL, 0,
-                                                       NULL, NULL, NULL);
-        OBD_ALLOC(cookie, LLU_SYSIO_COOKIE_SIZE(exp, maxpages));
-        if (cookie == NULL)
-                goto out;
-
-        I_REF(inode);
-        cookie->lsc_inode = inode;
-        cookie->lsc_maxpages = maxpages;
-        cookie->lsc_llap = (struct ll_async_page *)(cookie + 1);
-        cookie->lsc_pages = (struct page *) (cookie->lsc_llap + maxpages);
-        cookie->lsc_llap_cookie = (void *)(cookie->lsc_pages + maxpages);
-
-        rc = oig_init(&cookie->lsc_oig);
-        if (rc) {
-                OBD_FREE(cookie, LLU_SYSIO_COOKIE_SIZE(exp, maxpages));
-                cookie = NULL;
-        }
-
-out:
-        return cookie;
-}
-
-static
-void put_sysio_cookie(struct llu_sysio_cookie *cookie)
-{
-        struct lov_stripe_md *lsm = llu_i2info(cookie->lsc_inode)->lli_smd;
-        struct obd_export *exp = llu_i2obdexp(cookie->lsc_inode);
-        struct ll_async_page *llap = cookie->lsc_llap;
-#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE
-        struct page *pages = cookie->lsc_pages;
-#endif
-        int i;
-
-        for (i = 0; i< cookie->lsc_maxpages; i++) {
-                if (llap[i].llap_cookie)
-                        obd_teardown_async_page(exp, lsm, NULL,
-                                                llap[i].llap_cookie);
-#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE
-                if (pages[i]._managed) {
-                        free(pages[i].addr);
-                        pages[i]._managed = 0;
-                }
-#endif
-        }
-
-        I_RELE(cookie->lsc_inode);
-
-        oig_release(cookie->lsc_oig);
-        OBD_FREE(cookie, LLU_SYSIO_COOKIE_SIZE(exp, cookie->lsc_maxpages));
-}
-
-#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE
-/* Note: these code should be removed finally, don't need
- * more cleanup
- */
-static
-int prepare_unaligned_write(struct llu_sysio_cookie *cookie)
-{
-        struct inode *inode = cookie->lsc_inode;
-        struct llu_inode_info *lli = llu_i2info(inode);
-        struct lov_stripe_md *lsm = lli->lli_smd;
-        struct obdo oa;
-        struct page *pages = cookie->lsc_pages;
-        int i, pgidx[2] = {0, cookie->lsc_npages-1};
-        int rc;
+        struct ll_async_page *llap;
+        struct ll_uctxt ctxt;
         ENTRY;
 
-        for (i = 0; i < 2; i++) {
-                struct page *oldpage = &pages[pgidx[i]];
-                struct page newpage;
-                struct brw_page pg;
-                char *newbuf;
-
-                if (i == 0 && pgidx[0] == pgidx[1])
-                        continue;
-
-                LASSERT(oldpage->_offset + oldpage->_count <= PAGE_CACHE_SIZE);
+        llap = LLAP_FROM_COOKIE(data);
 
-                if (oldpage->_count == PAGE_CACHE_SIZE)
-                        continue;
-
-                if (oldpage->index << PAGE_CACHE_SHIFT >=
-                    lli->lli_st_size)
-                        continue;
-
-                newbuf = malloc(PAGE_CACHE_SIZE);
-                if (!newbuf)
-                        return -ENOMEM;
+        ouc->ouc_fsuid = current->fsuid;
+        ouc->ouc_fsgid = current->fsgid;
+        ouc->ouc_cap = current->cap_effective;
+        ll_i2uctxt(&ctxt, llap->llap_inode, NULL);
+        ouc->ouc_suppgid1 = ctxt.gid1;
 
-                newpage.index = oldpage->index;
-                newpage.addr = newbuf;
-
-                pg.pg = &newpage;
-                pg.off = ((obd_off)newpage.index << PAGE_CACHE_SHIFT);
-                if (pg.off + PAGE_CACHE_SIZE > lli->lli_st_size)
-                        pg.count = lli->lli_st_size % PAGE_CACHE_SIZE;
-                else
-                        pg.count = PAGE_CACHE_SIZE;
-                pg.flag = 0;
-
-                oa.o_id = lsm->lsm_object_id;
-                oa.o_mode = lli->lli_st_mode;
-                oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE;
-
-                /* issue read */
-                rc = obd_brw(OBD_BRW_READ, llu_i2obdexp(inode), &oa, lsm, 1, &pg, NULL);
-                if (rc) {
-                        free(newbuf);
-                        RETURN(rc);
-                }
-
-                /* copy page content, and reset page params */
-                memcpy(newbuf + oldpage->_offset,
-                       (char*)oldpage->addr + oldpage->_offset,
-                       oldpage->_count);
-
-                oldpage->addr = newbuf;
-                if ((((obd_off)oldpage->index << PAGE_CACHE_SHIFT) +
-                    oldpage->_offset + oldpage->_count) > lli->lli_st_size)
-                        oldpage->_count += oldpage->_offset;
-                else
-                        oldpage->_count = PAGE_CACHE_SIZE;
-                oldpage->_offset = 0;
-                oldpage->_managed = 1;
-        }
-
-        RETURN(0);
+        EXIT;
 }
-#endif
 
-static
-int llu_prep_async_io(struct llu_sysio_cookie *cookie, int cmd,
-                      char *buf, loff_t pos, size_t count)
+static struct obd_async_page_ops llu_async_page_ops = {
+        .ap_make_ready =        NULL,
+        .ap_refresh_count =     NULL,
+        .ap_fill_obdo =         llu_ap_fill_obdo,
+        .ap_completion =        llu_ap_completion,
+        .ap_get_ucred =         llu_ap_get_ucred,
+};
+
+static int llu_queue_pio(int cmd, struct llu_io_group *group,
+                         char *buf, size_t count, loff_t pos)
 {
-        struct llu_inode_info *lli = llu_i2info(cookie->lsc_inode);
+        struct llu_inode_info *lli = llu_i2info(group->lig_inode);
+        struct intnl_stat *st = llu_i2stat(group->lig_inode);
         struct lov_stripe_md *lsm = lli->lli_smd;
-        struct obd_export *exp = llu_i2obdexp(cookie->lsc_inode);
-        struct page *pages = cookie->lsc_pages;
-        struct ll_async_page *llap = cookie->lsc_llap;
-        void *llap_cookie = cookie->lsc_llap_cookie;
-        int i, rc, npages = 0;
+        struct obd_export *exp = llu_i2obdexp(group->lig_inode);
+        struct page *pages = &group->lig_pages[group->lig_npages],*page = pages;
+        struct ll_async_page *llap = &group->lig_llaps[group->lig_npages];
+        void *llap_cookie = group->lig_llap_cookies +
+                llap_cookie_size * group->lig_npages;
+        int i, rc, npages = 0, ret_bytes = 0;
         ENTRY;
 
         if (!exp)
@@ -587,350 +410,393 @@ int llu_prep_async_io(struct llu_sysio_cookie *cookie, int cmd,
 
                 /* prevent read beyond file range */
                 if ((cmd == OBD_BRW_READ) &&
-                    (pos + bytes) >= lli->lli_st_size) {
-                        if (pos >= lli->lli_st_size)
+                    (pos + bytes) >= st->st_size) {
+                        if (pos >= st->st_size)
                                 break;
-                        bytes = lli->lli_st_size - pos;
+                        bytes = st->st_size - pos;
                 }
 
                 /* prepare page for this index */
-                pages[npages].index = index;
-                pages[npages].addr = buf - offset;
+                page->index = index;
+                page->addr = buf - offset;
 
-                pages[npages]._offset = offset;
-                pages[npages]._count = bytes;
+                page->_offset = offset;
+                page->_count = bytes;
 
+                page++;
                 npages++;
                 count -= bytes;
                 pos += bytes;
                 buf += bytes;
 
-                cookie->lsc_rwcount += bytes;
+                group->lig_rwcount += bytes;
+                ret_bytes += bytes;
         } while (count);
 
-        cookie->lsc_npages = npages;
-
-#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE
-        if (cmd == OBD_BRW_WRITE) {
-                rc = prepare_unaligned_write(cookie);
-                if (rc)
-                        RETURN(rc);
-        }
-#endif
+        group->lig_npages += npages;
 
-        for (i = 0; i < npages; i++) {
-                llap[i].llap_magic = LLAP_MAGIC;
-                llap[i].llap_cookie = llap_cookie + i * llap_cookie_size;
-                rc = obd_prep_async_page(exp, lsm, NULL, &pages[i],
-                                         (obd_off)pages[i].index << PAGE_SHIFT,
+        for (i = 0, page = pages; i < npages;
+             i++, page++, llap++, llap_cookie += llap_cookie_size){
+                llap->llap_magic = LLAP_MAGIC;
+                llap->llap_cookie = llap_cookie;
+                rc = obd_prep_async_page(exp, lsm, NULL, page,
+                                         (obd_off)page->index << PAGE_SHIFT,
                                          &llu_async_page_ops,
-                                         &llap[i], &llap[i].llap_cookie);
+                                         llap, &llap->llap_cookie);
                 if (rc) {
-                        llap[i].llap_cookie = NULL;
+                        LASSERT(rc < 0);
+                        llap->llap_cookie = NULL;
                         RETURN(rc);
                 }
-                CDEBUG(D_CACHE, "llap %p page %p cookie %p obj off "LPU64"\n",
-                       &llap[i], &pages[i], llap[i].llap_cookie,
-                       (obd_off)pages[i].index << PAGE_SHIFT);
-                pages[i].private = (unsigned long)&llap[i];
-                llap[i].llap_page = &pages[i];
-                llap[i].llap_inode = cookie->lsc_inode;
-
-                rc = obd_queue_group_io(exp, lsm, NULL, cookie->lsc_oig,
-                                        llap[i].llap_cookie, cmd,
-                                        pages[i]._offset, pages[i]._count, 0,
+                CDEBUG(D_CACHE, "llap %p page %p group %p obj off "LPU64"\n",
+                       llap, page, llap->llap_cookie,
+                       (obd_off)pages->index << PAGE_SHIFT);
+                page->private = (unsigned long)llap;
+                llap->llap_page = page;
+                llap->llap_inode = group->lig_inode;
+
+                rc = obd_queue_group_io(exp, lsm, NULL, group->lig_oig,
+                                        llap->llap_cookie, cmd,
+                                        page->_offset, page->_count, 0,
                                         ASYNC_READY | ASYNC_URGENT |
                                         ASYNC_COUNT_STABLE | ASYNC_GROUP_SYNC);
-                if (rc)
+                if (rc) {
+                        LASSERT(rc < 0);
                         RETURN(rc);
+                }
 
-                llap[i].llap_queued = 1;
+                llap->llap_queued = 1;
         }
 
-        RETURN(0);
+        RETURN(ret_bytes);
 }
 
 static
-int llu_start_async_io(struct llu_sysio_cookie *cookie)
+struct llu_io_group * get_io_group(struct inode *inode, int maxpages)
 {
-        struct lov_stripe_md *lsm = llu_i2info(cookie->lsc_inode)->lli_smd;
-        struct obd_export *exp = llu_i2obdexp(cookie->lsc_inode);
+        struct llu_io_group *group;
+        int rc;
 
-        return obd_trigger_group_io(exp, lsm, NULL, cookie->lsc_oig);
-}
+        if (!llap_cookie_size)
+                llap_cookie_size = obd_prep_async_page(llu_i2obdexp(inode),
+                                                       NULL, NULL, NULL, 0,
+                                                       NULL, NULL, NULL);
 
-/*
- * read/write a continuous buffer for an inode (zero-copy)
- */
-struct llu_sysio_cookie*
-llu_rw(int cmd, struct inode *inode, char *buf, size_t count, loff_t pos)
-{
-        struct llu_sysio_cookie *cookie;
-        int max_pages, rc;
-        ENTRY;
+        OBD_ALLOC(group, LLU_IO_GROUP_SIZE(maxpages));
+        if (!group)
+                return ERR_PTR(-ENOMEM);
 
-        max_pages = (count >> PAGE_SHIFT) + 2;
+        I_REF(inode);
+        group->lig_inode = inode;
+        group->lig_maxpages = maxpages;
+        group->lig_llaps = (struct ll_async_page *)(group + 1);
+        group->lig_pages = (struct page *)(&group->lig_llaps[maxpages]);
+        group->lig_llap_cookies = (void *)(&group->lig_pages[maxpages]);
 
-        cookie = get_sysio_cookie(inode, llu_i2obdexp(inode), max_pages);
-        if (!cookie)
-                RETURN(ERR_PTR(-ENOMEM));
+        rc = oig_init(&group->lig_oig);
+        if (rc) {
+                OBD_FREE(group, LLU_IO_GROUP_SIZE(maxpages));
+                return ERR_PTR(rc);
+        }
 
-        rc = llu_prep_async_io(cookie, cmd, buf, pos, count);
-        if (rc)
-                GOTO(out_cleanup, rc);
+        return group;
+}
 
-        rc = llu_start_async_io(cookie);
-        if (rc)
-                GOTO(out_cleanup, rc);
+static int max_io_pages(ssize_t len, int iovlen)
+{
+        return (((len + PAGE_SIZE -1) / PAGE_SIZE) + 2 + iovlen - 1);
+}
 
-/*
-        rc = oig_wait(&oig);
-        if (rc) {
-                CERROR("file i/o error!\n");
-                rw_count = rc;
+static
+void put_io_group(struct llu_io_group *group)
+{
+        struct lov_stripe_md *lsm = llu_i2info(group->lig_inode)->lli_smd;
+        struct obd_export *exp = llu_i2obdexp(group->lig_inode);
+        struct ll_async_page *llap = group->lig_llaps;
+        int i;
+
+        for (i = 0; i < group->lig_npages; i++, llap++) {
+                if (llap->llap_cookie)
+                        obd_teardown_async_page(exp, lsm, NULL,
+                                                llap->llap_cookie);
         }
-*/
-        RETURN(cookie);
 
-out_cleanup:
-        put_sysio_cookie(cookie);
-        RETURN(ERR_PTR(rc));
+        I_RELE(group->lig_inode);
+
+        oig_release(group->lig_oig);
+        OBD_FREE(group, LLU_IO_GROUP_SIZE(group->lig_maxpages));
 }
 
-struct llu_sysio_callback_args*
-llu_file_write(struct inode *inode, const struct iovec *iovec,
-               size_t iovlen, loff_t pos)
+static
+ssize_t llu_file_prwv(const struct iovec *iovec, int iovlen,
+                        _SYSIO_OFF_T pos, ssize_t len,
+                        void *private)
 {
+        struct llu_io_session *session = (struct llu_io_session *) private;
+        struct inode *inode = session->lis_inode;
         struct llu_inode_info *lli = llu_i2info(inode);
+        struct intnl_stat *st = llu_i2stat(inode);
         struct ll_file_data *fd = lli->lli_file_data;
         struct lustre_handle lockh = {0};
         struct lov_stripe_md *lsm = lli->lli_smd;
         struct obd_export *exp = NULL;
         ldlm_policy_data_t policy;
-        struct llu_sysio_callback_args *lsca;
-        struct llu_sysio_cookie *cookie;
-        ldlm_error_t err;
-        int iovidx;
+        struct llu_io_group *iogroup;
+        int astflag = (lli->lli_open_flags & O_NONBLOCK) ?
+                       LDLM_FL_BLOCK_NOWAIT : 0;
+        __u64 kms;
+        int err, is_read, lock_mode, iovidx, ret;
         ENTRY;
 
-        /* XXX consider other types later */
-        if (!S_ISREG(lli->lli_st_mode))
-                LBUG();
-
-        LASSERT(iovlen <= MAX_IOVEC);
+        /* in a large iov read/write we'll be repeatedly called.
+         * so give a chance to answer cancel ast here
+         */
+        liblustre_wait_event(0);
 
         exp = llu_i2obdexp(inode);
         if (exp == NULL)
-                RETURN(ERR_PTR(-EINVAL));
+                RETURN(-EINVAL);
+
+        if (len == 0 || iovlen == 0)
+                RETURN(0);
 
-        OBD_ALLOC(lsca, sizeof(*lsca));
-        if (!lsca)
-                RETURN(ERR_PTR(-ENOMEM));
+        if (pos + len > lli->lli_maxbytes)
+                RETURN(-ERANGE);
+
+        iogroup = get_io_group(inode, max_io_pages(len, iovlen));
+        if (IS_ERR(iogroup))
+                RETURN(PTR_ERR(iogroup));
+
+        is_read = session->lis_cmd == OBD_BRW_READ;
+        lock_mode = is_read ? LCK_PR : LCK_PW;
+
+        if (!is_read && (lli->lli_open_flags & O_APPEND)) {
+                policy.l_extent.start = 0;
+                policy.l_extent.end = OBD_OBJECT_EOF;
+        } else {
+                policy.l_extent.start = pos;
+                policy.l_extent.end = pos + len - 1;
+        }
+
+        err = llu_extent_lock(fd, inode, lsm, lock_mode, &policy,
+                              &lockh, astflag);
+        if (err != ELDLM_OK)
+                GOTO(err_put, err);
+
+        if (is_read) {
+                kms = lov_merge_size(lsm, 1);
+                if (policy.l_extent.end > kms) {
+                        /* A glimpse is necessary to determine whether we
+                         * return a short read or some zeroes at the end of
+                         * the buffer */
+                        if ((err = llu_glimpse_size(inode))) {
+                                llu_extent_unlock(fd, inode, lsm,
+                                                  lock_mode, &lockh);
+                                GOTO(err_put, err);
+                        }
+                } else {
+                        st->st_size = kms;
+                }
+        } else {
+                if (lli->lli_open_flags & O_APPEND)
+                        pos = st->st_size;
+        }
 
-        /* FIXME optimize the following extent locking */
         for (iovidx = 0; iovidx < iovlen; iovidx++) {
-                char *buf = (char*)iovec[iovidx].iov_base;
+                char *buf = (char *) iovec[iovidx].iov_base;
                 size_t count = iovec[iovidx].iov_len;
 
-                if (count == 0)
+                if (!count)
                         continue;
+                if (len < count)
+                        count = len;
+                if (IS_BAD_PTR(buf) || IS_BAD_PTR(buf + count)) {
+                        llu_extent_unlock(fd, inode, lsm, lock_mode, &lockh);
+                        GOTO(err_put, err = -EFAULT);
+                }
 
-                if (pos + count > lli->lli_maxbytes)
-                        GOTO(err_out, err = -ERANGE);
-
-                /* FIXME libsysio haven't handle O_APPEND?? */
-                policy.l_extent.start = pos;
-                policy.l_extent.end = pos + count - 1;
-
-                err = llu_extent_lock(fd, inode, lsm, LCK_PW, &policy,
-                                      &lockh, 0);
-                if (err != ELDLM_OK)
-                        GOTO(err_out, err = -ENOLCK);
-
-                CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n",
-                       lli->lli_st_ino, count, pos);
-
-                cookie = llu_rw(OBD_BRW_WRITE, inode, buf, count, pos);
-                if (!IS_ERR(cookie)) {
-                        /* save cookie */
-                        lsca->cookies[lsca->ncookies++] = cookie;
-                        pos += count;
-                        obd_adjust_kms(exp, lsm, pos, 0);
-                        /* file size grow */
-                        if (pos > lli->lli_st_size)
-                                lli->lli_st_size = pos;
+                if (is_read) {
+                        if (pos >= st->st_size)
+                                break;
                 } else {
-                        llu_extent_unlock(fd, inode, lsm, LCK_PW, &lockh);
-                        GOTO(err_out, err = PTR_ERR(cookie));
+                        if (pos >= lli->lli_maxbytes) {
+                                llu_extent_unlock(fd, inode, lsm, lock_mode,
+                                                  &lockh);
+                                GOTO(err_put, err = -EFBIG);
+                        }
+                        if (pos + count >= lli->lli_maxbytes)
+                                count = lli->lli_maxbytes - pos;
                 }
 
-                /* XXX errors? */
-                err = llu_extent_unlock(fd, inode, lsm, LCK_PW, &lockh);
-                if (err)
-                        CERROR("extent unlock error %d\n", err);
+                ret = llu_queue_pio(session->lis_cmd, iogroup, buf, count, pos);
+                if (ret < 0) {
+                        llu_extent_unlock(fd, inode, lsm, lock_mode, &lockh);
+                        GOTO(err_put, err = ret);
+                } else {
+                        pos += ret;
+                        if (!is_read) {
+                                LASSERT(ret == count);
+                                obd_adjust_kms(exp, lsm, pos, 0);
+                                /* file size grow immediately */
+                                if (pos > st->st_size)
+                                        st->st_size = pos;
+                        }
+                        len -= ret;
+                        if (!len)
+                                break;
+                }
         }
+        LASSERT(len == 0 || is_read); /* libsysio should guarantee this */
 
-        RETURN(lsca);
+        err = llu_extent_unlock(fd, inode, lsm, lock_mode, &lockh);
+        if (err)
+                CERROR("extent unlock error %d\n", err);
 
-err_out:
-        /* teardown all async stuff */
-        while (lsca->ncookies--) {
-                put_sysio_cookie(lsca->cookies[lsca->ncookies]);
-        }
-        OBD_FREE(lsca, sizeof(*lsca));
+        err = obd_trigger_group_io(exp, lsm, NULL, iogroup->lig_oig);
+        if (err)
+                GOTO(err_put, err);
 
-        RETURN(ERR_PTR(err));
+        session->lis_groups[session->lis_ngroups++] = iogroup;
+        RETURN(0);
+err_put:
+        put_io_group(iogroup);
+        RETURN((ssize_t)err);
 }
 
-#if 0
-static void llu_update_atime(struct inode *inode)
+static
+struct llu_io_session *get_io_session(struct inode *ino, int ngroups, int cmd)
 {
-        struct llu_inode_info *lli = llu_i2info(inode);
-
-#ifdef USE_ATIME
-        struct iattr attr;
-
-        attr.ia_atime = LTIME_S(CURRENT_TIME);
-        attr.ia_valid = ATTR_ATIME;
+        struct llu_io_session *session;
 
-        if (lli->lli_st_atime == attr.ia_atime) return;
-        if (IS_RDONLY(inode)) return;
-        if (IS_NOATIME(inode)) return;
+        OBD_ALLOC(session, LLU_IO_SESSION_SIZE(ngroups));
+        if (!session)
+                return NULL;
 
-        /* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */
-        llu_inode_setattr(inode, &attr, 0);
-#else
-        /* update atime, but don't explicitly write it out just this change */
-        inode->i_atime = CURRENT_TIME;
-#endif
+        I_REF(ino);
+        session->lis_inode = ino;
+        session->lis_max_groups = ngroups;
+        session->lis_cmd = cmd;
+        return session;
 }
-#endif
 
-struct llu_sysio_callback_args*
-llu_file_read(struct inode *inode, const struct iovec *iovec,
-              size_t iovlen, loff_t pos)
+static void put_io_session(struct llu_io_session *session)
 {
-        struct llu_inode_info *lli = llu_i2info(inode);
-        struct ll_file_data *fd = lli->lli_file_data;
-        struct lov_stripe_md *lsm = lli->lli_smd;
-        struct lustre_handle lockh = { 0 };
-        ldlm_policy_data_t policy;
-        struct llu_sysio_callback_args *lsca;
-        struct llu_sysio_cookie *cookie;
-        __u64 kms;
-        int iovidx;
-
-        ldlm_error_t err;
-        ENTRY;
+        int i;
 
-        OBD_ALLOC(lsca, sizeof(*lsca));
-        if (!lsca)
-                RETURN(ERR_PTR(-ENOMEM));
+        for (i = 0; i < session->lis_ngroups; i++) {
+                if (session->lis_groups[i]) {
+                        put_io_group(session->lis_groups[i]);
+                        session->lis_groups[i] = NULL;
+                }
+        }
 
-        for (iovidx = 0; iovidx < iovlen; iovidx++) {
-                char *buf = iovec[iovidx].iov_base;
-                size_t count = iovec[iovidx].iov_len;
+        I_RELE(session->lis_inode);
+        OBD_FREE(session, LLU_IO_SESSION_SIZE(session->lis_max_groups));
+}
 
-                /* "If nbyte is 0, read() will return 0 and have no other results."
-                 *                      -- Single Unix Spec */
-                if (count == 0)
-                        continue;
+static int llu_file_rwx(struct inode *ino,
+                        struct ioctx *ioctx,
+                        int read)
+{
+        struct llu_io_session *session;
+        ssize_t cc;
+        int cmd = read ? OBD_BRW_READ : OBD_BRW_WRITE;
+        ENTRY;
 
-                policy.l_extent.start = pos;
-                policy.l_extent.end = pos + count - 1;
+        LASSERT(ioctx->ioctx_xtvlen >= 0);
+        LASSERT(ioctx->ioctx_iovlen >= 0);
 
-                err = llu_extent_lock(fd, inode, lsm, LCK_PR, &policy, &lockh, 0);
-                if (err != ELDLM_OK)
-                        GOTO(err_out, err = -ENOLCK);
+        liblustre_wait_event(0);
 
-                kms = lov_merge_size(lsm, 1);
-                if (policy.l_extent.end > kms) {
-                        /* A glimpse is necessary to determine whether we
-                         * return a short read or some zeroes at the end of
-                         * the buffer */
-                        if (llu_glimpse_size(inode)) {
-                                llu_extent_unlock(fd, inode, lsm,LCK_PR,&lockh);
-                                GOTO(err_out, err = -ENOLCK);
-                        }
-                } else {
-                        lli->lli_st_size = kms;
-                }
+        if (!ioctx->ioctx_xtvlen)
+                RETURN(0);
 
-                CDEBUG(D_INFO, "Reading inode %lu, "LPSZ" bytes, offset %Ld, "
-                       "i_size "LPU64"\n", lli->lli_st_ino, count, pos,
-                       lli->lli_st_size);
+        /* XXX consider other types later */
+        if (S_ISDIR(llu_i2stat(ino)->st_mode))
+                RETURN(-EISDIR);
+        if (!S_ISREG(llu_i2stat(ino)->st_mode))
+                RETURN(-EOPNOTSUPP);
+
+        session = get_io_session(ino, ioctx->ioctx_xtvlen * 2, cmd);
+        if (!session)
+                RETURN(-ENOMEM);
+
+        cc = _sysio_enumerate_extents(ioctx->ioctx_xtv, ioctx->ioctx_xtvlen,
+                                      ioctx->ioctx_iov, ioctx->ioctx_iovlen,
+                                      llu_file_prwv, session);
+
+        if (cc >= 0) {
+                LASSERT(!ioctx->ioctx_cc);
+                ioctx->ioctx_private = session;
+                RETURN(0);
+        } else {
+                put_io_session(session);
+                RETURN(cc);
+        }
+}
 
-                if (pos >= lli->lli_st_size) {
-                        llu_extent_unlock(fd, inode, lsm, LCK_PR, &lockh);
-                        break;
-                }
+int llu_iop_read(struct inode *ino,
+                 struct ioctx *ioctx)
+{
+        return llu_file_rwx(ino, ioctx, 1);
+}
 
-                cookie = llu_rw(OBD_BRW_READ, inode, buf, count, pos);
-                if (!IS_ERR(cookie)) {
-                        /* save cookie */
-                        lsca->cookies[lsca->ncookies++] = cookie;
-                        pos += count;
-                } else {
-                        llu_extent_unlock(fd, inode, lsm, LCK_PR, &lockh);
-                        GOTO(err_out, err = PTR_ERR(cookie));
-                }
+int llu_iop_write(struct inode *ino,
+                  struct ioctx *ioctx)
+{
+        struct iattr iattr;
+        int rc;
 
-                /* XXX errors? */
-                err = llu_extent_unlock(fd, inode, lsm, LCK_PR, &lockh);
-                if (err)
-                        CERROR("extent_unlock fail: %d\n", err);
-        }
-#if 0
-        if (readed > 0)
-                llu_update_atime(inode);
-#endif
-        RETURN(lsca);
+        memset(&iattr, 0, sizeof(iattr));
+        iattr.ia_mtime = iattr.ia_atime = CURRENT_TIME;
+        iattr.ia_valid = ATTR_MTIME | ATTR_ATIME | ATTR_RAW;
 
-err_out:
-        /* teardown all async stuff */
-        while (lsca->ncookies--) {
-                put_sysio_cookie(lsca->cookies[lsca->ncookies]);
+        liblustre_wait_event(0);
+        rc = llu_setattr_raw(ino, &iattr);
+        if (rc) {
+                CERROR("failed to set mtime/atime during write: %d", rc);
+                /* XXX should continue or return error? */
         }
-        OBD_FREE(lsca, sizeof(*lsca));
 
-        RETURN(ERR_PTR(err));
+        return llu_file_rwx(ino, ioctx, 0);
 }
 
-int llu_iop_iodone(struct ioctx *ioctxp)
+int llu_iop_iodone(struct ioctx *ioctx)
 {
-        struct llu_sysio_callback_args *lsca = ioctxp->ioctx_private;
-        struct llu_sysio_cookie *cookie;
+        struct llu_io_session *session;
+        struct llu_io_group *group;
         int i, err = 0, rc = 0;
         ENTRY;
 
-        /* write/read(fd, buf, 0) */
-        if (!lsca) {
-                ioctxp->ioctx_cc = 0;
-                RETURN(1);
-        }
+        liblustre_wait_event(0);
 
-        LASSERT(!IS_ERR(lsca));
+        session = (struct llu_io_session *) ioctx->ioctx_private;
+        LASSERT(session);
+        LASSERT(!IS_ERR(session));
 
-        for (i = 0; i < lsca->ncookies; i++) {
-                cookie = lsca->cookies[i];
-                if (cookie) {
-                        err = oig_wait(cookie->lsc_oig);
-                        if (err && !rc)
-                                rc = err;
+        for (i = 0; i < session->lis_ngroups; i++) {
+                group = session->lis_groups[i];
+                if (group) {
+                        if (!rc) {
+                                err = oig_wait(group->lig_oig);
+                                if (err)
+                                        rc = err;
+                        }
                         if (!rc)
-                                ioctxp->ioctx_cc += cookie->lsc_rwcount;
-                        put_sysio_cookie(cookie);
+                                ioctx->ioctx_cc += group->lig_rwcount;
+                        put_io_group(group);
+                        session->lis_groups[i] = NULL;
                 }
         }
 
         if (rc) {
                 LASSERT(rc < 0);
-                ioctxp->ioctx_cc = -1;
-                ioctxp->ioctx_errno = -rc;
+                ioctx->ioctx_cc = -1;
+                ioctx->ioctx_errno = -rc;
         }
 
-        OBD_FREE(lsca, sizeof(*lsca));
-        ioctxp->ioctx_private = NULL;
+        put_io_session(session);
+        ioctx->ioctx_private = NULL;
 
         RETURN(1);
 }
index 9972f1a..6476c6e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Lustre Light Super operations
  *
- *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *  Copyright (c) 2002-2004 Cluster File Systems, Inc.
  *
  *   This file is part of Lustre, http://www.lustre.org.
  *
 # include <sys/statfs.h>
 #endif
 
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
 #include <sysio.h>
 #include <fs.h>
 #include <mount.h>
 #include <inode.h>
+#ifdef HAVE_FILE_H
 #include <file.h>
+#endif
 
 #undef LIST_HEAD
 
 #include "llite_lib.h"
 
+#ifndef MAY_EXEC
+#define MAY_EXEC        1
+#define MAY_WRITE       2
+#define MAY_READ        4
+#endif
+
+#define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH)
+
+static int ll_permission(struct inode *inode, int mask)
+{
+        struct intnl_stat *st = llu_i2stat(inode);
+        mode_t mode = st->st_mode;
+
+        if (current->fsuid == st->st_uid)
+                mode >>= 6;
+        else if (in_group_p(st->st_gid))
+                mode >>= 3;
+
+        if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
+                return 0;
+
+        if ((mask & (MAY_READ|MAY_WRITE)) ||
+            (st->st_mode & S_IXUGO))
+                if (capable(CAP_DAC_OVERRIDE))
+                        return 0;
+
+        if (mask == MAY_READ ||
+            (S_ISDIR(st->st_mode) && !(mask & MAY_WRITE))) {
+                if (capable(CAP_DAC_READ_SEARCH))
+                        return 0;
+        }
+
+        return -EACCES;
+}
+
 static void llu_fsop_gone(struct filesys *fs)
 {
         struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private;
         struct obd_device *obd = class_exp2obd(sbi->ll_mdc_exp);
-        struct ll_fid rootfid;
+        int next = 0;
         ENTRY;
 
         list_del(&sbi->ll_conn_chain);
         obd_disconnect(sbi->ll_osc_exp);
-
-        /* NULL request to force sync on the MDS, and get the last_committed
-         * value to flush remaining RPCs from the sending queue on client.
-         *
-         * XXX This should be an mdc_sync() call to sync the whole MDS fs,
-         *     which we can call for other reasons as well.
-         */
-        if (!obd->obd_no_recov)
-                mdc_getstatus(sbi->ll_mdc_exp, &rootfid);
-
         obd_disconnect(sbi->ll_mdc_exp);
 
+        while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL){
+                struct lustre_cfg_bufs bufs;
+                struct lustre_cfg *lcfg;
+                int err;
+
+                lustre_cfg_bufs_reset(&bufs, obd->obd_name);
+                lcfg = lustre_cfg_new(LCFG_CLEANUP, &bufs);
+                err = class_process_config(lcfg);
+                lustre_cfg_free(lcfg);
+                if (err)
+                        CERROR("cleanup failed: %s\n", obd->obd_name);
+
+                lustre_cfg_bufs_reset(&bufs, obd->obd_name);
+                lcfg = lustre_cfg_new(LCFG_DETACH, &bufs);
+                err = class_process_config(lcfg);
+                if (err)
+                        CERROR("detach failed: %s\n", obd->obd_name);
+        }
+
         OBD_FREE(sbi, sizeof(*sbi));
 
         EXIT;
@@ -79,6 +128,7 @@ void llu_update_inode(struct inode *inode, struct mds_body *body,
                       struct lov_stripe_md *lsm)
 {
         struct llu_inode_info *lli = llu_i2info(inode);
+        struct intnl_stat *st = llu_i2stat(inode);
 
         LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
         if (lsm != NULL) {
@@ -89,41 +139,45 @@ void llu_update_inode(struct inode *inode, struct mds_body *body,
                                 lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
                 } else {
                         if (memcmp(lli->lli_smd, lsm, sizeof(*lsm))) {
-                                CERROR("lsm mismatch for inode %ld\n",
-                                       lli->lli_st_ino);
+                                CERROR("lsm mismatch for inode %lld\n",
+                                       st->st_ino);
                                 LBUG();
                         }
                 }
         }
 
         if (body->valid & OBD_MD_FLID)
-                lli->lli_st_ino = body->ino;
+                st->st_ino = body->ino;
         if (body->valid & OBD_MD_FLATIME)
-                LTIME_S(lli->lli_st_atime) = body->atime;
+                LTIME_S(st->st_atime) = body->atime;
         if (body->valid & OBD_MD_FLMTIME)
-                LTIME_S(lli->lli_st_mtime) = body->mtime;
+                LTIME_S(st->st_mtime) = body->mtime;
         if (body->valid & OBD_MD_FLCTIME)
-                LTIME_S(lli->lli_st_ctime) = body->ctime;
+                LTIME_S(st->st_ctime) = body->ctime;
         if (body->valid & OBD_MD_FLMODE)
-                lli->lli_st_mode = (lli->lli_st_mode & S_IFMT)|(body->mode & ~S_IFMT);
+                st->st_mode = (st->st_mode & S_IFMT)|(body->mode & ~S_IFMT);
         if (body->valid & OBD_MD_FLTYPE)
-                lli->lli_st_mode = (lli->lli_st_mode & ~S_IFMT)|(body->mode & S_IFMT);
+                st->st_mode = (st->st_mode & ~S_IFMT)|(body->mode & S_IFMT);
+        if (S_ISREG(st->st_mode))
+                st->st_blksize = min(2UL * PTLRPC_MAX_BRW_SIZE, LL_MAX_BLKSIZE);
+        else
+                st->st_blksize = 4096;
         if (body->valid & OBD_MD_FLUID)
-                lli->lli_st_uid = body->uid;
+                st->st_uid = body->uid;
         if (body->valid & OBD_MD_FLGID)
-                lli->lli_st_gid = body->gid;
-        if (body->valid & OBD_MD_FLFLAGS)
-                lli->lli_st_flags = body->flags;
+                st->st_gid = body->gid;
         if (body->valid & OBD_MD_FLNLINK)
-                lli->lli_st_nlink = body->nlink;
-        if (body->valid & OBD_MD_FLGENER)
-                lli->lli_st_generation = body->generation;
+                st->st_nlink = body->nlink;
         if (body->valid & OBD_MD_FLRDEV)
-                lli->lli_st_rdev = body->rdev;
+                st->st_rdev = body->rdev;
         if (body->valid & OBD_MD_FLSIZE)
-                lli->lli_st_size = body->size;
+                st->st_size = body->size;
         if (body->valid & OBD_MD_FLBLOCKS)
-                lli->lli_st_blocks = body->blocks;
+                st->st_blocks = body->blocks;
+        if (body->valid & OBD_MD_FLFLAGS)
+                lli->lli_st_flags = body->flags;
+        if (body->valid & OBD_MD_FLGENER)
+                lli->lli_st_generation = body->generation;
 
         /* fillin fid */
         if (body->valid & OBD_MD_FLID)
@@ -137,35 +191,36 @@ void llu_update_inode(struct inode *inode, struct mds_body *body,
 void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid)
 {
         struct llu_inode_info *lli = llu_i2info(dst);
+        struct intnl_stat *st = llu_i2stat(dst);
 
         valid &= src->o_valid;
 
         if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
                 CDEBUG(D_INODE, "valid %x, cur time %lu/%lu, new %lu/%lu\n",
                        src->o_valid, 
-                       LTIME_S(lli->lli_st_mtime), LTIME_S(lli->lli_st_ctime),
+                       LTIME_S(st->st_mtime), LTIME_S(st->st_ctime),
                        (long)src->o_mtime, (long)src->o_ctime);
 
         if (valid & OBD_MD_FLATIME)
-                LTIME_S(lli->lli_st_atime) = src->o_atime;
+                LTIME_S(st->st_atime) = src->o_atime;
         if (valid & OBD_MD_FLMTIME)
-                LTIME_S(lli->lli_st_mtime) = src->o_mtime;
-        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(lli->lli_st_ctime))
-                LTIME_S(lli->lli_st_ctime) = src->o_ctime;
+                LTIME_S(st->st_mtime) = src->o_mtime;
+        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(st->st_ctime))
+                LTIME_S(st->st_ctime) = src->o_ctime;
         if (valid & OBD_MD_FLSIZE)
-                lli->lli_st_size = src->o_size;
+                st->st_size = src->o_size;
         if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
-                lli->lli_st_blocks = src->o_blocks;
+                st->st_blocks = src->o_blocks;
         if (valid & OBD_MD_FLBLKSZ)
-                lli->lli_st_blksize = src->o_blksize;
+                st->st_blksize = src->o_blksize;
         if (valid & OBD_MD_FLTYPE)
-                lli->lli_st_mode = (lli->lli_st_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
+                st->st_mode = (st->st_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
         if (valid & OBD_MD_FLMODE)
-                lli->lli_st_mode = (lli->lli_st_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
+                st->st_mode = (st->st_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
         if (valid & OBD_MD_FLUID)
-                lli->lli_st_uid = src->o_uid;
+                st->st_uid = src->o_uid;
         if (valid & OBD_MD_FLGID)
-                lli->lli_st_gid = src->o_gid;
+                st->st_gid = src->o_gid;
         if (valid & OBD_MD_FLFLAGS)
                 lli->lli_st_flags = src->o_flags;
         if (valid & OBD_MD_FLGENER)
@@ -178,51 +233,52 @@ void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid)
 void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid)
 {
         struct llu_inode_info *lli = llu_i2info(src);
+        struct intnl_stat *st = llu_i2stat(src);
         obd_flag newvalid = 0;
 
         if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
                 CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n",
-                       valid, LTIME_S(lli->lli_st_mtime), 
-                       LTIME_S(lli->lli_st_ctime));
+                       valid, LTIME_S(st->st_mtime), 
+                       LTIME_S(st->st_ctime));
 
         if (valid & OBD_MD_FLATIME) {
-                dst->o_atime = LTIME_S(lli->lli_st_atime);
+                dst->o_atime = LTIME_S(st->st_atime);
                 newvalid |= OBD_MD_FLATIME;
         }
         if (valid & OBD_MD_FLMTIME) {
-                dst->o_mtime = LTIME_S(lli->lli_st_mtime);
+                dst->o_mtime = LTIME_S(st->st_mtime);
                 newvalid |= OBD_MD_FLMTIME;
         }
         if (valid & OBD_MD_FLCTIME) {
-                dst->o_ctime = LTIME_S(lli->lli_st_ctime);
+                dst->o_ctime = LTIME_S(st->st_ctime);
                 newvalid |= OBD_MD_FLCTIME;
         }
         if (valid & OBD_MD_FLSIZE) {
-                dst->o_size = lli->lli_st_size;
+                dst->o_size = st->st_size;
                 newvalid |= OBD_MD_FLSIZE;
         }
         if (valid & OBD_MD_FLBLOCKS) {  /* allocation of space (x512 bytes) */
-                dst->o_blocks = lli->lli_st_blocks;
+                dst->o_blocks = st->st_blocks;
                 newvalid |= OBD_MD_FLBLOCKS;
         }
         if (valid & OBD_MD_FLBLKSZ) {   /* optimal block size */
-                dst->o_blksize = lli->lli_st_blksize;
+                dst->o_blksize = st->st_blksize;
                 newvalid |= OBD_MD_FLBLKSZ;
         }
         if (valid & OBD_MD_FLTYPE) {
-                dst->o_mode = (dst->o_mode & S_IALLUGO)|(lli->lli_st_mode & S_IFMT);
+                dst->o_mode = (dst->o_mode & S_IALLUGO)|(st->st_mode & S_IFMT);
                 newvalid |= OBD_MD_FLTYPE;
         }
         if (valid & OBD_MD_FLMODE) {
-                dst->o_mode = (dst->o_mode & S_IFMT)|(lli->lli_st_mode & S_IALLUGO);
+                dst->o_mode = (dst->o_mode & S_IFMT)|(st->st_mode & S_IALLUGO);
                 newvalid |= OBD_MD_FLMODE;
         }
         if (valid & OBD_MD_FLUID) {
-                dst->o_uid = lli->lli_st_uid;
+                dst->o_uid = st->st_uid;
                 newvalid |= OBD_MD_FLUID;
         }
         if (valid & OBD_MD_FLGID) {
-                dst->o_gid = lli->lli_st_gid;
+                dst->o_gid = st->st_gid;
                 newvalid |= OBD_MD_FLGID;
         }
         if (valid & OBD_MD_FLFLAGS) {
@@ -286,6 +342,16 @@ static struct inode* llu_new_inode(struct filesys *fs,
 {
        struct inode *inode;
         struct llu_inode_info *lli;
+        struct intnl_stat st = {
+                st_dev:         0,
+#ifndef AUTOMOUNT_FILE_NAME
+                st_mode:        fid->f_type & S_IFMT,
+#else
+                st_mode:        fid->f_type /* all of the bits! */
+#endif
+                st_uid:         geteuid(),
+                st_gid:         getegid(),
+        };
 
         OBD_ALLOC(lli, sizeof(*lli));
         if (!lli)
@@ -306,13 +372,7 @@ static struct inode* llu_new_inode(struct filesys *fs,
 
         /* file identifier is needed by functions like _sysio_i_find() */
        inode = _sysio_i_new(fs, &lli->lli_sysio_fid,
-#ifndef AUTOMOUNT_FILE_NAME
-                            fid->f_type & S_IFMT,
-#else
-                            fid->f_type, /* all of the bits! */
-#endif
-                             0, 0,
-                            &llu_inode_ops, lli);
+                             &st, 0, &llu_inode_ops, lli);
 
        if (!inode)
                OBD_FREE(lli, sizeof(*lli));
@@ -333,7 +393,7 @@ static int llu_have_md_lock(struct inode *inode)
         LASSERT(inode);
 
         obddev = sbi->ll_mdc_exp->exp_obd;
-        res_id.name[0] = lli->lli_st_ino;
+        res_id.name[0] = llu_i2stat(inode)->st_ino;
         res_id.name[1] = lli->lli_st_generation;
 
         CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
@@ -356,7 +416,6 @@ static int llu_have_md_lock(struct inode *inode)
 
 static int llu_inode_revalidate(struct inode *inode)
 {
-        struct llu_inode_info *lli = llu_i2info(inode);
         struct lov_stripe_md *lsm = NULL;
         ENTRY;
 
@@ -375,14 +434,14 @@ static int llu_inode_revalidate(struct inode *inode)
 
                 /* Why don't we update all valid MDS fields here, if we're
                  * doing an RPC anyways?  -phil */
-                if (S_ISREG(lli->lli_st_mode)) {
+                if (S_ISREG(llu_i2stat(inode)->st_mode)) {
                         ealen = obd_size_diskmd(sbi->ll_osc_exp, NULL);
                         valid |= OBD_MD_FLEASIZE;
                 }
                 ll_inode2fid(&fid, inode);
                 rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req);
                 if (rc) {
-                        CERROR("failure %d inode %lu\n", rc, lli->lli_st_ino);
+                        CERROR("failure %d inode %llu\n", rc, llu_i2stat(inode)->st_ino);
                         RETURN(-abs(rc));
                 }
                 rc = mdc_req2lustre_md(req, 0, sbi->ll_osc_exp, &md);
@@ -423,21 +482,7 @@ static int llu_inode_revalidate(struct inode *inode)
 
 static void copy_stat_buf(struct inode *ino, struct intnl_stat *b)
 {
-        struct llu_inode_info *lli = llu_i2info(ino);
-
-        b->st_dev = lli->lli_st_dev;
-        b->st_ino = lli->lli_st_ino;
-        b->st_mode = lli->lli_st_mode;
-        b->st_nlink = lli->lli_st_nlink;
-        b->st_uid = lli->lli_st_uid;
-        b->st_gid = lli->lli_st_gid;
-        b->st_rdev = lli->lli_st_rdev;
-        b->st_size = lli->lli_st_size;
-        b->st_blksize = lli->lli_st_blksize;
-        b->st_blocks = lli->lli_st_blocks;
-        b->st_atime = lli->lli_st_atime;
-        b->st_mtime = lli->lli_st_mtime;
-        b->st_ctime = lli->lli_st_ctime;
+        *b = *llu_i2stat(ino);
 }
 
 static int llu_iop_getattr(struct pnode *pno,
@@ -447,6 +492,8 @@ static int llu_iop_getattr(struct pnode *pno,
         int rc;
         ENTRY;
 
+        liblustre_wait_event(0);
+
         if (!ino) {
                 LASSERT(pno);
                 LASSERT(pno->p_base->pb_ino);
@@ -461,14 +508,7 @@ static int llu_iop_getattr(struct pnode *pno,
         rc = llu_inode_revalidate(ino);
         if (!rc) {
                 copy_stat_buf(ino, b);
-
-                if (llu_i2info(ino)->lli_it) {
-                        struct lookup_intent *it;
-
-                        LL_GET_INTENT(ino, it);
-                        it->it_op_release(it);
-                        OBD_FREE(it, sizeof(*it));
-                }
+                LASSERT(!llu_i2info(ino)->lli_it);
         }
 
         RETURN(rc);
@@ -480,7 +520,8 @@ static int null_if_equal(struct ldlm_lock *lock, void *data)
                 lock->l_ast_data = NULL;
 
                 if (lock->l_req_mode != lock->l_granted_mode)
-                        LDLM_ERROR(lock,"clearing inode with ungranted lock\n");        }
+                        LDLM_ERROR(lock,"clearing inode with ungranted lock\n");
+        }
 
         return LDLM_ITER_CONTINUE;
 }
@@ -492,8 +533,8 @@ void llu_clear_inode(struct inode *inode)
         struct llu_sb_info *sbi = llu_i2sbi(inode);
         ENTRY;
 
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%lu(%p)\n", lli->lli_st_ino,
-               lli->lli_st_generation, inode);
+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu(%p)\n",
+               llu_i2stat(inode)->st_ino, lli->lli_st_generation, inode);
 
         ll_inode2fid(&fid, inode);
         clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(lli->lli_flags));
@@ -522,6 +563,7 @@ void llu_iop_gone(struct inode *inode)
         struct llu_inode_info *lli = llu_i2info(inode);
         ENTRY;
 
+        liblustre_wait_event(0);
         llu_clear_inode(inode);
 
         OBD_FREE(lli, sizeof(*lli));
@@ -531,7 +573,7 @@ void llu_iop_gone(struct inode *inode)
 static int inode_setattr(struct inode * inode, struct iattr * attr)
 {
         unsigned int ia_valid = attr->ia_valid;
-        struct llu_inode_info *lli = llu_i2info(inode);
+        struct intnl_stat *st = llu_i2stat(inode);
         int error = 0;
 
         if (ia_valid & ATTR_SIZE) {
@@ -541,19 +583,19 @@ static int inode_setattr(struct inode * inode, struct iattr * attr)
         }
 
         if (ia_valid & ATTR_UID)
-                lli->lli_st_uid = attr->ia_uid;
+                st->st_uid = attr->ia_uid;
         if (ia_valid & ATTR_GID)
-                lli->lli_st_gid = attr->ia_gid;
+                st->st_gid = attr->ia_gid;
         if (ia_valid & ATTR_ATIME)
-                lli->lli_st_atime = attr->ia_atime;
+                st->st_atime = attr->ia_atime;
         if (ia_valid & ATTR_MTIME)
-                lli->lli_st_mtime = attr->ia_mtime;
+                st->st_mtime = attr->ia_mtime;
         if (ia_valid & ATTR_CTIME)
-                lli->lli_st_ctime = attr->ia_ctime;
+                st->st_ctime = attr->ia_ctime;
         if (ia_valid & ATTR_MODE) {
-                lli->lli_st_mode = attr->ia_mode;
-                if (!in_group_p(lli->lli_st_gid) && !capable(CAP_FSETID))
-                        lli->lli_st_mode &= ~S_ISGID;
+                st->st_mode = attr->ia_mode;
+                if (!in_group_p(st->st_gid) && !capable(CAP_FSETID))
+                        st->st_mode &= ~S_ISGID;
         }
         /* mark_inode_dirty(inode); */
 out:
@@ -577,14 +619,14 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr)
 {
         struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd;
         struct llu_sb_info *sbi = llu_i2sbi(inode);
-        struct llu_inode_info *lli = llu_i2info(inode);
+        struct intnl_stat *st = llu_i2stat(inode);
         struct ptlrpc_request *request = NULL;
         struct mdc_op_data op_data;
         int ia_valid = attr->ia_valid;
         int rc = 0;
         ENTRY;
 
-        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", lli->lli_st_ino);
+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", st->st_ino);
 
         if (ia_valid & ATTR_SIZE) {
                 if (attr->ia_size > ll_file_maxbytes(inode)) {
@@ -639,10 +681,18 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr)
                         ptlrpc_req_finished(request);
                         RETURN(rc);
                 }
+
+                /* We call inode_setattr to adjust timestamps, but we first
+                 * clear ATTR_SIZE to avoid invoking vmtruncate.
+                 *
+                 * NB: ATTR_SIZE will only be set at this point if the size
+                 * resides on the MDS, ie, this file has no objects. */
+                attr->ia_valid &= ~ATTR_SIZE;
+                inode_setattr(inode, attr);
                 llu_update_inode(inode, md.body, md.lsm);
                 ptlrpc_req_finished(request);
 
-                if (!md.lsm || !S_ISREG(lli->lli_st_mode)) {
+                if (!md.lsm || !S_ISREG(st->st_mode)) {
                         CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
                         RETURN(0);
                 }
@@ -654,12 +704,12 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr)
                 if (ia_valid & (ATTR_MTIME | ATTR_ATIME)) {
                         /* from sys_utime() */
                         if (!(ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))) {
-                                if (current->fsuid != lli->lli_st_uid &&
-                                    (rc = ll_permission(inode, 0/*MAY_WRITE*/, NULL)) != 0)
+                                if (current->fsuid != st->st_uid &&
+                                    (rc = ll_permission(inode, MAY_WRITE)) != 0)
                                         RETURN(rc);
                         } else {
                                /* from inode_change_ok() */
-                               if (current->fsuid != lli->lli_st_uid &&
+                               if (current->fsuid != st->st_uid &&
                                    !capable(CAP_FOWNER))
                                        RETURN(-EPERM);
                         }
@@ -689,9 +739,6 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr)
                 }
 
                 rc = llu_vmtruncate(inode, attr->ia_size);
-                if (rc == 0)
-                        set_bit(LLI_F_HAVE_OST_SIZE_LOCK,
-                                &llu_i2info(inode)->lli_flags);
 
                 /* unlock now as we don't mind others file lockers racing with
                  * the mds updates below? */
@@ -704,8 +751,8 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr)
         } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) {
                 struct obdo oa;
 
-                CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
-                       lli->lli_st_ino, LTIME_S(attr->ia_mtime));
+                CDEBUG(D_INODE, "set mtime on OST inode %llu to %lu\n",
+                       st->st_ino, LTIME_S(attr->ia_mtime));
                 oa.o_id = lsm->lsm_object_id;
                 oa.o_valid = OBD_MD_FLID;
                 obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
@@ -728,6 +775,11 @@ static int llu_iop_setattr(struct pnode *pno,
         struct iattr iattr;
         ENTRY;
 
+        liblustre_wait_event(0);
+
+        LASSERT(!(mask & ~(SETATTR_MTIME | SETATTR_ATIME | 
+                           SETATTR_UID | SETATTR_GID |
+                           SETATTR_LEN | SETATTR_MODE)));
         memset(&iattr, 0, sizeof(iattr));
 
         if (mask & SETATTR_MODE) {
@@ -755,7 +807,8 @@ static int llu_iop_setattr(struct pnode *pno,
                 iattr.ia_valid |= ATTR_SIZE;
         }
 
-        iattr.ia_valid |= ATTR_RAW;
+        iattr.ia_valid |= ATTR_RAW | ATTR_CTIME;
+        iattr.ia_ctime = CURRENT_TIME;
 
         RETURN(llu_setattr_raw(ino, &iattr));
 }
@@ -774,7 +827,7 @@ static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt)
         int err = -EMLINK;
         ENTRY;
 
-        if (llu_i2info(dir)->lli_st_nlink >= EXT2_LINK_MAX)
+        if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
                 RETURN(err);
 
         llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
@@ -793,7 +846,8 @@ static int llu_readlink_internal(struct inode *inode,
         struct llu_sb_info *sbi = llu_i2sbi(inode);
         struct ll_fid fid;
         struct mds_body *body;
-        int rc, symlen = lli->lli_st_size + 1;
+        struct intnl_stat *st = llu_i2stat(inode);
+        int rc, symlen = st->st_size + 1;
         ENTRY;
 
         *request = NULL;
@@ -808,7 +862,7 @@ static int llu_readlink_internal(struct inode *inode,
         rc = mdc_getattr(sbi->ll_mdc_exp, &fid,
                          OBD_MD_LINKNAME, symlen, request);
         if (rc) {
-                CERROR("inode %lu: rc = %d\n", lli->lli_st_ino, rc);
+                CERROR("inode %llu: rc = %d\n", st->st_ino, rc);
                 RETURN(rc);
         }
 
@@ -823,8 +877,8 @@ static int llu_readlink_internal(struct inode *inode,
         
         LASSERT (symlen != 0);
         if (body->eadatasize != symlen) {
-                CERROR ("inode %lu: symlink length %d not expected %d\n",
-                        lli->lli_st_ino, body->eadatasize - 1, symlen - 1);
+                CERROR ("inode %llu: symlink length %d not expected %d\n",
+                        st->st_ino, body->eadatasize - 1, symlen - 1);
                 GOTO (failed, rc = -EPROTO);
         }
 
@@ -832,8 +886,8 @@ static int llu_readlink_internal(struct inode *inode,
         if (*symname == NULL ||
             strnlen (*symname, symlen) != symlen - 1) {
                 /* not full/NULL terminated */
-                CERROR ("inode %lu: symlink not NULL terminated string"
-                        "of length %d\n", lli->lli_st_ino, symlen - 1);
+                CERROR ("inode %llu: symlink not NULL terminated string"
+                        "of length %d\n", st->st_ino, symlen - 1);
                 GOTO (failed, rc = -EPROTO);
         }
 
@@ -863,6 +917,7 @@ static int llu_iop_readlink(struct pnode *pno, char *data, size_t bufsize)
 
         LASSERT(symname);
         strncpy(data, symname, bufsize);
+        rc = strlen(symname);
 
         ptlrpc_req_finished(request);
  out:
@@ -880,10 +935,11 @@ static int llu_iop_mknod_raw(struct pnode *pno,
         int err = -EMLINK;
         ENTRY;
 
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu\n",
-               pno->p_base->pb_name.name, llu_i2info(dir)->lli_st_ino);
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu\n",
+               (int)pno->p_base->pb_name.len, pno->p_base->pb_name.name,
+               llu_i2stat(dir)->st_ino);
 
-        if (llu_i2info(dir)->lli_st_nlink >= EXT2_LINK_MAX)
+        if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
                 RETURN(err);
 
         mode &= ~current->fs->umask;
@@ -927,13 +983,18 @@ static int llu_iop_link_raw(struct pnode *old, struct pnode *new)
         LASSERT(src);
         LASSERT(dir);
 
+        liblustre_wait_event(0);
         llu_prepare_mdc_op_data(&op_data, src, dir, name, namelen, 0);
         rc = mdc_link(llu_i2sbi(src)->ll_mdc_exp, &op_data, &request);
         ptlrpc_req_finished(request);
+        liblustre_wait_event(0);
 
         RETURN(rc);
 }
 
+/*
+ * libsysio will clear the inode immediately after return
+ */
 static int llu_iop_unlink_raw(struct pnode *pno)
 {
         struct inode *dir = pno->p_base->pb_parent->pb_ino;
@@ -948,30 +1009,21 @@ static int llu_iop_unlink_raw(struct pnode *pno)
 
         LASSERT(target);
 
+        liblustre_wait_event(0);
         llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
         rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request);
-        if (!rc) {
+        if (!rc)
                 rc = llu_objects_destroy(request, dir);
-
-                llu_i2info(target)->lli_stale_flag = 1;
-                unhook_stale_inode(pno);
-        }
-
         ptlrpc_req_finished(request);
+        liblustre_wait_event(0);
+
         RETURN(rc);
 }
 
-/* FIXME
- * following cases need to be considered later:
- * - rename an opened file/dir
- * - an opened file be removed in rename
- * - rename to remove and hardlink (?opened)
- */
 static int llu_iop_rename_raw(struct pnode *old, struct pnode *new)
 {
         struct inode *src = old->p_parent->p_base->pb_ino;
         struct inode *tgt = new->p_parent->p_base->pb_ino;
-        struct inode *tgtinode = new->p_base->pb_ino;
         const char *oldname = old->p_base->pb_name.name;
         int oldnamelen = old->p_base->pb_name.len;
         const char *newname = new->p_base->pb_name.name;
@@ -990,11 +1042,6 @@ static int llu_iop_rename_raw(struct pnode *old, struct pnode *new)
                         &request);
         if (!rc) {
                 rc = llu_objects_destroy(request, src);
-
-                if (tgtinode) {
-                        llu_i2info(tgtinode)->lli_stale_flag = 1;
-                        unhook_stale_inode(new);
-                }
         }
 
         ptlrpc_req_finished(request);
@@ -1088,6 +1135,8 @@ static int llu_iop_statvfs(struct pnode *pno,
         int rc;
         ENTRY;
 
+        liblustre_wait_event(0);
+
 #ifndef __CYGWIN__
         LASSERT(pno->p_base->pb_ino);
         rc = llu_statfs(llu_i2sbi(pno->p_base->pb_ino), &fs);
@@ -1119,14 +1168,14 @@ static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode)
         const char *name = qstr->name;
         int len = qstr->len;
         struct ptlrpc_request *request = NULL;
-        struct llu_inode_info *lli = llu_i2info(dir);
+        struct intnl_stat *st = llu_i2stat(dir);
         struct mdc_op_data op_data;
         int err = -EMLINK;
         ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%lu(%p)\n",
-               name, lli->lli_st_ino, lli->lli_st_generation, dir);
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n",
+               len, name, st->st_ino, llu_i2info(dir)->lli_st_generation, dir);
 
-        if (lli->lli_st_nlink >= EXT2_LINK_MAX)
+        if (st->st_nlink >= EXT2_LINK_MAX)
                 RETURN(err);
 
         mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
@@ -1145,35 +1194,346 @@ static int llu_iop_rmdir_raw(struct pnode *pno)
         int len = qstr->len;
         struct ptlrpc_request *request = NULL;
         struct mdc_op_data op_data;
-        struct llu_inode_info *lli = llu_i2info(dir);
         int rc;
         ENTRY;
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%lu(%p)\n",
-               name, lli->lli_st_ino, lli->lli_st_generation, dir);
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
+               llu_i2stat(dir)->st_ino, llu_i2info(dir)->lli_st_generation,dir);
 
         llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, S_IFDIR);
         rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request);
         ptlrpc_req_finished(request);
 
-        /* libsysio: remove the pnode right away */
-        if (!rc) {
-                llu_i2info(pno->p_base->pb_ino)->lli_stale_flag = 1;
-                unhook_stale_inode(pno);
+        RETURN(rc);
+}
+
+#ifdef O_DIRECT
+#define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC|O_DIRECT)
+#else
+#define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC)
+#endif
+#define FCNTL_FLMASK_INVALID (O_NONBLOCK|O_ASYNC)
+
+#if 0
+/* refer to ll_file_flock() for details */
+static int llu_file_flock(struct inode *ino,
+                          int cmd,
+                          struct file_lock *file_lock)
+{
+        struct obd_device *obddev;
+        struct llu_inode_info *lli = llu_i2info(ino);
+        struct intnl_stat *st = llu_i2stat(ino);
+        struct ldlm_res_id res_id =
+                { .name = {st->st_ino,
+                           lli->lli_st_generation, LDLM_FLOCK} };
+        struct lustre_handle lockh = {0};
+        ldlm_policy_data_t flock;
+        ldlm_mode_t mode = 0;
+        int flags = 0;
+        int rc;
+
+        CDEBUG(D_VFSTRACE, "VFS Op:inode="LPU64" file_lock=%p\n",
+               st->st_ino, file_lock);
+
+        flock.l_flock.pid = file_lock->fl_pid;
+        flock.l_flock.start = file_lock->fl_start;
+        flock.l_flock.end = file_lock->fl_end;
+
+        switch (file_lock->fl_type) {
+        case F_RDLCK:
+                mode = LCK_PR;
+                break;
+        case F_UNLCK:
+                mode = LCK_NL;
+                break;
+        case F_WRLCK:
+                mode = LCK_PW;
+                break;
+        default:
+                CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
+                LBUG();
+        }
+
+        switch (cmd) {
+        case F_SETLKW:
+#ifdef F_SETLKW64
+#if F_SETLKW64 != F_SETLKW
+        case F_SETLKW64:
+#endif
+#endif
+                flags = 0;
+                break;
+        case F_SETLK:
+#ifdef F_SETLK64
+#if F_SETLK64 != F_SETLK
+        case F_SETLK64:
+#endif
+#endif
+                flags = LDLM_FL_BLOCK_NOWAIT;
+                break;
+        case F_GETLK:
+#ifdef F_GETLK64
+#if F_GETLK64 != F_GETLK
+        case F_GETLK64:
+#endif
+#endif
+                flags = LDLM_FL_TEST_LOCK;
+                file_lock->fl_type = mode;
+                break;
+        default:
+                CERROR("unknown fcntl cmd: %d\n", cmd);
+                LBUG();
         }
 
+        CDEBUG(D_DLMTRACE, "inode="LPU64", pid="LPU64", flags=%#x, mode=%u, "
+               "start="LPU64", end="LPU64"\n", st->st_ino, flock.l_flock.pid,
+               flags, mode, flock.l_flock.start, flock.l_flock.end);
+
+        obddev = llu_i2mdcexp(ino)->exp_obd;
+        rc = ldlm_cli_enqueue(llu_i2mdcexp(ino), NULL, obddev->obd_namespace,
+                              res_id, LDLM_FLOCK, &flock, mode, &flags,
+                              NULL, ldlm_flock_completion_ast, NULL, file_lock,
+                              NULL, 0, NULL, &lockh);
         RETURN(rc);
 }
 
-static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap)
+static int assign_type(struct file_lock *fl, int type)
 {
-        CERROR("liblustre did not support fcntl\n");
-        return -ENOSYS;
+        switch (type) {
+        case F_RDLCK:
+        case F_WRLCK:
+        case F_UNLCK:
+                fl->fl_type = type;
+                return 0;
+        default:
+                return -EINVAL;
+        }
+}
+
+static int flock_to_posix_lock(struct inode *ino,
+                               struct file_lock *fl,
+                               struct flock *l)
+{
+        switch (l->l_whence) {
+        /* XXX: only SEEK_SET is supported in lustre */
+        case SEEK_SET:
+                fl->fl_start = 0;
+                break;
+        default:
+                return -EINVAL;
+        }
+
+        fl->fl_end = l->l_len - 1;
+        if (l->l_len < 0)
+                return -EINVAL;
+        if (l->l_len == 0)
+                fl->fl_end = OFFSET_MAX;
+
+        fl->fl_pid = getpid();
+        fl->fl_flags = FL_POSIX;
+        fl->fl_notify = NULL;
+        fl->fl_insert = NULL;
+        fl->fl_remove = NULL;
+        /* XXX: these fields can't be filled with suitable values,
+                but I think lustre doesn't use them.
+         */
+        fl->fl_owner = NULL;
+        fl->fl_file = NULL;
+
+        return assign_type(fl, l->l_type);
 }
 
+static int llu_fcntl_getlk(struct inode *ino, struct flock *flock)
+{
+        struct file_lock fl;
+        int error;
+
+        error = EINVAL;
+        if ((flock->l_type != F_RDLCK) && (flock->l_type != F_WRLCK))
+                goto out;
+
+        error = flock_to_posix_lock(ino, &fl, flock);
+        if (error)
+                goto out;
+
+        error = llu_file_flock(ino, F_GETLK, &fl);
+        if (error)
+                goto out;
+
+        flock->l_type = F_UNLCK;
+        if (fl.fl_type != F_UNLCK) {
+                flock->l_pid = fl.fl_pid;
+                flock->l_start = fl.fl_start;
+                flock->l_len = fl.fl_end == OFFSET_MAX ? 0:
+                        fl.fl_end - fl.fl_start + 1;
+                flock->l_whence = SEEK_SET;
+                flock->l_type = fl.fl_type;
+        }
+
+out:
+        return error;
+}
+
+static int llu_fcntl_setlk(struct inode *ino, int cmd, struct flock *flock)
+{
+        struct file_lock fl;
+        int flags = llu_i2info(ino)->lli_open_flags + 1;
+        int error;
+
+        error = flock_to_posix_lock(ino, &fl, flock);
+        if (error)
+                goto out;
+        if (cmd == F_SETLKW)
+                fl.fl_flags |= FL_SLEEP;
+
+        error = -EBADF;
+        switch (flock->l_type) {
+        case F_RDLCK:
+                if (!(flags & FMODE_READ))
+                        goto out;
+                break;
+        case F_WRLCK:
+                if (!(flags & FMODE_WRITE))
+                        goto out;
+                break;
+        case F_UNLCK:
+                break;
+        default:
+                error = -EINVAL;
+                goto out;
+        }
+
+        error = llu_file_flock(ino, cmd, &fl);
+        if (error)
+                goto out;
+
+out:
+        return error;
+}
+#endif
+
+static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn)
+{
+        struct llu_inode_info *lli = llu_i2info(ino);
+        long flags;
+        struct flock *flock;
+        long err;
+
+        switch (cmd) {
+        case F_GETFL:
+                *rtn = lli->lli_open_flags;
+                return 0;
+        case F_SETFL:
+                flags = va_arg(ap, long);
+                flags &= FCNTL_FLMASK;
+                if (flags & FCNTL_FLMASK_INVALID) {
+                        CERROR("liblustre don't support O_NONBLOCK, O_ASYNC, "
+                               "and O_DIRECT on file descriptor\n");
+                        *rtn = -EINVAL;
+                        return EINVAL;
+                }
+                lli->lli_open_flags = (int)(flags & FCNTL_FLMASK) |
+                                      (lli->lli_open_flags & ~FCNTL_FLMASK);
+                *rtn = 0;
+                return 0;
+#if 0
+        case F_GETLK:
+                flock = va_arg(ap, struct flock *);
+                err = llu_fcntl_getlk(ino, flock);
+                *rtn = err? -1: 0;
+                return err;
+        case F_SETLK:
+        case F_SETLKW:
+                flock = va_arg(ap, struct flock *);
+                err = llu_fcntl_setlk(ino, cmd, flock);
+                *rtn = err? -1: 0;
+                return err;
+#endif
+        }
+
+        CERROR("unsupported fcntl cmd %x\n", cmd);
+        *rtn = -ENOSYS;
+        return ENOSYS;
+}
+
+#if 0
+static int llu_get_grouplock(struct inode *inode, unsigned long arg)
+{
+        struct llu_inode_info *lli = llu_i2info(inode);
+        struct ll_file_data *fd = lli->lli_file_data;
+        ldlm_policy_data_t policy = { .l_extent = { .start = 0,
+                                                    .end = OBD_OBJECT_EOF}};
+        struct lustre_handle lockh = { 0 };
+        struct lov_stripe_md *lsm = lli->lli_smd;
+        ldlm_error_t err;
+        int flags = 0;
+        ENTRY;
+
+        if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
+                RETURN(-EINVAL);
+        }
+
+        policy.l_extent.gid = arg;
+        if (lli->lli_open_flags & O_NONBLOCK)
+                flags = LDLM_FL_BLOCK_NOWAIT;
+
+        err = llu_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh,
+                              flags);
+        if (err)
+                RETURN(err);
+
+        fd->fd_flags |= LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK;
+        fd->fd_gid = arg;
+        memcpy(&fd->fd_cwlockh, &lockh, sizeof(lockh));
+
+        RETURN(0);
+}
+
+static int llu_put_grouplock(struct inode *inode, unsigned long arg)
+{
+        struct llu_inode_info *lli = llu_i2info(inode);
+        struct ll_file_data *fd = lli->lli_file_data;
+        struct lov_stripe_md *lsm = lli->lli_smd;
+        ldlm_error_t err;
+        ENTRY;
+
+        if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED))
+                RETURN(-EINVAL);
+
+        if (fd->fd_gid != arg)
+                RETURN(-EINVAL);
+
+        fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
+
+        err = llu_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh);
+        if (err)
+                RETURN(err);
+
+        fd->fd_gid = 0;
+        memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh));
+
+        RETURN(0);
+}       
+#endif
+
 static int llu_iop_ioctl(struct inode *ino, unsigned long int request,
                          va_list ap)
 {
-        CERROR("liblustre did not support ioctl\n");
+        unsigned long arg;
+
+        liblustre_wait_event(0);
+
+        switch (request) {
+#if 0
+        case LL_IOC_GROUP_LOCK:
+                arg = va_arg(ap, unsigned long);
+                return llu_get_grouplock(ino, arg);
+        case LL_IOC_GROUP_UNLOCK:
+                arg = va_arg(ap, unsigned long);
+                return llu_put_grouplock(ino, arg);
+#endif
+        }
+
+        CERROR("did not support ioctl cmd %lx\n", request);
         return -ENOSYS;
 }
 
@@ -1182,11 +1542,13 @@ static int llu_iop_ioctl(struct inode *ino, unsigned long int request,
  */
 static int llu_iop_sync(struct inode *inode)
 {
+        liblustre_wait_event(0);
         return 0;
 }
 
 static int llu_iop_datasync(struct inode *inode)
 {
+        liblustre_wait_event(0);
         return 0;
 }
 
@@ -1203,8 +1565,11 @@ struct inode *llu_iget(struct filesys *fs, struct lustre_md *md)
 
         if ((md->body->valid &
              (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) !=
-            (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE))
-                CERROR("invalide fields!\n");
+            (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) {
+                CERROR("bad md body valid mask 0x%x\n", md->body->valid);
+                LBUG();
+                return ERR_PTR(-EPERM);
+        }
 
         /* try to find existing inode */
         fid.id = md->body->ino;
@@ -1215,9 +1580,10 @@ struct inode *llu_iget(struct filesys *fs, struct lustre_md *md)
         if (inode) {
                 struct llu_inode_info *lli = llu_i2info(inode);
 
-                if (lli->lli_stale_flag ||
-                    lli->lli_st_generation != md->body->generation)
+                if (inode->i_zombie ||
+                    lli->lli_st_generation != md->body->generation) {
                         I_RELE(inode);
+                }
                 else {
                         llu_update_inode(inode, md->body, md->lsm);
                         return inode;
@@ -1253,12 +1619,25 @@ llu_fsswop_mount(const char *source,
         struct lustre_handle osc_conn = {0, };
         struct lustre_md md;
         class_uuid_t uuid;
+        struct config_llog_instance cfg;
         struct lustre_profile *lprof;
+       char *zconf_mdsnid, *zconf_mdsname, *zconf_profile;
         char *osc = NULL, *mdc = NULL;
-        int err = -EINVAL;
+        int async = 1, err = -EINVAL;
 
         ENTRY;
 
+        if (ll_parse_mount_target(source,
+                                  &zconf_mdsnid,
+                                  &zconf_mdsname,
+                                  &zconf_profile)) {
+                CERROR("mal-formed target %s\n", source);
+                RETURN(err);
+        }
+        if (!zconf_mdsnid || !zconf_mdsname || !zconf_profile) {
+                printf("Liblustre: invalid target %s\n", source);
+                RETURN(err);
+        }
         /* allocate & initialize sbi */
         OBD_ALLOC(sbi, sizeof(*sbi));
         if (!sbi)
@@ -1268,61 +1647,39 @@ llu_fsswop_mount(const char *source,
         generate_random_uuid(uuid);
         class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
 
-        /* zeroconf */
-        if (g_zconf) {
-                struct config_llog_instance cfg;
-                int len;
-
-                if (!g_zconf_mdsname) {
-                        CERROR("no mds name\n");
-                        GOTO(out_free, err = -EINVAL);
-                }
-
-                /* generate a string unique to this super, let's try
-                 the address of the super itself.*/
-                len = (sizeof(sbi) * 2) + 1; 
-                OBD_ALLOC(sbi->ll_instance, len);
-                if (sbi->ll_instance == NULL) 
-                        GOTO(out_free, err = -ENOMEM);
-                sprintf(sbi->ll_instance, "%p", sbi);
-
-                cfg.cfg_instance = sbi->ll_instance;
-                cfg.cfg_uuid = sbi->ll_sb_uuid;
-                err = liblustre_process_log(&cfg, 1);
-                if (err < 0) {
-                        CERROR("Unable to process log: %s\n", g_zconf_profile);
-
-                        GOTO(out_free, err);
-                }
-
-                lprof = class_get_profile(g_zconf_profile);
-                if (lprof == NULL) {
-                        CERROR("No profile found: %s\n", g_zconf_profile);
-                        GOTO(out_free, err = -EINVAL);
-                }
-                if (osc)
-                        OBD_FREE(osc, strlen(osc) + 1);
-                OBD_ALLOC(osc, strlen(lprof->lp_osc) + 
-                          strlen(sbi->ll_instance) + 2);
-                sprintf(osc, "%s-%s", lprof->lp_osc, sbi->ll_instance);
-
-                if (mdc)
-                        OBD_FREE(mdc, strlen(mdc) + 1);
-                OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + 
-                          strlen(sbi->ll_instance) + 2);
-                sprintf(mdc, "%s-%s", lprof->lp_mdc, sbi->ll_instance);
-        } else {
-                /* setup from dump_file */
-                if (list_empty(&lustre_profile_list)) {
-                        CERROR("no profile\n");
-                        GOTO(out_free, err = -EINVAL);
-                }
+        /* generate a string unique to this super, let's try
+         the address of the super itself.*/
+        OBD_ALLOC(sbi->ll_instance, sizeof(sbi) * 2 + 1);
+        if (sbi->ll_instance == NULL) 
+                GOTO(out_free, err = -ENOMEM);
+        sprintf(sbi->ll_instance, "%p", sbi);
+
+        /* retrive & parse config log */
+        cfg.cfg_instance = sbi->ll_instance;
+        cfg.cfg_uuid = sbi->ll_sb_uuid;
+        err = liblustre_process_log(&cfg,
+                               zconf_mdsnid, zconf_mdsname, zconf_profile, 1);
+        if (err < 0) {
+                CERROR("Unable to process log: %s\n", zconf_profile);
+                GOTO(out_free, err);
+        }
 
-                lprof = list_entry(lustre_profile_list.next,
-                                   struct lustre_profile, lp_list);
-                osc = lprof->lp_osc;
-                mdc = lprof->lp_mdc;
+        lprof = class_get_profile(zconf_profile);
+        if (lprof == NULL) {
+                CERROR("No profile found: %s\n", zconf_profile);
+                GOTO(out_free, err = -EINVAL);
         }
+        if (osc)
+                OBD_FREE(osc, strlen(osc) + 1);
+        OBD_ALLOC(osc, strlen(lprof->lp_osc) + 
+                  strlen(sbi->ll_instance) + 2);
+        sprintf(osc, "%s-%s", lprof->lp_osc, sbi->ll_instance);
+
+        if (mdc)
+                OBD_FREE(mdc, strlen(mdc) + 1);
+        OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + 
+                  strlen(sbi->ll_instance) + 2);
+        sprintf(mdc, "%s-%s", lprof->lp_mdc, sbi->ll_instance);
 
         if (!osc) {
                 CERROR("no osc\n");
@@ -1344,6 +1701,8 @@ llu_fsswop_mount(const char *source,
                 CERROR("MDC %s: not setup or attached\n", mdc);
                 GOTO(out_free, err = -EINVAL);
         }
+        obd_set_info(obd->obd_self_export, strlen("async"), "async",
+                     sizeof(async), &async);
 
         /* setup mdc */
         err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, NULL /* ocd */);
@@ -1367,6 +1726,8 @@ llu_fsswop_mount(const char *source,
                 CERROR("OSC %s: not setup or attached\n", osc);
                 GOTO(out_mdc, err = -EINVAL);
         }
+        obd_set_info(obd->obd_self_export, strlen("async"), "async",
+                     sizeof(async), &async);
 
         err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, NULL /* ocd */);
         if (err) {
@@ -1402,7 +1763,7 @@ llu_fsswop_mount(const char *source,
         LASSERT(sbi->ll_rootino != 0);
 
         root = llu_iget(fs, &md);
-        if (root == NULL) {
+        if (!root || IS_ERR(root)) {
                 CERROR("fail to generate root inode\n");
                 GOTO(out_request, err = -EBADF);
         }
@@ -1424,7 +1785,7 @@ llu_fsswop_mount(const char *source,
 
         ptlrpc_req_finished(request);
 
-        printf("LibLustre: namespace mounted successfully!\n");
+        CDEBUG(D_SUPER, "LibLustre: %s mounted successfully!\n", source);
 
         return 0;
 
@@ -1459,8 +1820,9 @@ static struct inode_ops llu_inode_ops = {
         inop_link:      llu_iop_link_raw,
         inop_unlink:    llu_iop_unlink_raw,
         inop_rename:    llu_iop_rename_raw,
-        inop_ipreadv:   llu_iop_ipreadv,
-        inop_ipwritev:  llu_iop_ipwritev,
+        inop_pos:       llu_iop_pos,
+        inop_read:      llu_iop_read,
+        inop_write:     llu_iop_write,
         inop_iodone:    llu_iop_iodone,
         inop_fcntl:     llu_iop_fcntl,
         inop_sync:      llu_iop_sync,
@@ -1472,5 +1834,3 @@ static struct inode_ops llu_inode_ops = {
 #endif
         inop_gone:      llu_iop_gone,
 };
-
-#warning "time_after() defined in liblustre.h need to be rewrite in userspace"
index 0a9a1c0..616fea4 100644 (file)
@@ -2,47 +2,53 @@
 
 AM_CPPFLAGS = -I$(SYSIO)/include -I/opt/lam/include $(LLCPPFLAGS) -I$(top_srcdir)/portals/unals
 AM_CFLAGS = $(LLCFLAGS)
-LIBS = $(LIBEFENCE) $(LIBREADLINE)
+AM_LIBS = $(LIBEFENCE) $(LIBREADLINE)
 
-LLIB_EXEC= ../liblustre.a -lpthread
+LLIB_EXEC= $(top_builddir)/lustre/liblustre/liblustre.a $(CAP_LIBS) $(PTHREAD_LIBS)
 
 if LIBLUSTRE
 noinst_LIBRARIES = libtestcommon.a
-def_tests = echo_test sanity recovery_small replay_single replay_ost_single
+
+if LIBLUSTRE_TESTS
+noinst_PROGRAMS = sanity recovery_small replay_single replay_ost_single
+
+if TESTS
+noinst_PROGRAMS += echo_test
+endif # TESTS
 
 if MPITESTS
-noinst_PROGRAMS = $(def_tests) test_lock_cancel
-else
-noinst_PROGRAMS = $(def_tests)
-endif
+noinst_PROGRAMS += test_lock_cancel
+endif # MPITESTS
+
+endif # LIBLUSTRE_TESTS
 endif # LIBLUSTRE
 
 libtestcommon_a_SOURCES = test_common.c test_common.h
 
-echo_test_SOURCES = echo_test.c $(top_srcdir)/lustre/utils/parser.c $(top_srcdir)/lustre/utils/obd.c $(top_srcdir)/lustre/utils/lustre_cfg.c
+echo_test_SOURCES = echo_test.c  $(top_srcdir)/lustre/utils/parser.c $(top_srcdir)/lustre/utils/obd.c $(top_srcdir)/lustre/utils/lustre_cfg.c
 echo_test_CFLAGS = $(LL_CFLAGS)
-echo_test_LDADD = ../liblsupport.a $(LIBREADLINE) -lpthread 
+echo_test_LDADD = $(top_builddir)/lustre/liblustre/liblsupport.a $(LIBREADLINE) $(CAP_LIBS) $(PTHREAD_LIBS) 
 echo_test_DEPENDENCIES=$(top_builddir)/lustre/liblustre/liblsupport.a
 
 sanity_SOURCES = sanity.c
 sanity_CFLAGS = $(LL_CFLAGS)
-sanity_LDADD := ./libtestcommon.a $(LLIB_EXEC)
-sanity_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a ./libtestcommon.a
+sanity_LDADD := libtestcommon.a $(LLIB_EXEC)
+sanity_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a libtestcommon.a
 
 recovery_small_SOURCES = recovery_small.c
 recovery_small_CFLAGS = $(LL_CFLAGS)
-recovery_small_LDADD := ./libtestcommon.a $(LLIB_EXEC) 
-recovery_small_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a
+recovery_small_LDADD := libtestcommon.a $(LLIB_EXEC) 
+recovery_small_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a libtestcommon.a
 
 replay_single_SOURCES = replay_single.c
 replay_single_CFLAGS = $(LL_CFLAGS)
-replay_single_LDADD := ./libtestcommon.a $(LLIB_EXEC)
-replay_single_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a
+replay_single_LDADD := libtestcommon.a $(LLIB_EXEC)
+replay_single_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a libtestcommon.a
 
 replay_ost_single_SOURCES = replay_ost_single.c
 replay_ost_single_CFLAGS = $(LL_CFLAGS)
-replay_ost_single_LDADD := ./libtestcommon.a $(LLIB_EXEC)
-replay_ost_single_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a
+replay_ost_single_LDADD := libtestcommon.a $(LLIB_EXEC)
+replay_ost_single_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a libtestcommon.a
 
 if MPITESTS
 test_lock_cancel_SOURCES = test_lock_cancel.c
index 1b70246..12816f1 100644 (file)
@@ -1,67 +1,37 @@
-#include <stdio.h>
-#include <netinet/in.h>
-#include <sys/socket.h>
-#include <arpa/inet.h>
-
-#include <portals/api-support.h> /* needed for ptpctl.h */
-#include <portals/ptlctl.h>    /* needed for parse_dump */
-
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Lustre Light user test program
+ *
+ *  Copyright (c) 2002-2004 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
 
 #include <liblustre.h>
 #include <linux/obd.h>
 #include <linux/obd_class.h>
-#include <procbridge.h>
 #include <linux/obd_ost.h>
 
 #define LIBLUSTRE_TEST 1
 #include "../utils/lctl.c"
 
-struct ldlm_namespace;
-struct ldlm_res_id;
-struct obd_import;
-
-unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_NAL);
-
-void *inter_module_get(char *arg)
-{
-        if (!strcmp(arg, "tcpnal_ni"))
-                return &tcpnal_ni;
-        else if (!strcmp(arg, "ldlm_cli_cancel_unused"))
-                return ldlm_cli_cancel_unused;
-        else if (!strcmp(arg, "ldlm_namespace_cleanup"))
-                return ldlm_namespace_cleanup;
-        else if (!strcmp(arg, "ldlm_replay_locks"))
-                return ldlm_replay_locks;
-        else
-                return NULL;
-}
-
-/* XXX move to proper place */
-char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
-{
-        switch(nal){
-        case TCPNAL:
-                /* userspace NAL */
-        case SOCKNAL:
-                snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u",
-                         (__u32)(nid >> 32), HIPQUAD(nid));
-                break;
-        case QSWNAL:
-        case GMNAL:
-        case IBNAL:
-        case SCIMACNAL:
-                snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u",
-                         (__u32)(nid >> 32), (__u32)nid);
-                break;
-        default:
-                snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx",
-                         nal, (long long)nid);
-                break;
-        }
-        return str;
-}
+#include "../lutil.h"
 
-ptl_handle_ni_t         tcpnal_ni;
+extern int class_handle_ioctl(unsigned int cmd, unsigned long arg);
 
 struct pingcli_args {
         ptl_nid_t mynid;
@@ -71,78 +41,7 @@ struct pingcli_args {
         int size;
 };
 
-struct task_struct *current;
-
-/* portals interfaces */
-ptl_handle_ni_t *
-kportal_get_ni (int nal)
-{
-        switch (nal)
-        {
-        case SOCKNAL:
-                return &tcpnal_ni;
-        default:
-                return NULL;
-        }
-}
-
-inline void
-kportal_put_ni (int nal)
-{
-        return;
-}
-
-int
-kportal_nal_cmd(struct portals_cfg *pcfg)
-{
-#if 0
-        __u32 nal = pcfg->pcfg_nal;
-        int rc = -EINVAL;
-
-        ENTRY;
-
-        down(&nal_cmd_sem);
-        if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) {
-                CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, 
-                       pcfg->pcfg_command);
-                rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private);
-        }
-        up(&nal_cmd_sem);
-        RETURN(rc);
-#else
-        CERROR("empty function!!!\n");
-        return 0;
-#endif
-}
-
-int init_current(int argc, char **argv)
-{ 
-        current = malloc(sizeof(*current));
-        strncpy(current->comm, argv[0], sizeof(current->comm));
-        current->pid = getpid();
-       return 0;
-}
-
-ptl_nid_t tcpnal_mynid;
-
-int init_lib_portals()
-{
-        int rc;
-
-        PtlInit();
-        rc = PtlNIInit(procbridge_interface, 0, 0, 0, &tcpnal_ni);
-        if (rc != 0) {
-                CERROR("ksocknal: PtlNIInit failed: error %d\n", rc);
-                PtlFini();
-                RETURN (rc);
-        }
-        PtlNIDebug(tcpnal_ni, ~0);
-        return rc;
-}
-
-extern int class_handle_ioctl(unsigned int cmd, unsigned long arg);
-
-int liblustre_ioctl(int dev_id, int opc, void *ptr)
+static int liblustre_ioctl(int dev_id, unsigned int opc, void *ptr)
 {
        int   rc = -EINVAL;
        
@@ -160,15 +59,6 @@ int liblustre_ioctl(int dev_id, int opc, void *ptr)
        return rc;
 }
 
-static void generate_random_uuid(unsigned char uuid_out[16])
-{
-        int *arr = (int*)uuid_out;
-        int i;
-
-        for (i = 0; i < sizeof(uuid_out)/sizeof(int); i++)
-                arr[i] = rand();
-}
-
 static char *echo_server_nid = NULL;
 static char *echo_server_ostname = "obd1";
 static char *osc_dev_name = "OSC_DEV_NAME";
@@ -348,18 +238,15 @@ int main(int argc, char **argv)
                return 1;
        }
 
-        srand(time(NULL));
-
-       tcpnal_mynid = rand();
-#if 1
        portal_debug = 0;
        portal_subsystem_debug = 0;
-#endif
 
-        if (init_current(argc, argv) ||
+        liblustre_init_random();
+        liblustre_set_nal_nid();
+
+        if (liblustre_init_current(argv[0]) ||
            init_obdclass() || init_lib_portals() ||
            ptlrpc_init() ||
-           ldlm_init() ||
            mdc_init() ||
            lov_init() ||
            osc_init() ||
index 5aed06c..6cd9ba4 100644 (file)
@@ -39,6 +39,8 @@
 
 #include "test_common.h"
 
+#define MAX_STRING_SIZE 2048
+
 static struct {
         const char   *name;
         unsigned long code;
@@ -52,6 +54,7 @@ static struct {
 static int drop_index = 0;
 
 static char mds_server[1024] = {0, };
+static char ssh_cmd[MAX_STRING_SIZE] = {0,};
 
 int do_stat(const char *name, struct stat *buf)
 {
@@ -121,14 +124,14 @@ void cleanup_dir(const char *path)
 
 #define FAIL()                                                             \
     do {                                                                   \
-        char cmd[1024];                                                    \
+        char cmd[MAX_STRING_SIZE];                                         \
         int rc;                                                            \
                                                                            \
         if (drop_arr[drop_index].name) {                                   \
             printf("server drops next %s\n", drop_arr[drop_index].name);   \
             sprintf(cmd,                                                   \
-                    "ssh %s \"echo %lu > /proc/sys/lustre/fail_loc\"",     \
-                    mds_server, drop_arr[drop_index].code);                \
+                    "%s %s \"echo %lu > /proc/sys/lustre/fail_loc\"",      \
+                    ssh_cmd, mds_server, drop_arr[drop_index].code);       \
             if (system(cmd)) {                                             \
                 printf("error excuting remote command: %d\n", rc);         \
                 exit(rc);                                                  \
@@ -141,8 +144,8 @@ void cleanup_dir(const char *path)
         char cmd[1024];                                                    \
                                                                            \
         if (drop_arr[drop_index].name) {                                   \
-            sprintf(cmd, "ssh %s \"echo 0 > /proc/sys/lustre/fail_loc\"",  \
-                    mds_server);                                           \
+            sprintf(cmd, "%s %s \"echo 0 > /proc/sys/lustre/fail_loc\"",   \
+                    ssh_cmd, mds_server);                                  \
             system(cmd);                                                   \
         }                                                                  \
     } while (0)
@@ -313,6 +316,7 @@ int main(int argc, char * argv[])
         static struct option long_opts[] = {
                 {"target", 1, 0, 0},
                 {"dumpfile", 1, 0, 0},
+                {"ssh", 1, 0, 0},
                 {0, 0, 0, 0}
         };
 
@@ -329,12 +333,14 @@ int main(int argc, char * argv[])
                                 setenv(ENV_LUSTRE_MNTTGT, optarg, 1);
                         } else if (!strcmp(long_opts[opt_index].name, "dumpfile")) {
                                 setenv(ENV_LUSTRE_DUMPFILE, optarg, 1);
+                        } else if (!strcmp(long_opts[opt_index].name, "ssh")) {
+                                safe_strncpy(ssh_cmd, optarg, MAX_STRING_SIZE);
                         } else
                                 usage(argv[0]);
                         break;
                 }
                 case 's':
-                        strcpy(mds_server, optarg);
+                        safe_strncpy(mds_server, optarg, MAX_STRING_SIZE);
                         break;
                 default:
                         usage(argv[0]);
@@ -347,13 +353,18 @@ int main(int argc, char * argv[])
         if (strlen(mds_server) == 0)
                 usage(argv[0]);
 
-        sprintf(cmd, "ssh %s cat /dev/null", mds_server);
+        /* default to using ssh */
+        if (!strlen(ssh_cmd)) {
+                safe_strncpy(ssh_cmd, "ssh", MAX_STRING_SIZE);
+        }
+
+        sprintf(cmd, "%s %s cat /dev/null", ssh_cmd, mds_server);
         if (system(cmd)) {
-                printf("can't access server node: %s\n", mds_server);
+                printf("Can't access server node: %s using method: %s\n", mds_server, ssh_cmd);
                 exit(-1);
         }
 
-        setenv(ENV_LUSTRE_TIMEOUT, "10", 1);
+        setenv(ENV_LUSTRE_TIMEOUT, "5", 1);
 
         __liblustre_setup_();
 
@@ -362,7 +373,9 @@ int main(int argc, char * argv[])
                 t2();
                 t3();
                 t4();
+#if 0
                 t5();
+#endif
                 t6();
                 t7();
 
index 6645056..9628354 100644 (file)
 
 #include "test_common.h"
 
+#define MAX_STRING_SIZE 2048
 
-
-static char mds_server[1024] = {0,};
-static char barrier_script[1024] = {0,};
-static char failover_script[1024] = {0,};
-static char barrier_cmd[1024] = {0,};
-static char failover_cmd[1024] = {0,};
+static char mds_server[MAX_STRING_SIZE] = {0,};
+static char barrier_script[MAX_STRING_SIZE] = {0,};
+static char failover_script[MAX_STRING_SIZE] = {0,};
+static char barrier_cmd[MAX_STRING_SIZE] = {0,};
+static char failover_cmd[MAX_STRING_SIZE] = {0,};
+static char ssh_cmd[MAX_STRING_SIZE] = {0,};
 
 static void replay_barrier()
 {
@@ -91,9 +92,11 @@ static void mds_failover()
 
 void t0()
 {
+        char *path="/mnt/lustre/f0";
         ENTRY("empty replay");
         replay_barrier();
         mds_failover();
+        t_check_stat_fail("/mnt/lustre/f0");
         LEAVE();
 }
 
@@ -315,19 +318,19 @@ extern void __liblustre_cleanup_(void);
 void usage(const char *cmd)
 {
         printf("Usage: \t%s --target mdsnid:/mdsname/profile -s mds_hostname "
-                "-b \"barrier cmd\" -f \"failover cmd\"\n", cmd);
+                "-b \"barrier cmd\" -f \"failover cmd\" [--rsh \"rsh_cmd\"]\n", cmd);
         printf("       \t%s --dumpfile dumpfile -s mds_hostname -b \"barrier cmd\" "
-                "-f \"failover cmd\"\n", cmd);
+                "-f \"failover cmd\" [--rsh \"rsh_cmd\"]\n", cmd);
         exit(-1);
 }
 
 void test_ssh()
 {
-        char cmd[1024];
+        char cmd[MAX_STRING_SIZE];
 
-        sprintf(cmd, "ssh %s cat /dev/null", mds_server);
+        sprintf(cmd, "%s %s cat /dev/null", ssh_cmd, mds_server);
         if (system(cmd)) {
-                printf("ssh can't access server node: %s\n", mds_server);
+                printf("Can't access server node: %s using method: %s\n", mds_server, ssh_cmd);
                 exit(-1);
         }
 }
@@ -338,6 +341,7 @@ int main(int argc, char * const argv[])
         static struct option long_opts[] = {
                 {"target", 1, 0, 0},
                 {"dumpfile", 1, 0, 0},
+                {"ssh", 1, 0, 0},
                 {0, 0, 0, 0}
         };
 
@@ -354,18 +358,20 @@ int main(int argc, char * const argv[])
                                 setenv(ENV_LUSTRE_MNTTGT, optarg, 1);
                         } else if (!strcmp(long_opts[opt_index].name, "dumpfile")) {
                                 setenv(ENV_LUSTRE_DUMPFILE, optarg, 1);
+                        } else if (!strcmp(long_opts[opt_index].name, "ssh")) {
+                                safe_strncpy(ssh_cmd, optarg, MAX_STRING_SIZE);
                         } else
                                 usage(argv[0]);
                         break;
                 }
                 case 's':
-                        strcpy(mds_server, optarg);
+                        safe_strncpy(mds_server, optarg, MAX_STRING_SIZE);
                         break;
                 case 'b':
-                        strcpy(barrier_script, optarg);
+                        safe_strncpy(barrier_script, optarg, MAX_STRING_SIZE);
                         break;
                 case 'f':
-                        strcpy(failover_script, optarg);
+                        safe_strncpy(failover_script, optarg, MAX_STRING_SIZE);
                         break;
                 default:
                         usage(argv[0]);
@@ -378,11 +384,18 @@ int main(int argc, char * const argv[])
             !strlen(failover_script))
                 usage(argv[0]);
 
+        /* default to using ssh */
+        if (!strlen(ssh_cmd)) {
+                safe_strncpy(ssh_cmd, "ssh", MAX_STRING_SIZE);
+        }
+
         test_ssh();
 
         /* prepare remote command */
-        sprintf(barrier_cmd, "ssh %s \"%s\"", mds_server, barrier_script);
-        sprintf(failover_cmd, "ssh %s \"%s\"", mds_server, failover_script);
+        sprintf(barrier_cmd, "%s %s \"%s\"", 
+                ssh_cmd, mds_server, barrier_script);
+        sprintf(failover_cmd, "%s %s \"%s\"", 
+                ssh_cmd, mds_server, failover_script);
 
         setenv(ENV_LUSTRE_TIMEOUT, "10", 1);
 
index 944ae9c..acea41e 100644 (file)
 #include <fcntl.h>
 #include <sys/queue.h>
 #include <signal.h>
-
-#include <sysio.h>
-#include <mount.h>
+#include <errno.h>
+#include <dirent.h>
+#include <sys/uio.h>
+#include <sys/time.h>
 
 #include "test_common.h"
 
+extern char *lustre_path;
+
 #define ENTRY(str)                                                      \
         do {                                                            \
                 char buf[100];                                          \
                 int len;                                                \
-                sprintf(buf, "===== START: %s ", (str));                \
+                sprintf(buf, "===== START %s: %s ", __FUNCTION__, (str)); \
                 len = strlen(buf);                                      \
                 if (len < 79) {                                         \
                         memset(buf+len, '=', 100-len);                  \
 
 #define LEAVE()                                                         \
         do {                                                            \
-                printf("----- END TEST successfully ---");              \
-                printf("-----------------------------");                \
-                printf("-------------------\n");                        \
+                char buf[100];                                          \
+                int len;                                                \
+                sprintf(buf, "===== END TEST %s: successfully ",        \
+                        __FUNCTION__);                                  \
+                len = strlen(buf);                                      \
+                if (len < 79) {                                         \
+                        memset(buf+len, '=', 100-len);                  \
+                        buf[79] = '\n';                                 \
+                        buf[80] = 0;                                    \
+                }                                                       \
+                printf("%s", buf);                                      \
         } while (0)
 
+#define MAX_PATH_LENGTH 4096
+
 void t1()
 {
-        char *path="/mnt/lustre/test_t1";
+        char path[MAX_PATH_LENGTH] = "";
+
         ENTRY("create/delete");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t1", lustre_path);
 
         t_touch(path);
         t_unlink(path);
@@ -72,8 +87,10 @@ void t1()
 
 void t2()
 {
-        char *path="/mnt/lustre/test_t2";
+        char path[MAX_PATH_LENGTH] = "";
+
         ENTRY("mkdir/rmdir");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t2", lustre_path);
 
         t_mkdir(path);
         t_rmdir(path);
@@ -82,8 +99,10 @@ void t2()
 
 void t3()
 {
-        char *path="/mnt/lustre/test_t3";
+        char path[MAX_PATH_LENGTH] = "";
+
         ENTRY("regular stat");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t3", lustre_path);
 
         t_touch(path);
         t_check_stat(path, NULL);
@@ -93,8 +112,10 @@ void t3()
 
 void t4()
 {
-        char *path="/mnt/lustre/test_t4";
+        char path[MAX_PATH_LENGTH] = "";
+
         ENTRY("dir stat");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t4", lustre_path);
 
         t_mkdir(path);
         t_check_stat(path, NULL);
@@ -102,115 +123,54 @@ void t4()
         LEAVE();
 }
 
-#define PAGE_SIZE (4096)
-#define _npages (2048)
-
-static int _buffer[_npages][PAGE_SIZE/sizeof(int)];
-
-/* pos:   i/o start from
- * xfer:  npages per transfer
- */
-static void pages_io(int xfer, loff_t pos)
+void t6()
 {
-        char *path="/mnt/lustre/test_t5";
-        int check_sum[_npages] = {0,};
-        int fd, rc, i, j;
-
-        memset(_buffer, 0, sizeof(_buffer));
-
-        /* create sample data */
-        for (i = 0; i < _npages; i++) {
-                for (j = 0; j < PAGE_SIZE/sizeof(int); j++) {
-                        _buffer[i][j] = rand();
-                }
-        }
+        char path[MAX_PATH_LENGTH] = "";
+        char path2[MAX_PATH_LENGTH] = "";
 
-        /* compute checksum */
-        for (i = 0; i < _npages; i++) {
-                for (j = 0; j < PAGE_SIZE/sizeof(int); j++) {
-                        check_sum[i] += _buffer[i][j];
-                }
-        }
+        ENTRY("symlink");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t6", lustre_path);
+        snprintf(path2, MAX_PATH_LENGTH, "%s/test_t6_link", lustre_path);
 
         t_touch(path);
-
-       fd = t_open(path);
-
-        /* write */
-       lseek(fd, pos, SEEK_SET);
-       for (i = 0; i < _npages; i += xfer) {
-               rc = write(fd, _buffer[i], PAGE_SIZE * xfer);
-                if (rc != PAGE_SIZE * xfer) {
-                        printf("write error %d (i = %d)\n", rc, i);
-                        exit(1);
-                }
-       }
-        printf("succefully write %d pages(%d per xfer)\n", _npages, xfer);
-        memset(_buffer, 0, sizeof(_buffer));
-
-        /* read */
-       lseek(fd, pos, SEEK_SET);
-       for (i = 0; i < _npages; i += xfer) {
-               rc = read(fd, _buffer[i], PAGE_SIZE * xfer);
-                if (rc != PAGE_SIZE * xfer) {
-                        printf("read error %d (i = %d)\n", rc, i);
-                        exit(1);
-                }
-       }
-        printf("succefully read %d pages(%d per xfer)\n", _npages, xfer);
-
-        /* compute checksum */
-        for (i = 0; i < _npages; i++) {
-                int sum = 0;
-                for (j = 0; j < PAGE_SIZE/sizeof(int); j++) {
-                        sum += _buffer[i][j];
-                }
-                if (sum != check_sum[i]) {
-                        printf("chunk %d checksum error: expected 0x%x, get 0x%x\n",
-                                i, check_sum[i], sum);
-                }
-        }
-        printf("checksum verified OK!\n");
-
-       t_close(fd);
+        t_symlink(path, path2);
+        t_check_stat(path2, NULL);
+        t_unlink(path2);
         t_unlink(path);
+        LEAVE();
 }
 
-void t5()
+void t6b()
 {
-        char text[256];
-        loff_t off_array[] = {1, 4, 17, 255, 258, 4095, 4097, 8191, 1024*1024*1024};
-        int np = 1, i;
-        loff_t offset = 0;
+        char path[MAX_PATH_LENGTH] = "";
+        char path2[MAX_PATH_LENGTH] = "";
+        char cwd[MAX_PATH_LENGTH] = "";
+        char *tmp;
+        int fd;
 
-        while (np <= _npages) {
-                sprintf(text, "pages_io: %d per transfer, offset %lld",
-                        np, offset);
-                ENTRY(text);
-                pages_io(np, offset);
-                LEAVE();
-                np += np;
-        }
+        ENTRY("symlink + chdir and open");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t6b", lustre_path);
+        snprintf(path2, MAX_PATH_LENGTH, "%s/test_t6b_link", lustre_path);
 
-        for (i = 0; i < sizeof(off_array)/sizeof(loff_t); i++) {
-                offset = off_array[i];
-                sprintf(text, "pages_io: 16 per transfer, offset %lld",
-                        offset);
-                ENTRY(text);
-                pages_io(16, offset);
+        t_mkdir(path);
+        t_symlink(path, path2);
+        t_check_stat(path2, NULL);
+
+        tmp = getcwd(cwd, MAX_PATH_LENGTH);
+        if (tmp == NULL) {
+                fprintf(stderr, "current path too long to fit in "
+                        "MAX_PATH_LENGTH?\n");
                 LEAVE();
+                return;
         }
-}
+        t_chdir(path2);
+        t_chdir(cwd);
+        t_rmdir(path);
+        t_touch(path);
 
-void t6()
-{
-        char *path="/mnt/lustre/test_t6";
-        char *path2="/mnt/lustre/test_t6_link";
-        ENTRY("symlink");
+        fd = t_open(path2);
+        t_close(fd);
 
-        t_touch(path);
-        t_symlink(path, path2);
-        t_check_stat(path2, NULL);
         t_unlink(path2);
         t_unlink(path);
         LEAVE();
@@ -218,19 +178,32 @@ void t6()
 
 void t7()
 {
-        char *path="/mnt/lustre/test_t7";
+        char path[MAX_PATH_LENGTH] = "";
+        int rc;
+
         ENTRY("mknod");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t7", lustre_path);
 
-        t_mknod(path, S_IFCHR | 0644, 5, 4);
-        t_check_stat(path, NULL);
-        t_unlink(path);
+        if (geteuid() != 0) {
+                rc = mknod(path, S_IFCHR | 0644, (5<<8 | 4));
+                if (rc != -1 || errno != EPERM) {
+                        printf("mknod shouldn't success: rc %d, errno %d\n",
+                                rc, errno);
+                }
+        } else {
+                t_mknod(path, S_IFCHR | 0644, 5, 4);
+                t_check_stat(path, NULL);
+                t_unlink(path);
+        }
         LEAVE();
 }
 
 void t8()
 {
-        char *path="/mnt/lustre/test_t8";
+        char path[MAX_PATH_LENGTH] = "";
+
         ENTRY("chmod");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t8", lustre_path);
 
         t_touch(path);
         t_chmod_raw(path, 0700);
@@ -241,9 +214,12 @@ void t8()
 
 void t9()
 {
-        char *path="/mnt/lustre/test_t9";
-        char *path2="/mnt/lustre/test_t9_link";
+        char path[MAX_PATH_LENGTH] = "";
+        char path2[MAX_PATH_LENGTH] = "";
+
         ENTRY("hard link");
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t9", lustre_path);
+        snprintf(path2, MAX_PATH_LENGTH, "%s/test_t9_link", lustre_path);
 
         t_touch(path);
         t_link(path, path2);
@@ -256,14 +232,22 @@ void t9()
 
 void t10()
 {
-        char *dir1="/mnt/lustre/test_t10_dir1";
-        char *dir2="/mnt/lustre/test_t10_dir2";
-        char *path1="/mnt/lustre/test_t10_reg1";
-        char *path2="/mnt/lustre/test_t10_reg2";
-        char *rename1="/mnt/lustre/test_t10_dir1/rename1";
-        char *rename2="/mnt/lustre/test_t10_dir2/rename2";
-        char *rename3="/mnt/lustre/test_t10_dir2/rename3";
+        char dir1[MAX_PATH_LENGTH] = "";
+        char dir2[MAX_PATH_LENGTH] = "";
+        char path1[MAX_PATH_LENGTH] = "";
+        char path2[MAX_PATH_LENGTH] = "";
+        char rename1[MAX_PATH_LENGTH] = "";
+        char rename2[MAX_PATH_LENGTH] = "";
+        char rename3[MAX_PATH_LENGTH] = "";
+
         ENTRY("rename");
+        snprintf(dir1, MAX_PATH_LENGTH, "%s/test_t10_dir1", lustre_path);
+        snprintf(dir2, MAX_PATH_LENGTH, "%s/test_t10_dir2", lustre_path);
+        snprintf(path1, MAX_PATH_LENGTH, "%s/test_t10_reg1", lustre_path);
+        snprintf(path2, MAX_PATH_LENGTH, "%s/test_t10_reg2", lustre_path);
+        snprintf(rename1, MAX_PATH_LENGTH, "%s/test_t10_dir1/rename1", lustre_path);
+        snprintf(rename2, MAX_PATH_LENGTH, "%s/test_t10_dir2/rename2", lustre_path);
+        snprintf(rename3, MAX_PATH_LENGTH, "%s/test_t10_dir2/rename3", lustre_path);
 
         t_mkdir(dir1);
         t_mkdir(dir2);
@@ -281,12 +265,12 @@ void t10()
 
 void t11()
 {
-        char *base="/mnt/lustre";
-        char path[4096], path2[4096];
+        char *base=lustre_path;
+        char path[MAX_PATH_LENGTH], path2[MAX_PATH_LENGTH];
         int i, j, level = 5, nreg = 5;
         ENTRY("deep tree");
 
-        strcpy(path, base);
+        safe_strncpy(path, base, MAX_PATH_LENGTH);
 
         for (i = 0; i < level; i++) {
                 for (j = 0; j < nreg; j++) {
@@ -299,7 +283,7 @@ void t11()
         }
 
         for (i = level; i > 0; i--) {
-                strcpy(path, base);
+                safe_strncpy(path, base, MAX_PATH_LENGTH);
                 for (j = 1; j < i; j++)
                         strcat(path, "/dir");
                 
@@ -317,10 +301,11 @@ void t11()
 
 void t12()
 {
-        char *dir="/mnt/lustre/test_t12_dir";
+        char dir[MAX_PATH_LENGTH] = "";
         char buf[1024*128];
         int fd;
         ENTRY("empty directory readdir");
+        snprintf(dir, MAX_PATH_LENGTH, "%s/test_t12_dir", lustre_path);
 
         t_mkdir(dir);
         fd = t_opendir(dir);
@@ -332,13 +317,14 @@ void t12()
 
 void t13()
 {
-        char *dir="/mnt/lustre/test_t13_dir/";
+        char dir[MAX_PATH_LENGTH] = "";
         char name[1024];
         char buf[1024];
         const int nfiles = 20;
         char *prefix = "test13_filename_prefix_";
         int fd, i;
         ENTRY("multiple entries directory readdir");
+        snprintf(dir, MAX_PATH_LENGTH, "%s/test_t13_dir/", lustre_path);
 
         t_mkdir(dir);
         printf("Creating %d files...\n", nfiles);
@@ -360,13 +346,16 @@ void t13()
 
 void t14()
 {
-        char *dir="/mnt/lustre/test_t14_dir/";
+        char dir[MAX_PATH_LENGTH] = "";
         char name[1024];
         char buf[1024];
         const int nfiles = 256;
         char *prefix = "test14_filename_long_prefix_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA___";
-        int fd, i;
+       struct dirent64 *ent;
+        int fd, i, rc, pos, index;
+       loff_t base = 0;
         ENTRY(">1 block(4k) directory readdir");
+        snprintf(dir, MAX_PATH_LENGTH, "%s/test_t14_dir/", lustre_path);
 
         t_mkdir(dir);
         printf("Creating %d files...\n", nfiles);
@@ -375,7 +364,35 @@ void t14()
                 t_touch(name);
         }
         fd = t_opendir(dir);
-        t_ls(fd, buf, sizeof(buf));
+        printf("Listing...\n");
+        index = 0;
+       while ((rc = getdirentries64(fd, buf, 1024, &base)) > 0) {
+               pos = 0;
+               while (pos < rc) {
+                        char *item;
+
+                       ent = (struct dirent64 *) ((char*) buf + pos);
+                        item = (char *) ent->d_name;
+                        if (!strcmp(item, ".") || !strcmp(item, ".."))
+                                goto iter;
+                        if (strstr(item, prefix) != item) {
+                                printf("found bad name %s\n", item);
+                                exit(-1);
+                        }
+                       printf("[%03d]: %s\n",
+                                index++, item + strlen(prefix));
+iter:
+                       pos += ent->d_reclen;
+               }
+       }
+       if (rc < 0) {
+               printf("getdents error %d\n", rc);
+                exit(-1);
+       }
+        if (index != nfiles) {
+                printf("get %d files != %d\n", index, nfiles);
+                exit(-1);
+        }
         t_close(fd);
         printf("Cleanup...\n");
         for (i = 0; i < nfiles; i++) {
@@ -388,9 +405,10 @@ void t14()
 
 void t15()
 {
-        char *file = "/mnt/lustre/test_t15_file";
+        char file[MAX_PATH_LENGTH] = "";
         int fd;
         ENTRY("open-stat-close");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t15_file", lustre_path);
 
         t_touch(file);
         fd = t_open(file);
@@ -400,6 +418,431 @@ void t15()
         LEAVE();
 }
 
+void t16()
+{
+        char file[MAX_PATH_LENGTH] = "";
+        ENTRY("small-write-read");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t16_file", lustre_path);
+
+        t_echo_create(file, "aaaaaaaaaaaaaaaaaaaaaa");
+        t_grep(file, "aaaaaaaaaaaaaaaaaaaaaa");
+        t_unlink(file);
+        LEAVE();
+}
+
+void t17()
+{
+        char file[MAX_PATH_LENGTH] = "";
+        int fd;
+        ENTRY("open-unlink without close");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t17_file", lustre_path);
+
+        fd = open(file, O_WRONLY | O_CREAT, 0666);
+        if (fd < 0) {
+                printf("failed to create file: %s\n", strerror(errno));
+                exit(-1);
+        }
+        t_unlink(file);
+        LEAVE();
+}
+
+void t18()
+{
+        char file[MAX_PATH_LENGTH] = "";
+        char buf[128];
+        int fd, i;
+        struct stat statbuf[3];
+        ENTRY("write should change mtime/atime");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t18_file", lustre_path);
+
+        for (i = 0; i < 3; i++) {
+                fd = open(file, O_RDWR|O_CREAT|O_APPEND, (mode_t)0666);
+                if (fd < 0) {
+                        printf("error open file: %s\n", strerror(errno));
+                        exit(-1);
+                }
+                if (write(fd, buf, sizeof(buf)) != sizeof(buf)) {
+                        printf("error write file\n");
+                        exit(-1);
+                }
+                close(fd);
+                if(stat(file, &statbuf[i]) != 0) {
+                        printf("Error stat\n");
+                        exit(1);
+                }
+                printf("atime %lu, mtime %lu\n",
+                        statbuf[i].st_atime, statbuf[i].st_mtime);
+                sleep(2);
+        }
+
+        for (i = 1; i < 3; i++) {
+                if ((statbuf[i].st_atime <= statbuf[i-1].st_atime) ||
+                    (statbuf[i].st_mtime <= statbuf[i-1].st_mtime)) {
+                        printf("time error\n");
+                        exit(-1);
+                }
+        }
+        t_unlink(file);
+        LEAVE();
+}
+
+void t18b()
+{
+        char file[MAX_PATH_LENGTH] = "";
+        char buf[128];
+        int fd, i;
+        struct stat statbuf[3];
+        ENTRY("utime should change mtime/atime/ctime");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t23_file", lustre_path);
+        t_touch(file);
+
+        for (i = 0; i < 3; i++) {
+                t_utime(file, NULL);
+                if(stat(file, &statbuf[i]) != 0) {
+                        printf("Error stat\n");
+                        exit(1);
+                }
+                printf("atime %lu, mtime %lu, ctime %lu\n",
+                       statbuf[i].st_atime, statbuf[i].st_mtime,
+                       statbuf[i].st_ctime);
+                sleep(2);
+        }
+
+        for (i = 1; i < 3; i++) {
+                if ((statbuf[i].st_atime <= statbuf[i-1].st_atime) ||
+                    (statbuf[i].st_mtime <= statbuf[i-1].st_mtime) ||
+                    (statbuf[i].st_ctime <= statbuf[i-1].st_ctime)) {
+                        printf("time error\n");
+                        exit(-1);
+                }
+        }
+        t_unlink(file);
+        LEAVE();
+}
+
+void t19()
+{
+        char file[MAX_PATH_LENGTH] = "";
+        int fd;
+        struct stat statbuf;
+        ENTRY("open(O_TRUNC) should trancate file to 0-length");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t19_file", lustre_path);
+
+        t_echo_create(file, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
+
+        fd = open(file, O_RDWR|O_CREAT|O_TRUNC, (mode_t)0666);
+        if (fd < 0) {
+                printf("error open file: %s\n", strerror(errno));
+                exit(-1);
+        }
+        close(fd);
+        if(stat(file, &statbuf) != 0) {
+                printf("Error stat\n");
+                exit(1);
+        }
+        if (statbuf.st_size != 0) {
+                printf("size %ld is not zero\n", statbuf.st_size);
+                exit(-1);
+        }
+        t_unlink(file);
+        LEAVE();
+}
+
+void t20()
+{
+        char file[MAX_PATH_LENGTH] = "";
+        int fd;
+        struct iovec iov[2];
+        char buf[100];
+        ssize_t ret;
+        ENTRY("trap app's general bad pointer for file i/o");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t20_file", lustre_path);
+
+        fd = open(file, O_RDWR|O_CREAT, (mode_t)0666);
+        if (fd < 0) {
+                printf("error open file: %s\n", strerror(errno));
+                exit(-1);
+        }
+
+        ret = write(fd, NULL, 20);
+        if (ret != -1 || errno != EFAULT) {
+                printf("write 1: ret %ld, errno %d\n", ret, errno);
+                exit(1);
+        }
+        ret = write(fd, (void *)-1, 20);
+        if (ret != -1 || errno != EFAULT) {
+                printf("write 2: ret %ld, errno %d\n", ret, errno);
+                exit(1);
+        }
+        iov[0].iov_base = NULL;
+        iov[0].iov_len = 10;
+        iov[1].iov_base = (void *)-1;
+        iov[1].iov_len = 10;
+        ret = writev(fd, iov, 2);
+        if (ret != -1 || errno != EFAULT) {
+                printf("writev 1: ret %ld, errno %d\n", ret, errno);
+                exit(1);
+        }
+        iov[0].iov_base = NULL;
+        iov[0].iov_len = 0;
+        iov[1].iov_base = buf;
+        iov[1].iov_len = sizeof(buf);
+        ret = writev(fd, iov, 2);
+        if (ret != sizeof(buf)) {
+                printf("write 3 ret %ld, error %d\n", ret, errno);
+                exit(1);
+        }
+        lseek(fd, 0, SEEK_SET);
+
+        ret = read(fd, NULL, 20);
+        if (ret != -1 || errno != EFAULT) {
+                printf("read 1: ret %ld, errno %d\n", ret, errno);
+                exit(1);
+        }
+        ret = read(fd, (void *)-1, 20);
+        if (ret != -1 || errno != EFAULT) {
+                printf("read 2: ret %ld, errno %d\n", ret, errno);
+                exit(1);
+        }
+        iov[0].iov_base = NULL;
+        iov[0].iov_len = 10;
+        iov[1].iov_base = (void *)-1;
+        iov[1].iov_len = 10;
+        ret = readv(fd, iov, 2);
+        if (ret != -1 || errno != EFAULT) {
+                printf("readv 1: ret %ld, errno %d\n", ret, errno);
+                exit(1);
+        }
+        iov[0].iov_base = NULL;
+        iov[0].iov_len = 0;
+        iov[1].iov_base = buf;
+        iov[1].iov_len = sizeof(buf);
+        ret = readv(fd, iov, 2);
+        if (ret != sizeof(buf)) {
+                printf("read 3 ret %ld, error %d\n", ret, errno);
+                exit(1);
+        }
+
+        close(fd);
+        t_unlink(file);
+        LEAVE();
+}
+
+void t21()
+{
+        char file[MAX_PATH_LENGTH] = "";
+        int fd, ret;
+       struct flock lock = {
+               .l_type = F_RDLCK,
+               .l_whence = SEEK_SET,
+       };
+
+        ENTRY("basic fcntl support");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t21_file", lustre_path);
+
+        fd = open(file, O_RDWR|O_CREAT, (mode_t)0666);
+        if (fd < 0) {
+                printf("error open file: %m\n", file);
+                exit(-1);
+        }
+
+        t_fcntl(fd, F_SETFL, O_APPEND);
+        if (!(ret = t_fcntl(fd, F_GETFL)) & O_APPEND) {
+                printf("error get flag: ret %x\n", ret);
+                exit(-1);
+        }
+
+       t_fcntl(fd, F_SETLK, &lock);
+       t_fcntl(fd, F_GETLK, &lock);
+       lock.l_type = F_WRLCK;
+       t_fcntl(fd, F_SETLKW, &lock);
+       t_fcntl(fd, F_GETLK, &lock);
+       lock.l_type = F_UNLCK;
+       t_fcntl(fd, F_SETLK, &lock);
+
+        close(fd);
+        t_unlink(file);
+        LEAVE();
+}
+
+void t22()
+{
+        char file[MAX_PATH_LENGTH] = "";
+        int fd;
+        char *str = "1234567890";
+        char buf[100];
+        ssize_t ret;
+        ENTRY("make sure O_APPEND take effect");
+        snprintf(file, MAX_PATH_LENGTH, "%s/test_t22_file", lustre_path);
+
+        fd = open(file, O_RDWR|O_CREAT|O_APPEND, (mode_t)0666);
+        if (fd < 0) {
+                printf("error open file: %s\n", strerror(errno));
+                exit(-1);
+        }
+
+        lseek(fd, 100, SEEK_SET);
+        ret = write(fd, str, strlen(str));
+        if (ret != strlen(str)) {
+                printf("write 1: ret %ld, errno %d\n", ret, errno);
+                exit(1);
+        }
+
+        lseek(fd, 0, SEEK_SET);
+        ret = read(fd, buf, sizeof(buf));
+        if (ret != strlen(str)) {
+                printf("read 1 got %ld\n", ret);
+                exit(1);
+        }
+
+        if (memcmp(buf, str, strlen(str))) {
+                printf("read 1 data err\n");
+                exit(1);
+        }
+
+        if (fcntl(fd, F_SETFL, 0)) {
+                printf("fcntl err: %s\n", strerror(errno));
+                exit(1);
+        }
+
+        lseek(fd, 100, SEEK_SET);
+        ret = write(fd, str, strlen(str));
+        if (ret != strlen(str)) {
+                printf("write 2: ret %ld, errno %d\n", ret, errno);
+                exit(1);
+        }
+
+        lseek(fd, 100, SEEK_SET);
+        ret = read(fd, buf, sizeof(buf));
+        if (ret != strlen(str)) {
+                printf("read 2 got %ld\n", ret);
+                exit(1);
+        }
+
+        if (memcmp(buf, str, strlen(str))) {
+                printf("read 2 data err\n");
+                exit(1);
+        }
+
+        close(fd);
+        t_unlink(file);
+        LEAVE();
+}
+
+#define PAGE_SIZE (4096)
+#define _npages (2048)
+
+static int _buffer[_npages][PAGE_SIZE/sizeof(int)];
+
+/* pos:   i/o start from
+ * xfer:  npages per transfer
+ */
+static void pages_io(int xfer, loff_t pos)
+{
+        char path[MAX_PATH_LENGTH] = "";
+
+        int check_sum[_npages] = {0,};
+        int fd, rc, i, j, data_error = 0;
+        struct timeval tw1, tw2, tr1, tr2;
+        double tw, tr;
+
+        snprintf(path, MAX_PATH_LENGTH, "%s/test_t50", lustre_path);
+        memset(_buffer, 0, sizeof(_buffer));
+
+        /* create sample data */
+        for (i = 0; i < _npages; i++) {
+                for (j = 0; j < PAGE_SIZE/sizeof(int); j++) {
+                        _buffer[i][j] = rand();
+                }
+        }
+
+        /* compute checksum */
+        for (i = 0; i < _npages; i++) {
+                for (j = 0; j < PAGE_SIZE/sizeof(int); j++) {
+                        check_sum[i] += _buffer[i][j];
+                }
+        }
+
+        t_touch(path);
+
+       fd = t_open(path);
+
+        /* write */
+       lseek(fd, pos, SEEK_SET);
+        gettimeofday(&tw1, NULL);
+       for (i = 0; i < _npages; i += xfer) {
+               rc = write(fd, _buffer[i], PAGE_SIZE * xfer);
+                if (rc != PAGE_SIZE * xfer) {
+                        printf("write error %d (i = %d)\n", rc, i);
+                        exit(1);
+                }
+       }
+        gettimeofday(&tw2, NULL);
+
+        memset(_buffer, 0, sizeof(_buffer));
+
+        /* read */
+       lseek(fd, pos, SEEK_SET);
+        gettimeofday(&tr1, NULL);
+       for (i = 0; i < _npages; i += xfer) {
+               rc = read(fd, _buffer[i], PAGE_SIZE * xfer);
+                if (rc != PAGE_SIZE * xfer) {
+                        printf("read error %d (i = %d)\n", rc, i);
+                        exit(1);
+                }
+       }
+        gettimeofday(&tr2, NULL);
+
+        /* compute checksum */
+        for (i = 0; i < _npages; i++) {
+                int sum = 0;
+                for (j = 0; j < PAGE_SIZE/sizeof(int); j++) {
+                        sum += _buffer[i][j];
+                }
+                if (sum != check_sum[i]) {
+                        data_error = 1;
+                        printf("chunk %d checksum error: expected 0x%x, get 0x%x\n",
+                                i, check_sum[i], sum);
+                }
+        }
+
+       t_close(fd);
+        t_unlink(path);
+        tw = (tw2.tv_sec - tw1.tv_sec) * 1000000 + (tw2.tv_usec - tw1.tv_usec);
+        tr = (tr2.tv_sec - tr1.tv_sec) * 1000000 + (tr2.tv_usec - tr1.tv_usec);
+        printf(" (R:%.3fM/s, W:%.3fM/s)\n",
+                (_npages * PAGE_SIZE) / (tw / 1000000.0) / (1024 * 1024),
+                (_npages * PAGE_SIZE) / (tr / 1000000.0) / (1024 * 1024));
+
+        if (data_error)
+                exit(1);
+}
+
+void t50()
+{
+        loff_t off_array[] = {1, 17, 255, 258, 4095, 4097, 8191,
+                              1024*1024*1024*1024ULL};
+        int np = 1, i;
+        loff_t offset = 0;
+
+        ENTRY("4k aligned i/o sanity");
+        while (np <= _npages) {
+                printf("%3d per xfer(total %d)...\t", np, _npages);
+                pages_io(np, offset);
+                np += np;
+        }
+        LEAVE();
+
+        ENTRY("4k un-aligned i/o sanity");
+        for (i = 0; i < sizeof(off_array)/sizeof(loff_t); i++) {
+                offset = off_array[i];
+                printf("16 per xfer(total %d), offset %10lld...\t",
+                        _npages, offset);
+                pages_io(16, offset);
+        }
+        LEAVE();
+}
+
 extern void __liblustre_setup_(void);
 extern void __liblustre_cleanup_(void);
 
@@ -446,13 +889,12 @@ int main(int argc, char * const argv[])
 
         __liblustre_setup_();
 
-#ifndef __CYGWIN__
         t1();
         t2();
         t3();
         t4();
-        t5();
         t6();
+        t6b();
         t7();
         t8();
         t9();
@@ -462,7 +904,15 @@ int main(int argc, char * const argv[])
         t13();
         t14();
         t15();
-#endif
+        t16();
+        t17();
+        t18();
+        t18b();
+        t19();
+        t20();
+        t21();
+        t22();
+        t50();
 
        printf("liblustre is about shutdown\n");
         __liblustre_cleanup_();
index a87f0fa..03d005d 100644 (file)
@@ -1,3 +1,7 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
@@ -7,6 +11,8 @@
 #include <string.h>
 #include <errno.h>
 #include <dirent.h>
+#include <utime.h>
+#include <stdarg.h>
 
 #include "test_common.h"
 
@@ -89,7 +95,7 @@ void t_mkdir(const char *path)
 {
         int rc;
 
-        rc = mkdir(path, 00644);
+        rc = mkdir(path, 00755);
         if (rc < 0) {
                 printf("mkdir(%s) error: %s\n", path, strerror(errno));
                 EXIT(1);
@@ -181,6 +187,27 @@ int t_open(const char *path)
         return fd;
 }
 
+int t_chdir(const char *path)
+{
+        int rc = chdir(path);
+        if (rc < 0) {
+                printf("chdir(%s) error: %s\n", path, strerror(errno));
+                EXIT_RET(rc);
+        }
+        return rc;
+}
+
+int t_utime(const char *path, const struct utimbuf *buf)
+{
+        int rc = utime(path, buf);
+        if (rc < 0) {
+                printf("utime(%s, %p) error: %s\n", path, buf,
+                       strerror(errno));
+                EXIT_RET(rc);
+        }
+        return rc;
+}
+
 int t_opendir(const char *path)
 {
         int fd;
@@ -209,6 +236,8 @@ int t_check_stat(const char *name, struct stat *buf)
        struct stat stat;
         int rc;
 
+        memset(&stat, 0, sizeof(stat));
+
        rc = lstat(name, &stat);
         if (rc) {
                printf("error %d stat %s\n", rc, name);
@@ -216,6 +245,10 @@ int t_check_stat(const char *name, struct stat *buf)
        }
         if (buf)
                 memcpy(buf, &stat, sizeof(*buf));
+        if (stat.st_blksize == 0) {
+                printf("error: blksize is 0\n");
+                EXIT_RET(-EINVAL);
+        }
 
        return 0;
 }
@@ -313,3 +346,74 @@ void t_ls(int fd, char *buf, int size)
                EXIT(-1);
        }
 }
+
+int t_fcntl(int fd, int cmd, ...)
+{
+       va_list ap;
+       long arg;
+       struct flock *lock;
+       int rc = -1;
+
+       va_start(ap, cmd);
+       switch (cmd) {
+       case F_GETFL:
+               va_end(ap);
+               rc = fcntl(fd, cmd);
+               if (rc == -1) {
+                       printf("fcntl GETFL failed: %s\n",
+                                strerror(errno));
+                       EXIT(1);
+               }
+               break;
+       case F_SETFL:
+               arg = va_arg(ap, long);
+               va_end(ap);
+               rc = fcntl(fd, cmd, arg);
+               if (rc == -1) {
+                       printf("fcntl SETFL %ld failed: %s\n",
+                                arg, strerror(errno));
+                       EXIT(1);
+               }
+               break;
+       case F_GETLK:
+       case F_SETLK:
+       case F_SETLKW:
+               lock = va_arg(ap, struct flock *);
+               va_end(ap);
+               rc = fcntl(fd, cmd, lock);
+               if (rc == -1) {
+                       printf("fcntl cmd %d failed: %s\n",
+                                cmd, strerror(errno));
+                       EXIT(1);
+               }
+               break;
+       case F_DUPFD:
+               arg = va_arg(ap, long);
+               va_end(ap);
+               rc = fcntl(fd, cmd, arg);
+               if (rc == -1) {
+                       printf("fcntl F_DUPFD %d failed: %s\n",
+                                (int)arg, strerror(errno));
+                       EXIT(1);
+               }
+               break;
+       default:
+               va_end(ap);
+               printf("fcntl cmd %d not supported\n", cmd);
+               EXIT(1);
+       }
+       return rc;
+}
+
+char *safe_strncpy(char *dst, char *src, int max_size)
+{
+       int src_size;
+       src_size=strlen(src);
+       if (src_size >= max_size) {
+        src_size=max_size-1;
+       }
+       memcpy(dst, src, src_size);
+       dst[src_size]=0;
+
+       return(dst);
+}
index c3687b9..5949a42 100644 (file)
@@ -8,6 +8,8 @@
 
 extern int exit_on_err;
 
+#include <utime.h> /* for utimbuf */
+
 void t_touch(const char *path);
 void t_create(const char *path);
 void t_link(const char *src, const char *dst);
@@ -21,6 +23,8 @@ void t_chmod(const char *path, const char *format, ...);
 void t_rename(const char *oldpath, const char *newpath);
 int t_open_readonly(const char *path);
 int t_open(const char *path);
+int t_chdir(const char *path);
+int t_utime(const char *path, const struct utimbuf *buf);
 int t_opendir(const char *path);
 void t_close(int fd);
 int t_check_stat(const char *name, struct stat *buf);
@@ -29,5 +33,8 @@ void t_echo_create(const char *path, const char *str);
 void t_grep(const char *path, char *str);
 void t_grep_v(const char *path, char *str);
 void t_ls(int fd, char *buf, int size);
+int t_fcntl(int fd, int cmd, ...);
+
+char *safe_strncpy(char *dst, char *src, int max_size);
 
 #endif
index 3ecbb54..a109d1a 100644 (file)
@@ -36,6 +36,8 @@
 #else
 #include <liblustre.h>
 #include <linux/obd_class.h>
+#include <linux/lustre_mds.h>
+#include <linux/obd_ost.h>
 #include <linux/obd.h>
 #endif
 #include <linux/lprocfs_status.h>
index 5b13389..25ee67f 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_LOG
 
+#ifndef __KERNEL__
+#include <liblustre.h>
+#endif
+
 #include <linux/lustre_log.h>
 
 static void print_llogd_body(struct llogd_body *d)
index af0649d..be3ff47 100644 (file)
@@ -5,7 +5,7 @@
 
 if LIBLUSTRE
 noinst_LIBRARIES = libosc.a
-libosc_a_SOURCES = osc_request.c osc_lib.c osc_create.c osc_internal.h
+libosc_a_SOURCES = osc_request.c osc_lib.c osc_create.c osc_quota.c osc_internal.h
 libosc_a_CPPFLAGS = $(LLCPPFLAGS)
 libosc_a_CFLAGS = $(LLCFLAGS)
 endif
index 2ace56a..a12158e 100644 (file)
@@ -26,7 +26,6 @@
 
 #ifdef __KERNEL__
 # include <linux/module.h>
-# include <linux/obd.h>
 # include <linux/obd_ost.h>
 # include <linux/lustre_net.h>
 # include <linux/lustre_dlm.h>
 # else
 #  include <linux/locks.h>
 # endif
+#else
+# include <liblustre.h>
 #endif
 
+#include <linux/obd.h>
 #include "osc_internal.h"
 
 struct osc_quota_info {
index 01588c7..3342251 100644 (file)
@@ -730,8 +730,10 @@ static obd_count osc_checksum_bulk(int nob, obd_count pg_count,
 
                 cksum = crc32_le(cksum, ptr + off, count);
                 kunmap(pga->pg);
+#ifdef __KERNEL__
                 LL_CDEBUG_PAGE(D_PAGE, pga->pg, "off %d checksum %x\n",
                                off, cksum);
+#endif
 
                 nob -= pga->count;
                 pg_count--;
@@ -2258,9 +2260,8 @@ out:
         RETURN(rc);
 }
 
-#ifdef __KERNEL__
 /* Note: caller will lock/unlock, and set uptodate on the pages */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
 static int sanosc_brw_read(struct obd_export *exp, struct obdo *oa,
                            struct lov_stripe_md *lsm, obd_count page_count,
                            struct brw_page *pga)
@@ -2533,7 +2534,6 @@ static int sanosc_brw(int cmd, struct obd_export *exp, struct obdo *oa,
         RETURN(0);
 }
 #endif
-#endif
 
 static void osc_set_data_with_check(struct lustre_handle *lockh, void *data,
                                     int flags)
index 4075a1e..2262e48 100644 (file)
@@ -15,7 +15,7 @@ LDLM_COMM_SOURCES= $(top_srcdir)/lustre/ldlm/l_lock.c \
 
 COMMON_SOURCES =  client.c recover.c connection.c niobuf.c pack_generic.c   \
     events.c ptlrpc_module.c service.c pinger.c recov_thread.c llog_net.c   \
-    llog_client.c llog_server.c import.c ptlrpcd.c                         \
+    llog_client.c llog_server.c import.c ptlrpcd.c pers.c                  \
     ptlrpc_internal.h $(LDLM_COMM_SOURCES)
 
 if LIBLUSTRE
index 3eed4f4..0194ade 100644 (file)
@@ -590,6 +590,7 @@ static int signal_completed_replay(struct obd_import *imp)
         RETURN(0);
 }
 
+#ifdef __KERNEL__
 static int ptlrpc_invalidate_import_thread(void *data)
 {
         struct obd_import *imp = data;
@@ -618,6 +619,7 @@ static int ptlrpc_invalidate_import_thread(void *data)
 
         RETURN(0);
 }
+#endif
 
 int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
 {
@@ -636,11 +638,17 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
                        imp->imp_target_uuid.uuid,
                        imp->imp_connection->c_remote_uuid.uuid);
 
+#ifdef __KERNEL__
                 rc = kernel_thread(ptlrpc_invalidate_import_thread, imp,
                                    CLONE_VM | CLONE_FILES);
                 if (rc < 0)
                         CERROR("error starting invalidate thread: %d\n", rc);
                 RETURN(rc);
+#else
+                ptlrpc_invalidate_import(imp);
+
+                IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+#endif
         }
 
         if (imp->imp_state == LUSTRE_IMP_REPLAY) {
index ebb685f..a20932f 100644 (file)
@@ -753,42 +753,6 @@ int llog_log_swabbed(struct llog_log_hdr *hdr)
         return -1;
 }
 
-void lustre_swab_llogd_body (struct llogd_body *d)
-{
-        __swab64s (&d->lgd_logid.lgl_oid);
-        __swab64s (&d->lgd_logid.lgl_ogr);
-        __swab32s (&d->lgd_logid.lgl_ogen);
-        __swab32s (&d->lgd_ctxt_idx);
-        __swab32s (&d->lgd_llh_flags);
-        __swab32s (&d->lgd_index);
-        __swab32s (&d->lgd_saved_index);
-        __swab32s (&d->lgd_len);
-        __swab64s (&d->lgd_cur_offset);
-}
-
-void lustre_swab_llog_hdr (struct llog_log_hdr *h)
-{
-        __swab32s (&h->llh_hdr.lrh_index);
-        __swab32s (&h->llh_hdr.lrh_len);
-        __swab32s (&h->llh_hdr.lrh_type);
-        __swab64s (&h->llh_timestamp);
-        __swab32s (&h->llh_count);
-        __swab32s (&h->llh_bitmap_offset);
-        __swab32s (&h->llh_flags);
-        __swab32s (&h->llh_tail.lrt_index);
-        __swab32s (&h->llh_tail.lrt_len);
-}
-
-void lustre_swab_llogd_conn_body (struct llogd_conn_body *d)
-{
-        __swab64s (&d->lgdc_gen.mnt_cnt);
-        __swab64s (&d->lgdc_gen.conn_cnt);
-        __swab64s (&d->lgdc_logid.lgl_oid);
-        __swab64s (&d->lgdc_logid.lgl_ogr);
-        __swab32s (&d->lgdc_logid.lgl_ogen);
-        __swab32s (&d->lgdc_ctxt_idx);
-}
-
 void lustre_swab_qdata(struct qunit_data *d)
 {
         __swab32s (&d->qd_id);
index 1702e0b..9ad9b7d 100644 (file)
@@ -726,6 +726,7 @@ liblustre_check_services (void *arg)
 
         RETURN(did_something);
 }
+#define ptlrpc_stop_all_threads(s) do {} while (0)
 
 #else /* __KERNEL__ */