From: green Date: Thu, 26 May 2005 21:07:30 +0000 (+0000) Subject: Branch: b1_4 X-Git-Tag: v1_7_100~1^25~8^2~93 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=65d9ef789300c9b0c0fc628d8ee80eca9dd2c128;p=fs%2Flustre-release.git Branch: b1_4 Liblustre revival. Liblustre can be compiled and works now. --- diff --git a/lustre/ChangeLog b/lustre/ChangeLog index af7bb94..409e4cf 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -2,6 +2,11 @@ tbd Cluster File Systems, Inc. * version 1.4.3 * bug fixes +Severity : enhancement +Bugzilla : 2563 +Description: Liblustre support for 1.4.x +Details : Liblustre is now back. It compiles and works. + Severity : minor Frequency : rare (extremely heavy IO load with hundreds of clients) Bugzilla : 6172 diff --git a/lustre/autoMakefile.am b/lustre/autoMakefile.am index 6ab2f9d..29ba3f7 100644 --- a/lustre/autoMakefile.am +++ b/lustre/autoMakefile.am @@ -6,12 +6,14 @@ AUTOMAKE_OPTIONS = foreign ALWAYS_SUBDIRS := include lvfs obdclass ldlm ptlrpc osc lov obdecho \ - liblustre doc utils tests conf scripts autoconf + doc utils tests conf scripts autoconf SERVER_SUBDIRS := ldiskfs obdfilter ost mds CLIENT_SUBDIRS := mdc llite +LIBLUSTRE_SUBDIRS := liblustre + SUBDIRS := $(ALWAYS_SUBDIRS) if SERVER @@ -22,6 +24,14 @@ if CLIENT SUBDIRS += $(CLIENT_SUBDIRS) endif +# this needs to be after the client subdirs +if LIBLUSTRE +if !CLIENT +SUBDIRS += $(CLIENT_SUBDIRS) +endif +SUBDIRS += $(LIBLUSTRE_SUBDIRS) +endif + DIST_SUBDIRS := $(ALWAYS_SUBDIRS) $(SERVER_SUBDIRS) $(CLIENT_SUBDIRS) EXTRA_DIST = BUGS FDL kernel_patches diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index bbc9fbd..6156424 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -486,6 +486,7 @@ AM_CONDITIONAL(EXTN, test x$enable_extN = xyes) AM_CONDITIONAL(LDISKFS, test x$enable_ldiskfs = xyes) AM_CONDITIONAL(USE_QUILT, test x$QUILT != xno) AM_CONDITIONAL(LIBLUSTRE, test x$enable_liblustre = xyes) +AM_CONDITIONAL(LIBLUSTRE_TESTS, test x$enable_liblustre_tests = xyes) AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests) AM_CONDITIONAL(CLIENT, test x$enable_client = xyes) AM_CONDITIONAL(SERVER, test x$enable_server = xyes) diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index d618a40..ca328bd 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -76,6 +76,10 @@ typedef unsigned short umode_t; #endif +#ifndef CURRENT_SECONDS +# define CURRENT_SECONDS time(0) +#endif + /* This is because lprocfs_status.h gets included here indirectly. It would * be much better to just avoid lprocfs being included into liblustre entirely * but that requires more header surgery than I can handle right now. @@ -314,14 +318,7 @@ static inline void spin_unlock_irqrestore(spinlock_t *a, unsigned long b) {} /* random */ -static inline void get_random_bytes(void *ptr, int size) -{ - int *p = (int *)ptr; - int i, count = size/sizeof(int); - - for (i = 0; i< count; i++) - *p++ = rand(); -} +void get_random_bytes(void *ptr, int size); /* memory */ @@ -377,6 +374,10 @@ static inline int kmem_cache_destroy(kmem_cache_t *a) /* struct page decl moved out from here into portals/include/libcfs/user-prim.h */ +/* 2.4 defines */ +#define PAGE_LIST_ENTRY list +#define PAGE_LIST(page) ((page)->list) + #define kmap(page) (page)->addr #define kunmap(a) do {} while (0) @@ -566,12 +567,23 @@ struct task_struct { int pid; int fsuid; int fsgid; + int max_groups; + int ngroups; + gid_t *groups; __u32 cap_effective; + + struct fs_struct __fs; }; extern struct task_struct *current; - -#define in_group_p(a) 0 /* FIXME */ +int in_group_p(gid_t gid); +static inline int capable(int cap) +{ + if (current->cap_effective & (1 << cap)) + return 1; + else + return 0; +} #define set_current_state(foo) do { current->state = foo; } while (0) @@ -611,6 +623,7 @@ static inline int schedule_timeout(signed long t) } #define lock_kernel() do {} while (0) +#define unlock_kernel() do {} while (0) #define daemonize(l) do {} while (0) #define sigfillset(l) do {} while (0) #define recalc_sigpending(l) do {} while (0) @@ -684,6 +697,33 @@ typedef struct { volatile int counter; } atomic_t; #define unlikely(exp) (exp) #endif +/* FIXME sys/capability will finally included linux/fs.h thus + * cause numerous trouble on x86-64. as temporary solution for + * build broken at cary, we copy definition we need from capability.h + * FIXME + */ +struct _cap_struct; +typedef struct _cap_struct *cap_t; +typedef int cap_value_t; +typedef enum { + CAP_EFFECTIVE=0, + CAP_PERMITTED=1, + CAP_INHERITABLE=2 +} cap_flag_t; +typedef enum { + CAP_CLEAR=0, + CAP_SET=1 +} cap_flag_value_t; + +#define CAP_DAC_OVERRIDE 1 +#define CAP_DAC_READ_SEARCH 2 +#define CAP_FOWNER 3 +#define CAP_FSETID 4 +#define CAP_SYS_ADMIN 21 + +cap_t cap_get_proc(void); +int cap_get_flag(cap_t, cap_value_t, cap_flag_t, cap_flag_value_t *); + /* log related */ static inline int llog_init_commit_master(void) { return 0; } static inline int llog_cleanup_commit_master(int force) { return 0; } @@ -728,6 +768,10 @@ void *liblustre_register_wait_callback(int (*fn)(void *arg), void *arg); void liblustre_deregister_wait_callback(void *notifier); int liblustre_wait_event(int timeout); +/* quota */ +#define QUOTA_OK 0 +#define NO_QUOTA 1 + #include #include #include diff --git a/lustre/include/linux/lustre_lite.h b/lustre/include/linux/lustre_lite.h index c5c045d..094eafe 100644 --- a/lustre/include/linux/lustre_lite.h +++ b/lustre/include/linux/lustre_lite.h @@ -69,6 +69,11 @@ enum { #include #endif /* __KERNEL__ */ +#define LLAP_FROM_COOKIE(c) \ + (LASSERT(((struct ll_async_page *)(c))->llap_magic == LLAP_MAGIC), \ + (struct ll_async_page *)(c)) +#define LL_MAX_BLKSIZE (4UL * 1024 * 1024) + #include #endif diff --git a/lustre/include/linux/lustre_quota.h b/lustre/include/linux/lustre_quota.h index 85fa3a2..7633ebe 100644 --- a/lustre/include/linux/lustre_quota.h +++ b/lustre/include/linux/lustre_quota.h @@ -4,7 +4,9 @@ #ifndef _LUSTRE_QUOTA_H #define _LUSTRE_QUOTA_H -#include +#ifdef __KERNEL__ +# include +#endif #include #include diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index f833f72..9cb4ea8 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -256,7 +256,7 @@ do { \ } while(0) #else /* sigh. an expedient fix until OBD_RACE is fixed up */ -#define OBD_RACE(foo) LBUG() +#define OBD_RACE(foo) do {} while(0) #endif #define fixme() CDEBUG(D_OTHER, "FIXME\n"); @@ -317,6 +317,20 @@ static inline void OBD_FAIL_WRITE(int id, struct super_block *sb) extern atomic_t portal_kmemory; +#if defined(LUSTRE_UTILS) /* this version is for utils only */ +#define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ +do { \ + (ptr) = kmalloc(size, (gfp_mask)); \ + if ((ptr) == NULL) { \ + CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ + (int)(size), __FILE__, __LINE__); \ + } else { \ + memset(ptr, 0, size); \ + CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p\n", \ + (int)(size), ptr); \ + } \ +} while (0) +#else /* this version is for the kernel and liblustre */ #define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ do { \ (ptr) = kmalloc(size, (gfp_mask)); \ @@ -334,17 +348,13 @@ do { \ (int)(size), ptr, atomic_read(&obd_memory)); \ } \ } while (0) +#endif #ifndef OBD_GFP_MASK # define OBD_GFP_MASK GFP_NOFS #endif -#ifdef __KERNEL__ #define OBD_ALLOC(ptr, size) OBD_ALLOC_GFP(ptr, size, OBD_GFP_MASK) -#else -#define OBD_ALLOC(ptr, size) (ptr = malloc(size)) -#endif - #define OBD_ALLOC_WAIT(ptr, size) OBD_ALLOC_GFP(ptr, size, GFP_KERNEL) #ifdef __arch_um__ diff --git a/lustre/liblustre/Makefile.am b/lustre/liblustre/Makefile.am index a776768..6fac0b2 100644 --- a/lustre/liblustre/Makefile.am +++ b/lustre/liblustre/Makefile.am @@ -1,10 +1,11 @@ ## Liblustre excecutables & libraries Makefile SUBDIRS = . tests -AM_CPPFLAGS = $(HAVE_EFENCE) -I$(SYSIO)/include -D_LARGEFILE64_SOURCE=1 $(LLCPPFLAGS) -I$(top_srcdir)/portals/unals +AM_CPPFLAGS = $(HAVE_EFENCE) -I$(SYSIO)/include -D_LARGEFILE64_SOURCE=1 \ + $(LLCPPFLAGS) -I$(top_srcdir)/portals/unals AM_CFLAGS = $(LLCFLAGS) -LIBS = $(LIBEFENCE) +AM_LIBS = $(LIBEFENCE) LUSTRE_LIBS = libllite.a \ $(top_builddir)/lustre/lov/liblov.a \ @@ -15,14 +16,16 @@ LUSTRE_LIBS = libllite.a \ $(top_builddir)/lustre/obdclass/liblustreclass.a \ $(top_builddir)/lustre/lvfs/liblvfs.a +if !CRAY_PORTALS PTL_LIBS = $(top_builddir)/portals/utils/libuptlctl.a \ $(top_builddir)/portals/unals/libtcpnal.a \ $(top_builddir)/portals/portals/libportals.a +else +PTL_LIBS = $(top_builddir)/portals/utils/libuptlctl.a \ + $(CRAY_PORTALS_LIBS)/libportals.a +endif -SYSIO_LIBS = $(SYSIO)/drivers/native/libsysio_native.a \ - $(SYSIO)/drivers/sockets/libsysio_sockets.a \ - $(SYSIO)/src/libsysio.a \ - $(SYSIO)/dev/stdfd/libsysio_stdfd.a +SYSIO_LIBS = $(SYSIO)/lib/libsysio.a if LIBLUSTRE lib_LIBRARIES = liblustre.a @@ -40,16 +43,18 @@ install-exec-hook: liblustre.so done else install-exec-hook: - endif -libllite_a_SOURCES = llite_lib.c super.c namei.c rw.c file.c dir.c llite_lib.h +libllite_a_SOURCES = llite_lib.c super.c namei.c rw.c file.c dir.c \ + lutil.c lutil.h llite_lib.h # for make rpms -- need cleanup liblustre_a_SOURCES = llite_lib.c super.c namei.c rw.c file.c dir.c \ llite_lib.h liblustre.a : $(LUSTRE_LIBS) $(PTL_LIBS) $(SYSIO_LIBS) - $(srcdir)/genlib.sh $(SYSIO) $(AR) $(LINK) || ( rm -f $@; exit 1 ) + sh $(srcdir)/genlib.sh "$(SYSIO)" "$(CRAY_PORTALS_LIBS)" "$(LIBS)" EXTRA_DIST = genlib.sh + +CLEANFILES := liblsupport.a liblustre.so diff --git a/lustre/liblustre/dir.c b/lustre/liblustre/dir.c index c125b79..ec33ac3 100644 --- a/lustre/liblustre/dir.c +++ b/lustre/liblustre/dir.c @@ -3,7 +3,7 @@ * * Lustre Light directory handling * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * Copyright (c) 2002-2004 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -33,23 +33,39 @@ #include #include +#ifdef HAVE_XTIO_H +#include +#endif #include #include #include #include +#ifdef HAVE_FILE_H #include +#endif #undef LIST_HEAD +#ifdef HAVE_LINUX_TYPES_H #include -#include +#elif defined(HAVE_SYS_TYPES_H) +#include +#endif + +#ifdef HAVE_LINUX_UNISTD_H #include +#elif defined(HAVE_UNISTD_H) +#include +#endif + +#include #include "llite_lib.h" static int llu_dir_do_readpage(struct inode *inode, struct page *page) { struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); struct llu_sb_info *sbi = llu_i2sbi(inode); struct ll_fid mdc_fid; __u64 offset; @@ -61,19 +77,9 @@ static int llu_dir_do_readpage(struct inode *inode, struct page *page) struct mdc_op_data data; struct obd_device *obddev = class_exp2obd(sbi->ll_mdc_exp); struct ldlm_res_id res_id = - { .name = {lli->lli_st_ino, (__u64)lli->lli_st_generation} }; + { .name = {st->st_ino, (__u64)lli->lli_st_generation} }; ENTRY; - if ((lli->lli_st_size + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT <= page->index) { - /* XXX why do we need this exactly, and why do we think that - * an all-zero directory page is useful? - */ - CERROR("memsetting dir page %lu to zero (size %lld)\n", - page->index, lli->lli_st_size); - memset(page->addr, 0, PAGE_CACHE_SIZE); - GOTO(readpage_out, rc); - } - rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED, &res_id, LDLM_PLAIN, NULL, LCK_PR, &lockh); if (!rc) { @@ -93,7 +99,7 @@ static int llu_dir_do_readpage(struct inode *inode, struct page *page) } ldlm_lock_dump_handle(D_OTHER, &lockh); - mdc_pack_fid(&mdc_fid, lli->lli_st_ino, lli->lli_st_generation, S_IFDIR); + mdc_pack_fid(&mdc_fid, st->st_ino, lli->lli_st_generation, S_IFDIR); offset = page->index << PAGE_SHIFT; rc = mdc_readpage(sbi->ll_mdc_exp, &mdc_fid, @@ -103,12 +109,13 @@ static int llu_dir_do_readpage(struct inode *inode, struct page *page) LASSERT (body != NULL); /* checked by mdc_readpage() */ LASSERT_REPSWABBED (request, 0); /* swabbed by mdc_readpage() */ - lli->lli_st_size = body->size; + st->st_size = body->size; + } else { + CERROR("read_dir_page(%ld) error %d\n", page->index, rc); } ptlrpc_req_finished(request); EXIT; - readpage_out: ldlm_lock_decref(&lockh, LCK_PR); return rc; } @@ -135,6 +142,29 @@ static struct page *llu_dir_read_page(struct inode *ino, int pgidx) return page; } +enum { + EXT2_FT_UNKNOWN, + EXT2_FT_REG_FILE, + EXT2_FT_DIR, + EXT2_FT_CHRDEV, + EXT2_FT_BLKDEV, + EXT2_FT_FIFO, + EXT2_FT_SOCK, + EXT2_FT_SYMLINK, + EXT2_FT_MAX +}; + +static unsigned char ext2_filetype_table[EXT2_FT_MAX] = { + [EXT2_FT_UNKNOWN] DT_UNKNOWN, + [EXT2_FT_REG_FILE] DT_REG, + [EXT2_FT_DIR] DT_DIR, + [EXT2_FT_CHRDEV] DT_CHR, + [EXT2_FT_BLKDEV] DT_BLK, + [EXT2_FT_FIFO] DT_FIFO, + [EXT2_FT_SOCK] DT_SOCK, + [EXT2_FT_SYMLINK] DT_LNK, +}; + #define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) #define ROUND_UP64(x) (((x)+sizeof(__u64)-1) & ~(sizeof(__u64)-1)) @@ -165,16 +195,24 @@ ssize_t llu_iop_getdirentries(struct inode *ino, char *buf, size_t nbytes, _SYSIO_OFF_T *basep) { struct llu_inode_info *lli = llu_i2info(ino); + struct intnl_stat *st = llu_i2stat(ino); loff_t pos = *basep, offset; int maxpages, pgidx, filled = 0; ENTRY; + if (st->st_size == 0) { + CWARN("dir size is 0?\n"); + RETURN(0); + } + + liblustre_wait_event(0); + if (pos == -1) pos = lli->lli_dir_pos; - maxpages = lli->lli_st_size >> PAGE_CACHE_SHIFT; - pgidx = pos >> PAGE_CACHE_SHIFT; - offset = pos & ~PAGE_CACHE_MASK; + maxpages = (st->st_size + PAGE_SIZE - 1) >> PAGE_SHIFT; + pgidx = pos >> PAGE_SHIFT; + offset = pos & ~PAGE_MASK; for ( ; pgidx < maxpages ; pgidx++, offset = 0) { struct page *page; @@ -186,23 +224,24 @@ ssize_t llu_iop_getdirentries(struct inode *ino, char *buf, size_t nbytes, continue; /* size might have been updated by mdc_readpage */ - maxpages = lli->lli_st_size >> PAGE_CACHE_SHIFT; + maxpages = (st->st_size + PAGE_SIZE - 1) >> PAGE_SHIFT; /* fill in buffer */ addr = page->addr; - limit = addr + PAGE_CACHE_SIZE - EXT2_DIR_REC_LEN(1); + limit = addr + PAGE_SIZE - EXT2_DIR_REC_LEN(1); de = (struct ext2_dirent *) (addr + offset); for ( ; (char*) de <= limit; de = ext2_next_entry(de)) { if (de->inode) { int over; - unsigned char d_type = 0; + unsigned char d_type = DT_UNKNOWN; - /* XXX handle type, etc here */ + if (de->file_type < EXT2_FT_MAX) + d_type = ext2_filetype_table[de->file_type]; offset = (char*) de - addr; over = filldir(buf, nbytes, de->name, de->name_len, - (pgidx << PAGE_CACHE_SHIFT) | offset, + (pgidx << PAGE_SHIFT) | offset, le32_to_cpu(de->inode), d_type, &filled); if (over) { free_page(page); @@ -214,7 +253,7 @@ ssize_t llu_iop_getdirentries(struct inode *ino, char *buf, size_t nbytes, free_page(page); } done: - lli->lli_dir_pos = pgidx << PAGE_CACHE_SHIFT | offset; + lli->lli_dir_pos = pgidx << PAGE_SHIFT | offset; *basep = lli->lli_dir_pos; RETURN(filled); } diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c index 0aa6687..ab9017d 100644 --- a/lustre/liblustre/file.c +++ b/lustre/liblustre/file.c @@ -3,7 +3,7 @@ * * Lustre Light file operations * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * Copyright (c) 2002-2004 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -28,13 +28,20 @@ #include #include #include +#include #include +#include +#ifdef HAVE_XTIO_H +#include +#endif #include #include #include #include +#ifdef HAVE_FILE_H #include +#endif #undef LIST_HEAD @@ -66,29 +73,29 @@ void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid) { - struct llu_inode_info *lli = llu_i2info(dst); + struct intnl_stat *st = llu_i2stat(dst); valid &= src->o_valid; if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) CDEBUG(D_INODE, "valid %x, cur time %lu/%lu, new %lu/%lu\n", - src->o_valid, LTIME_S(lli->lli_st_mtime), - LTIME_S(lli->lli_st_ctime), + src->o_valid, LTIME_S(st->st_mtime), + LTIME_S(st->st_ctime), (long)src->o_mtime, (long)src->o_ctime); - if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(lli->lli_st_atime)) - LTIME_S(lli->lli_st_atime) = src->o_atime; - if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(lli->lli_st_mtime)) - LTIME_S(lli->lli_st_mtime) = src->o_mtime; - if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(lli->lli_st_ctime)) - LTIME_S(lli->lli_st_ctime) = src->o_ctime; - if (valid & OBD_MD_FLSIZE && src->o_size > lli->lli_st_size) - lli->lli_st_size = src->o_size; + if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(st->st_atime)) + LTIME_S(st->st_atime) = src->o_atime; + if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(st->st_mtime)) + LTIME_S(st->st_mtime) = src->o_mtime; + if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(st->st_ctime)) + LTIME_S(st->st_ctime) = src->o_ctime; + if (valid & OBD_MD_FLSIZE && src->o_size > st->st_size) + st->st_size = src->o_size; /* optimum IO size */ if (valid & OBD_MD_FLBLKSZ) - lli->lli_st_blksize = src->o_blksize; + st->st_blksize = src->o_blksize; /* allocation of space */ - if (valid & OBD_MD_FLBLOCKS && src->o_blocks > lli->lli_st_blocks) - lli->lli_st_blocks = src->o_blocks; + if (valid & OBD_MD_FLBLOCKS && src->o_blocks > st->st_blocks) + st->st_blocks = src->o_blocks; } static int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it) @@ -105,7 +112,7 @@ static int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it) /* already opened? */ if (lli->lli_open_count++) RETURN(0); - + LASSERT(!lli->lli_file_data); OBD_ALLOC(fd, sizeof(*fd)); @@ -126,6 +133,7 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) { struct inode *inode = pnode->p_base->pb_ino; struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); struct ll_file_data *fd; struct ptlrpc_request *request; struct lookup_intent *it; @@ -133,11 +141,13 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) int rc = 0; ENTRY; + liblustre_wait_event(0); + /* don't do anything for '/' */ if (llu_is_root_inode(inode)) RETURN(0); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", lli->lli_st_ino); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", st->st_ino); LL_GET_INTENT(inode, it); if (!it->d.lustre.it_disposition) { @@ -152,7 +162,7 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) if (rc) LBUG(); - if (!S_ISREG(lli->lli_st_mode)) + if (!S_ISREG(st->st_mode)) GOTO(out_release, rc = 0); fd = lli->lli_file_data; @@ -166,7 +176,7 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) } fd->fd_flags &= ~O_LOV_DELAY_CREATE; - lli->lli_open_flags = flags; + lli->lli_open_flags = flags & ~(O_CREAT | O_EXCL | O_TRUNC); out_release: request = it->d.lustre.it_data; @@ -175,6 +185,22 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) it->it_op_release(it); OBD_FREE(it, sizeof(*it)); + /* libsysio haven't doing anything for O_TRUNC. here we + * simply simulate it as open(...); truncate(...); + */ + if (rc == 0 && (flags & O_TRUNC) && + S_ISREG(st->st_mode)) { + struct iattr attr; + + memset(&attr, 0, sizeof(attr)); + attr.ia_size = 0; + attr.ia_valid |= ATTR_SIZE | ATTR_RAW; + rc = llu_setattr_raw(inode, &attr); + if (rc) { + CERROR("error %d truncate in open()\n", rc); + } + } + RETURN(rc); } @@ -251,6 +277,7 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir) int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode) { struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); struct ll_file_data *fd = lli->lli_file_data; struct ptlrpc_request *req = NULL; struct obd_client_handle *och = &fd->fd_mds_och; @@ -258,7 +285,7 @@ int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode) int rc, valid; ENTRY; - obdo.o_id = lli->lli_st_ino; + obdo.o_id = st->st_ino; obdo.o_valid = OBD_MD_FLID; valid = OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLSIZE |OBD_MD_FLBLOCKS | OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME; @@ -278,12 +305,12 @@ int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode) //ll_queue_done_writing(inode); rc = 0; } else if (rc) { - CERROR("inode %lu close failed: rc %d\n", lli->lli_st_ino, rc); + CERROR("inode %llu close failed: rc %d\n", st->st_ino, rc); } else { rc = llu_objects_destroy(req, inode); if (rc) - CERROR("inode %lu ll_objects destroy: rc = %d\n", - lli->lli_st_ino, rc); + CERROR("inode %llu ll_objects destroy: rc = %d\n", + st->st_ino, rc); } mdc_clear_open_replay_data(och); @@ -303,7 +330,7 @@ int llu_file_release(struct inode *inode) int rc = 0, rc2; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%lu\n", lli->lli_st_ino, + CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu\n", llu_i2stat(inode)->st_ino, lli->lli_st_generation); if (llu_is_root_inode(inode)) @@ -324,54 +351,34 @@ int llu_file_release(struct inode *inode) RETURN(rc); } +/* + * libsysio require us return 0 + */ int llu_iop_close(struct inode *inode) { int rc; + liblustre_wait_event(0); + rc = llu_file_release(inode); + if (rc) { + CERROR("file close error %d\n", rc); + } /* if open count == 0 && stale_flag is set, should we * remove the inode immediately? */ - return rc; + return 0; } -int llu_iop_ipreadv(struct inode *ino, - struct ioctx *ioctx) +_SYSIO_OFF_T llu_iop_pos(struct inode *ino, _SYSIO_OFF_T off) { ENTRY; - if (!ioctx->ioctx_iovlen) - RETURN(0); - if (ioctx->ioctx_iovlen < 0) - RETURN(-EINVAL); - - ioctx->ioctx_private = llu_file_read(ino, - ioctx->ioctx_iovec, - ioctx->ioctx_iovlen, - ioctx->ioctx_offset); - if (IS_ERR(ioctx->ioctx_private)) - return (PTR_ERR(ioctx->ioctx_private)); - - RETURN(0); -} - -int llu_iop_ipwritev(struct inode *ino, - struct ioctx *ioctx) -{ - ENTRY; + liblustre_wait_event(0); - if (!ioctx->ioctx_iovlen) - RETURN(0); - if (ioctx->ioctx_iovlen < 0) + if (off < 0 || off > ll_file_maxbytes(ino)) RETURN(-EINVAL); - ioctx->ioctx_private = llu_file_write(ino, - ioctx->ioctx_iovec, - ioctx->ioctx_iovlen, - ioctx->ioctx_offset); - if (IS_ERR(ioctx->ioctx_private)) - return (PTR_ERR(ioctx->ioctx_private)); - - RETURN(0); + RETURN(off); } /* this isn't where truncate starts. roughly: @@ -380,15 +387,17 @@ int llu_iop_ipwritev(struct inode *ino, static void llu_truncate(struct inode *inode) { struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); struct lov_stripe_md *lsm = lli->lli_smd; struct obdo oa = {0}; - int err; + int rc; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%lu\n", lli->lli_st_ino, - lli->lli_st_generation); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu(%p) to %llu\n", st->st_ino, + lli->lli_st_generation, inode, st->st_size); if (!lsm) { - CERROR("truncate on inode %lu with no objects\n", lli->lli_st_ino); + CDEBUG(D_INODE, "truncate on inode %llu with no objects\n", + st->st_ino); EXIT; return; } @@ -398,14 +407,16 @@ static void llu_truncate(struct inode *inode) obdo_from_inode(&oa, inode, OBD_MD_FLTYPE|OBD_MD_FLMODE|OBD_MD_FLATIME| OBD_MD_FLMTIME | OBD_MD_FLCTIME); + obd_adjust_kms(llu_i2obdexp(inode), lsm, st->st_size, 1); + CDEBUG(D_INFO, "calling punch for "LPX64" (all bytes after %Lu)\n", - oa.o_id, lli->lli_st_size); + oa.o_id, st->st_size); /* truncate == punch from new size to absolute end of file */ - err = obd_punch(llu_i2obdexp(inode), &oa, lsm, lli->lli_st_size, - OBD_OBJECT_EOF, NULL); - if (err) - CERROR("obd_truncate fails (%d) ino %lu\n", err, lli->lli_st_ino); + rc = obd_punch(llu_i2obdexp(inode), &oa, lsm, st->st_size, + OBD_OBJECT_EOF, NULL); + if (rc) + CERROR("obd_truncate fails (%d) ino %llu\n", rc, st->st_ino); else obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLATIME | OBD_MD_FLMTIME | @@ -413,13 +424,11 @@ static void llu_truncate(struct inode *inode) EXIT; return; -} +} /* llu_truncate */ int llu_vmtruncate(struct inode * inode, loff_t offset) { - struct llu_inode_info *lli = llu_i2info(inode); - - lli->lli_st_size = offset; + llu_i2stat(inode)->st_size = offset; llu_truncate(inode); diff --git a/lustre/liblustre/genlib.sh b/lustre/liblustre/genlib.sh index 4a3c356..f70116d 100755 --- a/lustre/liblustre/genlib.sh +++ b/lustre/liblustre/genlib.sh @@ -4,7 +4,10 @@ # # This script is to generate lib lustre library as a whole. It will leave # two files on current directory: liblustre.a and liblustre.so. -# Integrate them into Makefile.am later +# +# Most concern here is the libraries linking order +# +# FIXME: How to do this cleanly use makefile? # AR=/usr/bin/ar @@ -14,14 +17,16 @@ RANLIB=/usr/bin/ranlib CWD=`pwd` SYSIO=$1 +CRAY_PORTALS_LIBS=$2 +LIBS=$3 + +if [ ! -f $SYSIO/lib/libsysio.a ]; then + echo "ERROR: $SYSIO/lib/libsysio.a dosen't exist" + exit 1 +fi -#if [ ! -f $SYSIO/lib/libsysio.a ]; then -# echo "ERROR: $SYSIO/lib/libsysio.a dosen't exist" -# exit 1 -#fi -# # do cleanup at first -#rm -f liblustre.so +rm -f liblustre.so ALL_OBJS= @@ -35,16 +40,34 @@ build_obj_list() { # # special treatment for libsysio # -#sysio_tmp=$CWD/sysio_tmp_`date +%s` -#build_sysio_obj_list() { -# _objs=`$AR -t $1` -# mkdir -p $sysio_tmp -# $AR -x $1 -# mv $_objs $sysio_tmp -# for _lib in $_objs; do -# ALL_OBJS=$ALL_OBJS"$sysio_tmp/$_lib "; -# done -#} +sysio_tmp=$CWD/sysio_tmp_`date +%s` +rm -rf $sysio_tmp +build_sysio_obj_list() { + _objs=`$AR -t $1` + mkdir -p $sysio_tmp + cd $sysio_tmp + $AR -x $1 + cd .. + for _lib in $_objs; do + ALL_OBJS=$ALL_OBJS"$sysio_tmp/$_lib "; + done +} + +# +# special treatment for libportals.a +# +cray_tmp=$CWD/cray_tmp_`date +%s` +rm -rf $cray_tmp +build_cray_portals_obj_list() { + _objs=`$AR -t $1` + mkdir -p $cray_tmp + cd $cray_tmp + $AR -x $1 + cd .. + for _lib in $_objs; do + ALL_OBJS=$ALL_OBJS"$cray_tmp/$_lib "; + done +} # lustre components libs build_obj_list . libllite.a @@ -58,22 +81,24 @@ build_obj_list ../lvfs liblvfs.a # portals components libs build_obj_list ../../portals/utils libuptlctl.a -build_obj_list ../../portals/unals libtcpnal.a -build_obj_list ../../portals/portals libportals.a + +if [ "x$CRAY_PORTALS_LIBS" = "x" ]; then + build_obj_list ../../portals/unals libtcpnal.a + build_obj_list ../../portals/portals libportals.a +# if libportals is already in our LIBS we don't need to link against it here +elif $(echo "$LIBS" | grep -v -- "-lportals" >/dev/null) ; then + build_cray_portals_obj_list $CRAY_PORTALS_LIBS/libportals.a +fi # create static lib lsupport rm -f $CWD/liblsupport.a $AR -cru $CWD/liblsupport.a $ALL_OBJS $RANLIB $CWD/liblsupport.a -# libsysio components libs -build_obj_list $SYSIO/drivers/native libsysio_native.a -build_obj_list $SYSIO/drivers/sockets libsysio_sockets.a -build_obj_list $SYSIO/src libsysio.a -build_obj_list $SYSIO/dev/stdfd libsysio_stdfd.a -# -#build_sysio_obj_list $SYSIO/lib/libsysio.a -# +# if libsysio is already in our LIBS we don't need to link against it here +if $(echo "$LIBS" | grep -v -- "-lsysio" >/dev/null) ; then + build_sysio_obj_list $SYSIO/lib/libsysio.a +fi # create static lib lustre rm -f $CWD/liblustre.a @@ -83,6 +108,7 @@ $RANLIB $CWD/liblustre.a # create shared lib lustre rm -f $CWD/liblustre.so $LD -shared -o $CWD/liblustre.so -init __liblustre_setup_ -fini __liblustre_cleanup_ \ - $ALL_OBJS -lpthread + $ALL_OBJS -lcap -lpthread -#rm -rf $sysio_tmp +rm -rf $sysio_tmp +rm -rf $cray_tmp diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c index d9f3470..cac2df0 100644 --- a/lustre/liblustre/llite_lib.c +++ b/lustre/liblustre/llite_lib.c @@ -3,7 +3,7 @@ * * Lustre Light common routines * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * Copyright (c) 2002-2004 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -24,206 +24,46 @@ #include #include #include +#include #include +#include #include -#include -#include -#include - +#ifdef HAVE_XTIO_H +#include +#endif #include #include #include #include +#ifdef HAVE_FILE_H #include +#endif + +/* env variables */ +#define ENV_LUSTRE_MNTPNT "LIBLUSTRE_MOUNT_POINT" +#define ENV_LUSTRE_MNTTGT "LIBLUSTRE_MOUNT_TARGET" +#define ENV_LUSTRE_TIMEOUT "LIBLUSTRE_TIMEOUT" +#define ENV_LUSTRE_DUMPFILE "LIBLUSTRE_DUMPFILE" +#define ENV_LUSTRE_DEBUG_MASK "LIBLUSTRE_DEBUG_MASK" +#define ENV_LUSTRE_DEBUG_SUBSYS "LIBLUSTRE_DEBUG_SUBSYS" +#define ENV_LUSTRE_NAL_NAME "LIBLUSTRE_NAL_NAME" /* both sys/queue.h (libsysio require it) and portals/lists.h have definition * of 'LIST_HEAD'. undef it to suppress warnings */ #undef LIST_HEAD - -#include /* needed for ptpctl.h */ #include /* needed for parse_dump */ -#include +#include "lutil.h" #include "llite_lib.h" -unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_NAL); - -ptl_handle_ni_t tcpnal_ni; -struct task_struct *current; - -/* portals interfaces */ -ptl_handle_ni_t * -kportal_get_ni (int nal) -{ - switch (nal) - { - case SOCKNAL: - return &tcpnal_ni; - default: - return NULL; - } -} - -inline void -kportal_put_ni (int nal) -{ - return; -} - -struct ldlm_namespace; -struct ldlm_res_id; -struct obd_import; - -void *inter_module_get(char *arg) -{ - if (!strcmp(arg, "tcpnal_ni")) - return &tcpnal_ni; - else if (!strcmp(arg, "ldlm_cli_cancel_unused")) - return ldlm_cli_cancel_unused; - else if (!strcmp(arg, "ldlm_namespace_cleanup")) - return ldlm_namespace_cleanup; - else if (!strcmp(arg, "ldlm_replay_locks")) - return ldlm_replay_locks; - else - return NULL; -} - -/* XXX move to proper place */ -char *portals_nid2str(int nal, ptl_nid_t nid, char *str) -{ - switch(nal){ - case TCPNAL: - /* userspace NAL */ - case SOCKNAL: - snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u", - (__u32)(nid >> 32), HIPQUAD(nid)); - break; - case QSWNAL: - case GMNAL: - case IBNAL: - case SCIMACNAL: - snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u", - (__u32)(nid >> 32), (__u32)nid); - break; - default: - snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx", - nal, (long long)nid); - break; - } - return str; -} - -void init_current(char *comm) -{ - current = malloc(sizeof(*current)); - current->fs = malloc(sizeof(*current->fs)); - current->fs->umask = umask(0777); - umask(current->fs->umask); - strncpy(current->comm, comm, sizeof(current->comm)); - current->pid = getpid(); - current->fsuid = 0; - current->fsgid = 0; - current->cap_effective = -1; - memset(¤t->pending, 0, sizeof(current->pending)); -} - -/* FIXME */ -void generate_random_uuid(unsigned char uuid_out[16]) +static int lllib_init(void) { - int *arr = (int*)uuid_out; - int i; - - for (i = 0; i < sizeof(uuid_out)/sizeof(int); i++) - arr[i] = rand(); -} - -ptl_nid_t tcpnal_mynid; + liblustre_set_nal_nid(); -int init_lib_portals() -{ - int rc; - ENTRY; - - PtlInit(); - rc = PtlNIInit(procbridge_interface, 0, 0, 0, &tcpnal_ni); - if (rc != 0) { - CERROR("TCPNAL: PtlNIInit failed: error %d\n", rc); - PtlFini(); - RETURN (rc); - } - PtlNIDebug(tcpnal_ni, ~0); - RETURN(rc); -} - -int -kportal_nal_cmd(struct portals_cfg *pcfg) -{ - /* handle portals command if we want */ - return 0; -} - -extern int class_handle_ioctl(unsigned int cmd, unsigned long arg); - -int lib_ioctl_nalcmd(int dev_id, int opc, void * ptr) -{ - struct portal_ioctl_data *ptldata; - - if (opc == IOC_PORTAL_NAL_CMD) { - ptldata = (struct portal_ioctl_data *) ptr; - - if (ptldata->ioc_nal_cmd == NAL_CMD_REGISTER_MYNID) { - tcpnal_mynid = ptldata->ioc_nid; - printf("mynid: %u.%u.%u.%u\n", - (unsigned)(tcpnal_mynid>>24) & 0xFF, - (unsigned)(tcpnal_mynid>>16) & 0xFF, - (unsigned)(tcpnal_mynid>>8) & 0xFF, - (unsigned)(tcpnal_mynid) & 0xFF); - } - } - - return (0); -} - -int lib_ioctl(int dev_id, int opc, void * ptr) -{ - int rc; - - if (dev_id == OBD_DEV_ID) { - struct obd_ioctl_data *ioc = ptr; - - //XXX hack!!! - ioc->ioc_plen1 = ioc->ioc_inllen1; - ioc->ioc_pbuf1 = ioc->ioc_bulk; - //XXX - - rc = class_handle_ioctl(opc, (unsigned long)ptr); - - printf ("proccssing ioctl cmd: %x, rc %d\n", opc, rc); - - if (rc) - return rc; - } - return (0); -} - -int lllib_init(char *dumpfile) -{ - if (!g_zconf) { - /* this parse only get my nid from config file - * before initialize portals - */ - if (parse_dump(dumpfile, lib_ioctl_nalcmd)) - return -1; - } else { - /* XXX need setup mynid before tcpnal initialize */ - tcpnal_mynid = ((uint64_t)getpid() << 32) | time(0); - printf("LibLustre: TCPNAL NID: %016llx\n", tcpnal_mynid); - } - - init_current("dummy"); - if (init_obdclass() || + if (liblustre_init_current("dummy") || + init_obdclass() || init_lib_portals() || ptlrpc_init() || mdc_init() || @@ -231,20 +71,12 @@ int lllib_init(char *dumpfile) osc_init()) return -1; - if (!g_zconf && parse_dump(dumpfile, lib_ioctl)) - return -1; - return _sysio_fssw_register("llite", &llu_fssw_ops); } - -#if 0 -static void llu_check_request() -{ - liblustre_wait_event(0); -} -#endif -int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov) +int liblustre_process_log(struct config_llog_instance *cfg, + char *mdsnid, char *mdsname, char *profile, + int allow_recov) { struct lustre_cfg_bufs bufs; struct lustre_cfg *lcfg; @@ -258,22 +90,30 @@ int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov) struct llog_ctxt *ctxt; ptl_nid_t nid = 0; int nal, err, rc = 0; + char *nal_name; ENTRY; generate_random_uuid(uuid); class_uuid_unparse(uuid, &mdc_uuid); - if (ptl_parse_nid(&nid, g_zconf_mdsnid)) { - CERROR("Can't parse NID %s\n", g_zconf_mdsnid); + if (ptl_parse_nid(&nid, mdsnid)) { + CERROR("Can't parse NID %s\n", mdsnid); RETURN(-EINVAL); } - nal = ptl_name2nal("tcp"); + nal_name = getenv(ENV_LUSTRE_NAL_NAME); + if (!nal_name) { +#if CRAY_PORTALS + nal_name = "cray_qk_nal"; +#else + nal_name = "tcp"; +#endif + } + nal = ptl_name2nal(nal_name); if (nal <= 0) { - CERROR("Can't parse NAL tcp\n"); + CERROR("Can't parse NAL %s\n", nal_name); RETURN(-EINVAL); } - lustre_cfg_bufs_reset(&bufs, NULL); lustre_cfg_bufs_set_string(&bufs, 1, peer); lcfg = lustre_cfg_new(LCFG_ADD_UUID, &bufs); @@ -294,14 +134,14 @@ int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov) GOTO(out_del_uuid, err); lustre_cfg_bufs_reset(&bufs, name); - lustre_cfg_bufs_set_string(&bufs, 1, g_zconf_mdsname); + lustre_cfg_bufs_set_string(&bufs, 1, mdsname); lustre_cfg_bufs_set_string(&bufs, 2, peer); lcfg = lustre_cfg_new(LCFG_SETUP, &bufs); err = class_process_config(lcfg); lustre_cfg_free(lcfg); if (err < 0) GOTO(out_detach, err); - + obd = class_name2obd(name); if (obd == NULL) GOTO(out_cleanup, err = -EINVAL); @@ -314,14 +154,14 @@ int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov) err = obd_connect(&mdc_conn, obd, &mdc_uuid, NULL /*connect_flags*/); if (err) { CERROR("cannot connect to %s: rc = %d\n", - g_zconf_mdsname, err); + mdsname, err); GOTO(out_cleanup, err); } - + exp = class_conn2export(&mdc_conn); - + ctxt = exp->exp_obd->obd_llog_ctxt[LLOG_CONFIG_REPL_CTXT]; - rc = class_config_parse_llog(ctxt, g_zconf_profile, cfg); + rc = class_config_parse_llog(ctxt, profile, cfg); if (rc) { CERROR("class_config_parse_llog failed: rc = %d\n", rc); } @@ -353,13 +193,8 @@ out_del_uuid: out: if (rc == 0) rc = err; - - RETURN(rc); -} -static void sighandler_USR1(int signum) -{ - /* do nothing */ + RETURN(rc); } /* parse host:/mdsname/profile string */ @@ -375,7 +210,7 @@ int ll_parse_mount_target(const char *target, char **mdsnid, if ((s = strchr(buf, ':'))) { *mdsnid = buf; *s = '\0'; - + while (*++s == '/') ; *mdsname = s; @@ -389,119 +224,161 @@ int ll_parse_mount_target(const char *target, char **mdsnid, return -1; } -/* env variables */ -#define ENV_LUSTRE_MNTPNT "LIBLUSTRE_MOUNT_POINT" -#define ENV_LUSTRE_MNTTGT "LIBLUSTRE_MOUNT_TARGET" -#define ENV_LUSTRE_TIMEOUT "LIBLUSTRE_TIMEOUT" -#define ENV_LUSTRE_DUMPFILE "LIBLUSTRE_DUMPFILE" +/* + * early liblustre init + * called from C startup in catamount apps, before main() + * + * The following is a skeleton sysio startup sequence, + * as implemented in C startup (skipping error handling). + * In this framework none of these calls need be made here + * or in the apps themselves. The NAMESPACE_STRING specifying + * the initial set of fs ops (creates, mounts, etc.) is passed + * as an environment variable. + * + * _sysio_init(); + * _sysio_incore_init(); + * _sysio_native_init(); + * _sysio_lustre_init(); + * _sysio_boot(NAMESPACE_STRING); + * + * the name _sysio_lustre_init() follows the naming convention + * established in other fs drivers from libsysio: + * _sysio_incore_init(), _sysio_native_init() + * + * _sysio_lustre_init() must be called before _sysio_boot() + * to enable libsysio's processing of namespace init strings containing + * lustre filesystem operations + */ +int _sysio_lustre_init(void) +{ + int err; + char *timeout = NULL; + char *debug_mask = NULL; + char *debug_subsys = NULL; +#ifndef INIT_SYSIO + extern void __liblustre_cleanup_(void); +#endif -extern int _sysio_native_init(); +#if 0 + portal_debug = -1; + portal_subsystem_debug = -1; +#endif -extern unsigned int obd_timeout; + liblustre_init_random(); + + err = lllib_init(); + if (err) { + perror("init llite driver"); + return err; + } + timeout = getenv(ENV_LUSTRE_TIMEOUT); + if (timeout) { + obd_timeout = (unsigned int) strtol(timeout, NULL, 0); + printf("LibLustre: set obd timeout as %u seconds\n", + obd_timeout); + } -/* global variables */ -int g_zconf = 0; /* zeroconf or dumpfile */ -char *g_zconf_mdsname = NULL; /* mdsname, for zeroconf */ -char *g_zconf_mdsnid = NULL; /* mdsnid, for zeroconf */ -char *g_zconf_profile = NULL; /* profile, for zeroconf */ + /* debug masks */ + debug_mask = getenv(ENV_LUSTRE_DEBUG_MASK); + if (debug_mask) + portal_debug = (unsigned int) strtol(debug_mask, NULL, 0); + debug_subsys = getenv(ENV_LUSTRE_DEBUG_SUBSYS); + if (debug_subsys) + portal_subsystem_debug = + (unsigned int) strtol(debug_subsys, NULL, 0); + +#ifndef INIT_SYSIO + (void)atexit(__liblustre_cleanup_); +#endif + return err; +} + +extern int _sysio_native_init(); +extern unsigned int obd_timeout; + +char *lustre_path = NULL; void __liblustre_setup_(void) { - char *lustre_path = NULL; char *target = NULL; - char *timeout = NULL; - char *dumpfile = NULL; char *root_driver = "native"; char *lustre_driver = "llite"; char *root_path = "/"; unsigned mntflgs = 0; - int err; - /* consider tha case of starting multiple liblustre instances - * at a same time on single node. - */ - srand(time(NULL) + getpid()); - - signal(SIGUSR1, sighandler_USR1); - lustre_path = getenv(ENV_LUSTRE_MNTPNT); if (!lustre_path) { lustre_path = "/mnt/lustre"; } + /* mount target */ target = getenv(ENV_LUSTRE_MNTTGT); if (!target) { - dumpfile = getenv(ENV_LUSTRE_DUMPFILE); - if (!dumpfile) { - CERROR("Neither mount target, nor dumpfile\n"); - exit(1); - } - g_zconf = 0; - printf("LibLustre: mount point %s, dumpfile %s\n", - lustre_path, dumpfile); - } else { - if (ll_parse_mount_target(target, - &g_zconf_mdsnid, - &g_zconf_mdsname, - &g_zconf_profile)) { - CERROR("mal-formed target %s \n", target); - exit(1); - } - g_zconf = 1; - printf("LibLustre: mount point %s, target %s\n", - lustre_path, target); + printf("LibLustre: no mount target specified\n"); + exit(1); } + printf("LibLustre: mount point %s, target %s\n", + lustre_path, target); - timeout = getenv(ENV_LUSTRE_TIMEOUT); - if (timeout) { - obd_timeout = (unsigned int) atoi(timeout); - printf("LibLustre: set obd timeout as %u seconds\n", - obd_timeout); - } - if (_sysio_init() != 0) { +#ifdef INIT_SYSIO + /* initialize libsysio & mount rootfs */ + if (_sysio_init()) { perror("init sysio"); exit(1); } - - /* cygwin don't need native driver */ -#ifndef __CYGWIN__ _sysio_native_init(); -#endif err = _sysio_mount_root(root_path, root_driver, mntflgs, NULL); if (err) { - perror(root_driver); + fprintf(stderr, "sysio mount failed: %s\n", strerror(errno)); exit(1); } -#if 1 - portal_debug = 0; - portal_subsystem_debug = 0; -#endif - err = lllib_init(dumpfile); - if (err) { - perror("init llite driver"); + if (_sysio_lustre_init()) exit(1); - } +#endif /* INIT_SYSIO */ - err = mount("/", lustre_path, lustre_driver, mntflgs, NULL); + err = mount(target, lustre_path, lustre_driver, mntflgs, NULL); if (err) { - errno = -err; - perror(lustre_driver); + fprintf(stderr, "Lustre mount failed: %s\n", strerror(errno)); exit(1); } - -#if 0 - __sysio_hook_sys_enter = llu_check_request; - __sysio_hook_sys_leave = NULL; -#endif } void __liblustre_cleanup_(void) { +#ifndef INIT_SYSIO + /* guard against being called multiple times */ + static int cleaned = 0; + + if (cleaned) + return; + cleaned++; +#endif + + /* user app might chdir to a lustre directory, and leave busy pnode + * during finaly libsysio cleanup. here we chdir back to "/". + * but it can't fix the situation that liblustre is mounted + * at "/". + */ + chdir("/"); +#if 0 + umount(lustre_path); +#endif + /* we can't call umount here, because libsysio will not cleanup + * opening files for us. _sysio_shutdown() will cleanup fds at + * first but which will also close the sockets we need for umount + * liblutre. this delima lead to another hack in + * libsysio/src/file_hack.c FIXME + */ +#ifdef INIT_SYSIO _sysio_shutdown(); + cleanup_lib_portals(); PtlFini(); +#else + _sysio_shutdown(); +#endif } diff --git a/lustre/liblustre/llite_lib.h b/lustre/liblustre/llite_lib.h index 4462311..e254ea0 100644 --- a/lustre/liblustre/llite_lib.h +++ b/lustre/liblustre/llite_lib.h @@ -19,6 +19,8 @@ struct ll_file_data { struct obd_client_handle fd_mds_och; __u32 fd_flags; + struct lustre_handle fd_cwlockh; + unsigned long fd_gid; }; struct llu_sb_info @@ -36,7 +38,6 @@ struct llu_sb_info }; #define LL_SBI_NOLCK 0x1 -#define LL_SBI_READAHEAD 0x2 #define LLI_F_HAVE_OST_SIZE_LOCK 0 #define LLI_F_HAVE_MDS_SIZE_LOCK 1 @@ -49,15 +50,13 @@ struct llu_inode_info { char *lli_symlink_name; struct semaphore lli_open_sem; __u64 lli_maxbytes; - unsigned long lli_flags; + unsigned long lli_flags; /* for libsysio */ struct file_identifier lli_sysio_fid; struct lookup_intent *lli_it; - /* XXX workaround for libsysio unlink */ - int lli_stale_flag; /* XXX workaround for libsysio readdir */ loff_t lli_dir_pos; @@ -69,50 +68,9 @@ struct llu_inode_info { int lli_open_flags; int lli_open_count; - /* stat FIXME not 64 bit clean */ - dev_t lli_st_dev; - ino_t lli_st_ino; - mode_t lli_st_mode; - nlink_t lli_st_nlink; - uid_t lli_st_uid; - gid_t lli_st_gid; - dev_t lli_st_rdev; - loff_t lli_st_size; - unsigned int lli_st_blksize; - unsigned long lli_st_blocks; - time_t lli_st_atime; - time_t lli_st_mtime; - time_t lli_st_ctime; - /* not for stat, change it later */ - int lli_st_flags; - unsigned long lli_st_generation; -}; - -#define LLU_SYSIO_COOKIE_SIZE(exp, x) \ - (sizeof(struct llu_sysio_cookie) + \ - sizeof(struct ll_async_page) * (x) + \ - sizeof(struct page) * (x) + \ - llap_cookie_size * (x)) - -struct llu_sysio_cookie { - struct obd_io_group *lsc_oig; - struct inode *lsc_inode; - int lsc_maxpages; - int lsc_npages; - struct ll_async_page *lsc_llap; - struct page *lsc_pages; - void *lsc_llap_cookie; - __u64 lsc_rwcount; -}; - -/* XXX why uio.h haven't the definition? */ -#define MAX_IOVEC 32 - -struct llu_sysio_callback_args -{ - int ncookies; - struct llu_sysio_cookie *cookies[MAX_IOVEC]; + int lli_st_flags; + unsigned long lli_st_generation; }; static inline struct llu_sb_info *llu_fs2sbi(struct filesys *fs) @@ -125,6 +83,11 @@ static inline struct llu_inode_info *llu_i2info(struct inode *inode) return (struct llu_inode_info*)(inode->i_private); } +static inline struct intnl_stat *llu_i2stat(struct inode *inode) +{ + return &inode->i_stbuf; +} + static inline struct llu_sb_info *llu_i2sbi(struct inode *inode) { return llu_i2info(inode)->lli_sbi; @@ -153,8 +116,8 @@ do { \ OBD_ALLOC(temp, sizeof(*temp)); \ memcpy(temp, it, sizeof(*temp)); \ llu_i2info(inode)->lli_it = temp; \ - CDEBUG(D_DENTRY, "alloc intent %p to inode %p(ino %lu)\n", \ - temp, inode, llu_i2info(inode)->lli_st_ino); \ + CDEBUG(D_DENTRY, "alloc intent %p to inode %p(ino %llu)\n", \ + temp, inode, llu_i2stat(inode)->st_ino); \ } while(0) @@ -164,8 +127,8 @@ do { \ \ LASSERT(it); \ llu_i2info(inode)->lli_it = NULL; \ - CDEBUG(D_DENTRY, "dettach intent %p from inode %p(ino %lu)\n", \ - it, inode, llu_i2info(inode)->lli_st_ino); \ + CDEBUG(D_DENTRY, "dettach intent %p from inode %p(ino %llu)\n", \ + it, inode, llu_i2stat(inode)->st_ino); \ } while(0) /* interpet return codes from intent lookup */ @@ -186,26 +149,24 @@ struct it_cb_data { static inline void ll_i2uctxt(struct ll_uctxt *ctxt, struct inode *i1, struct inode *i2) { - struct llu_inode_info *lli1 = llu_i2info(i1); - struct llu_inode_info *lli2; + struct intnl_stat *st = llu_i2stat(i1); LASSERT(i1); LASSERT(ctxt); - if (in_group_p(lli1->lli_st_gid)) - ctxt->gid1 = lli1->lli_st_gid; + if (in_group_p(st->st_gid)) + ctxt->gid1 = st->st_gid; else ctxt->gid1 = -1; if (i2) { - lli2 = llu_i2info(i2); - if (in_group_p(lli2->lli_st_gid)) - ctxt->gid2 = lli2->lli_st_gid; + st = llu_i2stat(i2); + if (in_group_p(st->st_gid)) + ctxt->gid2 = st->st_gid; else ctxt->gid2 = -1; - } else { - ctxt->gid2 = -1; - } + } else + ctxt->gid2 = 0; } @@ -215,12 +176,6 @@ typedef int (*intent_finish_cb)(struct ptlrpc_request *, int llu_intent_lock(struct inode *parent, struct pnode *pnode, struct lookup_intent *, int flags, intent_finish_cb); -/* FIXME */ -static inline int ll_permission(struct inode *inode, int flag, void * unused) -{ - return 0; -} - static inline __u64 ll_file_maxbytes(struct inode *inode) { return llu_i2info(inode)->lli_maxbytes; @@ -232,16 +187,19 @@ struct mount_option_s char *osc_uuid; }; +#define IS_BAD_PTR(ptr) \ + ((unsigned long)(ptr) == 0 || (unsigned long)(ptr) > -1000UL) + /* llite_lib.c */ void generate_random_uuid(unsigned char uuid_out[16]); -int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov); +int liblustre_process_log(struct config_llog_instance *cfg, + char *mdsnid, + char *mdsname, + char *profile, + int allow_recov); int ll_parse_mount_target(const char *target, char **mdsnid, char **mdsname, char **profile); -extern int g_zconf; -extern char *g_zconf_mdsnid; -extern char *g_zconf_mdsname; -extern char *g_zconf_profile; extern struct mount_option_s mount_option; /* super.c */ @@ -252,6 +210,7 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid); int ll_it_open_error(int phase, struct lookup_intent *it); struct inode *llu_iget(struct filesys *fs, struct lustre_md *md); int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm); +int llu_setattr_raw(struct inode *inode, struct iattr *attr); extern struct fssw_ops llu_fssw_ops; @@ -266,20 +225,15 @@ int llu_create(struct inode *dir, struct pnode_base *pnode, int mode); int llu_iop_open(struct pnode *pnode, int flags, mode_t mode); int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode); int llu_iop_close(struct inode *inode); -int llu_iop_ipreadv(struct inode *ino, struct ioctx *ioctxp); -int llu_iop_ipwritev(struct inode *ino, struct ioctx *ioctxp); +_SYSIO_OFF_T llu_iop_pos(struct inode *ino, _SYSIO_OFF_T off); int llu_vmtruncate(struct inode * inode, loff_t offset); void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid); int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir); /* rw.c */ -int llu_iop_iodone(struct ioctx *ioctxp __IS_UNUSED); -struct llu_sysio_callback_args* -llu_file_write(struct inode *inode, const struct iovec *iovec, - size_t iovlen, loff_t pos); -struct llu_sysio_callback_args* -llu_file_read(struct inode *inode, const struct iovec *iovec, - size_t iovlen, loff_t pos); +int llu_iop_read(struct inode *ino, struct ioctx *ioctxp); +int llu_iop_write(struct inode *ino, struct ioctx *ioctxp); +int llu_iop_iodone(struct ioctx *ioctxp); int llu_glimpse_size(struct inode *inode); int llu_extent_lock(struct ll_file_data *fd, struct inode *inode, struct lov_stripe_md *lsm, int mode, diff --git a/lustre/liblustre/lutil.c b/lustre/liblustre/lutil.c index 7ad8aa5..bc4f64e 100644 --- a/lustre/liblustre/lutil.c +++ b/lustre/liblustre/lutil.c @@ -25,26 +25,30 @@ #include #include -#ifndef REDSTORM #include +#ifdef HAVE_NETDB_H #include +#endif #include #include +#ifdef HAVE_NETINET_IN_H #include +#endif #include +#ifdef HAVE_ARPA_INET_H #include -#else -#include +#endif +#ifdef HAVE_CATAMOUNT_DATA_H #include #endif #include "lutil.h" -#ifdef CRAY_PORTALS +#if CRAY_PORTALS void portals_debug_dumplog(void){}; #endif -unsigned int portal_subsystem_debug = ~0 - S_NAL; +unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_NAL); unsigned int portal_debug = 0; struct task_struct *current; @@ -68,28 +72,28 @@ void *inter_module_get(char *arg) char *portals_nid2str(int nal, ptl_nid_t nid, char *str) { if (nid == PTL_NID_ANY) { - snprintf(str, PTL_NALFMT_SIZE - 1, "%s", - "PTL_NID_ANY"); + snprintf(str, PTL_NALFMT_SIZE, "%s", "PTL_NID_ANY"); return str; } switch(nal){ -#ifndef CRAY_PORTALS +#if !CRAY_PORTALS case TCPNAL: /* userspace NAL */ + case IIBNAL: case OPENIBNAL: case SOCKNAL: - snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u", + snprintf(str, PTL_NALFMT_SIZE, "%u:%u.%u.%u.%u", (__u32)(nid >> 32), HIPQUAD(nid)); break; case QSWNAL: case GMNAL: - snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u", + snprintf(str, PTL_NALFMT_SIZE, "%u:%u", (__u32)(nid >> 32), (__u32)nid); break; #endif default: - snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx", + snprintf(str, PTL_NALFMT_SIZE, "?%x? %llx", nal, (long long)nid); break; } @@ -98,35 +102,22 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str) char *portals_id2str(int nal, ptl_process_id_t id, char *str) { - switch(nal){ -#ifndef CRAY_PORTALS - case TCPNAL: - /* userspace NAL */ - case OPENIBNAL: - case SOCKNAL: - snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u,%u", - (__u32)(id.nid >> 32), HIPQUAD((id.nid)) , id.pid); - break; - case QSWNAL: - case GMNAL: - snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u,%u", - (__u32)(id.nid >> 32), (__u32)id.nid, id.pid); - break; -#endif - default: - snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx,%lx", - nal, (long long)id.nid, (long)id.pid ); - break; - } + int len; + + portals_nid2str(nal, id.nid, str); + len = strlen(str); + snprintf(str + len, PTL_NALFMT_SIZE - len, ",%u", id.pid); return str; } -#ifndef REDSTORM /* * random number generator stuff */ +#ifdef LIBLUSTRE_USE_URANDOM static int _rand_dev_fd = -1; +#endif +#ifdef HAVE_GETHOSTBYNAME static int get_ipv4_addr() { struct utsname myname; @@ -148,25 +139,33 @@ static int get_ipv4_addr() return ip; } +#endif void liblustre_init_random() { int seed; struct timeval tv; +#ifdef LIBLUSTRE_USE_URANDOM _rand_dev_fd = syscall(SYS_open, "/dev/urandom", O_RDONLY); if (_rand_dev_fd >= 0) { - if (syscall(SYS_read, _rand_dev_fd, &seed, sizeof(int)) == - sizeof(int)) { + if (syscall(SYS_read, _rand_dev_fd, + &seed, sizeof(int)) == sizeof(int)) { srand(seed); return; } syscall(SYS_close, _rand_dev_fd); _rand_dev_fd = -1; } +#endif /* LIBLUSTRE_USE_URANDOM */ +#ifdef HAVE_GETHOSTBYNAME + seed = get_ipv4_addr(); +#else + seed = _my_pnid; +#endif gettimeofday(&tv, NULL); - srand(tv.tv_sec + tv.tv_usec + getpid() + __swab32(get_ipv4_addr())); + srand(tv.tv_sec + tv.tv_usec + getpid() + __swab32(seed)); } void get_random_bytes(void *buf, int size) @@ -174,12 +173,14 @@ void get_random_bytes(void *buf, int size) char *p = buf; LASSERT(size >= 0); +#ifdef LIBLUSTRE_USE_URANDOM if (_rand_dev_fd >= 0) { if (syscall(SYS_read, _rand_dev_fd, buf, size) == size) return; syscall(SYS_close, _rand_dev_fd); _rand_dev_fd = -1; } +#endif while (size--) *p++ = rand(); @@ -187,6 +188,7 @@ void get_random_bytes(void *buf, int size) static void init_capability(int *res) { +#ifdef HAVE_LIBCAP cap_t syscap; cap_flag_value_t capval; int i; @@ -207,10 +209,23 @@ static void init_capability(int *res) } } } +#else + /* + * set fake cap flags to ship to linux server + * from client platforms that have none (eg. catamount) + * full capability for root + * no capability for anybody else + */ +#define FAKE_ROOT_CAP 0x1ffffeff +#define FAKE_USER_CAP 0 + + *res = (current->fsuid == 0) ? FAKE_ROOT_CAP: FAKE_USER_CAP; +#endif } void liblustre_set_nal_nid() { +#ifdef HAVE_GETHOSTBYNAME pid_t pid; uint32_t ip; struct in_addr in; @@ -226,36 +241,7 @@ void liblustre_set_nal_nid() in.s_addr = htonl(ip); printf("LibLustre: TCPNAL NID: %016llx (%s:%u)\n", tcpnal_mynid, inet_ntoa(in), pid); -} - -#else /* REDSTORM */ - -void liblustre_init_random() -{ - struct timeval tv; - UINT32 nodeid; - - gettimeofday(&tv, NULL); - nodeid = _my_pnid; - srand(tv.tv_sec + tv.tv_usec + getpid() + __swab32(nodeid)); -} - -void get_random_bytes(void *buf, int size) -{ - char *p = buf; - LASSERT(size >= 0); - - while (size--) - *p++ = rand(); -} - -static void init_capability(int *res) -{ - *res = 0; -} - -void liblustre_set_nal_nid() -{ +#else pid_t pid; uint32_t ip; @@ -266,10 +252,9 @@ void liblustre_set_nal_nid() tcpnal_mynid = ip | pid; printf("LibLustre: NAL NID: %08x (%u)\n", tcpnal_mynid, pid); +#endif } -#endif /* REDSOTRM */ - int in_group_p(gid_t gid) { int i; diff --git a/lustre/liblustre/namei.c b/lustre/liblustre/namei.c index 74eddb2..9e69a9e 100644 --- a/lustre/liblustre/namei.c +++ b/lustre/liblustre/namei.c @@ -3,7 +3,7 @@ * * Lustre Light name resolution * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * Copyright (c) 2002-2004 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -32,11 +32,16 @@ #include #include +#ifdef HAVE_XTIO_H +#include +#endif #include #include #include #include +#ifdef HAVE_FILE_H #include +#endif #undef LIST_HEAD @@ -70,6 +75,7 @@ static void ll_intent_release(struct lookup_intent *it) EXIT; } +#if 0 /* * remove the stale inode from pnode */ @@ -94,6 +100,7 @@ void unhook_stale_inode(struct pnode *pno) EXIT; return; } +#endif void llu_lookup_finish_locks(struct lookup_intent *it, struct pnode *pnode) { @@ -102,14 +109,14 @@ void llu_lookup_finish_locks(struct lookup_intent *it, struct pnode *pnode) if (it && pnode->p_base->pb_ino != NULL) { struct inode *inode = pnode->p_base->pb_ino; - CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%lu)\n", - inode, llu_i2info(inode)->lli_st_ino, + CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%llu/%lu)\n", + inode, llu_i2stat(inode)->st_ino, llu_i2info(inode)->lli_st_generation); mdc_set_lock_data(&it->d.lustre.it_lock_handle, inode); } - /* drop IT_LOOKUP locks */ - if (it->it_op == IT_LOOKUP) + /* drop lookup/getattr locks */ + if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR) ll_intent_release(it); } @@ -140,23 +147,25 @@ int llu_mdc_blocking_ast(struct ldlm_lock *lock, case LDLM_CB_CANCELING: { struct inode *inode = llu_inode_from_lock(lock); struct llu_inode_info *lli; + struct intnl_stat *st; /* Invalidate all dentries associated with this inode */ if (inode == NULL) break; lli = llu_i2info(inode); + st = llu_i2stat(inode); clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags); - if (lock->l_resource->lr_name.name[0] != lli->lli_st_ino || + if (lock->l_resource->lr_name.name[0] != st->st_ino || lock->l_resource->lr_name.name[1] != lli->lli_st_generation) { - LDLM_ERROR(lock, "data mismatch with ino %lu/%lu", - lli->lli_st_ino, lli->lli_st_generation); + LDLM_ERROR(lock, "data mismatch with ino %llu/%lu", + st->st_ino, lli->lli_st_generation); } - if (S_ISDIR(lli->lli_st_mode)) { - CDEBUG(D_INODE, "invalidating inode %lu\n", - lli->lli_st_ino); + if (S_ISDIR(st->st_mode)) { + CDEBUG(D_INODE, "invalidating inode %llu\n", + st->st_ino); llu_invalidate_inode_pages(inode); } @@ -215,8 +224,8 @@ int llu_pb_revalidate(struct pnode *pnode, int flags, struct lookup_intent *it) int rc; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%x\n", - pb->pb_name.name, it ? it->it_op : 0); + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,intent=%x\n", + (int)pb->pb_name.len, pb->pb_name.name, it ? it->it_op : 0); /* We don't want to cache negative dentries, so return 0 immediately. * We believe that this is safe, that negative dentries cannot be @@ -226,26 +235,16 @@ int llu_pb_revalidate(struct pnode *pnode, int flags, struct lookup_intent *it) RETURN(0); } - /* check stale inode */ - if (llu_i2info(pb->pb_ino)->lli_stale_flag) - unhook_stale_inode(pnode); - - /* check again because unhook_stale_inode() might generate - * negative pnode */ - if (pb->pb_ino == NULL) { - CDEBUG(D_INODE, "negative pb\n"); - RETURN(0); - } - /* This is due to bad interaction with libsysio. remove this when we * switched to libbsdio XXX */ { struct llu_inode_info *lli = llu_i2info(pb->pb_ino); + struct intnl_stat *st = llu_i2stat(pb->pb_ino); if (lli->lli_it) { - CDEBUG(D_INODE, "inode %lu still have intent " + CDEBUG(D_INODE, "inode %llu still have intent " "%p(opc 0x%x), release it\n", - lli->lli_st_ino, lli->lli_it, + st->st_ino, lli->lli_it, lli->lli_it->it_op); ll_intent_release(lli->lli_it); OBD_FREE(lli->lli_it, sizeof(*lli->lli_it)); @@ -279,14 +278,19 @@ int llu_pb_revalidate(struct pnode *pnode, int flags, struct lookup_intent *it) GOTO(out, rc = 0); rc = pnode_revalidate_finish(req, 1, it, pnode); + if (rc != 0) { + ll_intent_release(it); + GOTO(out, rc = 0); + } + rc = 1; /* Note: ll_intent_lock may cause a callback, check this! */ - if (it->it_op & (IT_OPEN | IT_GETATTR)) + if (it->it_op & IT_OPEN) LL_SAVE_INTENT(pb->pb_ino, it); - RETURN(1); + out: - if (req) + if (req && rc == 1) ptlrpc_req_finished(req); if (rc == 0) { LASSERT(pb->pb_ino); @@ -294,9 +298,6 @@ int llu_pb_revalidate(struct pnode *pnode, int flags, struct lookup_intent *it) pb->pb_ino = NULL; } else { llu_lookup_finish_locks(it, pnode); - llu_i2info(pb->pb_ino)->lli_stale_flag = 0; - if (it->it_op & (IT_OPEN | IT_GETATTR)) - LL_SAVE_INTENT(pb->pb_ino, it); } RETURN(rc); } @@ -311,13 +312,37 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, struct inode *inode = NULL; int rc; + /* libsysio require us generate inode right away if success. + * so if mds created new inode for us we need make sure it + * succeeded. thus for any error we can't delay to the + * llu_file_open() time. */ + if (it_disposition(it, DISP_OPEN_CREATE) && + it_open_error(DISP_OPEN_CREATE, it)) { + CDEBUG(D_INODE, "detect mds create error\n"); + return it_open_error(DISP_OPEN_CREATE, it); + } + if (it_disposition(it, DISP_OPEN_OPEN) && + it_open_error(DISP_OPEN_OPEN, it)) { + CDEBUG(D_INODE, "detect mds open error\n"); + /* undo which did by mdc_intent_lock */ + if (it_disposition(it, DISP_OPEN_CREATE) && + !it_open_error(DISP_OPEN_CREATE, it)) { + LASSERT(request); + LASSERT(atomic_read(&request->rq_refcount) > 1); + CDEBUG(D_INODE, "dec a ref of req %p\n", request); + ptlrpc_req_finished(request); + } + return it_open_error(DISP_OPEN_OPEN, it); + } + /* NB 1 request reference will be taken away by ll_intent_lock() * when I return - * Note: libsysio require the inode must be generated here */ - if ((it->it_op & IT_CREAT) || !it_disposition(it, DISP_LOOKUP_NEG)) { + if (!it_disposition(it, DISP_LOOKUP_NEG) || + (it->it_op & IT_CREAT)) { struct lustre_md md; struct llu_inode_info *lli; + struct intnl_stat *st; ENTRY; rc = mdc_req2lustre_md(request, offset, sbi->ll_osc_exp, &md); @@ -325,23 +350,23 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, RETURN(rc); inode = llu_iget(parent->i_fs, &md); - if (!inode) { + if (!inode || IS_ERR(inode)) { /* free the lsm if we allocated one above */ if (md.lsm != NULL) obd_free_memmd(sbi->ll_osc_exp, &md.lsm); - RETURN(-ENOMEM); + RETURN(inode ? PTR_ERR(inode) : -ENOMEM); } else if (md.lsm != NULL && llu_i2info(inode)->lli_smd != md.lsm) { obd_free_memmd(sbi->ll_osc_exp, &md.lsm); } lli = llu_i2info(inode); + st = llu_i2stat(inode); /* If this is a stat, get the authoritative file size */ - if (it->it_op == IT_GETATTR && S_ISREG(lli->lli_st_mode) && + if (it->it_op == IT_GETATTR && S_ISREG(st->st_mode) && lli->lli_smd != NULL) { struct lov_stripe_md *lsm = lli->lli_smd; - struct ost_lvb lvb; ldlm_error_t rc; LASSERT(lsm->lsm_object_id != 0); @@ -360,7 +385,7 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, } /* intent will be further used in cases of open()/getattr() */ - if (inode && (it->it_op & (IT_OPEN | IT_GETATTR))) + if (inode && (it->it_op & IT_OPEN)) LL_SAVE_INTENT(inode, it); child->p_base->pb_ino = inode; @@ -517,6 +542,8 @@ int llu_iop_lookup(struct pnode *pnode, int rc; ENTRY; + liblustre_wait_event(0); + *inop = NULL; /* the mount root inode have no name, so don't call @@ -550,4 +577,3 @@ int llu_iop_lookup(struct pnode *pnode, RETURN(rc); } - diff --git a/lustre/liblustre/rw.c b/lustre/liblustre/rw.c index ea99362..41c43d6 100644 --- a/lustre/liblustre/rw.c +++ b/lustre/liblustre/rw.c @@ -3,7 +3,7 @@ * * Lustre Light block IO * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * Copyright (c) 2002-2004 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -28,22 +28,63 @@ #include #include #include +#include #include #include +#include +#ifdef HAVE_XTIO_H +#include +#endif #include #include #include #include +#ifdef HAVE_FILE_H #include +#endif #undef LIST_HEAD #include "llite_lib.h" +struct llu_io_group +{ + struct obd_io_group *lig_oig; + struct inode *lig_inode; + int lig_maxpages; + int lig_npages; + __u64 lig_rwcount; + struct ll_async_page *lig_llaps; + struct page *lig_pages; + void *lig_llap_cookies; +}; + +#define LLU_IO_GROUP_SIZE(x) \ + (sizeof(struct llu_io_group) + \ + (sizeof(struct ll_async_page) + \ + sizeof(struct page) + \ + llap_cookie_size) * (x)) + +struct llu_io_session +{ + struct inode *lis_inode; + int lis_cmd; + int lis_max_groups; + int lis_ngroups; + struct llu_io_group *lis_groups[0]; +}; +#define LLU_IO_SESSION_SIZE(x) \ + (sizeof(struct llu_io_session) + (x) * 2 * sizeof(void *)) + + +typedef ssize_t llu_file_piov_t(const struct iovec *iovec, int iovlen, + _SYSIO_OFF_T pos, ssize_t len, + void *private); + size_t llap_cookie_size; -static int llu_lock_to_stripe_offset(struct inode *inode,struct ldlm_lock *lock) +static int llu_lock_to_stripe_offset(struct inode *inode, struct ldlm_lock *lock) { struct llu_inode_info *lli = llu_i2info(inode); struct lov_stripe_md *lsm = lli->lli_smd; @@ -162,7 +203,7 @@ static int llu_glimpse_callback(struct ldlm_lock *lock, void *reqp) lvb->lvb_size = lli->lli_smd->lsm_oinfo[stripe].loi_kms; LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64, - lli->lli_st_size, stripe, lvb->lvb_size); + llu_i2stat(inode)->st_size, stripe, lvb->lvb_size); iput: I_RELE(inode); out: @@ -175,37 +216,35 @@ static int llu_glimpse_callback(struct ldlm_lock *lock, void *reqp) return rc; } -__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms); -__u64 lov_merge_blocks(struct lov_stripe_md *lsm); -__u64 lov_merge_mtime(struct lov_stripe_md *lsm, __u64 current_time); - /* NB: lov_merge_size will prefer locally cached writes if they extend the * file (because it prefers KMS over RSS when larger) */ int llu_glimpse_size(struct inode *inode) { struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); struct llu_sb_info *sbi = llu_i2sbi(inode); ldlm_policy_data_t policy = { .l_extent = { 0, OBD_OBJECT_EOF } }; struct lustre_handle lockh = { 0 }; int rc, flags = LDLM_FL_HAS_INTENT; ENTRY; - CDEBUG(D_DLMTRACE, "Glimpsing inode %lu\n", lli->lli_st_ino); + CDEBUG(D_DLMTRACE, "Glimpsing inode %llu\n", st->st_ino); rc = obd_enqueue(sbi->ll_osc_exp, lli->lli_smd, LDLM_EXTENT, &policy, LCK_PR, &flags, llu_extent_lock_callback, ldlm_completion_ast, llu_glimpse_callback, inode, sizeof(struct ost_lvb), lustre_swab_ost_lvb, &lockh); - if (rc > 0) - RETURN(-EIO); - - lli->lli_st_size = lov_merge_size(lli->lli_smd, 0); - lli->lli_st_blocks = lov_merge_blocks(lli->lli_smd); - lli->lli_st_mtime = lov_merge_mtime(lli->lli_smd, lli->lli_st_mtime); + if (rc) { + CERROR("obd_enqueue returned rc %d, returning -EIO\n", rc); + RETURN(rc > 0 ? -EIO : rc); + } - CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %lu\n", - lli->lli_st_size, lli->lli_st_blocks); + st->st_size = lov_merge_size(lli->lli_smd, 0); + st->st_blocks = lov_merge_blocks(lli->lli_smd); + //lli->lli_st_mtime = lov_merge_mtime(lli->lli_smd, inode->i_mtime); + CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %llu\n", + st->st_size, st->st_blocks); obd_cancel(sbi->ll_osc_exp, lli->lli_smd, LCK_PR, &lockh); @@ -218,7 +257,7 @@ int llu_extent_lock(struct ll_file_data *fd, struct inode *inode, int ast_flags) { struct llu_sb_info *sbi = llu_i2sbi(inode); - struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); int rc; ENTRY; @@ -229,8 +268,8 @@ int llu_extent_lock(struct ll_file_data *fd, struct inode *inode, (sbi->ll_flags & LL_SBI_NOLCK)) RETURN(0); - CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n", - lli->lli_st_ino, policy->l_extent.start, policy->l_extent.end); + CDEBUG(D_DLMTRACE, "Locking inode %llu, start "LPU64" end "LPU64"\n", + st->st_ino, policy->l_extent.start, policy->l_extent.end); rc = obd_enqueue(sbi->ll_osc_exp, lsm, LDLM_EXTENT, policy, mode, &ast_flags, llu_extent_lock_callback, @@ -241,91 +280,13 @@ int llu_extent_lock(struct ll_file_data *fd, struct inode *inode, if (policy->l_extent.start == 0 && policy->l_extent.end == OBD_OBJECT_EOF) - lli->lli_st_size = lov_merge_size(lsm, 1); + st->st_size = lov_merge_size(lsm, 1); - RETURN(rc); -} - -#if 0 -int llu_extent_lock_no_validate(struct ll_file_data *fd, - struct inode *inode, - struct lov_stripe_md *lsm, - int mode, - struct ldlm_extent *extent, - struct lustre_handle *lockh, - int ast_flags) -{ - struct llu_sb_info *sbi = llu_i2sbi(inode); - struct llu_inode_info *lli = llu_i2info(inode); - int rc; - ENTRY; - - LASSERT(lockh->cookie == 0); - - /* XXX phil: can we do this? won't it screw the file size up? */ - if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) || - (sbi->ll_flags & LL_SBI_NOLCK)) - RETURN(0); - - CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n", - lli->lli_st_ino, extent->start, extent->end); - - rc = obd_enqueue(sbi->ll_osc_exp, lsm, NULL, LDLM_EXTENT, extent, - sizeof(extent), mode, &ast_flags, - llu_extent_lock_callback, inode, lockh); + //inode->i_mtime = lov_merge_mtime(lsm, inode->i_mtime); RETURN(rc); } -/* - * this grabs a lock and manually implements behaviour that makes it look like - * the OST is returning the file size with each lock acquisition. - */ -int llu_extent_lock(struct ll_file_data *fd, struct inode *inode, - struct lov_stripe_md *lsm, int mode, - struct ldlm_extent *extent, struct lustre_handle *lockh) -{ - struct llu_inode_info *lli = llu_i2info(inode); - struct obd_export *exp = llu_i2obdexp(inode); - struct ldlm_extent size_lock; - struct lustre_handle match_lockh = {0}; - int flags, rc, matched; - ENTRY; - - rc = llu_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh, 0); - if (rc != ELDLM_OK) - RETURN(rc); - - if (test_bit(LLI_F_HAVE_OST_SIZE_LOCK, &lli->lli_flags)) - RETURN(0); - - rc = llu_inode_getattr(inode, lsm); - if (rc) { - llu_extent_unlock(fd, inode, lsm, mode, lockh); - RETURN(rc); - } - - size_lock.start = lli->lli_st_size; - size_lock.end = OBD_OBJECT_EOF; - - /* XXX I bet we should be checking the lock ignore flags.. */ - /* FIXME use LDLM_FL_TEST_LOCK instead */ - flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED; - matched = obd_match(exp, lsm, LDLM_EXTENT, &size_lock, - sizeof(size_lock), LCK_PR, &flags, inode, - &match_lockh); - - /* hey, alright, we hold a size lock that covers the size we - * just found, its not going to change for a while.. */ - if (matched == 1) { - set_bit(LLI_F_HAVE_OST_SIZE_LOCK, &lli->lli_flags); - obd_cancel(exp, lsm, LCK_PR, &match_lockh); - } - - RETURN(0); -} -#endif - int llu_extent_unlock(struct ll_file_data *fd, struct inode *inode, struct lov_stripe_md *lsm, int mode, struct lustre_handle *lockh) @@ -354,14 +315,6 @@ struct ll_async_page { struct inode *llap_inode; }; -static struct ll_async_page *llap_from_cookie(void *cookie) -{ - struct ll_async_page *llap = cookie; - if (llap->llap_magic != LLAP_MAGIC) - return ERR_PTR(-EINVAL); - return llap; -}; - static void llu_ap_fill_obdo(void *data, int cmd, struct obdo *oa) { struct ll_async_page *llap; @@ -370,12 +323,7 @@ static void llu_ap_fill_obdo(void *data, int cmd, struct obdo *oa) obd_flag valid_flags; ENTRY; - llap = llap_from_cookie(data); - if (IS_ERR(llap)) { - EXIT; - return; - } - + llap = LLAP_FROM_COOKIE(data); inode = llap->llap_inode; lsm = llu_i2info(inode)->lli_smd; @@ -394,13 +342,9 @@ static void llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc) { struct ll_async_page *llap; struct page *page; + ENTRY; - llap = llap_from_cookie(data); - if (IS_ERR(llap)) { - EXIT; - return; - } - + llap = LLAP_FROM_COOKIE(data); llap->llap_queued = 0; page = llap->llap_page; @@ -412,164 +356,43 @@ static void llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc) EXIT; } -static struct obd_async_page_ops llu_async_page_ops = { - .ap_make_ready = NULL, - .ap_refresh_count = NULL, - .ap_fill_obdo = llu_ap_fill_obdo, - .ap_completion = llu_ap_completion, -}; - -static -struct llu_sysio_cookie* get_sysio_cookie(struct inode *inode, - struct obd_export *exp, int maxpages) +static void llu_ap_get_ucred(void *data, struct obd_ucred *ouc) { - struct llu_sysio_cookie *cookie; - int rc; - - if (!llap_cookie_size) - llap_cookie_size = obd_prep_async_page(llu_i2obdexp(inode), - NULL, NULL, NULL, 0, - NULL, NULL, NULL); - OBD_ALLOC(cookie, LLU_SYSIO_COOKIE_SIZE(exp, maxpages)); - if (cookie == NULL) - goto out; - - I_REF(inode); - cookie->lsc_inode = inode; - cookie->lsc_maxpages = maxpages; - cookie->lsc_llap = (struct ll_async_page *)(cookie + 1); - cookie->lsc_pages = (struct page *) (cookie->lsc_llap + maxpages); - cookie->lsc_llap_cookie = (void *)(cookie->lsc_pages + maxpages); - - rc = oig_init(&cookie->lsc_oig); - if (rc) { - OBD_FREE(cookie, LLU_SYSIO_COOKIE_SIZE(exp, maxpages)); - cookie = NULL; - } - -out: - return cookie; -} - -static -void put_sysio_cookie(struct llu_sysio_cookie *cookie) -{ - struct lov_stripe_md *lsm = llu_i2info(cookie->lsc_inode)->lli_smd; - struct obd_export *exp = llu_i2obdexp(cookie->lsc_inode); - struct ll_async_page *llap = cookie->lsc_llap; -#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE - struct page *pages = cookie->lsc_pages; -#endif - int i; - - for (i = 0; i< cookie->lsc_maxpages; i++) { - if (llap[i].llap_cookie) - obd_teardown_async_page(exp, lsm, NULL, - llap[i].llap_cookie); -#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE - if (pages[i]._managed) { - free(pages[i].addr); - pages[i]._managed = 0; - } -#endif - } - - I_RELE(cookie->lsc_inode); - - oig_release(cookie->lsc_oig); - OBD_FREE(cookie, LLU_SYSIO_COOKIE_SIZE(exp, cookie->lsc_maxpages)); -} - -#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE -/* Note: these code should be removed finally, don't need - * more cleanup - */ -static -int prepare_unaligned_write(struct llu_sysio_cookie *cookie) -{ - struct inode *inode = cookie->lsc_inode; - struct llu_inode_info *lli = llu_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - struct obdo oa; - struct page *pages = cookie->lsc_pages; - int i, pgidx[2] = {0, cookie->lsc_npages-1}; - int rc; + struct ll_async_page *llap; + struct ll_uctxt ctxt; ENTRY; - for (i = 0; i < 2; i++) { - struct page *oldpage = &pages[pgidx[i]]; - struct page newpage; - struct brw_page pg; - char *newbuf; - - if (i == 0 && pgidx[0] == pgidx[1]) - continue; - - LASSERT(oldpage->_offset + oldpage->_count <= PAGE_CACHE_SIZE); + llap = LLAP_FROM_COOKIE(data); - if (oldpage->_count == PAGE_CACHE_SIZE) - continue; - - if (oldpage->index << PAGE_CACHE_SHIFT >= - lli->lli_st_size) - continue; - - newbuf = malloc(PAGE_CACHE_SIZE); - if (!newbuf) - return -ENOMEM; + ouc->ouc_fsuid = current->fsuid; + ouc->ouc_fsgid = current->fsgid; + ouc->ouc_cap = current->cap_effective; + ll_i2uctxt(&ctxt, llap->llap_inode, NULL); + ouc->ouc_suppgid1 = ctxt.gid1; - newpage.index = oldpage->index; - newpage.addr = newbuf; - - pg.pg = &newpage; - pg.off = ((obd_off)newpage.index << PAGE_CACHE_SHIFT); - if (pg.off + PAGE_CACHE_SIZE > lli->lli_st_size) - pg.count = lli->lli_st_size % PAGE_CACHE_SIZE; - else - pg.count = PAGE_CACHE_SIZE; - pg.flag = 0; - - oa.o_id = lsm->lsm_object_id; - oa.o_mode = lli->lli_st_mode; - oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE; - - /* issue read */ - rc = obd_brw(OBD_BRW_READ, llu_i2obdexp(inode), &oa, lsm, 1, &pg, NULL); - if (rc) { - free(newbuf); - RETURN(rc); - } - - /* copy page content, and reset page params */ - memcpy(newbuf + oldpage->_offset, - (char*)oldpage->addr + oldpage->_offset, - oldpage->_count); - - oldpage->addr = newbuf; - if ((((obd_off)oldpage->index << PAGE_CACHE_SHIFT) + - oldpage->_offset + oldpage->_count) > lli->lli_st_size) - oldpage->_count += oldpage->_offset; - else - oldpage->_count = PAGE_CACHE_SIZE; - oldpage->_offset = 0; - oldpage->_managed = 1; - } - - RETURN(0); + EXIT; } -#endif -static -int llu_prep_async_io(struct llu_sysio_cookie *cookie, int cmd, - char *buf, loff_t pos, size_t count) +static struct obd_async_page_ops llu_async_page_ops = { + .ap_make_ready = NULL, + .ap_refresh_count = NULL, + .ap_fill_obdo = llu_ap_fill_obdo, + .ap_completion = llu_ap_completion, + .ap_get_ucred = llu_ap_get_ucred, +}; + +static int llu_queue_pio(int cmd, struct llu_io_group *group, + char *buf, size_t count, loff_t pos) { - struct llu_inode_info *lli = llu_i2info(cookie->lsc_inode); + struct llu_inode_info *lli = llu_i2info(group->lig_inode); + struct intnl_stat *st = llu_i2stat(group->lig_inode); struct lov_stripe_md *lsm = lli->lli_smd; - struct obd_export *exp = llu_i2obdexp(cookie->lsc_inode); - struct page *pages = cookie->lsc_pages; - struct ll_async_page *llap = cookie->lsc_llap; - void *llap_cookie = cookie->lsc_llap_cookie; - int i, rc, npages = 0; + struct obd_export *exp = llu_i2obdexp(group->lig_inode); + struct page *pages = &group->lig_pages[group->lig_npages],*page = pages; + struct ll_async_page *llap = &group->lig_llaps[group->lig_npages]; + void *llap_cookie = group->lig_llap_cookies + + llap_cookie_size * group->lig_npages; + int i, rc, npages = 0, ret_bytes = 0; ENTRY; if (!exp) @@ -587,350 +410,393 @@ int llu_prep_async_io(struct llu_sysio_cookie *cookie, int cmd, /* prevent read beyond file range */ if ((cmd == OBD_BRW_READ) && - (pos + bytes) >= lli->lli_st_size) { - if (pos >= lli->lli_st_size) + (pos + bytes) >= st->st_size) { + if (pos >= st->st_size) break; - bytes = lli->lli_st_size - pos; + bytes = st->st_size - pos; } /* prepare page for this index */ - pages[npages].index = index; - pages[npages].addr = buf - offset; + page->index = index; + page->addr = buf - offset; - pages[npages]._offset = offset; - pages[npages]._count = bytes; + page->_offset = offset; + page->_count = bytes; + page++; npages++; count -= bytes; pos += bytes; buf += bytes; - cookie->lsc_rwcount += bytes; + group->lig_rwcount += bytes; + ret_bytes += bytes; } while (count); - cookie->lsc_npages = npages; - -#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE - if (cmd == OBD_BRW_WRITE) { - rc = prepare_unaligned_write(cookie); - if (rc) - RETURN(rc); - } -#endif + group->lig_npages += npages; - for (i = 0; i < npages; i++) { - llap[i].llap_magic = LLAP_MAGIC; - llap[i].llap_cookie = llap_cookie + i * llap_cookie_size; - rc = obd_prep_async_page(exp, lsm, NULL, &pages[i], - (obd_off)pages[i].index << PAGE_SHIFT, + for (i = 0, page = pages; i < npages; + i++, page++, llap++, llap_cookie += llap_cookie_size){ + llap->llap_magic = LLAP_MAGIC; + llap->llap_cookie = llap_cookie; + rc = obd_prep_async_page(exp, lsm, NULL, page, + (obd_off)page->index << PAGE_SHIFT, &llu_async_page_ops, - &llap[i], &llap[i].llap_cookie); + llap, &llap->llap_cookie); if (rc) { - llap[i].llap_cookie = NULL; + LASSERT(rc < 0); + llap->llap_cookie = NULL; RETURN(rc); } - CDEBUG(D_CACHE, "llap %p page %p cookie %p obj off "LPU64"\n", - &llap[i], &pages[i], llap[i].llap_cookie, - (obd_off)pages[i].index << PAGE_SHIFT); - pages[i].private = (unsigned long)&llap[i]; - llap[i].llap_page = &pages[i]; - llap[i].llap_inode = cookie->lsc_inode; - - rc = obd_queue_group_io(exp, lsm, NULL, cookie->lsc_oig, - llap[i].llap_cookie, cmd, - pages[i]._offset, pages[i]._count, 0, + CDEBUG(D_CACHE, "llap %p page %p group %p obj off "LPU64"\n", + llap, page, llap->llap_cookie, + (obd_off)pages->index << PAGE_SHIFT); + page->private = (unsigned long)llap; + llap->llap_page = page; + llap->llap_inode = group->lig_inode; + + rc = obd_queue_group_io(exp, lsm, NULL, group->lig_oig, + llap->llap_cookie, cmd, + page->_offset, page->_count, 0, ASYNC_READY | ASYNC_URGENT | ASYNC_COUNT_STABLE | ASYNC_GROUP_SYNC); - if (rc) + if (rc) { + LASSERT(rc < 0); RETURN(rc); + } - llap[i].llap_queued = 1; + llap->llap_queued = 1; } - RETURN(0); + RETURN(ret_bytes); } static -int llu_start_async_io(struct llu_sysio_cookie *cookie) +struct llu_io_group * get_io_group(struct inode *inode, int maxpages) { - struct lov_stripe_md *lsm = llu_i2info(cookie->lsc_inode)->lli_smd; - struct obd_export *exp = llu_i2obdexp(cookie->lsc_inode); + struct llu_io_group *group; + int rc; - return obd_trigger_group_io(exp, lsm, NULL, cookie->lsc_oig); -} + if (!llap_cookie_size) + llap_cookie_size = obd_prep_async_page(llu_i2obdexp(inode), + NULL, NULL, NULL, 0, + NULL, NULL, NULL); -/* - * read/write a continuous buffer for an inode (zero-copy) - */ -struct llu_sysio_cookie* -llu_rw(int cmd, struct inode *inode, char *buf, size_t count, loff_t pos) -{ - struct llu_sysio_cookie *cookie; - int max_pages, rc; - ENTRY; + OBD_ALLOC(group, LLU_IO_GROUP_SIZE(maxpages)); + if (!group) + return ERR_PTR(-ENOMEM); - max_pages = (count >> PAGE_SHIFT) + 2; + I_REF(inode); + group->lig_inode = inode; + group->lig_maxpages = maxpages; + group->lig_llaps = (struct ll_async_page *)(group + 1); + group->lig_pages = (struct page *)(&group->lig_llaps[maxpages]); + group->lig_llap_cookies = (void *)(&group->lig_pages[maxpages]); - cookie = get_sysio_cookie(inode, llu_i2obdexp(inode), max_pages); - if (!cookie) - RETURN(ERR_PTR(-ENOMEM)); + rc = oig_init(&group->lig_oig); + if (rc) { + OBD_FREE(group, LLU_IO_GROUP_SIZE(maxpages)); + return ERR_PTR(rc); + } - rc = llu_prep_async_io(cookie, cmd, buf, pos, count); - if (rc) - GOTO(out_cleanup, rc); + return group; +} - rc = llu_start_async_io(cookie); - if (rc) - GOTO(out_cleanup, rc); +static int max_io_pages(ssize_t len, int iovlen) +{ + return (((len + PAGE_SIZE -1) / PAGE_SIZE) + 2 + iovlen - 1); +} -/* - rc = oig_wait(&oig); - if (rc) { - CERROR("file i/o error!\n"); - rw_count = rc; +static +void put_io_group(struct llu_io_group *group) +{ + struct lov_stripe_md *lsm = llu_i2info(group->lig_inode)->lli_smd; + struct obd_export *exp = llu_i2obdexp(group->lig_inode); + struct ll_async_page *llap = group->lig_llaps; + int i; + + for (i = 0; i < group->lig_npages; i++, llap++) { + if (llap->llap_cookie) + obd_teardown_async_page(exp, lsm, NULL, + llap->llap_cookie); } -*/ - RETURN(cookie); -out_cleanup: - put_sysio_cookie(cookie); - RETURN(ERR_PTR(rc)); + I_RELE(group->lig_inode); + + oig_release(group->lig_oig); + OBD_FREE(group, LLU_IO_GROUP_SIZE(group->lig_maxpages)); } -struct llu_sysio_callback_args* -llu_file_write(struct inode *inode, const struct iovec *iovec, - size_t iovlen, loff_t pos) +static +ssize_t llu_file_prwv(const struct iovec *iovec, int iovlen, + _SYSIO_OFF_T pos, ssize_t len, + void *private) { + struct llu_io_session *session = (struct llu_io_session *) private; + struct inode *inode = session->lis_inode; struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); struct ll_file_data *fd = lli->lli_file_data; struct lustre_handle lockh = {0}; struct lov_stripe_md *lsm = lli->lli_smd; struct obd_export *exp = NULL; ldlm_policy_data_t policy; - struct llu_sysio_callback_args *lsca; - struct llu_sysio_cookie *cookie; - ldlm_error_t err; - int iovidx; + struct llu_io_group *iogroup; + int astflag = (lli->lli_open_flags & O_NONBLOCK) ? + LDLM_FL_BLOCK_NOWAIT : 0; + __u64 kms; + int err, is_read, lock_mode, iovidx, ret; ENTRY; - /* XXX consider other types later */ - if (!S_ISREG(lli->lli_st_mode)) - LBUG(); - - LASSERT(iovlen <= MAX_IOVEC); + /* in a large iov read/write we'll be repeatedly called. + * so give a chance to answer cancel ast here + */ + liblustre_wait_event(0); exp = llu_i2obdexp(inode); if (exp == NULL) - RETURN(ERR_PTR(-EINVAL)); + RETURN(-EINVAL); + + if (len == 0 || iovlen == 0) + RETURN(0); - OBD_ALLOC(lsca, sizeof(*lsca)); - if (!lsca) - RETURN(ERR_PTR(-ENOMEM)); + if (pos + len > lli->lli_maxbytes) + RETURN(-ERANGE); + + iogroup = get_io_group(inode, max_io_pages(len, iovlen)); + if (IS_ERR(iogroup)) + RETURN(PTR_ERR(iogroup)); + + is_read = session->lis_cmd == OBD_BRW_READ; + lock_mode = is_read ? LCK_PR : LCK_PW; + + if (!is_read && (lli->lli_open_flags & O_APPEND)) { + policy.l_extent.start = 0; + policy.l_extent.end = OBD_OBJECT_EOF; + } else { + policy.l_extent.start = pos; + policy.l_extent.end = pos + len - 1; + } + + err = llu_extent_lock(fd, inode, lsm, lock_mode, &policy, + &lockh, astflag); + if (err != ELDLM_OK) + GOTO(err_put, err); + + if (is_read) { + kms = lov_merge_size(lsm, 1); + if (policy.l_extent.end > kms) { + /* A glimpse is necessary to determine whether we + * return a short read or some zeroes at the end of + * the buffer */ + if ((err = llu_glimpse_size(inode))) { + llu_extent_unlock(fd, inode, lsm, + lock_mode, &lockh); + GOTO(err_put, err); + } + } else { + st->st_size = kms; + } + } else { + if (lli->lli_open_flags & O_APPEND) + pos = st->st_size; + } - /* FIXME optimize the following extent locking */ for (iovidx = 0; iovidx < iovlen; iovidx++) { - char *buf = (char*)iovec[iovidx].iov_base; + char *buf = (char *) iovec[iovidx].iov_base; size_t count = iovec[iovidx].iov_len; - if (count == 0) + if (!count) continue; + if (len < count) + count = len; + if (IS_BAD_PTR(buf) || IS_BAD_PTR(buf + count)) { + llu_extent_unlock(fd, inode, lsm, lock_mode, &lockh); + GOTO(err_put, err = -EFAULT); + } - if (pos + count > lli->lli_maxbytes) - GOTO(err_out, err = -ERANGE); - - /* FIXME libsysio haven't handle O_APPEND?? */ - policy.l_extent.start = pos; - policy.l_extent.end = pos + count - 1; - - err = llu_extent_lock(fd, inode, lsm, LCK_PW, &policy, - &lockh, 0); - if (err != ELDLM_OK) - GOTO(err_out, err = -ENOLCK); - - CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n", - lli->lli_st_ino, count, pos); - - cookie = llu_rw(OBD_BRW_WRITE, inode, buf, count, pos); - if (!IS_ERR(cookie)) { - /* save cookie */ - lsca->cookies[lsca->ncookies++] = cookie; - pos += count; - obd_adjust_kms(exp, lsm, pos, 0); - /* file size grow */ - if (pos > lli->lli_st_size) - lli->lli_st_size = pos; + if (is_read) { + if (pos >= st->st_size) + break; } else { - llu_extent_unlock(fd, inode, lsm, LCK_PW, &lockh); - GOTO(err_out, err = PTR_ERR(cookie)); + if (pos >= lli->lli_maxbytes) { + llu_extent_unlock(fd, inode, lsm, lock_mode, + &lockh); + GOTO(err_put, err = -EFBIG); + } + if (pos + count >= lli->lli_maxbytes) + count = lli->lli_maxbytes - pos; } - /* XXX errors? */ - err = llu_extent_unlock(fd, inode, lsm, LCK_PW, &lockh); - if (err) - CERROR("extent unlock error %d\n", err); + ret = llu_queue_pio(session->lis_cmd, iogroup, buf, count, pos); + if (ret < 0) { + llu_extent_unlock(fd, inode, lsm, lock_mode, &lockh); + GOTO(err_put, err = ret); + } else { + pos += ret; + if (!is_read) { + LASSERT(ret == count); + obd_adjust_kms(exp, lsm, pos, 0); + /* file size grow immediately */ + if (pos > st->st_size) + st->st_size = pos; + } + len -= ret; + if (!len) + break; + } } + LASSERT(len == 0 || is_read); /* libsysio should guarantee this */ - RETURN(lsca); + err = llu_extent_unlock(fd, inode, lsm, lock_mode, &lockh); + if (err) + CERROR("extent unlock error %d\n", err); -err_out: - /* teardown all async stuff */ - while (lsca->ncookies--) { - put_sysio_cookie(lsca->cookies[lsca->ncookies]); - } - OBD_FREE(lsca, sizeof(*lsca)); + err = obd_trigger_group_io(exp, lsm, NULL, iogroup->lig_oig); + if (err) + GOTO(err_put, err); - RETURN(ERR_PTR(err)); + session->lis_groups[session->lis_ngroups++] = iogroup; + RETURN(0); +err_put: + put_io_group(iogroup); + RETURN((ssize_t)err); } -#if 0 -static void llu_update_atime(struct inode *inode) +static +struct llu_io_session *get_io_session(struct inode *ino, int ngroups, int cmd) { - struct llu_inode_info *lli = llu_i2info(inode); - -#ifdef USE_ATIME - struct iattr attr; - - attr.ia_atime = LTIME_S(CURRENT_TIME); - attr.ia_valid = ATTR_ATIME; + struct llu_io_session *session; - if (lli->lli_st_atime == attr.ia_atime) return; - if (IS_RDONLY(inode)) return; - if (IS_NOATIME(inode)) return; + OBD_ALLOC(session, LLU_IO_SESSION_SIZE(ngroups)); + if (!session) + return NULL; - /* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */ - llu_inode_setattr(inode, &attr, 0); -#else - /* update atime, but don't explicitly write it out just this change */ - inode->i_atime = CURRENT_TIME; -#endif + I_REF(ino); + session->lis_inode = ino; + session->lis_max_groups = ngroups; + session->lis_cmd = cmd; + return session; } -#endif -struct llu_sysio_callback_args* -llu_file_read(struct inode *inode, const struct iovec *iovec, - size_t iovlen, loff_t pos) +static void put_io_session(struct llu_io_session *session) { - struct llu_inode_info *lli = llu_i2info(inode); - struct ll_file_data *fd = lli->lli_file_data; - struct lov_stripe_md *lsm = lli->lli_smd; - struct lustre_handle lockh = { 0 }; - ldlm_policy_data_t policy; - struct llu_sysio_callback_args *lsca; - struct llu_sysio_cookie *cookie; - __u64 kms; - int iovidx; - - ldlm_error_t err; - ENTRY; + int i; - OBD_ALLOC(lsca, sizeof(*lsca)); - if (!lsca) - RETURN(ERR_PTR(-ENOMEM)); + for (i = 0; i < session->lis_ngroups; i++) { + if (session->lis_groups[i]) { + put_io_group(session->lis_groups[i]); + session->lis_groups[i] = NULL; + } + } - for (iovidx = 0; iovidx < iovlen; iovidx++) { - char *buf = iovec[iovidx].iov_base; - size_t count = iovec[iovidx].iov_len; + I_RELE(session->lis_inode); + OBD_FREE(session, LLU_IO_SESSION_SIZE(session->lis_max_groups)); +} - /* "If nbyte is 0, read() will return 0 and have no other results." - * -- Single Unix Spec */ - if (count == 0) - continue; +static int llu_file_rwx(struct inode *ino, + struct ioctx *ioctx, + int read) +{ + struct llu_io_session *session; + ssize_t cc; + int cmd = read ? OBD_BRW_READ : OBD_BRW_WRITE; + ENTRY; - policy.l_extent.start = pos; - policy.l_extent.end = pos + count - 1; + LASSERT(ioctx->ioctx_xtvlen >= 0); + LASSERT(ioctx->ioctx_iovlen >= 0); - err = llu_extent_lock(fd, inode, lsm, LCK_PR, &policy, &lockh, 0); - if (err != ELDLM_OK) - GOTO(err_out, err = -ENOLCK); + liblustre_wait_event(0); - kms = lov_merge_size(lsm, 1); - if (policy.l_extent.end > kms) { - /* A glimpse is necessary to determine whether we - * return a short read or some zeroes at the end of - * the buffer */ - if (llu_glimpse_size(inode)) { - llu_extent_unlock(fd, inode, lsm,LCK_PR,&lockh); - GOTO(err_out, err = -ENOLCK); - } - } else { - lli->lli_st_size = kms; - } + if (!ioctx->ioctx_xtvlen) + RETURN(0); - CDEBUG(D_INFO, "Reading inode %lu, "LPSZ" bytes, offset %Ld, " - "i_size "LPU64"\n", lli->lli_st_ino, count, pos, - lli->lli_st_size); + /* XXX consider other types later */ + if (S_ISDIR(llu_i2stat(ino)->st_mode)) + RETURN(-EISDIR); + if (!S_ISREG(llu_i2stat(ino)->st_mode)) + RETURN(-EOPNOTSUPP); + + session = get_io_session(ino, ioctx->ioctx_xtvlen * 2, cmd); + if (!session) + RETURN(-ENOMEM); + + cc = _sysio_enumerate_extents(ioctx->ioctx_xtv, ioctx->ioctx_xtvlen, + ioctx->ioctx_iov, ioctx->ioctx_iovlen, + llu_file_prwv, session); + + if (cc >= 0) { + LASSERT(!ioctx->ioctx_cc); + ioctx->ioctx_private = session; + RETURN(0); + } else { + put_io_session(session); + RETURN(cc); + } +} - if (pos >= lli->lli_st_size) { - llu_extent_unlock(fd, inode, lsm, LCK_PR, &lockh); - break; - } +int llu_iop_read(struct inode *ino, + struct ioctx *ioctx) +{ + return llu_file_rwx(ino, ioctx, 1); +} - cookie = llu_rw(OBD_BRW_READ, inode, buf, count, pos); - if (!IS_ERR(cookie)) { - /* save cookie */ - lsca->cookies[lsca->ncookies++] = cookie; - pos += count; - } else { - llu_extent_unlock(fd, inode, lsm, LCK_PR, &lockh); - GOTO(err_out, err = PTR_ERR(cookie)); - } +int llu_iop_write(struct inode *ino, + struct ioctx *ioctx) +{ + struct iattr iattr; + int rc; - /* XXX errors? */ - err = llu_extent_unlock(fd, inode, lsm, LCK_PR, &lockh); - if (err) - CERROR("extent_unlock fail: %d\n", err); - } -#if 0 - if (readed > 0) - llu_update_atime(inode); -#endif - RETURN(lsca); + memset(&iattr, 0, sizeof(iattr)); + iattr.ia_mtime = iattr.ia_atime = CURRENT_TIME; + iattr.ia_valid = ATTR_MTIME | ATTR_ATIME | ATTR_RAW; -err_out: - /* teardown all async stuff */ - while (lsca->ncookies--) { - put_sysio_cookie(lsca->cookies[lsca->ncookies]); + liblustre_wait_event(0); + rc = llu_setattr_raw(ino, &iattr); + if (rc) { + CERROR("failed to set mtime/atime during write: %d", rc); + /* XXX should continue or return error? */ } - OBD_FREE(lsca, sizeof(*lsca)); - RETURN(ERR_PTR(err)); + return llu_file_rwx(ino, ioctx, 0); } -int llu_iop_iodone(struct ioctx *ioctxp) +int llu_iop_iodone(struct ioctx *ioctx) { - struct llu_sysio_callback_args *lsca = ioctxp->ioctx_private; - struct llu_sysio_cookie *cookie; + struct llu_io_session *session; + struct llu_io_group *group; int i, err = 0, rc = 0; ENTRY; - /* write/read(fd, buf, 0) */ - if (!lsca) { - ioctxp->ioctx_cc = 0; - RETURN(1); - } + liblustre_wait_event(0); - LASSERT(!IS_ERR(lsca)); + session = (struct llu_io_session *) ioctx->ioctx_private; + LASSERT(session); + LASSERT(!IS_ERR(session)); - for (i = 0; i < lsca->ncookies; i++) { - cookie = lsca->cookies[i]; - if (cookie) { - err = oig_wait(cookie->lsc_oig); - if (err && !rc) - rc = err; + for (i = 0; i < session->lis_ngroups; i++) { + group = session->lis_groups[i]; + if (group) { + if (!rc) { + err = oig_wait(group->lig_oig); + if (err) + rc = err; + } if (!rc) - ioctxp->ioctx_cc += cookie->lsc_rwcount; - put_sysio_cookie(cookie); + ioctx->ioctx_cc += group->lig_rwcount; + put_io_group(group); + session->lis_groups[i] = NULL; } } if (rc) { LASSERT(rc < 0); - ioctxp->ioctx_cc = -1; - ioctxp->ioctx_errno = -rc; + ioctx->ioctx_cc = -1; + ioctx->ioctx_errno = -rc; } - OBD_FREE(lsca, sizeof(*lsca)); - ioctxp->ioctx_private = NULL; + put_io_session(session); + ioctx->ioctx_private = NULL; RETURN(1); } diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index 9972f1a..6476c6e 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -3,7 +3,7 @@ * * Lustre Light Super operations * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * Copyright (c) 2002-2004 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -37,37 +37,86 @@ # include #endif +#ifdef HAVE_XTIO_H +#include +#endif #include #include #include #include +#ifdef HAVE_FILE_H #include +#endif #undef LIST_HEAD #include "llite_lib.h" +#ifndef MAY_EXEC +#define MAY_EXEC 1 +#define MAY_WRITE 2 +#define MAY_READ 4 +#endif + +#define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH) + +static int ll_permission(struct inode *inode, int mask) +{ + struct intnl_stat *st = llu_i2stat(inode); + mode_t mode = st->st_mode; + + if (current->fsuid == st->st_uid) + mode >>= 6; + else if (in_group_p(st->st_gid)) + mode >>= 3; + + if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask) + return 0; + + if ((mask & (MAY_READ|MAY_WRITE)) || + (st->st_mode & S_IXUGO)) + if (capable(CAP_DAC_OVERRIDE)) + return 0; + + if (mask == MAY_READ || + (S_ISDIR(st->st_mode) && !(mask & MAY_WRITE))) { + if (capable(CAP_DAC_READ_SEARCH)) + return 0; + } + + return -EACCES; +} + static void llu_fsop_gone(struct filesys *fs) { struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private; struct obd_device *obd = class_exp2obd(sbi->ll_mdc_exp); - struct ll_fid rootfid; + int next = 0; ENTRY; list_del(&sbi->ll_conn_chain); obd_disconnect(sbi->ll_osc_exp); - - /* NULL request to force sync on the MDS, and get the last_committed - * value to flush remaining RPCs from the sending queue on client. - * - * XXX This should be an mdc_sync() call to sync the whole MDS fs, - * which we can call for other reasons as well. - */ - if (!obd->obd_no_recov) - mdc_getstatus(sbi->ll_mdc_exp, &rootfid); - obd_disconnect(sbi->ll_mdc_exp); + while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL){ + struct lustre_cfg_bufs bufs; + struct lustre_cfg *lcfg; + int err; + + lustre_cfg_bufs_reset(&bufs, obd->obd_name); + lcfg = lustre_cfg_new(LCFG_CLEANUP, &bufs); + err = class_process_config(lcfg); + lustre_cfg_free(lcfg); + if (err) + CERROR("cleanup failed: %s\n", obd->obd_name); + + lustre_cfg_bufs_reset(&bufs, obd->obd_name); + lcfg = lustre_cfg_new(LCFG_DETACH, &bufs); + err = class_process_config(lcfg); + if (err) + CERROR("detach failed: %s\n", obd->obd_name); + } + OBD_FREE(sbi, sizeof(*sbi)); EXIT; @@ -79,6 +128,7 @@ void llu_update_inode(struct inode *inode, struct mds_body *body, struct lov_stripe_md *lsm) { struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0)); if (lsm != NULL) { @@ -89,41 +139,45 @@ void llu_update_inode(struct inode *inode, struct mds_body *body, lli->lli_maxbytes = PAGE_CACHE_MAXBYTES; } else { if (memcmp(lli->lli_smd, lsm, sizeof(*lsm))) { - CERROR("lsm mismatch for inode %ld\n", - lli->lli_st_ino); + CERROR("lsm mismatch for inode %lld\n", + st->st_ino); LBUG(); } } } if (body->valid & OBD_MD_FLID) - lli->lli_st_ino = body->ino; + st->st_ino = body->ino; if (body->valid & OBD_MD_FLATIME) - LTIME_S(lli->lli_st_atime) = body->atime; + LTIME_S(st->st_atime) = body->atime; if (body->valid & OBD_MD_FLMTIME) - LTIME_S(lli->lli_st_mtime) = body->mtime; + LTIME_S(st->st_mtime) = body->mtime; if (body->valid & OBD_MD_FLCTIME) - LTIME_S(lli->lli_st_ctime) = body->ctime; + LTIME_S(st->st_ctime) = body->ctime; if (body->valid & OBD_MD_FLMODE) - lli->lli_st_mode = (lli->lli_st_mode & S_IFMT)|(body->mode & ~S_IFMT); + st->st_mode = (st->st_mode & S_IFMT)|(body->mode & ~S_IFMT); if (body->valid & OBD_MD_FLTYPE) - lli->lli_st_mode = (lli->lli_st_mode & ~S_IFMT)|(body->mode & S_IFMT); + st->st_mode = (st->st_mode & ~S_IFMT)|(body->mode & S_IFMT); + if (S_ISREG(st->st_mode)) + st->st_blksize = min(2UL * PTLRPC_MAX_BRW_SIZE, LL_MAX_BLKSIZE); + else + st->st_blksize = 4096; if (body->valid & OBD_MD_FLUID) - lli->lli_st_uid = body->uid; + st->st_uid = body->uid; if (body->valid & OBD_MD_FLGID) - lli->lli_st_gid = body->gid; - if (body->valid & OBD_MD_FLFLAGS) - lli->lli_st_flags = body->flags; + st->st_gid = body->gid; if (body->valid & OBD_MD_FLNLINK) - lli->lli_st_nlink = body->nlink; - if (body->valid & OBD_MD_FLGENER) - lli->lli_st_generation = body->generation; + st->st_nlink = body->nlink; if (body->valid & OBD_MD_FLRDEV) - lli->lli_st_rdev = body->rdev; + st->st_rdev = body->rdev; if (body->valid & OBD_MD_FLSIZE) - lli->lli_st_size = body->size; + st->st_size = body->size; if (body->valid & OBD_MD_FLBLOCKS) - lli->lli_st_blocks = body->blocks; + st->st_blocks = body->blocks; + if (body->valid & OBD_MD_FLFLAGS) + lli->lli_st_flags = body->flags; + if (body->valid & OBD_MD_FLGENER) + lli->lli_st_generation = body->generation; /* fillin fid */ if (body->valid & OBD_MD_FLID) @@ -137,35 +191,36 @@ void llu_update_inode(struct inode *inode, struct mds_body *body, void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid) { struct llu_inode_info *lli = llu_i2info(dst); + struct intnl_stat *st = llu_i2stat(dst); valid &= src->o_valid; if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) CDEBUG(D_INODE, "valid %x, cur time %lu/%lu, new %lu/%lu\n", src->o_valid, - LTIME_S(lli->lli_st_mtime), LTIME_S(lli->lli_st_ctime), + LTIME_S(st->st_mtime), LTIME_S(st->st_ctime), (long)src->o_mtime, (long)src->o_ctime); if (valid & OBD_MD_FLATIME) - LTIME_S(lli->lli_st_atime) = src->o_atime; + LTIME_S(st->st_atime) = src->o_atime; if (valid & OBD_MD_FLMTIME) - LTIME_S(lli->lli_st_mtime) = src->o_mtime; - if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(lli->lli_st_ctime)) - LTIME_S(lli->lli_st_ctime) = src->o_ctime; + LTIME_S(st->st_mtime) = src->o_mtime; + if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(st->st_ctime)) + LTIME_S(st->st_ctime) = src->o_ctime; if (valid & OBD_MD_FLSIZE) - lli->lli_st_size = src->o_size; + st->st_size = src->o_size; if (valid & OBD_MD_FLBLOCKS) /* allocation of space */ - lli->lli_st_blocks = src->o_blocks; + st->st_blocks = src->o_blocks; if (valid & OBD_MD_FLBLKSZ) - lli->lli_st_blksize = src->o_blksize; + st->st_blksize = src->o_blksize; if (valid & OBD_MD_FLTYPE) - lli->lli_st_mode = (lli->lli_st_mode & ~S_IFMT) | (src->o_mode & S_IFMT); + st->st_mode = (st->st_mode & ~S_IFMT) | (src->o_mode & S_IFMT); if (valid & OBD_MD_FLMODE) - lli->lli_st_mode = (lli->lli_st_mode & S_IFMT) | (src->o_mode & ~S_IFMT); + st->st_mode = (st->st_mode & S_IFMT) | (src->o_mode & ~S_IFMT); if (valid & OBD_MD_FLUID) - lli->lli_st_uid = src->o_uid; + st->st_uid = src->o_uid; if (valid & OBD_MD_FLGID) - lli->lli_st_gid = src->o_gid; + st->st_gid = src->o_gid; if (valid & OBD_MD_FLFLAGS) lli->lli_st_flags = src->o_flags; if (valid & OBD_MD_FLGENER) @@ -178,51 +233,52 @@ void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid) void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid) { struct llu_inode_info *lli = llu_i2info(src); + struct intnl_stat *st = llu_i2stat(src); obd_flag newvalid = 0; if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n", - valid, LTIME_S(lli->lli_st_mtime), - LTIME_S(lli->lli_st_ctime)); + valid, LTIME_S(st->st_mtime), + LTIME_S(st->st_ctime)); if (valid & OBD_MD_FLATIME) { - dst->o_atime = LTIME_S(lli->lli_st_atime); + dst->o_atime = LTIME_S(st->st_atime); newvalid |= OBD_MD_FLATIME; } if (valid & OBD_MD_FLMTIME) { - dst->o_mtime = LTIME_S(lli->lli_st_mtime); + dst->o_mtime = LTIME_S(st->st_mtime); newvalid |= OBD_MD_FLMTIME; } if (valid & OBD_MD_FLCTIME) { - dst->o_ctime = LTIME_S(lli->lli_st_ctime); + dst->o_ctime = LTIME_S(st->st_ctime); newvalid |= OBD_MD_FLCTIME; } if (valid & OBD_MD_FLSIZE) { - dst->o_size = lli->lli_st_size; + dst->o_size = st->st_size; newvalid |= OBD_MD_FLSIZE; } if (valid & OBD_MD_FLBLOCKS) { /* allocation of space (x512 bytes) */ - dst->o_blocks = lli->lli_st_blocks; + dst->o_blocks = st->st_blocks; newvalid |= OBD_MD_FLBLOCKS; } if (valid & OBD_MD_FLBLKSZ) { /* optimal block size */ - dst->o_blksize = lli->lli_st_blksize; + dst->o_blksize = st->st_blksize; newvalid |= OBD_MD_FLBLKSZ; } if (valid & OBD_MD_FLTYPE) { - dst->o_mode = (dst->o_mode & S_IALLUGO)|(lli->lli_st_mode & S_IFMT); + dst->o_mode = (dst->o_mode & S_IALLUGO)|(st->st_mode & S_IFMT); newvalid |= OBD_MD_FLTYPE; } if (valid & OBD_MD_FLMODE) { - dst->o_mode = (dst->o_mode & S_IFMT)|(lli->lli_st_mode & S_IALLUGO); + dst->o_mode = (dst->o_mode & S_IFMT)|(st->st_mode & S_IALLUGO); newvalid |= OBD_MD_FLMODE; } if (valid & OBD_MD_FLUID) { - dst->o_uid = lli->lli_st_uid; + dst->o_uid = st->st_uid; newvalid |= OBD_MD_FLUID; } if (valid & OBD_MD_FLGID) { - dst->o_gid = lli->lli_st_gid; + dst->o_gid = st->st_gid; newvalid |= OBD_MD_FLGID; } if (valid & OBD_MD_FLFLAGS) { @@ -286,6 +342,16 @@ static struct inode* llu_new_inode(struct filesys *fs, { struct inode *inode; struct llu_inode_info *lli; + struct intnl_stat st = { + st_dev: 0, +#ifndef AUTOMOUNT_FILE_NAME + st_mode: fid->f_type & S_IFMT, +#else + st_mode: fid->f_type /* all of the bits! */ +#endif + st_uid: geteuid(), + st_gid: getegid(), + }; OBD_ALLOC(lli, sizeof(*lli)); if (!lli) @@ -306,13 +372,7 @@ static struct inode* llu_new_inode(struct filesys *fs, /* file identifier is needed by functions like _sysio_i_find() */ inode = _sysio_i_new(fs, &lli->lli_sysio_fid, -#ifndef AUTOMOUNT_FILE_NAME - fid->f_type & S_IFMT, -#else - fid->f_type, /* all of the bits! */ -#endif - 0, 0, - &llu_inode_ops, lli); + &st, 0, &llu_inode_ops, lli); if (!inode) OBD_FREE(lli, sizeof(*lli)); @@ -333,7 +393,7 @@ static int llu_have_md_lock(struct inode *inode) LASSERT(inode); obddev = sbi->ll_mdc_exp->exp_obd; - res_id.name[0] = lli->lli_st_ino; + res_id.name[0] = llu_i2stat(inode)->st_ino; res_id.name[1] = lli->lli_st_generation; CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]); @@ -356,7 +416,6 @@ static int llu_have_md_lock(struct inode *inode) static int llu_inode_revalidate(struct inode *inode) { - struct llu_inode_info *lli = llu_i2info(inode); struct lov_stripe_md *lsm = NULL; ENTRY; @@ -375,14 +434,14 @@ static int llu_inode_revalidate(struct inode *inode) /* Why don't we update all valid MDS fields here, if we're * doing an RPC anyways? -phil */ - if (S_ISREG(lli->lli_st_mode)) { + if (S_ISREG(llu_i2stat(inode)->st_mode)) { ealen = obd_size_diskmd(sbi->ll_osc_exp, NULL); valid |= OBD_MD_FLEASIZE; } ll_inode2fid(&fid, inode); rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req); if (rc) { - CERROR("failure %d inode %lu\n", rc, lli->lli_st_ino); + CERROR("failure %d inode %llu\n", rc, llu_i2stat(inode)->st_ino); RETURN(-abs(rc)); } rc = mdc_req2lustre_md(req, 0, sbi->ll_osc_exp, &md); @@ -423,21 +482,7 @@ static int llu_inode_revalidate(struct inode *inode) static void copy_stat_buf(struct inode *ino, struct intnl_stat *b) { - struct llu_inode_info *lli = llu_i2info(ino); - - b->st_dev = lli->lli_st_dev; - b->st_ino = lli->lli_st_ino; - b->st_mode = lli->lli_st_mode; - b->st_nlink = lli->lli_st_nlink; - b->st_uid = lli->lli_st_uid; - b->st_gid = lli->lli_st_gid; - b->st_rdev = lli->lli_st_rdev; - b->st_size = lli->lli_st_size; - b->st_blksize = lli->lli_st_blksize; - b->st_blocks = lli->lli_st_blocks; - b->st_atime = lli->lli_st_atime; - b->st_mtime = lli->lli_st_mtime; - b->st_ctime = lli->lli_st_ctime; + *b = *llu_i2stat(ino); } static int llu_iop_getattr(struct pnode *pno, @@ -447,6 +492,8 @@ static int llu_iop_getattr(struct pnode *pno, int rc; ENTRY; + liblustre_wait_event(0); + if (!ino) { LASSERT(pno); LASSERT(pno->p_base->pb_ino); @@ -461,14 +508,7 @@ static int llu_iop_getattr(struct pnode *pno, rc = llu_inode_revalidate(ino); if (!rc) { copy_stat_buf(ino, b); - - if (llu_i2info(ino)->lli_it) { - struct lookup_intent *it; - - LL_GET_INTENT(ino, it); - it->it_op_release(it); - OBD_FREE(it, sizeof(*it)); - } + LASSERT(!llu_i2info(ino)->lli_it); } RETURN(rc); @@ -480,7 +520,8 @@ static int null_if_equal(struct ldlm_lock *lock, void *data) lock->l_ast_data = NULL; if (lock->l_req_mode != lock->l_granted_mode) - LDLM_ERROR(lock,"clearing inode with ungranted lock\n"); } + LDLM_ERROR(lock,"clearing inode with ungranted lock\n"); + } return LDLM_ITER_CONTINUE; } @@ -492,8 +533,8 @@ void llu_clear_inode(struct inode *inode) struct llu_sb_info *sbi = llu_i2sbi(inode); ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%lu(%p)\n", lli->lli_st_ino, - lli->lli_st_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu(%p)\n", + llu_i2stat(inode)->st_ino, lli->lli_st_generation, inode); ll_inode2fid(&fid, inode); clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(lli->lli_flags)); @@ -522,6 +563,7 @@ void llu_iop_gone(struct inode *inode) struct llu_inode_info *lli = llu_i2info(inode); ENTRY; + liblustre_wait_event(0); llu_clear_inode(inode); OBD_FREE(lli, sizeof(*lli)); @@ -531,7 +573,7 @@ void llu_iop_gone(struct inode *inode) static int inode_setattr(struct inode * inode, struct iattr * attr) { unsigned int ia_valid = attr->ia_valid; - struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); int error = 0; if (ia_valid & ATTR_SIZE) { @@ -541,19 +583,19 @@ static int inode_setattr(struct inode * inode, struct iattr * attr) } if (ia_valid & ATTR_UID) - lli->lli_st_uid = attr->ia_uid; + st->st_uid = attr->ia_uid; if (ia_valid & ATTR_GID) - lli->lli_st_gid = attr->ia_gid; + st->st_gid = attr->ia_gid; if (ia_valid & ATTR_ATIME) - lli->lli_st_atime = attr->ia_atime; + st->st_atime = attr->ia_atime; if (ia_valid & ATTR_MTIME) - lli->lli_st_mtime = attr->ia_mtime; + st->st_mtime = attr->ia_mtime; if (ia_valid & ATTR_CTIME) - lli->lli_st_ctime = attr->ia_ctime; + st->st_ctime = attr->ia_ctime; if (ia_valid & ATTR_MODE) { - lli->lli_st_mode = attr->ia_mode; - if (!in_group_p(lli->lli_st_gid) && !capable(CAP_FSETID)) - lli->lli_st_mode &= ~S_ISGID; + st->st_mode = attr->ia_mode; + if (!in_group_p(st->st_gid) && !capable(CAP_FSETID)) + st->st_mode &= ~S_ISGID; } /* mark_inode_dirty(inode); */ out: @@ -577,14 +619,14 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) { struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd; struct llu_sb_info *sbi = llu_i2sbi(inode); - struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); struct ptlrpc_request *request = NULL; struct mdc_op_data op_data; int ia_valid = attr->ia_valid; int rc = 0; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", lli->lli_st_ino); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", st->st_ino); if (ia_valid & ATTR_SIZE) { if (attr->ia_size > ll_file_maxbytes(inode)) { @@ -639,10 +681,18 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) ptlrpc_req_finished(request); RETURN(rc); } + + /* We call inode_setattr to adjust timestamps, but we first + * clear ATTR_SIZE to avoid invoking vmtruncate. + * + * NB: ATTR_SIZE will only be set at this point if the size + * resides on the MDS, ie, this file has no objects. */ + attr->ia_valid &= ~ATTR_SIZE; + inode_setattr(inode, attr); llu_update_inode(inode, md.body, md.lsm); ptlrpc_req_finished(request); - if (!md.lsm || !S_ISREG(lli->lli_st_mode)) { + if (!md.lsm || !S_ISREG(st->st_mode)) { CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n"); RETURN(0); } @@ -654,12 +704,12 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) if (ia_valid & (ATTR_MTIME | ATTR_ATIME)) { /* from sys_utime() */ if (!(ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))) { - if (current->fsuid != lli->lli_st_uid && - (rc = ll_permission(inode, 0/*MAY_WRITE*/, NULL)) != 0) + if (current->fsuid != st->st_uid && + (rc = ll_permission(inode, MAY_WRITE)) != 0) RETURN(rc); } else { /* from inode_change_ok() */ - if (current->fsuid != lli->lli_st_uid && + if (current->fsuid != st->st_uid && !capable(CAP_FOWNER)) RETURN(-EPERM); } @@ -689,9 +739,6 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) } rc = llu_vmtruncate(inode, attr->ia_size); - if (rc == 0) - set_bit(LLI_F_HAVE_OST_SIZE_LOCK, - &llu_i2info(inode)->lli_flags); /* unlock now as we don't mind others file lockers racing with * the mds updates below? */ @@ -704,8 +751,8 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) { struct obdo oa; - CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n", - lli->lli_st_ino, LTIME_S(attr->ia_mtime)); + CDEBUG(D_INODE, "set mtime on OST inode %llu to %lu\n", + st->st_ino, LTIME_S(attr->ia_mtime)); oa.o_id = lsm->lsm_object_id; oa.o_valid = OBD_MD_FLID; obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME | @@ -728,6 +775,11 @@ static int llu_iop_setattr(struct pnode *pno, struct iattr iattr; ENTRY; + liblustre_wait_event(0); + + LASSERT(!(mask & ~(SETATTR_MTIME | SETATTR_ATIME | + SETATTR_UID | SETATTR_GID | + SETATTR_LEN | SETATTR_MODE))); memset(&iattr, 0, sizeof(iattr)); if (mask & SETATTR_MODE) { @@ -755,7 +807,8 @@ static int llu_iop_setattr(struct pnode *pno, iattr.ia_valid |= ATTR_SIZE; } - iattr.ia_valid |= ATTR_RAW; + iattr.ia_valid |= ATTR_RAW | ATTR_CTIME; + iattr.ia_ctime = CURRENT_TIME; RETURN(llu_setattr_raw(ino, &iattr)); } @@ -774,7 +827,7 @@ static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt) int err = -EMLINK; ENTRY; - if (llu_i2info(dir)->lli_st_nlink >= EXT2_LINK_MAX) + if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX) RETURN(err); llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); @@ -793,7 +846,8 @@ static int llu_readlink_internal(struct inode *inode, struct llu_sb_info *sbi = llu_i2sbi(inode); struct ll_fid fid; struct mds_body *body; - int rc, symlen = lli->lli_st_size + 1; + struct intnl_stat *st = llu_i2stat(inode); + int rc, symlen = st->st_size + 1; ENTRY; *request = NULL; @@ -808,7 +862,7 @@ static int llu_readlink_internal(struct inode *inode, rc = mdc_getattr(sbi->ll_mdc_exp, &fid, OBD_MD_LINKNAME, symlen, request); if (rc) { - CERROR("inode %lu: rc = %d\n", lli->lli_st_ino, rc); + CERROR("inode %llu: rc = %d\n", st->st_ino, rc); RETURN(rc); } @@ -823,8 +877,8 @@ static int llu_readlink_internal(struct inode *inode, LASSERT (symlen != 0); if (body->eadatasize != symlen) { - CERROR ("inode %lu: symlink length %d not expected %d\n", - lli->lli_st_ino, body->eadatasize - 1, symlen - 1); + CERROR ("inode %llu: symlink length %d not expected %d\n", + st->st_ino, body->eadatasize - 1, symlen - 1); GOTO (failed, rc = -EPROTO); } @@ -832,8 +886,8 @@ static int llu_readlink_internal(struct inode *inode, if (*symname == NULL || strnlen (*symname, symlen) != symlen - 1) { /* not full/NULL terminated */ - CERROR ("inode %lu: symlink not NULL terminated string" - "of length %d\n", lli->lli_st_ino, symlen - 1); + CERROR ("inode %llu: symlink not NULL terminated string" + "of length %d\n", st->st_ino, symlen - 1); GOTO (failed, rc = -EPROTO); } @@ -863,6 +917,7 @@ static int llu_iop_readlink(struct pnode *pno, char *data, size_t bufsize) LASSERT(symname); strncpy(data, symname, bufsize); + rc = strlen(symname); ptlrpc_req_finished(request); out: @@ -880,10 +935,11 @@ static int llu_iop_mknod_raw(struct pnode *pno, int err = -EMLINK; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu\n", - pno->p_base->pb_name.name, llu_i2info(dir)->lli_st_ino); + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu\n", + (int)pno->p_base->pb_name.len, pno->p_base->pb_name.name, + llu_i2stat(dir)->st_ino); - if (llu_i2info(dir)->lli_st_nlink >= EXT2_LINK_MAX) + if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX) RETURN(err); mode &= ~current->fs->umask; @@ -927,13 +983,18 @@ static int llu_iop_link_raw(struct pnode *old, struct pnode *new) LASSERT(src); LASSERT(dir); + liblustre_wait_event(0); llu_prepare_mdc_op_data(&op_data, src, dir, name, namelen, 0); rc = mdc_link(llu_i2sbi(src)->ll_mdc_exp, &op_data, &request); ptlrpc_req_finished(request); + liblustre_wait_event(0); RETURN(rc); } +/* + * libsysio will clear the inode immediately after return + */ static int llu_iop_unlink_raw(struct pnode *pno) { struct inode *dir = pno->p_base->pb_parent->pb_ino; @@ -948,30 +1009,21 @@ static int llu_iop_unlink_raw(struct pnode *pno) LASSERT(target); + liblustre_wait_event(0); llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request); - if (!rc) { + if (!rc) rc = llu_objects_destroy(request, dir); - - llu_i2info(target)->lli_stale_flag = 1; - unhook_stale_inode(pno); - } - ptlrpc_req_finished(request); + liblustre_wait_event(0); + RETURN(rc); } -/* FIXME - * following cases need to be considered later: - * - rename an opened file/dir - * - an opened file be removed in rename - * - rename to remove and hardlink (?opened) - */ static int llu_iop_rename_raw(struct pnode *old, struct pnode *new) { struct inode *src = old->p_parent->p_base->pb_ino; struct inode *tgt = new->p_parent->p_base->pb_ino; - struct inode *tgtinode = new->p_base->pb_ino; const char *oldname = old->p_base->pb_name.name; int oldnamelen = old->p_base->pb_name.len; const char *newname = new->p_base->pb_name.name; @@ -990,11 +1042,6 @@ static int llu_iop_rename_raw(struct pnode *old, struct pnode *new) &request); if (!rc) { rc = llu_objects_destroy(request, src); - - if (tgtinode) { - llu_i2info(tgtinode)->lli_stale_flag = 1; - unhook_stale_inode(new); - } } ptlrpc_req_finished(request); @@ -1088,6 +1135,8 @@ static int llu_iop_statvfs(struct pnode *pno, int rc; ENTRY; + liblustre_wait_event(0); + #ifndef __CYGWIN__ LASSERT(pno->p_base->pb_ino); rc = llu_statfs(llu_i2sbi(pno->p_base->pb_ino), &fs); @@ -1119,14 +1168,14 @@ static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode) const char *name = qstr->name; int len = qstr->len; struct ptlrpc_request *request = NULL; - struct llu_inode_info *lli = llu_i2info(dir); + struct intnl_stat *st = llu_i2stat(dir); struct mdc_op_data op_data; int err = -EMLINK; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%lu(%p)\n", - name, lli->lli_st_ino, lli->lli_st_generation, dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", + len, name, st->st_ino, llu_i2info(dir)->lli_st_generation, dir); - if (lli->lli_st_nlink >= EXT2_LINK_MAX) + if (st->st_nlink >= EXT2_LINK_MAX) RETURN(err); mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR; @@ -1145,35 +1194,346 @@ static int llu_iop_rmdir_raw(struct pnode *pno) int len = qstr->len; struct ptlrpc_request *request = NULL; struct mdc_op_data op_data; - struct llu_inode_info *lli = llu_i2info(dir); int rc; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%lu(%p)\n", - name, lli->lli_st_ino, lli->lli_st_generation, dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name, + llu_i2stat(dir)->st_ino, llu_i2info(dir)->lli_st_generation,dir); llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, S_IFDIR); rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request); ptlrpc_req_finished(request); - /* libsysio: remove the pnode right away */ - if (!rc) { - llu_i2info(pno->p_base->pb_ino)->lli_stale_flag = 1; - unhook_stale_inode(pno); + RETURN(rc); +} + +#ifdef O_DIRECT +#define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC|O_DIRECT) +#else +#define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC) +#endif +#define FCNTL_FLMASK_INVALID (O_NONBLOCK|O_ASYNC) + +#if 0 +/* refer to ll_file_flock() for details */ +static int llu_file_flock(struct inode *ino, + int cmd, + struct file_lock *file_lock) +{ + struct obd_device *obddev; + struct llu_inode_info *lli = llu_i2info(ino); + struct intnl_stat *st = llu_i2stat(ino); + struct ldlm_res_id res_id = + { .name = {st->st_ino, + lli->lli_st_generation, LDLM_FLOCK} }; + struct lustre_handle lockh = {0}; + ldlm_policy_data_t flock; + ldlm_mode_t mode = 0; + int flags = 0; + int rc; + + CDEBUG(D_VFSTRACE, "VFS Op:inode="LPU64" file_lock=%p\n", + st->st_ino, file_lock); + + flock.l_flock.pid = file_lock->fl_pid; + flock.l_flock.start = file_lock->fl_start; + flock.l_flock.end = file_lock->fl_end; + + switch (file_lock->fl_type) { + case F_RDLCK: + mode = LCK_PR; + break; + case F_UNLCK: + mode = LCK_NL; + break; + case F_WRLCK: + mode = LCK_PW; + break; + default: + CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type); + LBUG(); + } + + switch (cmd) { + case F_SETLKW: +#ifdef F_SETLKW64 +#if F_SETLKW64 != F_SETLKW + case F_SETLKW64: +#endif +#endif + flags = 0; + break; + case F_SETLK: +#ifdef F_SETLK64 +#if F_SETLK64 != F_SETLK + case F_SETLK64: +#endif +#endif + flags = LDLM_FL_BLOCK_NOWAIT; + break; + case F_GETLK: +#ifdef F_GETLK64 +#if F_GETLK64 != F_GETLK + case F_GETLK64: +#endif +#endif + flags = LDLM_FL_TEST_LOCK; + file_lock->fl_type = mode; + break; + default: + CERROR("unknown fcntl cmd: %d\n", cmd); + LBUG(); } + CDEBUG(D_DLMTRACE, "inode="LPU64", pid="LPU64", flags=%#x, mode=%u, " + "start="LPU64", end="LPU64"\n", st->st_ino, flock.l_flock.pid, + flags, mode, flock.l_flock.start, flock.l_flock.end); + + obddev = llu_i2mdcexp(ino)->exp_obd; + rc = ldlm_cli_enqueue(llu_i2mdcexp(ino), NULL, obddev->obd_namespace, + res_id, LDLM_FLOCK, &flock, mode, &flags, + NULL, ldlm_flock_completion_ast, NULL, file_lock, + NULL, 0, NULL, &lockh); RETURN(rc); } -static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap) +static int assign_type(struct file_lock *fl, int type) { - CERROR("liblustre did not support fcntl\n"); - return -ENOSYS; + switch (type) { + case F_RDLCK: + case F_WRLCK: + case F_UNLCK: + fl->fl_type = type; + return 0; + default: + return -EINVAL; + } +} + +static int flock_to_posix_lock(struct inode *ino, + struct file_lock *fl, + struct flock *l) +{ + switch (l->l_whence) { + /* XXX: only SEEK_SET is supported in lustre */ + case SEEK_SET: + fl->fl_start = 0; + break; + default: + return -EINVAL; + } + + fl->fl_end = l->l_len - 1; + if (l->l_len < 0) + return -EINVAL; + if (l->l_len == 0) + fl->fl_end = OFFSET_MAX; + + fl->fl_pid = getpid(); + fl->fl_flags = FL_POSIX; + fl->fl_notify = NULL; + fl->fl_insert = NULL; + fl->fl_remove = NULL; + /* XXX: these fields can't be filled with suitable values, + but I think lustre doesn't use them. + */ + fl->fl_owner = NULL; + fl->fl_file = NULL; + + return assign_type(fl, l->l_type); } +static int llu_fcntl_getlk(struct inode *ino, struct flock *flock) +{ + struct file_lock fl; + int error; + + error = EINVAL; + if ((flock->l_type != F_RDLCK) && (flock->l_type != F_WRLCK)) + goto out; + + error = flock_to_posix_lock(ino, &fl, flock); + if (error) + goto out; + + error = llu_file_flock(ino, F_GETLK, &fl); + if (error) + goto out; + + flock->l_type = F_UNLCK; + if (fl.fl_type != F_UNLCK) { + flock->l_pid = fl.fl_pid; + flock->l_start = fl.fl_start; + flock->l_len = fl.fl_end == OFFSET_MAX ? 0: + fl.fl_end - fl.fl_start + 1; + flock->l_whence = SEEK_SET; + flock->l_type = fl.fl_type; + } + +out: + return error; +} + +static int llu_fcntl_setlk(struct inode *ino, int cmd, struct flock *flock) +{ + struct file_lock fl; + int flags = llu_i2info(ino)->lli_open_flags + 1; + int error; + + error = flock_to_posix_lock(ino, &fl, flock); + if (error) + goto out; + if (cmd == F_SETLKW) + fl.fl_flags |= FL_SLEEP; + + error = -EBADF; + switch (flock->l_type) { + case F_RDLCK: + if (!(flags & FMODE_READ)) + goto out; + break; + case F_WRLCK: + if (!(flags & FMODE_WRITE)) + goto out; + break; + case F_UNLCK: + break; + default: + error = -EINVAL; + goto out; + } + + error = llu_file_flock(ino, cmd, &fl); + if (error) + goto out; + +out: + return error; +} +#endif + +static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn) +{ + struct llu_inode_info *lli = llu_i2info(ino); + long flags; + struct flock *flock; + long err; + + switch (cmd) { + case F_GETFL: + *rtn = lli->lli_open_flags; + return 0; + case F_SETFL: + flags = va_arg(ap, long); + flags &= FCNTL_FLMASK; + if (flags & FCNTL_FLMASK_INVALID) { + CERROR("liblustre don't support O_NONBLOCK, O_ASYNC, " + "and O_DIRECT on file descriptor\n"); + *rtn = -EINVAL; + return EINVAL; + } + lli->lli_open_flags = (int)(flags & FCNTL_FLMASK) | + (lli->lli_open_flags & ~FCNTL_FLMASK); + *rtn = 0; + return 0; +#if 0 + case F_GETLK: + flock = va_arg(ap, struct flock *); + err = llu_fcntl_getlk(ino, flock); + *rtn = err? -1: 0; + return err; + case F_SETLK: + case F_SETLKW: + flock = va_arg(ap, struct flock *); + err = llu_fcntl_setlk(ino, cmd, flock); + *rtn = err? -1: 0; + return err; +#endif + } + + CERROR("unsupported fcntl cmd %x\n", cmd); + *rtn = -ENOSYS; + return ENOSYS; +} + +#if 0 +static int llu_get_grouplock(struct inode *inode, unsigned long arg) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct ll_file_data *fd = lli->lli_file_data; + ldlm_policy_data_t policy = { .l_extent = { .start = 0, + .end = OBD_OBJECT_EOF}}; + struct lustre_handle lockh = { 0 }; + struct lov_stripe_md *lsm = lli->lli_smd; + ldlm_error_t err; + int flags = 0; + ENTRY; + + if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { + RETURN(-EINVAL); + } + + policy.l_extent.gid = arg; + if (lli->lli_open_flags & O_NONBLOCK) + flags = LDLM_FL_BLOCK_NOWAIT; + + err = llu_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh, + flags); + if (err) + RETURN(err); + + fd->fd_flags |= LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK; + fd->fd_gid = arg; + memcpy(&fd->fd_cwlockh, &lockh, sizeof(lockh)); + + RETURN(0); +} + +static int llu_put_grouplock(struct inode *inode, unsigned long arg) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct ll_file_data *fd = lli->lli_file_data; + struct lov_stripe_md *lsm = lli->lli_smd; + ldlm_error_t err; + ENTRY; + + if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) + RETURN(-EINVAL); + + if (fd->fd_gid != arg) + RETURN(-EINVAL); + + fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK); + + err = llu_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh); + if (err) + RETURN(err); + + fd->fd_gid = 0; + memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh)); + + RETURN(0); +} +#endif + static int llu_iop_ioctl(struct inode *ino, unsigned long int request, va_list ap) { - CERROR("liblustre did not support ioctl\n"); + unsigned long arg; + + liblustre_wait_event(0); + + switch (request) { +#if 0 + case LL_IOC_GROUP_LOCK: + arg = va_arg(ap, unsigned long); + return llu_get_grouplock(ino, arg); + case LL_IOC_GROUP_UNLOCK: + arg = va_arg(ap, unsigned long); + return llu_put_grouplock(ino, arg); +#endif + } + + CERROR("did not support ioctl cmd %lx\n", request); return -ENOSYS; } @@ -1182,11 +1542,13 @@ static int llu_iop_ioctl(struct inode *ino, unsigned long int request, */ static int llu_iop_sync(struct inode *inode) { + liblustre_wait_event(0); return 0; } static int llu_iop_datasync(struct inode *inode) { + liblustre_wait_event(0); return 0; } @@ -1203,8 +1565,11 @@ struct inode *llu_iget(struct filesys *fs, struct lustre_md *md) if ((md->body->valid & (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) != - (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) - CERROR("invalide fields!\n"); + (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) { + CERROR("bad md body valid mask 0x%x\n", md->body->valid); + LBUG(); + return ERR_PTR(-EPERM); + } /* try to find existing inode */ fid.id = md->body->ino; @@ -1215,9 +1580,10 @@ struct inode *llu_iget(struct filesys *fs, struct lustre_md *md) if (inode) { struct llu_inode_info *lli = llu_i2info(inode); - if (lli->lli_stale_flag || - lli->lli_st_generation != md->body->generation) + if (inode->i_zombie || + lli->lli_st_generation != md->body->generation) { I_RELE(inode); + } else { llu_update_inode(inode, md->body, md->lsm); return inode; @@ -1253,12 +1619,25 @@ llu_fsswop_mount(const char *source, struct lustre_handle osc_conn = {0, }; struct lustre_md md; class_uuid_t uuid; + struct config_llog_instance cfg; struct lustre_profile *lprof; + char *zconf_mdsnid, *zconf_mdsname, *zconf_profile; char *osc = NULL, *mdc = NULL; - int err = -EINVAL; + int async = 1, err = -EINVAL; ENTRY; + if (ll_parse_mount_target(source, + &zconf_mdsnid, + &zconf_mdsname, + &zconf_profile)) { + CERROR("mal-formed target %s\n", source); + RETURN(err); + } + if (!zconf_mdsnid || !zconf_mdsname || !zconf_profile) { + printf("Liblustre: invalid target %s\n", source); + RETURN(err); + } /* allocate & initialize sbi */ OBD_ALLOC(sbi, sizeof(*sbi)); if (!sbi) @@ -1268,61 +1647,39 @@ llu_fsswop_mount(const char *source, generate_random_uuid(uuid); class_uuid_unparse(uuid, &sbi->ll_sb_uuid); - /* zeroconf */ - if (g_zconf) { - struct config_llog_instance cfg; - int len; - - if (!g_zconf_mdsname) { - CERROR("no mds name\n"); - GOTO(out_free, err = -EINVAL); - } - - /* generate a string unique to this super, let's try - the address of the super itself.*/ - len = (sizeof(sbi) * 2) + 1; - OBD_ALLOC(sbi->ll_instance, len); - if (sbi->ll_instance == NULL) - GOTO(out_free, err = -ENOMEM); - sprintf(sbi->ll_instance, "%p", sbi); - - cfg.cfg_instance = sbi->ll_instance; - cfg.cfg_uuid = sbi->ll_sb_uuid; - err = liblustre_process_log(&cfg, 1); - if (err < 0) { - CERROR("Unable to process log: %s\n", g_zconf_profile); - - GOTO(out_free, err); - } - - lprof = class_get_profile(g_zconf_profile); - if (lprof == NULL) { - CERROR("No profile found: %s\n", g_zconf_profile); - GOTO(out_free, err = -EINVAL); - } - if (osc) - OBD_FREE(osc, strlen(osc) + 1); - OBD_ALLOC(osc, strlen(lprof->lp_osc) + - strlen(sbi->ll_instance) + 2); - sprintf(osc, "%s-%s", lprof->lp_osc, sbi->ll_instance); - - if (mdc) - OBD_FREE(mdc, strlen(mdc) + 1); - OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + - strlen(sbi->ll_instance) + 2); - sprintf(mdc, "%s-%s", lprof->lp_mdc, sbi->ll_instance); - } else { - /* setup from dump_file */ - if (list_empty(&lustre_profile_list)) { - CERROR("no profile\n"); - GOTO(out_free, err = -EINVAL); - } + /* generate a string unique to this super, let's try + the address of the super itself.*/ + OBD_ALLOC(sbi->ll_instance, sizeof(sbi) * 2 + 1); + if (sbi->ll_instance == NULL) + GOTO(out_free, err = -ENOMEM); + sprintf(sbi->ll_instance, "%p", sbi); + + /* retrive & parse config log */ + cfg.cfg_instance = sbi->ll_instance; + cfg.cfg_uuid = sbi->ll_sb_uuid; + err = liblustre_process_log(&cfg, + zconf_mdsnid, zconf_mdsname, zconf_profile, 1); + if (err < 0) { + CERROR("Unable to process log: %s\n", zconf_profile); + GOTO(out_free, err); + } - lprof = list_entry(lustre_profile_list.next, - struct lustre_profile, lp_list); - osc = lprof->lp_osc; - mdc = lprof->lp_mdc; + lprof = class_get_profile(zconf_profile); + if (lprof == NULL) { + CERROR("No profile found: %s\n", zconf_profile); + GOTO(out_free, err = -EINVAL); } + if (osc) + OBD_FREE(osc, strlen(osc) + 1); + OBD_ALLOC(osc, strlen(lprof->lp_osc) + + strlen(sbi->ll_instance) + 2); + sprintf(osc, "%s-%s", lprof->lp_osc, sbi->ll_instance); + + if (mdc) + OBD_FREE(mdc, strlen(mdc) + 1); + OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + + strlen(sbi->ll_instance) + 2); + sprintf(mdc, "%s-%s", lprof->lp_mdc, sbi->ll_instance); if (!osc) { CERROR("no osc\n"); @@ -1344,6 +1701,8 @@ llu_fsswop_mount(const char *source, CERROR("MDC %s: not setup or attached\n", mdc); GOTO(out_free, err = -EINVAL); } + obd_set_info(obd->obd_self_export, strlen("async"), "async", + sizeof(async), &async); /* setup mdc */ err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, NULL /* ocd */); @@ -1367,6 +1726,8 @@ llu_fsswop_mount(const char *source, CERROR("OSC %s: not setup or attached\n", osc); GOTO(out_mdc, err = -EINVAL); } + obd_set_info(obd->obd_self_export, strlen("async"), "async", + sizeof(async), &async); err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, NULL /* ocd */); if (err) { @@ -1402,7 +1763,7 @@ llu_fsswop_mount(const char *source, LASSERT(sbi->ll_rootino != 0); root = llu_iget(fs, &md); - if (root == NULL) { + if (!root || IS_ERR(root)) { CERROR("fail to generate root inode\n"); GOTO(out_request, err = -EBADF); } @@ -1424,7 +1785,7 @@ llu_fsswop_mount(const char *source, ptlrpc_req_finished(request); - printf("LibLustre: namespace mounted successfully!\n"); + CDEBUG(D_SUPER, "LibLustre: %s mounted successfully!\n", source); return 0; @@ -1459,8 +1820,9 @@ static struct inode_ops llu_inode_ops = { inop_link: llu_iop_link_raw, inop_unlink: llu_iop_unlink_raw, inop_rename: llu_iop_rename_raw, - inop_ipreadv: llu_iop_ipreadv, - inop_ipwritev: llu_iop_ipwritev, + inop_pos: llu_iop_pos, + inop_read: llu_iop_read, + inop_write: llu_iop_write, inop_iodone: llu_iop_iodone, inop_fcntl: llu_iop_fcntl, inop_sync: llu_iop_sync, @@ -1472,5 +1834,3 @@ static struct inode_ops llu_inode_ops = { #endif inop_gone: llu_iop_gone, }; - -#warning "time_after() defined in liblustre.h need to be rewrite in userspace" diff --git a/lustre/liblustre/tests/Makefile.am b/lustre/liblustre/tests/Makefile.am index 0a9a1c0..616fea4 100644 --- a/lustre/liblustre/tests/Makefile.am +++ b/lustre/liblustre/tests/Makefile.am @@ -2,47 +2,53 @@ AM_CPPFLAGS = -I$(SYSIO)/include -I/opt/lam/include $(LLCPPFLAGS) -I$(top_srcdir)/portals/unals AM_CFLAGS = $(LLCFLAGS) -LIBS = $(LIBEFENCE) $(LIBREADLINE) +AM_LIBS = $(LIBEFENCE) $(LIBREADLINE) -LLIB_EXEC= ../liblustre.a -lpthread +LLIB_EXEC= $(top_builddir)/lustre/liblustre/liblustre.a $(CAP_LIBS) $(PTHREAD_LIBS) if LIBLUSTRE noinst_LIBRARIES = libtestcommon.a -def_tests = echo_test sanity recovery_small replay_single replay_ost_single + +if LIBLUSTRE_TESTS +noinst_PROGRAMS = sanity recovery_small replay_single replay_ost_single + +if TESTS +noinst_PROGRAMS += echo_test +endif # TESTS if MPITESTS -noinst_PROGRAMS = $(def_tests) test_lock_cancel -else -noinst_PROGRAMS = $(def_tests) -endif +noinst_PROGRAMS += test_lock_cancel +endif # MPITESTS + +endif # LIBLUSTRE_TESTS endif # LIBLUSTRE libtestcommon_a_SOURCES = test_common.c test_common.h -echo_test_SOURCES = echo_test.c $(top_srcdir)/lustre/utils/parser.c $(top_srcdir)/lustre/utils/obd.c $(top_srcdir)/lustre/utils/lustre_cfg.c +echo_test_SOURCES = echo_test.c $(top_srcdir)/lustre/utils/parser.c $(top_srcdir)/lustre/utils/obd.c $(top_srcdir)/lustre/utils/lustre_cfg.c echo_test_CFLAGS = $(LL_CFLAGS) -echo_test_LDADD = ../liblsupport.a $(LIBREADLINE) -lpthread +echo_test_LDADD = $(top_builddir)/lustre/liblustre/liblsupport.a $(LIBREADLINE) $(CAP_LIBS) $(PTHREAD_LIBS) echo_test_DEPENDENCIES=$(top_builddir)/lustre/liblustre/liblsupport.a sanity_SOURCES = sanity.c sanity_CFLAGS = $(LL_CFLAGS) -sanity_LDADD := ./libtestcommon.a $(LLIB_EXEC) -sanity_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a ./libtestcommon.a +sanity_LDADD := libtestcommon.a $(LLIB_EXEC) +sanity_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a libtestcommon.a recovery_small_SOURCES = recovery_small.c recovery_small_CFLAGS = $(LL_CFLAGS) -recovery_small_LDADD := ./libtestcommon.a $(LLIB_EXEC) -recovery_small_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a +recovery_small_LDADD := libtestcommon.a $(LLIB_EXEC) +recovery_small_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a libtestcommon.a replay_single_SOURCES = replay_single.c replay_single_CFLAGS = $(LL_CFLAGS) -replay_single_LDADD := ./libtestcommon.a $(LLIB_EXEC) -replay_single_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a +replay_single_LDADD := libtestcommon.a $(LLIB_EXEC) +replay_single_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a libtestcommon.a replay_ost_single_SOURCES = replay_ost_single.c replay_ost_single_CFLAGS = $(LL_CFLAGS) -replay_ost_single_LDADD := ./libtestcommon.a $(LLIB_EXEC) -replay_ost_single_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a +replay_ost_single_LDADD := libtestcommon.a $(LLIB_EXEC) +replay_ost_single_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a libtestcommon.a if MPITESTS test_lock_cancel_SOURCES = test_lock_cancel.c diff --git a/lustre/liblustre/tests/echo_test.c b/lustre/liblustre/tests/echo_test.c index 1b70246..12816f1 100644 --- a/lustre/liblustre/tests/echo_test.c +++ b/lustre/liblustre/tests/echo_test.c @@ -1,67 +1,37 @@ -#include -#include -#include -#include - -#include /* needed for ptpctl.h */ -#include /* needed for parse_dump */ - +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Light user test program + * + * Copyright (c) 2002-2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ #include #include #include -#include #include #define LIBLUSTRE_TEST 1 #include "../utils/lctl.c" -struct ldlm_namespace; -struct ldlm_res_id; -struct obd_import; - -unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_NAL); - -void *inter_module_get(char *arg) -{ - if (!strcmp(arg, "tcpnal_ni")) - return &tcpnal_ni; - else if (!strcmp(arg, "ldlm_cli_cancel_unused")) - return ldlm_cli_cancel_unused; - else if (!strcmp(arg, "ldlm_namespace_cleanup")) - return ldlm_namespace_cleanup; - else if (!strcmp(arg, "ldlm_replay_locks")) - return ldlm_replay_locks; - else - return NULL; -} - -/* XXX move to proper place */ -char *portals_nid2str(int nal, ptl_nid_t nid, char *str) -{ - switch(nal){ - case TCPNAL: - /* userspace NAL */ - case SOCKNAL: - snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u.%u.%u.%u", - (__u32)(nid >> 32), HIPQUAD(nid)); - break; - case QSWNAL: - case GMNAL: - case IBNAL: - case SCIMACNAL: - snprintf(str, PTL_NALFMT_SIZE - 1, "%u:%u", - (__u32)(nid >> 32), (__u32)nid); - break; - default: - snprintf(str, PTL_NALFMT_SIZE - 1, "?%d? %llx", - nal, (long long)nid); - break; - } - return str; -} +#include "../lutil.h" -ptl_handle_ni_t tcpnal_ni; +extern int class_handle_ioctl(unsigned int cmd, unsigned long arg); struct pingcli_args { ptl_nid_t mynid; @@ -71,78 +41,7 @@ struct pingcli_args { int size; }; -struct task_struct *current; - -/* portals interfaces */ -ptl_handle_ni_t * -kportal_get_ni (int nal) -{ - switch (nal) - { - case SOCKNAL: - return &tcpnal_ni; - default: - return NULL; - } -} - -inline void -kportal_put_ni (int nal) -{ - return; -} - -int -kportal_nal_cmd(struct portals_cfg *pcfg) -{ -#if 0 - __u32 nal = pcfg->pcfg_nal; - int rc = -EINVAL; - - ENTRY; - - down(&nal_cmd_sem); - if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) { - CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, - pcfg->pcfg_command); - rc = nal_cmd[nal].nch_handler(pcfg, nal_cmd[nal].nch_private); - } - up(&nal_cmd_sem); - RETURN(rc); -#else - CERROR("empty function!!!\n"); - return 0; -#endif -} - -int init_current(int argc, char **argv) -{ - current = malloc(sizeof(*current)); - strncpy(current->comm, argv[0], sizeof(current->comm)); - current->pid = getpid(); - return 0; -} - -ptl_nid_t tcpnal_mynid; - -int init_lib_portals() -{ - int rc; - - PtlInit(); - rc = PtlNIInit(procbridge_interface, 0, 0, 0, &tcpnal_ni); - if (rc != 0) { - CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); - PtlFini(); - RETURN (rc); - } - PtlNIDebug(tcpnal_ni, ~0); - return rc; -} - -extern int class_handle_ioctl(unsigned int cmd, unsigned long arg); - -int liblustre_ioctl(int dev_id, int opc, void *ptr) +static int liblustre_ioctl(int dev_id, unsigned int opc, void *ptr) { int rc = -EINVAL; @@ -160,15 +59,6 @@ int liblustre_ioctl(int dev_id, int opc, void *ptr) return rc; } -static void generate_random_uuid(unsigned char uuid_out[16]) -{ - int *arr = (int*)uuid_out; - int i; - - for (i = 0; i < sizeof(uuid_out)/sizeof(int); i++) - arr[i] = rand(); -} - static char *echo_server_nid = NULL; static char *echo_server_ostname = "obd1"; static char *osc_dev_name = "OSC_DEV_NAME"; @@ -348,18 +238,15 @@ int main(int argc, char **argv) return 1; } - srand(time(NULL)); - - tcpnal_mynid = rand(); -#if 1 portal_debug = 0; portal_subsystem_debug = 0; -#endif - if (init_current(argc, argv) || + liblustre_init_random(); + liblustre_set_nal_nid(); + + if (liblustre_init_current(argv[0]) || init_obdclass() || init_lib_portals() || ptlrpc_init() || - ldlm_init() || mdc_init() || lov_init() || osc_init() || diff --git a/lustre/liblustre/tests/recovery_small.c b/lustre/liblustre/tests/recovery_small.c index 5aed06c..6cd9ba4 100644 --- a/lustre/liblustre/tests/recovery_small.c +++ b/lustre/liblustre/tests/recovery_small.c @@ -39,6 +39,8 @@ #include "test_common.h" +#define MAX_STRING_SIZE 2048 + static struct { const char *name; unsigned long code; @@ -52,6 +54,7 @@ static struct { static int drop_index = 0; static char mds_server[1024] = {0, }; +static char ssh_cmd[MAX_STRING_SIZE] = {0,}; int do_stat(const char *name, struct stat *buf) { @@ -121,14 +124,14 @@ void cleanup_dir(const char *path) #define FAIL() \ do { \ - char cmd[1024]; \ + char cmd[MAX_STRING_SIZE]; \ int rc; \ \ if (drop_arr[drop_index].name) { \ printf("server drops next %s\n", drop_arr[drop_index].name); \ sprintf(cmd, \ - "ssh %s \"echo %lu > /proc/sys/lustre/fail_loc\"", \ - mds_server, drop_arr[drop_index].code); \ + "%s %s \"echo %lu > /proc/sys/lustre/fail_loc\"", \ + ssh_cmd, mds_server, drop_arr[drop_index].code); \ if (system(cmd)) { \ printf("error excuting remote command: %d\n", rc); \ exit(rc); \ @@ -141,8 +144,8 @@ void cleanup_dir(const char *path) char cmd[1024]; \ \ if (drop_arr[drop_index].name) { \ - sprintf(cmd, "ssh %s \"echo 0 > /proc/sys/lustre/fail_loc\"", \ - mds_server); \ + sprintf(cmd, "%s %s \"echo 0 > /proc/sys/lustre/fail_loc\"", \ + ssh_cmd, mds_server); \ system(cmd); \ } \ } while (0) @@ -313,6 +316,7 @@ int main(int argc, char * argv[]) static struct option long_opts[] = { {"target", 1, 0, 0}, {"dumpfile", 1, 0, 0}, + {"ssh", 1, 0, 0}, {0, 0, 0, 0} }; @@ -329,12 +333,14 @@ int main(int argc, char * argv[]) setenv(ENV_LUSTRE_MNTTGT, optarg, 1); } else if (!strcmp(long_opts[opt_index].name, "dumpfile")) { setenv(ENV_LUSTRE_DUMPFILE, optarg, 1); + } else if (!strcmp(long_opts[opt_index].name, "ssh")) { + safe_strncpy(ssh_cmd, optarg, MAX_STRING_SIZE); } else usage(argv[0]); break; } case 's': - strcpy(mds_server, optarg); + safe_strncpy(mds_server, optarg, MAX_STRING_SIZE); break; default: usage(argv[0]); @@ -347,13 +353,18 @@ int main(int argc, char * argv[]) if (strlen(mds_server) == 0) usage(argv[0]); - sprintf(cmd, "ssh %s cat /dev/null", mds_server); + /* default to using ssh */ + if (!strlen(ssh_cmd)) { + safe_strncpy(ssh_cmd, "ssh", MAX_STRING_SIZE); + } + + sprintf(cmd, "%s %s cat /dev/null", ssh_cmd, mds_server); if (system(cmd)) { - printf("can't access server node: %s\n", mds_server); + printf("Can't access server node: %s using method: %s\n", mds_server, ssh_cmd); exit(-1); } - setenv(ENV_LUSTRE_TIMEOUT, "10", 1); + setenv(ENV_LUSTRE_TIMEOUT, "5", 1); __liblustre_setup_(); @@ -362,7 +373,9 @@ int main(int argc, char * argv[]) t2(); t3(); t4(); +#if 0 t5(); +#endif t6(); t7(); diff --git a/lustre/liblustre/tests/replay_single.c b/lustre/liblustre/tests/replay_single.c index 6645056..9628354 100644 --- a/lustre/liblustre/tests/replay_single.c +++ b/lustre/liblustre/tests/replay_single.c @@ -39,13 +39,14 @@ #include "test_common.h" +#define MAX_STRING_SIZE 2048 - -static char mds_server[1024] = {0,}; -static char barrier_script[1024] = {0,}; -static char failover_script[1024] = {0,}; -static char barrier_cmd[1024] = {0,}; -static char failover_cmd[1024] = {0,}; +static char mds_server[MAX_STRING_SIZE] = {0,}; +static char barrier_script[MAX_STRING_SIZE] = {0,}; +static char failover_script[MAX_STRING_SIZE] = {0,}; +static char barrier_cmd[MAX_STRING_SIZE] = {0,}; +static char failover_cmd[MAX_STRING_SIZE] = {0,}; +static char ssh_cmd[MAX_STRING_SIZE] = {0,}; static void replay_barrier() { @@ -91,9 +92,11 @@ static void mds_failover() void t0() { + char *path="/mnt/lustre/f0"; ENTRY("empty replay"); replay_barrier(); mds_failover(); + t_check_stat_fail("/mnt/lustre/f0"); LEAVE(); } @@ -315,19 +318,19 @@ extern void __liblustre_cleanup_(void); void usage(const char *cmd) { printf("Usage: \t%s --target mdsnid:/mdsname/profile -s mds_hostname " - "-b \"barrier cmd\" -f \"failover cmd\"\n", cmd); + "-b \"barrier cmd\" -f \"failover cmd\" [--rsh \"rsh_cmd\"]\n", cmd); printf(" \t%s --dumpfile dumpfile -s mds_hostname -b \"barrier cmd\" " - "-f \"failover cmd\"\n", cmd); + "-f \"failover cmd\" [--rsh \"rsh_cmd\"]\n", cmd); exit(-1); } void test_ssh() { - char cmd[1024]; + char cmd[MAX_STRING_SIZE]; - sprintf(cmd, "ssh %s cat /dev/null", mds_server); + sprintf(cmd, "%s %s cat /dev/null", ssh_cmd, mds_server); if (system(cmd)) { - printf("ssh can't access server node: %s\n", mds_server); + printf("Can't access server node: %s using method: %s\n", mds_server, ssh_cmd); exit(-1); } } @@ -338,6 +341,7 @@ int main(int argc, char * const argv[]) static struct option long_opts[] = { {"target", 1, 0, 0}, {"dumpfile", 1, 0, 0}, + {"ssh", 1, 0, 0}, {0, 0, 0, 0} }; @@ -354,18 +358,20 @@ int main(int argc, char * const argv[]) setenv(ENV_LUSTRE_MNTTGT, optarg, 1); } else if (!strcmp(long_opts[opt_index].name, "dumpfile")) { setenv(ENV_LUSTRE_DUMPFILE, optarg, 1); + } else if (!strcmp(long_opts[opt_index].name, "ssh")) { + safe_strncpy(ssh_cmd, optarg, MAX_STRING_SIZE); } else usage(argv[0]); break; } case 's': - strcpy(mds_server, optarg); + safe_strncpy(mds_server, optarg, MAX_STRING_SIZE); break; case 'b': - strcpy(barrier_script, optarg); + safe_strncpy(barrier_script, optarg, MAX_STRING_SIZE); break; case 'f': - strcpy(failover_script, optarg); + safe_strncpy(failover_script, optarg, MAX_STRING_SIZE); break; default: usage(argv[0]); @@ -378,11 +384,18 @@ int main(int argc, char * const argv[]) !strlen(failover_script)) usage(argv[0]); + /* default to using ssh */ + if (!strlen(ssh_cmd)) { + safe_strncpy(ssh_cmd, "ssh", MAX_STRING_SIZE); + } + test_ssh(); /* prepare remote command */ - sprintf(barrier_cmd, "ssh %s \"%s\"", mds_server, barrier_script); - sprintf(failover_cmd, "ssh %s \"%s\"", mds_server, failover_script); + sprintf(barrier_cmd, "%s %s \"%s\"", + ssh_cmd, mds_server, barrier_script); + sprintf(failover_cmd, "%s %s \"%s\"", + ssh_cmd, mds_server, failover_script); setenv(ENV_LUSTRE_TIMEOUT, "10", 1); diff --git a/lustre/liblustre/tests/sanity.c b/lustre/liblustre/tests/sanity.c index 944ae9c..acea41e 100644 --- a/lustre/liblustre/tests/sanity.c +++ b/lustre/liblustre/tests/sanity.c @@ -33,17 +33,20 @@ #include #include #include - -#include -#include +#include +#include +#include +#include #include "test_common.h" +extern char *lustre_path; + #define ENTRY(str) \ do { \ char buf[100]; \ int len; \ - sprintf(buf, "===== START: %s ", (str)); \ + sprintf(buf, "===== START %s: %s ", __FUNCTION__, (str)); \ len = strlen(buf); \ if (len < 79) { \ memset(buf+len, '=', 100-len); \ @@ -55,15 +58,27 @@ #define LEAVE() \ do { \ - printf("----- END TEST successfully ---"); \ - printf("-----------------------------"); \ - printf("-------------------\n"); \ + char buf[100]; \ + int len; \ + sprintf(buf, "===== END TEST %s: successfully ", \ + __FUNCTION__); \ + len = strlen(buf); \ + if (len < 79) { \ + memset(buf+len, '=', 100-len); \ + buf[79] = '\n'; \ + buf[80] = 0; \ + } \ + printf("%s", buf); \ } while (0) +#define MAX_PATH_LENGTH 4096 + void t1() { - char *path="/mnt/lustre/test_t1"; + char path[MAX_PATH_LENGTH] = ""; + ENTRY("create/delete"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t1", lustre_path); t_touch(path); t_unlink(path); @@ -72,8 +87,10 @@ void t1() void t2() { - char *path="/mnt/lustre/test_t2"; + char path[MAX_PATH_LENGTH] = ""; + ENTRY("mkdir/rmdir"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t2", lustre_path); t_mkdir(path); t_rmdir(path); @@ -82,8 +99,10 @@ void t2() void t3() { - char *path="/mnt/lustre/test_t3"; + char path[MAX_PATH_LENGTH] = ""; + ENTRY("regular stat"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t3", lustre_path); t_touch(path); t_check_stat(path, NULL); @@ -93,8 +112,10 @@ void t3() void t4() { - char *path="/mnt/lustre/test_t4"; + char path[MAX_PATH_LENGTH] = ""; + ENTRY("dir stat"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t4", lustre_path); t_mkdir(path); t_check_stat(path, NULL); @@ -102,115 +123,54 @@ void t4() LEAVE(); } -#define PAGE_SIZE (4096) -#define _npages (2048) - -static int _buffer[_npages][PAGE_SIZE/sizeof(int)]; - -/* pos: i/o start from - * xfer: npages per transfer - */ -static void pages_io(int xfer, loff_t pos) +void t6() { - char *path="/mnt/lustre/test_t5"; - int check_sum[_npages] = {0,}; - int fd, rc, i, j; - - memset(_buffer, 0, sizeof(_buffer)); - - /* create sample data */ - for (i = 0; i < _npages; i++) { - for (j = 0; j < PAGE_SIZE/sizeof(int); j++) { - _buffer[i][j] = rand(); - } - } + char path[MAX_PATH_LENGTH] = ""; + char path2[MAX_PATH_LENGTH] = ""; - /* compute checksum */ - for (i = 0; i < _npages; i++) { - for (j = 0; j < PAGE_SIZE/sizeof(int); j++) { - check_sum[i] += _buffer[i][j]; - } - } + ENTRY("symlink"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t6", lustre_path); + snprintf(path2, MAX_PATH_LENGTH, "%s/test_t6_link", lustre_path); t_touch(path); - - fd = t_open(path); - - /* write */ - lseek(fd, pos, SEEK_SET); - for (i = 0; i < _npages; i += xfer) { - rc = write(fd, _buffer[i], PAGE_SIZE * xfer); - if (rc != PAGE_SIZE * xfer) { - printf("write error %d (i = %d)\n", rc, i); - exit(1); - } - } - printf("succefully write %d pages(%d per xfer)\n", _npages, xfer); - memset(_buffer, 0, sizeof(_buffer)); - - /* read */ - lseek(fd, pos, SEEK_SET); - for (i = 0; i < _npages; i += xfer) { - rc = read(fd, _buffer[i], PAGE_SIZE * xfer); - if (rc != PAGE_SIZE * xfer) { - printf("read error %d (i = %d)\n", rc, i); - exit(1); - } - } - printf("succefully read %d pages(%d per xfer)\n", _npages, xfer); - - /* compute checksum */ - for (i = 0; i < _npages; i++) { - int sum = 0; - for (j = 0; j < PAGE_SIZE/sizeof(int); j++) { - sum += _buffer[i][j]; - } - if (sum != check_sum[i]) { - printf("chunk %d checksum error: expected 0x%x, get 0x%x\n", - i, check_sum[i], sum); - } - } - printf("checksum verified OK!\n"); - - t_close(fd); + t_symlink(path, path2); + t_check_stat(path2, NULL); + t_unlink(path2); t_unlink(path); + LEAVE(); } -void t5() +void t6b() { - char text[256]; - loff_t off_array[] = {1, 4, 17, 255, 258, 4095, 4097, 8191, 1024*1024*1024}; - int np = 1, i; - loff_t offset = 0; + char path[MAX_PATH_LENGTH] = ""; + char path2[MAX_PATH_LENGTH] = ""; + char cwd[MAX_PATH_LENGTH] = ""; + char *tmp; + int fd; - while (np <= _npages) { - sprintf(text, "pages_io: %d per transfer, offset %lld", - np, offset); - ENTRY(text); - pages_io(np, offset); - LEAVE(); - np += np; - } + ENTRY("symlink + chdir and open"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t6b", lustre_path); + snprintf(path2, MAX_PATH_LENGTH, "%s/test_t6b_link", lustre_path); - for (i = 0; i < sizeof(off_array)/sizeof(loff_t); i++) { - offset = off_array[i]; - sprintf(text, "pages_io: 16 per transfer, offset %lld", - offset); - ENTRY(text); - pages_io(16, offset); + t_mkdir(path); + t_symlink(path, path2); + t_check_stat(path2, NULL); + + tmp = getcwd(cwd, MAX_PATH_LENGTH); + if (tmp == NULL) { + fprintf(stderr, "current path too long to fit in " + "MAX_PATH_LENGTH?\n"); LEAVE(); + return; } -} + t_chdir(path2); + t_chdir(cwd); + t_rmdir(path); + t_touch(path); -void t6() -{ - char *path="/mnt/lustre/test_t6"; - char *path2="/mnt/lustre/test_t6_link"; - ENTRY("symlink"); + fd = t_open(path2); + t_close(fd); - t_touch(path); - t_symlink(path, path2); - t_check_stat(path2, NULL); t_unlink(path2); t_unlink(path); LEAVE(); @@ -218,19 +178,32 @@ void t6() void t7() { - char *path="/mnt/lustre/test_t7"; + char path[MAX_PATH_LENGTH] = ""; + int rc; + ENTRY("mknod"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t7", lustre_path); - t_mknod(path, S_IFCHR | 0644, 5, 4); - t_check_stat(path, NULL); - t_unlink(path); + if (geteuid() != 0) { + rc = mknod(path, S_IFCHR | 0644, (5<<8 | 4)); + if (rc != -1 || errno != EPERM) { + printf("mknod shouldn't success: rc %d, errno %d\n", + rc, errno); + } + } else { + t_mknod(path, S_IFCHR | 0644, 5, 4); + t_check_stat(path, NULL); + t_unlink(path); + } LEAVE(); } void t8() { - char *path="/mnt/lustre/test_t8"; + char path[MAX_PATH_LENGTH] = ""; + ENTRY("chmod"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t8", lustre_path); t_touch(path); t_chmod_raw(path, 0700); @@ -241,9 +214,12 @@ void t8() void t9() { - char *path="/mnt/lustre/test_t9"; - char *path2="/mnt/lustre/test_t9_link"; + char path[MAX_PATH_LENGTH] = ""; + char path2[MAX_PATH_LENGTH] = ""; + ENTRY("hard link"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t9", lustre_path); + snprintf(path2, MAX_PATH_LENGTH, "%s/test_t9_link", lustre_path); t_touch(path); t_link(path, path2); @@ -256,14 +232,22 @@ void t9() void t10() { - char *dir1="/mnt/lustre/test_t10_dir1"; - char *dir2="/mnt/lustre/test_t10_dir2"; - char *path1="/mnt/lustre/test_t10_reg1"; - char *path2="/mnt/lustre/test_t10_reg2"; - char *rename1="/mnt/lustre/test_t10_dir1/rename1"; - char *rename2="/mnt/lustre/test_t10_dir2/rename2"; - char *rename3="/mnt/lustre/test_t10_dir2/rename3"; + char dir1[MAX_PATH_LENGTH] = ""; + char dir2[MAX_PATH_LENGTH] = ""; + char path1[MAX_PATH_LENGTH] = ""; + char path2[MAX_PATH_LENGTH] = ""; + char rename1[MAX_PATH_LENGTH] = ""; + char rename2[MAX_PATH_LENGTH] = ""; + char rename3[MAX_PATH_LENGTH] = ""; + ENTRY("rename"); + snprintf(dir1, MAX_PATH_LENGTH, "%s/test_t10_dir1", lustre_path); + snprintf(dir2, MAX_PATH_LENGTH, "%s/test_t10_dir2", lustre_path); + snprintf(path1, MAX_PATH_LENGTH, "%s/test_t10_reg1", lustre_path); + snprintf(path2, MAX_PATH_LENGTH, "%s/test_t10_reg2", lustre_path); + snprintf(rename1, MAX_PATH_LENGTH, "%s/test_t10_dir1/rename1", lustre_path); + snprintf(rename2, MAX_PATH_LENGTH, "%s/test_t10_dir2/rename2", lustre_path); + snprintf(rename3, MAX_PATH_LENGTH, "%s/test_t10_dir2/rename3", lustre_path); t_mkdir(dir1); t_mkdir(dir2); @@ -281,12 +265,12 @@ void t10() void t11() { - char *base="/mnt/lustre"; - char path[4096], path2[4096]; + char *base=lustre_path; + char path[MAX_PATH_LENGTH], path2[MAX_PATH_LENGTH]; int i, j, level = 5, nreg = 5; ENTRY("deep tree"); - strcpy(path, base); + safe_strncpy(path, base, MAX_PATH_LENGTH); for (i = 0; i < level; i++) { for (j = 0; j < nreg; j++) { @@ -299,7 +283,7 @@ void t11() } for (i = level; i > 0; i--) { - strcpy(path, base); + safe_strncpy(path, base, MAX_PATH_LENGTH); for (j = 1; j < i; j++) strcat(path, "/dir"); @@ -317,10 +301,11 @@ void t11() void t12() { - char *dir="/mnt/lustre/test_t12_dir"; + char dir[MAX_PATH_LENGTH] = ""; char buf[1024*128]; int fd; ENTRY("empty directory readdir"); + snprintf(dir, MAX_PATH_LENGTH, "%s/test_t12_dir", lustre_path); t_mkdir(dir); fd = t_opendir(dir); @@ -332,13 +317,14 @@ void t12() void t13() { - char *dir="/mnt/lustre/test_t13_dir/"; + char dir[MAX_PATH_LENGTH] = ""; char name[1024]; char buf[1024]; const int nfiles = 20; char *prefix = "test13_filename_prefix_"; int fd, i; ENTRY("multiple entries directory readdir"); + snprintf(dir, MAX_PATH_LENGTH, "%s/test_t13_dir/", lustre_path); t_mkdir(dir); printf("Creating %d files...\n", nfiles); @@ -360,13 +346,16 @@ void t13() void t14() { - char *dir="/mnt/lustre/test_t14_dir/"; + char dir[MAX_PATH_LENGTH] = ""; char name[1024]; char buf[1024]; const int nfiles = 256; char *prefix = "test14_filename_long_prefix_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA___"; - int fd, i; + struct dirent64 *ent; + int fd, i, rc, pos, index; + loff_t base = 0; ENTRY(">1 block(4k) directory readdir"); + snprintf(dir, MAX_PATH_LENGTH, "%s/test_t14_dir/", lustre_path); t_mkdir(dir); printf("Creating %d files...\n", nfiles); @@ -375,7 +364,35 @@ void t14() t_touch(name); } fd = t_opendir(dir); - t_ls(fd, buf, sizeof(buf)); + printf("Listing...\n"); + index = 0; + while ((rc = getdirentries64(fd, buf, 1024, &base)) > 0) { + pos = 0; + while (pos < rc) { + char *item; + + ent = (struct dirent64 *) ((char*) buf + pos); + item = (char *) ent->d_name; + if (!strcmp(item, ".") || !strcmp(item, "..")) + goto iter; + if (strstr(item, prefix) != item) { + printf("found bad name %s\n", item); + exit(-1); + } + printf("[%03d]: %s\n", + index++, item + strlen(prefix)); +iter: + pos += ent->d_reclen; + } + } + if (rc < 0) { + printf("getdents error %d\n", rc); + exit(-1); + } + if (index != nfiles) { + printf("get %d files != %d\n", index, nfiles); + exit(-1); + } t_close(fd); printf("Cleanup...\n"); for (i = 0; i < nfiles; i++) { @@ -388,9 +405,10 @@ void t14() void t15() { - char *file = "/mnt/lustre/test_t15_file"; + char file[MAX_PATH_LENGTH] = ""; int fd; ENTRY("open-stat-close"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t15_file", lustre_path); t_touch(file); fd = t_open(file); @@ -400,6 +418,431 @@ void t15() LEAVE(); } +void t16() +{ + char file[MAX_PATH_LENGTH] = ""; + ENTRY("small-write-read"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t16_file", lustre_path); + + t_echo_create(file, "aaaaaaaaaaaaaaaaaaaaaa"); + t_grep(file, "aaaaaaaaaaaaaaaaaaaaaa"); + t_unlink(file); + LEAVE(); +} + +void t17() +{ + char file[MAX_PATH_LENGTH] = ""; + int fd; + ENTRY("open-unlink without close"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t17_file", lustre_path); + + fd = open(file, O_WRONLY | O_CREAT, 0666); + if (fd < 0) { + printf("failed to create file: %s\n", strerror(errno)); + exit(-1); + } + t_unlink(file); + LEAVE(); +} + +void t18() +{ + char file[MAX_PATH_LENGTH] = ""; + char buf[128]; + int fd, i; + struct stat statbuf[3]; + ENTRY("write should change mtime/atime"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t18_file", lustre_path); + + for (i = 0; i < 3; i++) { + fd = open(file, O_RDWR|O_CREAT|O_APPEND, (mode_t)0666); + if (fd < 0) { + printf("error open file: %s\n", strerror(errno)); + exit(-1); + } + if (write(fd, buf, sizeof(buf)) != sizeof(buf)) { + printf("error write file\n"); + exit(-1); + } + close(fd); + if(stat(file, &statbuf[i]) != 0) { + printf("Error stat\n"); + exit(1); + } + printf("atime %lu, mtime %lu\n", + statbuf[i].st_atime, statbuf[i].st_mtime); + sleep(2); + } + + for (i = 1; i < 3; i++) { + if ((statbuf[i].st_atime <= statbuf[i-1].st_atime) || + (statbuf[i].st_mtime <= statbuf[i-1].st_mtime)) { + printf("time error\n"); + exit(-1); + } + } + t_unlink(file); + LEAVE(); +} + +void t18b() +{ + char file[MAX_PATH_LENGTH] = ""; + char buf[128]; + int fd, i; + struct stat statbuf[3]; + ENTRY("utime should change mtime/atime/ctime"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t23_file", lustre_path); + t_touch(file); + + for (i = 0; i < 3; i++) { + t_utime(file, NULL); + if(stat(file, &statbuf[i]) != 0) { + printf("Error stat\n"); + exit(1); + } + printf("atime %lu, mtime %lu, ctime %lu\n", + statbuf[i].st_atime, statbuf[i].st_mtime, + statbuf[i].st_ctime); + sleep(2); + } + + for (i = 1; i < 3; i++) { + if ((statbuf[i].st_atime <= statbuf[i-1].st_atime) || + (statbuf[i].st_mtime <= statbuf[i-1].st_mtime) || + (statbuf[i].st_ctime <= statbuf[i-1].st_ctime)) { + printf("time error\n"); + exit(-1); + } + } + t_unlink(file); + LEAVE(); +} + +void t19() +{ + char file[MAX_PATH_LENGTH] = ""; + int fd; + struct stat statbuf; + ENTRY("open(O_TRUNC) should trancate file to 0-length"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t19_file", lustre_path); + + t_echo_create(file, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); + + fd = open(file, O_RDWR|O_CREAT|O_TRUNC, (mode_t)0666); + if (fd < 0) { + printf("error open file: %s\n", strerror(errno)); + exit(-1); + } + close(fd); + if(stat(file, &statbuf) != 0) { + printf("Error stat\n"); + exit(1); + } + if (statbuf.st_size != 0) { + printf("size %ld is not zero\n", statbuf.st_size); + exit(-1); + } + t_unlink(file); + LEAVE(); +} + +void t20() +{ + char file[MAX_PATH_LENGTH] = ""; + int fd; + struct iovec iov[2]; + char buf[100]; + ssize_t ret; + ENTRY("trap app's general bad pointer for file i/o"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t20_file", lustre_path); + + fd = open(file, O_RDWR|O_CREAT, (mode_t)0666); + if (fd < 0) { + printf("error open file: %s\n", strerror(errno)); + exit(-1); + } + + ret = write(fd, NULL, 20); + if (ret != -1 || errno != EFAULT) { + printf("write 1: ret %ld, errno %d\n", ret, errno); + exit(1); + } + ret = write(fd, (void *)-1, 20); + if (ret != -1 || errno != EFAULT) { + printf("write 2: ret %ld, errno %d\n", ret, errno); + exit(1); + } + iov[0].iov_base = NULL; + iov[0].iov_len = 10; + iov[1].iov_base = (void *)-1; + iov[1].iov_len = 10; + ret = writev(fd, iov, 2); + if (ret != -1 || errno != EFAULT) { + printf("writev 1: ret %ld, errno %d\n", ret, errno); + exit(1); + } + iov[0].iov_base = NULL; + iov[0].iov_len = 0; + iov[1].iov_base = buf; + iov[1].iov_len = sizeof(buf); + ret = writev(fd, iov, 2); + if (ret != sizeof(buf)) { + printf("write 3 ret %ld, error %d\n", ret, errno); + exit(1); + } + lseek(fd, 0, SEEK_SET); + + ret = read(fd, NULL, 20); + if (ret != -1 || errno != EFAULT) { + printf("read 1: ret %ld, errno %d\n", ret, errno); + exit(1); + } + ret = read(fd, (void *)-1, 20); + if (ret != -1 || errno != EFAULT) { + printf("read 2: ret %ld, errno %d\n", ret, errno); + exit(1); + } + iov[0].iov_base = NULL; + iov[0].iov_len = 10; + iov[1].iov_base = (void *)-1; + iov[1].iov_len = 10; + ret = readv(fd, iov, 2); + if (ret != -1 || errno != EFAULT) { + printf("readv 1: ret %ld, errno %d\n", ret, errno); + exit(1); + } + iov[0].iov_base = NULL; + iov[0].iov_len = 0; + iov[1].iov_base = buf; + iov[1].iov_len = sizeof(buf); + ret = readv(fd, iov, 2); + if (ret != sizeof(buf)) { + printf("read 3 ret %ld, error %d\n", ret, errno); + exit(1); + } + + close(fd); + t_unlink(file); + LEAVE(); +} + +void t21() +{ + char file[MAX_PATH_LENGTH] = ""; + int fd, ret; + struct flock lock = { + .l_type = F_RDLCK, + .l_whence = SEEK_SET, + }; + + ENTRY("basic fcntl support"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t21_file", lustre_path); + + fd = open(file, O_RDWR|O_CREAT, (mode_t)0666); + if (fd < 0) { + printf("error open file: %m\n", file); + exit(-1); + } + + t_fcntl(fd, F_SETFL, O_APPEND); + if (!(ret = t_fcntl(fd, F_GETFL)) & O_APPEND) { + printf("error get flag: ret %x\n", ret); + exit(-1); + } + + t_fcntl(fd, F_SETLK, &lock); + t_fcntl(fd, F_GETLK, &lock); + lock.l_type = F_WRLCK; + t_fcntl(fd, F_SETLKW, &lock); + t_fcntl(fd, F_GETLK, &lock); + lock.l_type = F_UNLCK; + t_fcntl(fd, F_SETLK, &lock); + + close(fd); + t_unlink(file); + LEAVE(); +} + +void t22() +{ + char file[MAX_PATH_LENGTH] = ""; + int fd; + char *str = "1234567890"; + char buf[100]; + ssize_t ret; + ENTRY("make sure O_APPEND take effect"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t22_file", lustre_path); + + fd = open(file, O_RDWR|O_CREAT|O_APPEND, (mode_t)0666); + if (fd < 0) { + printf("error open file: %s\n", strerror(errno)); + exit(-1); + } + + lseek(fd, 100, SEEK_SET); + ret = write(fd, str, strlen(str)); + if (ret != strlen(str)) { + printf("write 1: ret %ld, errno %d\n", ret, errno); + exit(1); + } + + lseek(fd, 0, SEEK_SET); + ret = read(fd, buf, sizeof(buf)); + if (ret != strlen(str)) { + printf("read 1 got %ld\n", ret); + exit(1); + } + + if (memcmp(buf, str, strlen(str))) { + printf("read 1 data err\n"); + exit(1); + } + + if (fcntl(fd, F_SETFL, 0)) { + printf("fcntl err: %s\n", strerror(errno)); + exit(1); + } + + lseek(fd, 100, SEEK_SET); + ret = write(fd, str, strlen(str)); + if (ret != strlen(str)) { + printf("write 2: ret %ld, errno %d\n", ret, errno); + exit(1); + } + + lseek(fd, 100, SEEK_SET); + ret = read(fd, buf, sizeof(buf)); + if (ret != strlen(str)) { + printf("read 2 got %ld\n", ret); + exit(1); + } + + if (memcmp(buf, str, strlen(str))) { + printf("read 2 data err\n"); + exit(1); + } + + close(fd); + t_unlink(file); + LEAVE(); +} + +#define PAGE_SIZE (4096) +#define _npages (2048) + +static int _buffer[_npages][PAGE_SIZE/sizeof(int)]; + +/* pos: i/o start from + * xfer: npages per transfer + */ +static void pages_io(int xfer, loff_t pos) +{ + char path[MAX_PATH_LENGTH] = ""; + + int check_sum[_npages] = {0,}; + int fd, rc, i, j, data_error = 0; + struct timeval tw1, tw2, tr1, tr2; + double tw, tr; + + snprintf(path, MAX_PATH_LENGTH, "%s/test_t50", lustre_path); + memset(_buffer, 0, sizeof(_buffer)); + + /* create sample data */ + for (i = 0; i < _npages; i++) { + for (j = 0; j < PAGE_SIZE/sizeof(int); j++) { + _buffer[i][j] = rand(); + } + } + + /* compute checksum */ + for (i = 0; i < _npages; i++) { + for (j = 0; j < PAGE_SIZE/sizeof(int); j++) { + check_sum[i] += _buffer[i][j]; + } + } + + t_touch(path); + + fd = t_open(path); + + /* write */ + lseek(fd, pos, SEEK_SET); + gettimeofday(&tw1, NULL); + for (i = 0; i < _npages; i += xfer) { + rc = write(fd, _buffer[i], PAGE_SIZE * xfer); + if (rc != PAGE_SIZE * xfer) { + printf("write error %d (i = %d)\n", rc, i); + exit(1); + } + } + gettimeofday(&tw2, NULL); + + memset(_buffer, 0, sizeof(_buffer)); + + /* read */ + lseek(fd, pos, SEEK_SET); + gettimeofday(&tr1, NULL); + for (i = 0; i < _npages; i += xfer) { + rc = read(fd, _buffer[i], PAGE_SIZE * xfer); + if (rc != PAGE_SIZE * xfer) { + printf("read error %d (i = %d)\n", rc, i); + exit(1); + } + } + gettimeofday(&tr2, NULL); + + /* compute checksum */ + for (i = 0; i < _npages; i++) { + int sum = 0; + for (j = 0; j < PAGE_SIZE/sizeof(int); j++) { + sum += _buffer[i][j]; + } + if (sum != check_sum[i]) { + data_error = 1; + printf("chunk %d checksum error: expected 0x%x, get 0x%x\n", + i, check_sum[i], sum); + } + } + + t_close(fd); + t_unlink(path); + tw = (tw2.tv_sec - tw1.tv_sec) * 1000000 + (tw2.tv_usec - tw1.tv_usec); + tr = (tr2.tv_sec - tr1.tv_sec) * 1000000 + (tr2.tv_usec - tr1.tv_usec); + printf(" (R:%.3fM/s, W:%.3fM/s)\n", + (_npages * PAGE_SIZE) / (tw / 1000000.0) / (1024 * 1024), + (_npages * PAGE_SIZE) / (tr / 1000000.0) / (1024 * 1024)); + + if (data_error) + exit(1); +} + +void t50() +{ + loff_t off_array[] = {1, 17, 255, 258, 4095, 4097, 8191, + 1024*1024*1024*1024ULL}; + int np = 1, i; + loff_t offset = 0; + + ENTRY("4k aligned i/o sanity"); + while (np <= _npages) { + printf("%3d per xfer(total %d)...\t", np, _npages); + pages_io(np, offset); + np += np; + } + LEAVE(); + + ENTRY("4k un-aligned i/o sanity"); + for (i = 0; i < sizeof(off_array)/sizeof(loff_t); i++) { + offset = off_array[i]; + printf("16 per xfer(total %d), offset %10lld...\t", + _npages, offset); + pages_io(16, offset); + } + LEAVE(); +} + extern void __liblustre_setup_(void); extern void __liblustre_cleanup_(void); @@ -446,13 +889,12 @@ int main(int argc, char * const argv[]) __liblustre_setup_(); -#ifndef __CYGWIN__ t1(); t2(); t3(); t4(); - t5(); t6(); + t6b(); t7(); t8(); t9(); @@ -462,7 +904,15 @@ int main(int argc, char * const argv[]) t13(); t14(); t15(); -#endif + t16(); + t17(); + t18(); + t18b(); + t19(); + t20(); + t21(); + t22(); + t50(); printf("liblustre is about shutdown\n"); __liblustre_cleanup_(); diff --git a/lustre/liblustre/tests/test_common.c b/lustre/liblustre/tests/test_common.c index a87f0fa..03d005d 100644 --- a/lustre/liblustre/tests/test_common.c +++ b/lustre/liblustre/tests/test_common.c @@ -1,3 +1,7 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ + #include #include #include @@ -7,6 +11,8 @@ #include #include #include +#include +#include #include "test_common.h" @@ -89,7 +95,7 @@ void t_mkdir(const char *path) { int rc; - rc = mkdir(path, 00644); + rc = mkdir(path, 00755); if (rc < 0) { printf("mkdir(%s) error: %s\n", path, strerror(errno)); EXIT(1); @@ -181,6 +187,27 @@ int t_open(const char *path) return fd; } +int t_chdir(const char *path) +{ + int rc = chdir(path); + if (rc < 0) { + printf("chdir(%s) error: %s\n", path, strerror(errno)); + EXIT_RET(rc); + } + return rc; +} + +int t_utime(const char *path, const struct utimbuf *buf) +{ + int rc = utime(path, buf); + if (rc < 0) { + printf("utime(%s, %p) error: %s\n", path, buf, + strerror(errno)); + EXIT_RET(rc); + } + return rc; +} + int t_opendir(const char *path) { int fd; @@ -209,6 +236,8 @@ int t_check_stat(const char *name, struct stat *buf) struct stat stat; int rc; + memset(&stat, 0, sizeof(stat)); + rc = lstat(name, &stat); if (rc) { printf("error %d stat %s\n", rc, name); @@ -216,6 +245,10 @@ int t_check_stat(const char *name, struct stat *buf) } if (buf) memcpy(buf, &stat, sizeof(*buf)); + if (stat.st_blksize == 0) { + printf("error: blksize is 0\n"); + EXIT_RET(-EINVAL); + } return 0; } @@ -313,3 +346,74 @@ void t_ls(int fd, char *buf, int size) EXIT(-1); } } + +int t_fcntl(int fd, int cmd, ...) +{ + va_list ap; + long arg; + struct flock *lock; + int rc = -1; + + va_start(ap, cmd); + switch (cmd) { + case F_GETFL: + va_end(ap); + rc = fcntl(fd, cmd); + if (rc == -1) { + printf("fcntl GETFL failed: %s\n", + strerror(errno)); + EXIT(1); + } + break; + case F_SETFL: + arg = va_arg(ap, long); + va_end(ap); + rc = fcntl(fd, cmd, arg); + if (rc == -1) { + printf("fcntl SETFL %ld failed: %s\n", + arg, strerror(errno)); + EXIT(1); + } + break; + case F_GETLK: + case F_SETLK: + case F_SETLKW: + lock = va_arg(ap, struct flock *); + va_end(ap); + rc = fcntl(fd, cmd, lock); + if (rc == -1) { + printf("fcntl cmd %d failed: %s\n", + cmd, strerror(errno)); + EXIT(1); + } + break; + case F_DUPFD: + arg = va_arg(ap, long); + va_end(ap); + rc = fcntl(fd, cmd, arg); + if (rc == -1) { + printf("fcntl F_DUPFD %d failed: %s\n", + (int)arg, strerror(errno)); + EXIT(1); + } + break; + default: + va_end(ap); + printf("fcntl cmd %d not supported\n", cmd); + EXIT(1); + } + return rc; +} + +char *safe_strncpy(char *dst, char *src, int max_size) +{ + int src_size; + src_size=strlen(src); + if (src_size >= max_size) { + src_size=max_size-1; + } + memcpy(dst, src, src_size); + dst[src_size]=0; + + return(dst); +} diff --git a/lustre/liblustre/tests/test_common.h b/lustre/liblustre/tests/test_common.h index c3687b9..5949a42 100644 --- a/lustre/liblustre/tests/test_common.h +++ b/lustre/liblustre/tests/test_common.h @@ -8,6 +8,8 @@ extern int exit_on_err; +#include /* for utimbuf */ + void t_touch(const char *path); void t_create(const char *path); void t_link(const char *src, const char *dst); @@ -21,6 +23,8 @@ void t_chmod(const char *path, const char *format, ...); void t_rename(const char *oldpath, const char *newpath); int t_open_readonly(const char *path); int t_open(const char *path); +int t_chdir(const char *path); +int t_utime(const char *path, const struct utimbuf *buf); int t_opendir(const char *path); void t_close(int fd); int t_check_stat(const char *name, struct stat *buf); @@ -29,5 +33,8 @@ void t_echo_create(const char *path, const char *str); void t_grep(const char *path, char *str); void t_grep_v(const char *path, char *str); void t_ls(int fd, char *buf, int size); +int t_fcntl(int fd, int cmd, ...); + +char *safe_strncpy(char *dst, char *src, int max_size); #endif diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 3ecbb54..a109d1a 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -36,6 +36,8 @@ #else #include #include +#include +#include #include #endif #include diff --git a/lustre/obdclass/llog_swab.c b/lustre/obdclass/llog_swab.c index 5b13389..25ee67f 100644 --- a/lustre/obdclass/llog_swab.c +++ b/lustre/obdclass/llog_swab.c @@ -25,6 +25,10 @@ #define DEBUG_SUBSYSTEM S_LOG +#ifndef __KERNEL__ +#include +#endif + #include static void print_llogd_body(struct llogd_body *d) diff --git a/lustre/osc/autoMakefile.am b/lustre/osc/autoMakefile.am index af0649d..be3ff47 100644 --- a/lustre/osc/autoMakefile.am +++ b/lustre/osc/autoMakefile.am @@ -5,7 +5,7 @@ if LIBLUSTRE noinst_LIBRARIES = libosc.a -libosc_a_SOURCES = osc_request.c osc_lib.c osc_create.c osc_internal.h +libosc_a_SOURCES = osc_request.c osc_lib.c osc_create.c osc_quota.c osc_internal.h libosc_a_CPPFLAGS = $(LLCPPFLAGS) libosc_a_CFLAGS = $(LLCFLAGS) endif diff --git a/lustre/osc/osc_quota.c b/lustre/osc/osc_quota.c index 2ace56a..a12158e 100644 --- a/lustre/osc/osc_quota.c +++ b/lustre/osc/osc_quota.c @@ -26,7 +26,6 @@ #ifdef __KERNEL__ # include -# include # include # include # include @@ -38,8 +37,11 @@ # else # include # endif +#else +# include #endif +#include #include "osc_internal.h" struct osc_quota_info { diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 01588c7..3342251 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -730,8 +730,10 @@ static obd_count osc_checksum_bulk(int nob, obd_count pg_count, cksum = crc32_le(cksum, ptr + off, count); kunmap(pga->pg); +#ifdef __KERNEL__ LL_CDEBUG_PAGE(D_PAGE, pga->pg, "off %d checksum %x\n", off, cksum); +#endif nob -= pga->count; pg_count--; @@ -2258,9 +2260,8 @@ out: RETURN(rc); } -#ifdef __KERNEL__ /* Note: caller will lock/unlock, and set uptodate on the pages */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#if defined(__KERNEL__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) static int sanosc_brw_read(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm, obd_count page_count, struct brw_page *pga) @@ -2533,7 +2534,6 @@ static int sanosc_brw(int cmd, struct obd_export *exp, struct obdo *oa, RETURN(0); } #endif -#endif static void osc_set_data_with_check(struct lustre_handle *lockh, void *data, int flags) diff --git a/lustre/ptlrpc/autoMakefile.am b/lustre/ptlrpc/autoMakefile.am index 4075a1e..2262e48 100644 --- a/lustre/ptlrpc/autoMakefile.am +++ b/lustre/ptlrpc/autoMakefile.am @@ -15,7 +15,7 @@ LDLM_COMM_SOURCES= $(top_srcdir)/lustre/ldlm/l_lock.c \ COMMON_SOURCES = client.c recover.c connection.c niobuf.c pack_generic.c \ events.c ptlrpc_module.c service.c pinger.c recov_thread.c llog_net.c \ - llog_client.c llog_server.c import.c ptlrpcd.c \ + llog_client.c llog_server.c import.c ptlrpcd.c pers.c \ ptlrpc_internal.h $(LDLM_COMM_SOURCES) if LIBLUSTRE diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 3eed4f4..0194ade 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -590,6 +590,7 @@ static int signal_completed_replay(struct obd_import *imp) RETURN(0); } +#ifdef __KERNEL__ static int ptlrpc_invalidate_import_thread(void *data) { struct obd_import *imp = data; @@ -618,6 +619,7 @@ static int ptlrpc_invalidate_import_thread(void *data) RETURN(0); } +#endif int ptlrpc_import_recovery_state_machine(struct obd_import *imp) { @@ -636,11 +638,17 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) imp->imp_target_uuid.uuid, imp->imp_connection->c_remote_uuid.uuid); +#ifdef __KERNEL__ rc = kernel_thread(ptlrpc_invalidate_import_thread, imp, CLONE_VM | CLONE_FILES); if (rc < 0) CERROR("error starting invalidate thread: %d\n", rc); RETURN(rc); +#else + ptlrpc_invalidate_import(imp); + + IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); +#endif } if (imp->imp_state == LUSTRE_IMP_REPLAY) { diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index ebb685f..a20932f 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -753,42 +753,6 @@ int llog_log_swabbed(struct llog_log_hdr *hdr) return -1; } -void lustre_swab_llogd_body (struct llogd_body *d) -{ - __swab64s (&d->lgd_logid.lgl_oid); - __swab64s (&d->lgd_logid.lgl_ogr); - __swab32s (&d->lgd_logid.lgl_ogen); - __swab32s (&d->lgd_ctxt_idx); - __swab32s (&d->lgd_llh_flags); - __swab32s (&d->lgd_index); - __swab32s (&d->lgd_saved_index); - __swab32s (&d->lgd_len); - __swab64s (&d->lgd_cur_offset); -} - -void lustre_swab_llog_hdr (struct llog_log_hdr *h) -{ - __swab32s (&h->llh_hdr.lrh_index); - __swab32s (&h->llh_hdr.lrh_len); - __swab32s (&h->llh_hdr.lrh_type); - __swab64s (&h->llh_timestamp); - __swab32s (&h->llh_count); - __swab32s (&h->llh_bitmap_offset); - __swab32s (&h->llh_flags); - __swab32s (&h->llh_tail.lrt_index); - __swab32s (&h->llh_tail.lrt_len); -} - -void lustre_swab_llogd_conn_body (struct llogd_conn_body *d) -{ - __swab64s (&d->lgdc_gen.mnt_cnt); - __swab64s (&d->lgdc_gen.conn_cnt); - __swab64s (&d->lgdc_logid.lgl_oid); - __swab64s (&d->lgdc_logid.lgl_ogr); - __swab32s (&d->lgdc_logid.lgl_ogen); - __swab32s (&d->lgdc_ctxt_idx); -} - void lustre_swab_qdata(struct qunit_data *d) { __swab32s (&d->qd_id); diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 1702e0b..9ad9b7d 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -726,6 +726,7 @@ liblustre_check_services (void *arg) RETURN(did_something); } +#define ptlrpc_stop_all_threads(s) do {} while (0) #else /* __KERNEL__ */