From: yury Date: Wed, 5 May 2004 11:40:01 +0000 (+0000) Subject: landing smfs. X-Git-Tag: v1_7_100~2315 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=cd9c585e8c7bdd6cfd802be64ef277dfd466be17 landing smfs. --- diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h index 5a185d7..958889a 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/linux/kp30.h @@ -694,11 +694,11 @@ typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data); # endif #endif -#ifndef LP_POISON +/*#ifndef LP_POISON # define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) # define LL_POISON ((long)0x5a5a5a5a5a5a5a5a) # define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) -#endif +#endif*/ #if defined(__x86_64__) # define LPU64 "%Lu" @@ -706,18 +706,33 @@ typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data); # define LPX64 "%#Lx" # define LPSZ "%lu" # define LPSSZ "%ld" +#ifndef LP_POISON +# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) +#endif #elif (BITS_PER_LONG == 32 || __WORDSIZE == 32) # define LPU64 "%Lu" # define LPD64 "%Ld" # define LPX64 "%#Lx" # define LPSZ "%u" # define LPSSZ "%d" +#ifndef LP_POISON +# define LI_POISON ((int)0x5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a) +#endif #elif (BITS_PER_LONG == 64 || __WORDSIZE == 64) # define LPU64 "%lu" # define LPD64 "%ld" # define LPX64 "%#lx" # define LPSZ "%lu" # define LPSSZ "%ld" +#ifndef LP_POISON +# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) +#endif #endif #ifndef LPU64 # error "No word size defined" diff --git a/lnet/include/linux/libcfs.h b/lnet/include/linux/libcfs.h index 5b1b8a5..1d27768 100644 --- a/lnet/include/linux/libcfs.h +++ b/lnet/include/linux/libcfs.h @@ -79,6 +79,7 @@ extern unsigned int portal_cerror; #define S_PTLROUTER 0x00100000 #define S_COBD 0x00200000 #define S_IBNAL 0x00400000 +#define S_SM 0x00800000 /* If you change these values, please keep portals/utils/debug.c * up to date! */ diff --git a/lnet/utils/debug.c b/lnet/utils/debug.c index ae2da55..b6bfec0 100644 --- a/lnet/utils/debug.c +++ b/lnet/utils/debug.c @@ -538,6 +538,7 @@ static struct mod_paths { {"mds", "lustre/mds"}, {"mdc", "lustre/mdc"}, {"llite", "lustre/llite"}, + {"smfs", "lustre/smfs"}, {"obdecho", "lustre/obdecho"}, {"ldlm", "lustre/ldlm"}, {"obdfilter", "lustre/obdfilter"}, @@ -546,6 +547,7 @@ static struct mod_paths { {"fsfilt_ext3", "lustre/lvfs"}, {"fsfilt_extN", "lustre/lvfs"}, {"fsfilt_reiserfs", "lustre/lvfs"}, + {"fsfilt_smfs", "lustre/lvfs"}, {"mds_ext2", "lustre/mds"}, {"mds_ext3", "lustre/mds"}, {"mds_extN", "lustre/mds"}, diff --git a/lustre/include/linux/Makefile.am b/lustre/include/linux/Makefile.am index cb75fe5..b170c5c 100644 --- a/lustre/include/linux/Makefile.am +++ b/lustre/include/linux/Makefile.am @@ -8,4 +8,4 @@ EXTRA_DIST = lprocfs_status.h lustre_debug.h lustre_ha.h lustre_lib.h \ lustre_net.h obd_class.h obd_ost.h obd_support.h lustre_commit_confd.h \ lustre_export.h lustre_log.h obd_echo.h obd_ptlbd.h obd_trace.h \ lustre_compat25.h lustre_fsfilt.h lustre_import.h lustre_mds.h obd.h \ - lvfs.h lvfs_linux.h lustre_cfg.h lustre_lite.h lustre_idl.h + lvfs.h lvfs_linux.h lustre_cfg.h lustre_lite.h lustre_idl.h lustre_smfs.h diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index 72f3817..ae8b544 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -1,7 +1,7 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Copyright (C) 2001 Cluster File Systems, Inc. + * Copyright (C) 2001-2004 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -28,6 +28,7 @@ #ifdef __KERNEL__ #include +#include #include typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd, @@ -58,10 +59,23 @@ struct fsfilt_operations { int (* fs_set_md)(struct inode *inode, void *handle, void *md, int size); int (* fs_get_md)(struct inode *inode, void *md, int size); + + /* this method is needed to make IO operation fsfilt nature depend. */ +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) + int (* fs_send_bio)(struct inode *inode, struct bio *bio); +#else + int (* fs_send_bio)(struct inode *inode, struct kiobuf *bio); +#endif + + /* methods for getting page from backing fs and putting page there + * during IO. Used on OST. */ + int (* fs_putpage)(struct inode *inode, struct page *page); + struct page *(* fs_getpage)(struct inode *inode, long int index); + ssize_t (* fs_readpage)(struct file *file, char *buf, size_t count, loff_t *offset); - int (* fs_add_journal_cb)(struct obd_device *obd, __u64 last_rcvd, - void *handle, fsfilt_cb_t cb_func, + int (* fs_add_journal_cb)(struct obd_device *obd, struct super_block *sb, + __u64 last_rcvd, void *handle, fsfilt_cb_t cb_func, void *cb_data); int (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs); int (* fs_sync)(struct super_block *sb); @@ -74,6 +88,12 @@ struct fsfilt_operations { int force_sync); int (* fs_read_record)(struct file *, void *, int size, loff_t *); int (* fs_setup)(struct super_block *sb); + + int (* fs_set_xattr)(struct inode *inode, void *handle, char *name, + void *buffer, int buffer_size); + int (* fs_get_xattr)(struct inode *inode, char *name, + void *buffer, int buffer_size); + int (* fs_get_op_len)(int, struct fsfilt_objinfo *, int); }; @@ -92,15 +112,46 @@ extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops); #define FSFILT_OP_SETATTR 8 #define FSFILT_OP_LINK 9 #define FSFILT_OP_CANCEL_UNLINK 10 - -static inline void *fsfilt_start_log(struct obd_device *obd, - struct inode *inode, int op, - struct obd_trans_info *oti, int logs) +#define FSFILT_OP_NOOP 15 + +/* XXX BUG 3188 -- must return to one set of opcodes */ +#define KML_UNLINK 0x11 +#define KML_RMDIR 0x12 +#define KML_RENAME 0x13 +#define KML_CREATE 0x14 +#define KML_MKDIR 0x15 +#define KML_SYMLINK 0x16 +#define KML_MKNOD 0x17 +#define KML_LINK 0x19 + +#define CACHE_UNLINK 0x21 +#define CACHE_RMDIR 0x22 +#define CACHE_RENAME 0x23 +#define CACHE_CREATE 0x24 +#define CACHE_MKDIR 0x25 +#define CACHE_SYMLINK 0x26 +#define CACHE_MKNOD 0x27 +#define CACHE_LINK 0x29 +#define CACHE_NOOP 0x2f + +#define KML_CACHE_UNLINK 0x31 +#define KML_CACHE_RMDIR 0x32 +#define KML_CACHE_RENAME 0x33 +#define KML_CACHE_CREATE 0x34 +#define KML_CACHE_MKDIR 0x35 +#define KML_CACHE_SYMLINK 0x36 +#define KML_CACHE_MKNOD 0x37 +#define KML_CACHE_LINK 0x39 +#define KML_CACHE_NOOP 0x3f + +static inline void * +fsfilt_start_ops(struct fsfilt_operations *ops, struct inode *inode, + int op, struct obd_trans_info *oti, int logs) { unsigned long now = jiffies; void *parent_handle = oti ? oti->oti_handle : NULL; - void *handle = obd->obd_fsops->fs_start(inode, op, parent_handle, logs); - CDEBUG(D_INFO, "started handle %p (%p)\n", handle, parent_handle); + void *handle = ops->fs_start(inode, op, parent_handle, logs); + CDEBUG(D_HA, "started handle %p (%p)\n", handle, parent_handle); if (oti != NULL) { if (parent_handle == NULL) { @@ -116,17 +167,60 @@ static inline void *fsfilt_start_log(struct obd_device *obd, return handle; } -static inline void *fsfilt_start(struct obd_device *obd, struct inode *inode, - int op, struct obd_trans_info *oti) +static inline void * +fsfilt_start_log(struct obd_device *obd, struct inode *inode, + int op, struct obd_trans_info *oti, int logs) +{ + return fsfilt_start_ops(obd->obd_fsops, inode, op, oti, logs); +} + +static inline void * +fsfilt_start(struct obd_device *obd, struct inode *inode, + int op, struct obd_trans_info *oti) +{ + return fsfilt_start_ops(obd->obd_fsops, inode, op, oti, 0); +} + +static inline void * +llog_fsfilt_start(struct llog_ctxt *ctxt, struct inode *inode, + int op, struct obd_trans_info *oti) +{ + return fsfilt_start_ops(ctxt->loc_fsops, inode, op, oti, 1); +} + +static inline int +fsfilt_commit_ops(struct fsfilt_operations *ops, struct inode *inode, + void *handle, int force_sync) +{ + unsigned long now = jiffies; + int rc = ops->fs_commit(inode, handle, force_sync); + CDEBUG(D_HA, "committing handle %p\n", handle); + + if (time_after(jiffies, now + 15 * HZ)) + CERROR("long journal start time %lus\n", (jiffies - now) / HZ); + + return rc; +} + +static inline int +fsfilt_commit(struct obd_device *obd, struct inode *inode, + void *handle, int force_sync) +{ + return fsfilt_commit_ops(obd->obd_fsops, inode, handle, force_sync); +} + +static inline int +llog_fsfilt_commit(struct llog_ctxt *ctxt, struct inode *inode, + void *handle, int force_sync) { - return fsfilt_start_log(obd, inode, op, oti, 0); + return fsfilt_commit_ops(ctxt->loc_fsops, inode, handle, force_sync); } -static inline void *fsfilt_brw_start_log(struct obd_device *obd, - int objcount, - struct fsfilt_objinfo *fso, - int niocount, struct niobuf_local *nb, - struct obd_trans_info *oti, int logs) +static inline void * +fsfilt_brw_start_log(struct obd_device *obd, int objcount, + struct fsfilt_objinfo *fso, int niocount, + struct niobuf_local *nb, struct obd_trans_info *oti, + int logs) { unsigned long now = jiffies; void *parent_handle = oti ? oti->oti_handle : NULL; @@ -149,30 +243,17 @@ static inline void *fsfilt_brw_start_log(struct obd_device *obd, return handle; } -static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount, - struct fsfilt_objinfo *fso, int niocount, - struct niobuf_local *nb, - struct obd_trans_info *oti) +static inline void * +fsfilt_brw_start(struct obd_device *obd, int objcount, + struct fsfilt_objinfo *fso, int niocount, + struct niobuf_local *nb, struct obd_trans_info *oti) { return fsfilt_brw_start_log(obd, objcount, fso, niocount, nb, oti, 0); } -static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode, - void *handle, int force_sync) -{ - unsigned long now = jiffies; - int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync); - CDEBUG(D_INFO, "committing handle %p\n", handle); - - if (time_after(jiffies, now + 15 * HZ)) - CERROR("long journal start time %lus\n", (jiffies - now) / HZ); - - return rc; -} - -static inline int fsfilt_commit_async(struct obd_device *obd, - struct inode *inode, void *handle, - void **wait_handle) +static inline int +fsfilt_commit_async(struct obd_device *obd, struct inode *inode, + void *handle, void **wait_handle) { unsigned long now = jiffies; int rc = obd->obd_fsops->fs_commit_async(inode, handle, wait_handle); @@ -184,8 +265,8 @@ static inline int fsfilt_commit_async(struct obd_device *obd, return rc; } -static inline int fsfilt_commit_wait(struct obd_device *obd, - struct inode *inode, void *handle) +static inline int +fsfilt_commit_wait(struct obd_device *obd, struct inode *inode, void *handle) { unsigned long now = jiffies; int rc = obd->obd_fsops->fs_commit_wait(inode, handle); @@ -195,8 +276,9 @@ static inline int fsfilt_commit_wait(struct obd_device *obd, return rc; } -static inline int fsfilt_setattr(struct obd_device *obd, struct dentry *dentry, - void *handle, struct iattr *iattr,int do_trunc) +static inline int +fsfilt_setattr(struct obd_device *obd, struct dentry *dentry, + void *handle, struct iattr *iattr, int do_trunc) { unsigned long now = jiffies; int rc; @@ -206,43 +288,120 @@ static inline int fsfilt_setattr(struct obd_device *obd, struct dentry *dentry, return rc; } -static inline int fsfilt_iocontrol(struct obd_device *obd, struct inode *inode, - struct file *file, unsigned int cmd, - unsigned long arg) +static inline int +fsfilt_iocontrol(struct obd_device *obd, struct inode *inode, + struct file *file, unsigned int cmd, + unsigned long arg) { return obd->obd_fsops->fs_iocontrol(inode, file, cmd, arg); } -static inline int fsfilt_set_md(struct obd_device *obd, struct inode *inode, - void *handle, void *md, int size) +static inline int fsfilt_setup(struct obd_device *obd, + struct super_block *fs) +{ + if (obd->obd_fsops->fs_setup) + return obd->obd_fsops->fs_setup(fs); + + return 0; +} + +static inline int +fsfilt_set_md(struct obd_device *obd, struct inode *inode, + void *handle, void *md, int size) { return obd->obd_fsops->fs_set_md(inode, handle, md, size); } -static inline int fsfilt_get_md(struct obd_device *obd, struct inode *inode, - void *md, int size) +static inline int +fsfilt_get_md(struct obd_device *obd, struct inode *inode, + void *md, int size) { return obd->obd_fsops->fs_get_md(inode, md, size); } -static inline ssize_t fsfilt_readpage(struct obd_device *obd, - struct file *file, char *buf, - size_t count, loff_t *offset) +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +static inline int +fsfilt_send_bio(struct obd_device *obd, struct inode *inode, + struct bio *bio) +#else +static inline int +fsfilt_send_bio(struct obd_device *obd, struct inode *inode, + struct kiobuf *bio) +#endif +{ + return obd->obd_fsops->fs_send_bio(inode, bio); +} + +static inline int +fsfilt_putpage(struct obd_device *obd, struct inode *inode, + struct page *page) +{ + int rc = 0; + struct filter_obd *filter; + unsigned long now = jiffies; + + LASSERT(obd != NULL); + LASSERT(inode != NULL); + LASSERT(page != NULL); + + filter = &obd->u.filter; + + if (!obd->obd_fsops->fs_putpage) + return -ENOSYS; + + CDEBUG(D_INFO, "putpage %lx\n", page->index); + + rc = obd->obd_fsops->fs_putpage(inode, page); + + if (time_after(jiffies, now + 15 * HZ)) + CERROR("long putpage time %lus\n", (jiffies - now) / HZ); + + return rc; +} + +static inline struct page * +fsfilt_getpage(struct obd_device *obd, struct inode *inode, + unsigned long index) +{ + struct page *page; + unsigned long now = jiffies; + + LASSERT(obd != NULL); + LASSERT(inode != NULL); + + if (!obd->obd_fsops->fs_getpage) + return ERR_PTR(-ENOSYS); + + CDEBUG(D_INFO, "getpage %lx\n", index); + + page = obd->obd_fsops->fs_getpage(inode, index); + + if (time_after(jiffies, now + 15 * HZ)) + CERROR("long getpage time %lus\n", (jiffies - now) / HZ); + + return page; +} + +static inline ssize_t +fsfilt_readpage(struct obd_device *obd, struct file *file, char *buf, + size_t count, loff_t *offset) { return obd->obd_fsops->fs_readpage(file, buf, count, offset); } -static inline int fsfilt_add_journal_cb(struct obd_device *obd, __u64 last_rcvd, - void *handle, fsfilt_cb_t cb_func, - void *cb_data) +static inline int +fsfilt_add_journal_cb(struct obd_device *obd, struct super_block *sb, + __u64 last_rcvd, void *handle, fsfilt_cb_t cb_func, + void *cb_data) { - return obd->obd_fsops->fs_add_journal_cb(obd, last_rcvd, - handle, cb_func, cb_data); + return obd->obd_fsops->fs_add_journal_cb(obd, sb, last_rcvd, handle, + cb_func, cb_data); } /* very similar to obd_statfs(), but caller already holds obd_osfs_lock */ -static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *sb, - unsigned long max_age) +static inline int +fsfilt_statfs(struct obd_device *obd, struct super_block *sb, + unsigned long max_age) { int rc = 0; @@ -258,48 +417,58 @@ static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *sb, return rc; } -static inline int fsfilt_sync(struct obd_device *obd, struct super_block *sb) +static inline int +fsfilt_sync(struct obd_device *obd, struct super_block *sb) { return obd->obd_fsops->fs_sync(sb); } -static inline int fsfilt_map_inode_page(struct obd_device *obd, - struct inode *inode, struct page *page, - unsigned long *blocks, int *created, - int create) +static inline int +fsfilt_map_inode_page(struct obd_device *obd, struct inode *inode, + struct page *page, unsigned long *blocks, + int *created, int create) { - return obd->obd_fsops->fs_map_inode_page(inode, page, blocks, created, - create); + return obd->obd_fsops->fs_map_inode_page(inode, page, blocks, + created, create); } -static inline int fs_prep_san_write(struct obd_device *obd, - struct inode *inode, - long *blocks, - int nblocks, - loff_t newsize) +static inline int +fs_prep_san_write(struct obd_device *obd, struct inode *inode, + long *blocks, int nblocks, loff_t newsize) { return obd->obd_fsops->fs_prep_san_write(inode, blocks, nblocks, newsize); } -static inline int fsfilt_read_record(struct obd_device *obd, struct file *file, - void *buf, loff_t size, loff_t *offs) +static inline int +fsfilt_read_record(struct obd_device *obd, struct file *file, + void *buf, loff_t size, loff_t *offs) { return obd->obd_fsops->fs_read_record(file, buf, size, offs); } -static inline int fsfilt_write_record(struct obd_device *obd, struct file *file, - void *buf, loff_t size, loff_t *offs, - int force_sync) +static inline int +fsfilt_write_record(struct obd_device *obd, struct file *file, + void *buf, loff_t size, loff_t *offs, int force_sync) { - return obd->obd_fsops->fs_write_record(file, buf, size,offs,force_sync); + return obd->obd_fsops->fs_write_record(file, buf, size, offs, + force_sync); } -static inline int fsfilt_setup(struct obd_device *obd, struct super_block *fs) +static inline int +llog_fsfilt_write_record(struct llog_ctxt *ctxt, struct file *file, + void *buf, loff_t size, loff_t *offs, + int force_sync) { - if (obd->obd_fsops->fs_setup) - return obd->obd_fsops->fs_setup(fs); - return 0; + return ctxt->loc_fsops->fs_write_record(file, buf, size, offs, + force_sync); +} + +static inline int +llog_fsfilt_read_record(struct llog_ctxt *ctxt, struct file *file, + void *buf, loff_t size, loff_t *offs) +{ + return ctxt->loc_fsops->fs_read_record(file, buf, size, offs); } #endif /* __KERNEL__ */ diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index ef2ad2d..7479634 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -54,7 +54,7 @@ # include # include # include /* for strncpy, below */ -# include /* to check for FMODE_EXEC, lest we redefine */ +# include /* to check for FMODE_EXEC, dev_t, lest we redefine */ #else #ifdef __CYGWIN__ # include @@ -487,19 +487,22 @@ typedef enum { MDS_DONE_WRITING = 45, MDS_LAST_OPC } mds_cmd_t; + #define MDS_FIRST_OPC MDS_GETATTR /* * Do not exceed 63 */ -#define REINT_SETATTR 1 -#define REINT_CREATE 2 -#define REINT_LINK 3 -#define REINT_UNLINK 4 -#define REINT_RENAME 5 -#define REINT_OPEN 6 -#define REINT_MAX 6 +#define REINT_SETATTR 1 +#define REINT_CREATE 2 +#define REINT_LINK 3 +#define REINT_UNLINK 4 +#define REINT_RENAME 5 +#define REINT_OPEN 6 +#define REINT_CLOSE 7 +#define REINT_WRITE 8 +#define REINT_MAX 8 /* the disposition of the intent outlines what was executed */ #define DISP_IT_EXECD 1 @@ -571,12 +574,6 @@ struct mds_body { extern void lustre_swab_mds_body (struct mds_body *b); -/* MDS update records */ - -//struct mds_update_record_hdr { -// __u32 ur_opcode; -//}; - struct mds_rec_setattr { __u32 sa_opcode; __u32 sa_fsuid; @@ -898,6 +895,8 @@ typedef enum { LLOG_GEN_REC = 0x10640000, LLOG_HDR_MAGIC = 0x10645539, LLOG_LOGID_MAGIC = 0x1064553b, + SMFS_UPDATE_REC = 0x10650000, + CACHE_LRU_REC = 0x10660000, } llog_op_type; /* Log record header - stored in little endian order. @@ -959,13 +958,48 @@ struct llog_size_change_rec { struct llog_gen { __u64 mnt_cnt; __u64 conn_cnt; -} __attribute__((packed)); +}; struct llog_gen_rec { struct llog_rec_hdr lgr_hdr; struct llog_gen lgr_gen; struct llog_rec_tail lgr_tail; +} __attribute__((packed)); + +struct llog_lru_rec { + struct llog_rec_hdr llr_hdr; + struct ll_fid llr_cfid; + struct ll_fid llr_pfid; + struct llog_rec_tail llr_tail; +} __attribute__((packed)); + +/* got from mds_update_record. FIXME: maybe some attribute in reint_record and + update_record will be changed later. */ +/* XXX BUG 3188 -- must return to one set of structures. */ + +struct update_record { + __u32 ur_opcode; + __u32 ur_fsuid; + __u32 ur_fsgid; + dev_t ur_rdev; + struct iattr ur_iattr; + struct iattr ur_pattr; + __u32 ur_flags; + __u32 ur_len; }; +struct reint_record { + struct update_record u_rec; + char *rec_data1; + int rec1_size; + char *rec_data2; + int rec2_size; +}; +struct llog_smfs_rec { + struct llog_rec_hdr lsr_hdr; + struct update_record lsr_rec; + struct llog_rec_tail lsr_tail; +}; + /* On-disk header structure of each log object, stored in little endian order */ #define LLOG_CHUNK_SIZE 8192 #define LLOG_HEADER_SIZE (96) @@ -986,7 +1020,7 @@ struct llog_log_hdr { __u32 llh_size; __u32 llh_flags; __u32 llh_cat_idx; - /* for a catlog the first plain slot is next to it */ + /* for a catalog the first plain slot is next to it */ struct obd_uuid llh_tgtuuid; __u32 llh_reserved[LLOG_HEADER_SIZE/sizeof(__u32) - 23]; __u32 llh_bitmap[LLOG_BITMAP_BYTES/sizeof(__u32)]; @@ -1010,6 +1044,7 @@ enum llogd_rpc_ops { LLOG_ORIGIN_HANDLE_CLOSE = 505, LLOG_ORIGIN_CONNECT = 506, LLOG_CATINFO = 507, /* for lfs catinfo */ + LLOG_ORIGIN_HANDLE_PREV_BLOCK = 508, }; struct llogd_body { diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h index 0bb5f0b..b4a59d3 100644 --- a/lustre/include/linux/lustre_lib.h +++ b/lustre/include/linux/lustre_lib.h @@ -40,8 +40,17 @@ #include #include -#define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) -#define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) +#if BITS_PER_LONG > 32 && !defined(__x86_64__) +#ifndef LP_POISON +# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) +#endif +#else +#ifndef LP_POISON +# define LI_POISON ((int)0x5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a) +#endif +#endif #ifndef LPU64 /* x86_64 has 64bit longs and defines u64 as long long */ diff --git a/lustre/include/linux/lustre_log.h b/lustre/include/linux/lustre_log.h index 1d0ff9f..36ec2d6 100644 --- a/lustre/include/linux/lustre_log.h +++ b/lustre/include/linux/lustre_log.h @@ -40,6 +40,7 @@ #define LOG_NAME_LIMIT(logname, name) \ snprintf(logname, sizeof(logname), "LOGS/%s", name) +#define LLOG_EEMPTY 4711 struct plain_handle_data { struct list_head phd_entry; @@ -67,18 +68,18 @@ struct llog_handle { } u; }; -#define LLOG_EEMPTY 4711 - /* llog.c - general API */ typedef int (*llog_cb_t)(struct llog_handle *, struct llog_rec_hdr *, void *); +struct llog_handle *llog_alloc_handle(void); +void llog_free_handle(struct llog_handle *handle); +int llog_cancel_rec(struct llog_handle *loghandle, int index); int llog_init_handle(struct llog_handle *handle, int flags, struct obd_uuid *uuid); +int llog_close(struct llog_handle *cathandle); int llog_process(struct llog_handle *loghandle, llog_cb_t cb, void *data, void *catdata); -extern struct llog_handle *llog_alloc_handle(void); -extern void llog_free_handle(struct llog_handle *handle); -extern int llog_close(struct llog_handle *cathandle); -extern int llog_cancel_rec(struct llog_handle *loghandle, int index); +int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb, + void *data, void *catdata); /* llog_cat.c - catalog api */ struct llog_process_data { @@ -89,37 +90,45 @@ struct llog_process_data { struct llog_process_cat_data { int first_idx; int last_idx; - /* to process catlog across zero record */ + /* to process catalog across zero record */ }; +int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res, + struct llog_logid *logid); int llog_cat_put(struct llog_handle *cathandle); int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec, struct llog_cookie *reccookie, void *buf); int llog_cat_cancel_records(struct llog_handle *cathandle, int count, struct llog_cookie *cookies); int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data); +int llog_cat_reverse_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data); int llog_cat_set_first_idx(struct llog_handle *cathandle, int index); -/* llog_obd.c */ -int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, - int count, struct llog_logid *logid,struct llog_operations *op); -int llog_cleanup(struct llog_ctxt *); -int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp); -int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, - struct lov_stripe_md *lsm, struct llog_cookie *logcookies, - int numcookies); -int llog_cancel(struct llog_ctxt *, struct lov_stripe_md *lsm, - int count, struct llog_cookie *cookies, int flags); +int llog_catalog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, + void *buf, struct llog_cookie *reccookie, int, void *data); +int llog_catalog_cancel(struct llog_ctxt *ctxt, int count, struct llog_cookie *, + int flags, void *data); +int llog_catalog_setup(struct llog_ctxt **res, char *name, struct lvfs_run_ctxt *, + struct fsfilt_operations *fsops, struct dentry *logs_de, + struct dentry *objects_de); +int llog_catalog_cleanup(struct llog_ctxt *ctxt); +int llog_cat_half_bottom(struct llog_cookie *, struct llog_handle *); + +/* llog_lvfs.c */ +int llog_get_cat_list(struct lvfs_run_ctxt *, struct fsfilt_operations *, + char *name, int count, struct llog_catid *idarray); +int llog_put_cat_list(struct lvfs_run_ctxt *, struct fsfilt_operations *, + char *name, int count, struct llog_catid *idarray); +extern struct llog_operations llog_lvfs_ops; +/* llog_obd.c - obd llog api */ +int obd_llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, + int count, struct llog_logid *logid,struct llog_operations *op); +int obd_llog_cleanup(struct llog_ctxt *); int llog_obd_origin_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, int count, struct llog_logid *logid); -int llog_obd_origin_cleanup(struct llog_ctxt *ctxt); -int llog_obd_origin_add(struct llog_ctxt *ctxt, - struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, - struct llog_cookie *logcookies, int numcookies); - -int llog_cat_initialize(struct obd_device *obd, int count); +int obd_llog_cat_initialize(struct obd_device *obd, int count, char *name); int obd_llog_init(struct obd_device *obd, struct obd_device *disk_obd, int count, struct llog_catid *logid); @@ -127,7 +136,7 @@ int obd_llog_finish(struct obd_device *obd, int count); /* llog_ioctl.c */ int llog_ioctl(struct llog_ctxt *ctxt, int cmd, struct obd_ioctl_data *data); -int llog_catlog_list(struct obd_device *obd, int count, +int llog_catalog_list(struct obd_device *obd, int count, struct obd_ioctl_data *data); /* llog_net.c */ @@ -139,60 +148,65 @@ int llog_origin_connect(struct llog_ctxt *ctxt, int count, int llog_handle_connect(struct ptlrpc_request *req); /* recov_thread.c */ -int llog_obd_repl_cancel(struct llog_ctxt *ctxt, - struct lov_stripe_md *lsm, int count, - struct llog_cookie *cookies, int flags); +int llog_obd_repl_cancel(struct llog_ctxt *ctxt, int count, + struct llog_cookie *cookies, int flags, void *data); + int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp); int llog_repl_connect(struct llog_ctxt *ctxt, int count, struct llog_logid *logid, struct llog_gen *gen, struct obd_uuid *uuid); struct llog_operations { + int (*lop_setup)(struct obd_device *obd, int ctxt_idx, + struct obd_device *disk_obd, int count, + struct llog_logid *logid); + int (*lop_cleanup)(struct llog_ctxt *ctxt); + int (*lop_create)(struct llog_ctxt *ctxt, struct llog_handle **, + struct llog_logid *logid, char *name); + int (*lop_destroy)(struct llog_handle *handle); + int (*lop_close)(struct llog_handle *handle); + + int (*lop_read_header)(struct llog_handle *handle); + int (*lop_add)(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, + void *buf, struct llog_cookie *logcookies, + int numcookies, void *data); + int (*lop_cancel)(struct llog_ctxt *ctxt, int count, + struct llog_cookie *cookies, int flags, void *data); int (*lop_write_rec)(struct llog_handle *loghandle, struct llog_rec_hdr *rec, struct llog_cookie *logcookies, int numcookies, void *, int idx); - int (*lop_destroy)(struct llog_handle *handle); int (*lop_next_block)(struct llog_handle *h, int *curr_idx, int next_idx, __u64 *offset, void *buf, int len); - int (*lop_create)(struct llog_ctxt *ctxt, struct llog_handle **, - struct llog_logid *logid, char *name); - int (*lop_close)(struct llog_handle *handle); - int (*lop_read_header)(struct llog_handle *handle); + int (*lop_prev_block)(struct llog_handle *h, + int prev_idx, void *buf, int len); - int (*lop_setup)(struct obd_device *obd, int ctxt_idx, - struct obd_device *disk_obd, int count, - struct llog_logid *logid); + /* XXX add 2 more: commit callbacks and llog recovery functions */ int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp); - int (*lop_cleanup)(struct llog_ctxt *ctxt); - int (*lop_add)(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, - struct lov_stripe_md *lsm, - struct llog_cookie *logcookies, int numcookies); - int (*lop_cancel)(struct llog_ctxt *ctxt, struct lov_stripe_md *lsm, - int count, struct llog_cookie *cookies, int flags); int (*lop_connect)(struct llog_ctxt *ctxt, int count, struct llog_logid *logid, struct llog_gen *gen, struct obd_uuid *uuid); - /* XXX add 2 more: commit callbacks and llog recovery functions */ }; -/* llog_lvfs.c */ -extern struct llog_operations llog_lvfs_ops; -int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd, - char *name, int count, struct llog_catid *idarray); - struct llog_ctxt { - int loc_idx; /* my index the obd array of ctxt's */ - struct llog_gen loc_gen; + /* needed for lvfs based log */ + struct llog_handle *loc_handle; + struct llog_operations *loc_logops; + struct fsfilt_operations *loc_fsops; + struct dentry *loc_logs_dir; + struct dentry *loc_objects_dir; + struct lvfs_run_ctxt *loc_lvfs_ctxt; + struct obd_device *loc_obd; /* points back to the containing obd*/ + struct llog_gen loc_gen; + int loc_idx; /* my index the obd array of ctxt's */ + struct obd_export *loc_exp; struct obd_import *loc_imp; /* to use in RPC's: can be backward pointing import */ - struct llog_operations *loc_logops; - struct llog_handle *loc_handle; struct llog_canceld_ctxt *loc_llcd; struct semaphore loc_sem; /* protects loc_llcd */ - void *llog_proc_cb; + void *loc_proc_cb; /* cb for recovery */ }; static inline void llog_gen_init(struct llog_ctxt *ctxt) @@ -219,10 +233,10 @@ static inline int llog_gen_lt(struct llog_gen a, struct llog_gen b) #define LLOG_GEN_INC(gen) ((gen).conn_cnt) ++ #define LLOG_PROC_BREAK 0x0001 -static inline int llog_obd2ops(struct llog_ctxt *ctxt, +static inline int llog_ctxt2ops(struct llog_ctxt *ctxt, struct llog_operations **lop) { - if (ctxt == NULL) + if (ctxt == NULL) return -ENOTCONN; *lop = ctxt->loc_logops; @@ -238,7 +252,7 @@ static inline int llog_handle2ops(struct llog_handle *loghandle, if (loghandle == NULL) return -EINVAL; - return llog_obd2ops(loghandle->lgh_ctxt, lop); + return llog_ctxt2ops(loghandle->lgh_ctxt, lop); } static inline int llog_data_len(int len) @@ -255,29 +269,36 @@ static inline struct llog_ctxt *llog_get_context(struct obd_device *obd, return obd->obd_llog_ctxt[index]; } -static inline int llog_write_rec(struct llog_handle *handle, - struct llog_rec_hdr *rec, - struct llog_cookie *logcookies, - int numcookies, void *buf, int idx) +static inline int llog_create(struct llog_ctxt *ctxt, struct llog_handle **res, + struct llog_logid *logid, char *name) { struct llog_operations *lop; - int rc, buflen; + int rc; ENTRY; - rc = llog_handle2ops(handle, &lop); + rc = llog_ctxt2ops(ctxt, &lop); if (rc) RETURN(rc); - if (lop->lop_write_rec == NULL) + if (lop->lop_create == NULL) RETURN(-EOPNOTSUPP); - if (buf) - buflen = rec->lrh_len + sizeof(struct llog_rec_hdr) - + sizeof(struct llog_rec_tail); - else - buflen = rec->lrh_len; - LASSERT(size_round(buflen) == buflen); + rc = lop->lop_create(ctxt, res, logid, name); + RETURN(rc); +} - rc = lop->lop_write_rec(handle, rec, logcookies, numcookies, buf, idx); +static inline int llog_destroy(struct llog_handle *handle) +{ + struct llog_operations *lop; + int rc; + ENTRY; + + rc = llog_handle2ops(handle, &lop); + if (rc) + RETURN(rc); + if (lop->lop_destroy == NULL) + RETURN(-EOPNOTSUPP); + + rc = lop->lop_destroy(handle); RETURN(rc); } @@ -297,44 +318,69 @@ static inline int llog_read_header(struct llog_handle *handle) RETURN(rc); } -static inline int llog_destroy(struct llog_handle *handle) +static inline int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, + void *buf, struct llog_cookie *logcookies, + int numcookies, void *data) { struct llog_operations *lop; int rc; ENTRY; - rc = llog_handle2ops(handle, &lop); + rc = llog_ctxt2ops(ctxt, &lop); if (rc) RETURN(rc); - if (lop->lop_destroy == NULL) + if (lop->lop_add == NULL) RETURN(-EOPNOTSUPP); - rc = lop->lop_destroy(handle); + rc = lop->lop_add(ctxt, rec, buf, logcookies, numcookies, data); RETURN(rc); } -#if 0 -static inline int llog_cancel(struct obd_export *exp, - struct lov_stripe_md *lsm, int count, - struct llog_cookie *cookies, int flags) +static inline int llog_cancel(struct llog_ctxt *ctxt, int count, + struct llog_cookie *cookies, int flags, void *data) { struct llog_operations *lop; int rc; ENTRY; - rc = llog_handle2ops(loghandle, &lop); + rc = llog_ctxt2ops(ctxt, &lop); if (rc) RETURN(rc); if (lop->lop_cancel == NULL) RETURN(-EOPNOTSUPP); - rc = lop->lop_cancel(exp, lsm, count, cookies, flags); + rc = lop->lop_cancel(ctxt, count, cookies, flags, data); + RETURN(rc); +} + +static inline int llog_write_rec(struct llog_handle *handle, + struct llog_rec_hdr *rec, + struct llog_cookie *logcookies, + int numcookies, void *buf, int idx) +{ + struct llog_operations *lop; + int rc, buflen; + ENTRY; + + rc = llog_handle2ops(handle, &lop); + if (rc) + RETURN(rc); + if (lop->lop_write_rec == NULL) + RETURN(-EOPNOTSUPP); + + if (buf) + buflen = le32_to_cpu(rec->lrh_len) + sizeof(struct llog_rec_hdr) + + sizeof(struct llog_rec_tail); + else + buflen = le32_to_cpu(rec->lrh_len); + LASSERT(size_round(buflen) == buflen); + + rc = lop->lop_write_rec(handle, rec, logcookies, numcookies, buf, idx); RETURN(rc); } -#endif -static inline int llog_next_block(struct llog_handle *loghandle, int *cur_idx, - int next_idx, __u64 *cur_offset, void *buf, +static inline int llog_next_block(struct llog_handle *loghandle, int *curr_idx, + int next_idx, __u64 *curr_offset, void *buf, int len) { struct llog_operations *lop; @@ -347,25 +393,25 @@ static inline int llog_next_block(struct llog_handle *loghandle, int *cur_idx, if (lop->lop_next_block == NULL) RETURN(-EOPNOTSUPP); - rc = lop->lop_next_block(loghandle, cur_idx, next_idx, cur_offset, buf, + rc = lop->lop_next_block(loghandle, curr_idx, next_idx, curr_offset, buf, len); RETURN(rc); } -static inline int llog_create(struct llog_ctxt *ctxt, struct llog_handle **res, - struct llog_logid *logid, char *name) +static inline int llog_prev_block(struct llog_handle *loghandle, + int prev_idx, void *buf, int len) { struct llog_operations *lop; int rc; ENTRY; - rc = llog_obd2ops(ctxt, &lop); + rc = llog_handle2ops(loghandle, &lop); if (rc) RETURN(rc); - if (lop->lop_create == NULL) + if (lop->lop_prev_block == NULL) RETURN(-EOPNOTSUPP); - rc = lop->lop_create(ctxt, res, logid, name); + rc = lop->lop_prev_block(loghandle, prev_idx, buf, len); RETURN(rc); } @@ -377,7 +423,7 @@ static inline int llog_connect(struct llog_ctxt *ctxt, int count, int rc; ENTRY; - rc = llog_obd2ops(ctxt, &lop); + rc = llog_ctxt2ops(ctxt, &lop); if (rc) RETURN(rc); if (lop->lop_connect == NULL) @@ -387,4 +433,20 @@ static inline int llog_connect(struct llog_ctxt *ctxt, int count, RETURN(rc); } +static inline int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp) +{ + struct llog_operations *lop; + int rc; + ENTRY; + + rc = llog_ctxt2ops(ctxt, &lop); + if (rc) + RETURN(rc); + if (lop->lop_sync == NULL) + RETURN(-EOPNOTSUPP); + + rc = lop->lop_sync(ctxt, exp); + RETURN(rc); +} + #endif diff --git a/lustre/include/linux/lustre_mds.h b/lustre/include/linux/lustre_mds.h index c602bb6..20e2771 100644 --- a/lustre/include/linux/lustre_mds.h +++ b/lustre/include/linux/lustre_mds.h @@ -84,18 +84,18 @@ struct mds_update_record { int ur_cookielen; struct llog_cookie *ur_logcookies; struct iattr ur_iattr; - struct obd_ucred ur_uc; + struct lvfs_ucred ur_uc; __u64 ur_rdev; __u32 ur_mode; __u64 ur_time; __u32 ur_flags; }; -#define ur_fsuid ur_uc.ouc_fsuid -#define ur_fsgid ur_uc.ouc_fsgid -#define ur_cap ur_uc.ouc_cap -#define ur_suppgid1 ur_uc.ouc_suppgid1 -#define ur_suppgid2 ur_uc.ouc_suppgid2 +#define ur_fsuid ur_uc.luc_fsuid +#define ur_fsgid ur_uc.luc_fsgid +#define ur_cap ur_uc.luc_cap +#define ur_suppgid1 ur_uc.luc_suppgid1 +#define ur_suppgid2 ur_uc.luc_suppgid2 /* i_attr_flags holds the open count in the inode in 2.4 */ //XXX Alex implement on 2.4 with i_attr_flags and find soln for 2.5 please diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index 3c75a8b..8abb4e4 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -713,6 +713,7 @@ void ptlrpc_lprocfs_unregister_obd(struct obd_device *obddev); /* ptlrpc/llog_server.c */ int llog_origin_handle_create(struct ptlrpc_request *req); +int llog_origin_handle_prev_block(struct ptlrpc_request *req); int llog_origin_handle_next_block(struct ptlrpc_request *req); int llog_origin_handle_read_header(struct ptlrpc_request *req); int llog_origin_handle_close(struct ptlrpc_request *req); diff --git a/lustre/include/linux/lustre_smfs.h b/lustre/include/linux/lustre_smfs.h new file mode 100644 index 0000000..2cb6774 --- /dev/null +++ b/lustre/include/linux/lustre_smfs.h @@ -0,0 +1,274 @@ +#ifndef __LUSTRE_SMFS_H +#define __LUSTRE_SMFS_H + +struct smfs_inode_info { + struct inode *smi_inode; + __u32 smi_flags; +}; + +struct journal_operations { + void *(*tr_start)(struct inode *, int op); + void (*tr_commit)(void *handle); +}; + +struct sm_operations { + /* operations on the file store */ + struct super_operations sm_sb_ops; + struct inode_operations sm_dir_iops; + struct inode_operations sm_file_iops; + struct inode_operations sm_sym_iops; + struct file_operations sm_dir_fops; + struct file_operations sm_file_fops; + struct file_operations sm_sym_fops; + struct dentry_operations sm_dentry_ops; + struct journal_operations sm_journal_ops; +}; + +struct smfs_super_info { + struct super_block *smsi_sb; + struct vfsmount *smsi_mnt; /* mount the cache kern with + * kern_do_mount (like MDS) */ + struct fsfilt_operations *sm_cache_fsfilt; /* fsfilt operations */ + struct fsfilt_operations *sm_fsfilt; /* fsfilt operations */ + struct sm_operations *sm_ops; /* cache ops for set cache + * inode ops */ + + struct lvfs_run_ctxt *smsi_ctxt; + struct llog_ctxt *smsi_rec_log; /* smfs kml llog */ + struct dentry *smsi_logs_dir; + struct dentry *smsi_objects_dir; + struct dentry *smsi_delete_dir; /* for delete inode dir */ + char *cache_fs_type; /* cache file system type */ + char *fs_type; /* file system type */ + __u32 flags; /* flags */ + __u32 ops_check; +}; + +#define SMFS_FILE_TYPE "smfs" +#define SMFS_FILE_MAGIC 0x19760218 + +struct smfs_file_info { + struct file *c_file; + int magic; +}; + +struct smfs_record_extents { + size_t sre_count; + loff_t sre_off; +}; + +#define I2SMI(inode) ((struct smfs_inode_info *) (&(inode->u.generic_ip))) +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#define S2SMI(sb) ((struct smfs_super_info *) (&(sb->u.generic_sbp))) +#define S2CSB(sb) (((struct smfs_super_info *) (&(sb->u.generic_sbp)))->smsi_sb) +#else +#define S2SMI(sb) ((struct smfs_super_info *) (sb->s_fs_info)) +#define S2CSB(sb) (((struct smfs_super_info *) (sb->s_fs_info))->smsi_sb) +#endif + +#define I2CI(inode) (((struct smfs_inode_info*) (&(inode->u.generic_ip)))->smi_inode) + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#define I2CSB(inode) ((struct smfs_super_info *) (&(inode->i_sb->u.generic_sbp))) +#else +#define I2CSB(inode) ((struct smfs_super_info *) (inode->i_sb->s_fs_info)) +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#define I2FOPS(inode) (((struct smfs_super_info *) \ + (&(inode->i_sb->u.generic_sbp)))->sm_cache_fsfilt) +#else +#define I2FOPS(inode) (((struct smfs_super_info *) \ + (inode->i_sb->s_fs_info))->sm_cache_fsfilt) +#endif + +#define F2SMFI(file) ((struct smfs_file_info *)((file->private_data))) +#define F2CF(file) (((struct smfs_file_info *) ((file->private_data)))->c_file) + +#define SM_DO_REC 0x1 +#define SM_INIT_REC 0x2 +#define SM_CACHE_HOOK 0x4 + +#define SMFS_DO_REC(smfs_info) (smfs_info->flags & SM_DO_REC) +#define SMFS_SET_REC(smfs_info) (smfs_info->flags |= SM_DO_REC) +#define SMFS_CLEAN_REC(smfs_info) (smfs_info->flags &= ~SM_DO_REC) + +#define SMFS_INIT_REC(smfs_info) (smfs_info->flags & SM_INIT_REC) +#define SMFS_SET_INIT_REC(smfs_info) (smfs_info->flags |= SM_INIT_REC) +#define SMFS_CLEAN_INIT_REC(smfs_info) (smfs_info->flags &= ~SM_INIT_REC) + +#define SMFS_SET_INODE_REC(inode) (I2SMI(inode)->smi_flags |= SM_DO_REC) +#define SMFS_DO_INODE_REC(inode) (I2SMI(inode)->smi_flags & SM_DO_REC) +#define SMFS_CLEAN_INODE_REC(inode) (I2SMI(inode)->smi_flags &= ~SM_DO_REC) + +#define SMFS_CACHE_HOOK(smfs_info) (smfs_info->flags & SM_CACHE_HOOK) +#define SMFS_SET_CACHE_HOOK(smfs_info) (smfs_info->flags |= SM_CACHE_HOOK) +#define SMFS_CLEAN_CACHE_HOOK(smfs_info) (smfs_info->flags &= ~SM_CACHE_HOOK) + +#define SMFS_INODE_CACHE_HOOK(inode) (I2SMI(inode)->smi_flags & SM_CACHE_HOOK) +#define SMFS_SET_INODE_CACHE_HOOK(inode) (I2SMI(inode)->smi_flags |= SM_CACHE_HOOK) +#define SMFS_CLEAN_INODE_CACHE_HOOK(inode) (I2SMI(inode)->smi_flags &= ~SM_CACHE_HOOK) + +#define LVFS_SMFS_BACK_ATTR "lvfs_back_attr" + + +#define REC_COUNT_BIT 0 +#define REC_COUNT_MASK 0x01 /*0001*/ +#define REC_OP_BIT 1 +#define REC_OP_MASK 0x06 /*0110*/ +#define REC_WRITE_KML_BIT 3 +#define REC_WRITE_KML_MASK 0x08 /*1000*/ +#define REC_DEC_LINK_BIT 4 +#define REC_DEC_LINK_MASK 0x10 /*10000* different with unlink*/ + + +#define SET_REC_COUNT_FLAGS(flag, count_flag) \ + (flag |= count_flag << REC_COUNT_BIT) +#define GET_REC_COUNT_FLAGS(flag) \ + ((flag & REC_COUNT_MASK) >> REC_COUNT_BIT) + +#define SET_REC_OP_FLAGS(flag, op_flag) \ + (flag |= op_flag << REC_OP_BIT) +#define GET_REC_OP_FLAGS(flag) \ + ((flag & REC_OP_MASK) >> REC_OP_BIT) + +#define SET_REC_WRITE_KML_FLAGS(flag, op_flag) \ + (flag |= op_flag << REC_OP_BIT) +#define GET_REC_WRITE_KML_FLAGS(flag) \ + ((flag & REC_WRITE_KML_MASK) >> REC_WRITE_KML_BIT) + +#define SET_REC_DEC_LINK_FLAGS(flag, op_flag) \ + (flag |= op_flag << REC_DEC_LINK_BIT) +#define GET_REC_DEC_LINK_FLAGS(flag) \ + ((flag & REC_DEC_LINK_MASK) >> REC_DEC_LINK_BIT) + +#define SMFS_REC_ALL 0x1 +#define SMFS_REC_BY_COUNT 0x0 + +#define SMFS_REINT_REC 0x1 +#define SMFS_UNDO_REC 0x2 + +#define SMFS_WRITE_KML 0x1 + +#define SMFS_DEC_LINK 0x1 + +#define SMFS_DO_REINT_REC(flag) \ + (GET_REC_OP_FLAGS(flag) == SMFS_REINT_REC) +#define SMFS_DO_UNDO_REC(flag) \ + (GET_REC_OP_FLAGS(flag) == SMFS_UNDO_REC) +#define SMFS_DO_REC_ALL(flag) \ + (GET_REC_COUNT_FLAGS(flag) == SMFS_REC_ALL) +#define SMFS_DO_REC_BY_COUNT(flag) \ + (GET_REC_COUNT_FLAGS(flag) == SMFS_REC_BY_COUNT) +#define SMFS_DO_WRITE_KML(flag) \ + (GET_REC_WRITE_KML_FLAGS(flag) == SMFS_WRITE_KML) +#define SMFS_DO_DEC_LINK(flag) \ + (GET_REC_DEC_LINK_FLAGS(flag) == SMFS_DEC_LINK) + +static inline void duplicate_inode(struct inode *dst_inode, + struct inode *src_inode) +{ + dst_inode->i_mode = src_inode->i_mode; + dst_inode->i_uid = src_inode->i_uid; + dst_inode->i_gid = src_inode->i_gid; + dst_inode->i_nlink = src_inode->i_nlink; + dst_inode->i_size = src_inode->i_size; + dst_inode->i_atime = src_inode->i_atime; + dst_inode->i_ctime = src_inode->i_ctime; + dst_inode->i_mtime = src_inode->i_mtime; + dst_inode->i_blksize = src_inode->i_blksize; + dst_inode->i_version = src_inode->i_version; + dst_inode->i_state = src_inode->i_state; + dst_inode->i_generation = src_inode->i_generation; + + /* This is to make creating special files working. */ + dst_inode->i_rdev = src_inode->i_rdev; +} + +static inline void post_smfs_inode(struct inode *inode, + struct inode *cache_inode) +{ + if (inode && cache_inode) { + duplicate_inode(inode, cache_inode); + /*Here we must release the cache_inode, + *Otherwise we will have no chance to + *do it + */ + cache_inode->i_state &=~I_LOCK; + inode->i_blocks = cache_inode->i_blocks; + } +} + +static inline void pre_smfs_inode(struct inode *inode, + struct inode *cache_inode) +{ + if (inode && cache_inode) + duplicate_inode(cache_inode, inode); +} + +/* instantiate a file handle to the cache file */ +static inline void duplicate_file(struct file *dst_file, + struct file *src_file) +{ + dst_file->f_pos = src_file->f_pos; + dst_file->f_mode = src_file->f_mode; + dst_file->f_flags = src_file->f_flags; + dst_file->f_owner = src_file->f_owner; + dst_file->f_vfsmnt = src_file->f_vfsmnt; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + dst_file->f_reada = src_file->f_reada; + dst_file->f_ramax = src_file->f_ramax; + dst_file->f_raend = src_file->f_raend; + dst_file->f_ralen = src_file->f_ralen; + dst_file->f_rawin = src_file->f_rawin; +#else + dst_file->f_ra = src_file->f_ra; +#endif +} + +static inline void duplicate_sb(struct super_block *dst_sb, + struct super_block *src_sb) +{ + dst_sb->s_blocksize = src_sb->s_blocksize; + dst_sb->s_magic = src_sb->s_magic; + dst_sb->s_blocksize_bits = src_sb->s_blocksize_bits; + dst_sb->s_maxbytes = src_sb->s_maxbytes; +} + +static inline void d_unalloc(struct dentry *dentry) +{ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + list_del(&dentry->d_hash); + INIT_LIST_HEAD(&dentry->d_hash); +#else + hlist_del_init(&dentry->d_hash); +#endif + dput(dentry); /* this will free the dentry memory */ +} + +static inline struct dentry *pre_smfs_dentry(struct dentry *parent_dentry, + struct inode *cache_inode, + struct dentry *dentry) +{ + struct dentry *cache_dentry = NULL; + + cache_dentry = d_alloc(parent_dentry, &dentry->d_name); + if (!cache_dentry) + RETURN(NULL); + if (!parent_dentry) + cache_dentry->d_parent = cache_dentry; + if (cache_inode) + d_add(cache_dentry, cache_inode); + RETURN(cache_dentry); +} + +static inline void post_smfs_dentry(struct dentry *cache_dentry) +{ + if (!cache_dentry) + return; + if (cache_dentry->d_inode) + igrab(cache_dentry->d_inode); + d_unalloc(cache_dentry); +} +#endif /* _LUSTRE_SMFS_H */ diff --git a/lustre/include/linux/lvfs.h b/lustre/include/linux/lvfs.h index b18769f..f51cf73 100644 --- a/lustre/include/linux/lvfs.h +++ b/lustre/include/linux/lvfs.h @@ -14,12 +14,12 @@ #endif /* simple.c */ -struct obd_ucred { - __u32 ouc_fsuid; - __u32 ouc_fsgid; - __u32 ouc_cap; - __u32 ouc_suppgid1; - __u32 ouc_suppgid2; +struct lvfs_ucred { + __u32 luc_fsuid; + __u32 luc_fsgid; + __u32 luc_cap; + __u32 luc_suppgid1; + __u32 luc_suppgid2; }; struct lvfs_callback_ops { @@ -28,11 +28,11 @@ struct lvfs_callback_ops { #define OBD_RUN_CTXT_MAGIC 0xC0FFEEAA #define OBD_CTXT_DEBUG /* development-only debugging */ -struct obd_run_ctxt { +struct lvfs_run_ctxt { struct vfsmount *pwdmnt; struct dentry *pwd; mm_segment_t fs; - struct obd_ucred ouc; + struct lvfs_ucred luc; int ngroups; struct lvfs_callback_ops cb_ops; #ifdef OBD_CTXT_DEBUG @@ -47,15 +47,16 @@ struct obd_run_ctxt { #endif /* lvfs_common.c */ -struct dentry *lvfs_fid2dentry(struct obd_run_ctxt *, __u64, __u32, __u64 ,void *data); +struct dentry *lvfs_fid2dentry(struct lvfs_run_ctxt *, __u64, __u32, __u64 ,void *data); -void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, - struct obd_ucred *cred); -void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, - struct obd_ucred *cred); +void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *cred); +void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *cred); #ifdef __KERNEL__ - +int lvfs_reint(struct super_block *sb, void *r_rec); +int lvfs_undo(struct super_block *sb, void *r_rec); struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix); struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix); int lustre_fread(struct file *file, void *buf, int len, loff_t *off); diff --git a/lustre/include/linux/lvfs_linux.h b/lustre/include/linux/lvfs_linux.h index 71fc431..5e125a0 100644 --- a/lustre/include/linux/lvfs_linux.h +++ b/lustre/include/linux/lvfs_linux.h @@ -20,8 +20,8 @@ #define l_filp_open filp_open -struct obd_run_ctxt; -struct l_file *l_dentry_open(struct obd_run_ctxt *, struct l_dentry *, +struct lvfs_run_ctxt; +struct l_file *l_dentry_open(struct lvfs_run_ctxt *, struct l_dentry *, int flags); struct l_linux_dirent { diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index b3ccd51..8635862 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -32,6 +32,7 @@ # include #endif +#include #include #include #include @@ -484,8 +485,9 @@ struct obd_device { spinlock_t obd_osfs_lock; struct obd_statfs obd_osfs; unsigned long obd_osfs_age; - struct obd_run_ctxt obd_ctxt; + struct lvfs_run_ctxt obd_lvfs_ctxt; struct llog_ctxt *obd_llog_ctxt[LLOG_MAX_CTXTS]; + struct obd_device *obd_observer; struct obd_export *obd_self_export; diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 941d9b5..cc508c3 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -600,7 +600,7 @@ obd_lvfs_fid2dentry(struct obd_export *exp, __u64 id_ino, __u32 gen, __u64 gr) { LASSERT(exp->exp_obd); - return lvfs_fid2dentry(&exp->exp_obd->obd_ctxt, id_ino, gen, gr, + return lvfs_fid2dentry(&exp->exp_obd->obd_lvfs_ctxt, id_ino, gen, gr, exp->exp_obd); } diff --git a/lustre/kernel_patches/patches/linux-2.4.20-tmpfs-iopen.patch b/lustre/kernel_patches/patches/linux-2.4.20-tmpfs-iopen.patch deleted file mode 100644 index b3b75e3..0000000 --- a/lustre/kernel_patches/patches/linux-2.4.20-tmpfs-iopen.patch +++ /dev/null @@ -1,1287 +0,0 @@ -diff -rupN --exclude='ide*' linux-2.4.20.orig/include/linux/mm.h linux-2.4.20/include/linux/mm.h ---- linux-2.4.20.orig/include/linux/mm.h 2004-02-10 11:43:10.000000000 +0200 -+++ linux-2.4.20/include/linux/mm.h 2004-03-01 13:44:45.000000000 +0200 -@@ -468,7 +468,8 @@ extern void clear_page_tables(struct mm_ - extern int fail_writepage(struct page *); - struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int unused); - struct file *shmem_file_setup(char * name, loff_t size); --int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr); -+struct page *shmem_getpage_locked(struct inode *inode, unsigned long idx); -+struct page *shmem_getpage_unlocked(struct inode *inode, unsigned long idx); - extern void shmem_lock(struct file * file, int lock); - extern int shmem_zero_setup(struct vm_area_struct *); - -diff -rupN --exclude='ide*' linux-2.4.20.orig/include/linux/shmem_fs.h linux-2.4.20/include/linux/shmem_fs.h ---- linux-2.4.20.orig/include/linux/shmem_fs.h 2004-02-10 18:39:17.000000000 +0200 -+++ linux-2.4.20/include/linux/shmem_fs.h 2004-02-23 12:40:28.000000000 +0200 -@@ -7,6 +7,9 @@ - - #define SHMEM_NR_DIRECT 16 - -+#define SHMEM_MOUNT_IOPEN 0x8000 /* Allow access via iopen */ -+#define SHMEM_MOUNT_IOPEN_NOPRIV 0x10000 /* Make iopen world-readable */ -+ - /* - * A swap entry has to fit into a "unsigned long", as - * the entry is hidden in the "index" field of the -@@ -38,6 +41,9 @@ struct shmem_inode_info { - }; - - struct shmem_sb_info { -+ struct dentry *iopen; -+ unsigned long options; -+ unsigned long root_ino; - unsigned long max_blocks; /* How many blocks are allowed */ - unsigned long free_blocks; /* How many are left for allocation */ - unsigned long max_inodes; /* How many inodes are allowed */ -@@ -59,11 +65,9 @@ shmem_xattr_find(struct inode *inode, co - extern ssize_t - shmem_xattr_set(struct inode *inode, const char *name, - const void *value, u16 valuelen, int flags); -- - extern ssize_t - shmem_xattr_get(struct inode *inode, const char *name, - void *value, size_t valuelen); -- - extern int - shmem_xattr_delete(struct inode *inode, struct shmem_xattr *xattr); - -diff -rupN --exclude='ide*' linux-2.4.20.orig/mm/shmem.c linux-2.4.20/mm/shmem.c ---- linux-2.4.20.orig/mm/shmem.c 2004-02-10 18:44:05.000000000 +0200 -+++ linux-2.4.20/mm/shmem.c 2004-03-01 14:37:21.000000000 +0200 -@@ -36,29 +36,45 @@ - #define TMPFS_MAGIC 0x01021994 - - #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) -+ - #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) - --#define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE * (ENTRIES_PER_PAGE/2) * (ENTRIES_PER_PAGE+1)) -+#define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + ENTRIES_PER_PAGE * \ -+ (ENTRIES_PER_PAGE/2) * (ENTRIES_PER_PAGE+1)) -+ - #define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT) - #define VM_ACCT(size) (((size) + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT) - - /* Pretend that each entry is of this size in directory's i_size */ --#define BOGO_DIRENT_SIZE 20 -- -+#define BOGO_DIRENT_SIZE (20) - #define SHMEM_SB(sb) (&sb->u.shmem_sb) - -+#define SHMEM_IOPEN_INO 2 -+#define SHMEM_IOPEN_NAME_LEN 32 -+ -+#define ASSERT(cond) \ -+do { \ -+ if (!(cond)) { \ -+ printk (KERN_EMERG \ -+ "Assertion failure in %s() at %s:%d: \"%s\"\n", \ -+ __FUNCTION__, __FILE__, __LINE__, # cond); \ -+ BUG(); \ -+ } \ -+} while (0) -+ - static struct super_operations shmem_ops; -+static struct vm_operations_struct shmem_vm_ops; - static struct address_space_operations shmem_aops; - static struct file_operations shmem_file_operations; - static struct inode_operations shmem_inode_operations; - static struct inode_operations shmem_dir_inode_operations; --static struct vm_operations_struct shmem_vm_ops; - - LIST_HEAD (shmem_inodes); - static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED; - atomic_t shmem_nrpages = ATOMIC_INIT(0); /* Not used right now */ - --static struct page *shmem_getpage_locked(struct shmem_inode_info *, struct inode *, unsigned long); -+struct page *shmem_getpage_locked(struct inode *inode, unsigned long idx); -+struct page *shmem_getpage_unlocked(struct inode *inode, unsigned long idx); - - #ifdef CONFIG_TMPFS - static struct inode_operations shmem_symlink_inode_operations; -@@ -327,7 +343,7 @@ shmem_getxattr(struct dentry *dentry, co - * inode attributes list.*/ - static int - shmem_setxattr(struct dentry *dentry, const char *name, -- void *value, size_t valuelen, int flags) -+ const void *value, size_t valuelen, int flags) - { - int error; - struct inode *inode = dentry->d_inode; -@@ -404,8 +420,8 @@ shmem_listxattr(struct dentry *dentry, c - * @inode: inode to recalc - * @swap: additional swap pages freed externally - * -- * We have to calculate the free blocks since the mm can drop pages -- * behind our back -+ * We have to calculate the free blocks since the mm can drop pages behind our -+ * back - * - * But we know that normally - * inodes->i_blocks/BLOCKS_PER_PAGE == -@@ -441,24 +457,23 @@ static void shmem_recalc_inode(struct in - * @page: optional page to add to the structure. Has to be preset to - * all zeros - * -- * If there is no space allocated yet it will return -ENOMEM when -- * page == 0 else it will use the page for the needed block. -+ * If there is no space allocated yet it will return -ENOMEM when page == 0 else -+ * it will use the page for the needed block. - * - * returns -EFBIG if the index is too big. - * - * - * The swap vector is organized the following way: - * -- * There are SHMEM_NR_DIRECT entries directly stored in the -- * shmem_inode_info structure. So small files do not need an addional -- * allocation. -- * -- * For pages with index > SHMEM_NR_DIRECT there is the pointer -- * i_indirect which points to a page which holds in the first half -- * doubly indirect blocks, in the second half triple indirect blocks: -+ * There are SHMEM_NR_DIRECT entries directly stored in the shmem_inode_info -+ * structure. So small files do not need an addional allocation. -+ * -+ * For pages with index > SHMEM_NR_DIRECT there is the pointer i_indirect which -+ * points to a page which holds in the first half doubly indirect blocks, in the -+ * second half triple indirect blocks: - * -- * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the -- * following layout (for SHMEM_NR_DIRECT == 16): -+ * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the following -+ * layout (for SHMEM_NR_DIRECT == 16): - * - * i_indirect -> dir --> 16-19 - * | +-> 20-23 -@@ -473,7 +488,9 @@ static void shmem_recalc_inode(struct in - * +-> 48-51 - * +-> 52-55 - */ --static swp_entry_t * shmem_swp_entry (struct shmem_inode_info *info, unsigned long index, unsigned long page) -+static swp_entry_t * -+shmem_swp_entry (struct shmem_inode_info *info, unsigned long index, -+ unsigned long page) - { - unsigned long offset; - void **dir; -@@ -520,7 +537,8 @@ static swp_entry_t * shmem_swp_entry (st - * @info: info structure for the inode - * @index: index of the page to find - */ --static inline swp_entry_t * shmem_alloc_entry (struct shmem_inode_info *info, unsigned long index) -+static inline swp_entry_t * -+shmem_alloc_entry(struct shmem_inode_info *info, unsigned long index) - { - unsigned long page = 0; - swp_entry_t * res; -@@ -545,7 +563,8 @@ static inline swp_entry_t * shmem_alloc_ - * @dir: pointer to the directory - * @count: number of entries to scan - */ --static int shmem_free_swp(swp_entry_t *dir, unsigned int count) -+static int -+shmem_free_swp(swp_entry_t *dir, unsigned int count) - { - swp_entry_t *ptr, entry; - int freed = 0; -@@ -573,7 +592,9 @@ static int shmem_free_swp(swp_entry_t *d - */ - - static inline unsigned long --shmem_truncate_direct(swp_entry_t *** dir, unsigned long start, unsigned long len) { -+shmem_truncate_direct(swp_entry_t ***dir, unsigned long start, -+ unsigned long len) -+{ - swp_entry_t **last, **ptr; - unsigned long off, freed = 0; - -@@ -639,7 +660,8 @@ shmem_truncate_indirect(struct shmem_ino - BUG(); - - baseidx = max & ~(ENTRIES_PER_PAGE*ENTRIES_PER_PAGE-1); -- base = (swp_entry_t ***) info->i_indirect + ENTRIES_PER_PAGE/2 + baseidx/ENTRIES_PER_PAGE/ENTRIES_PER_PAGE ; -+ base = (swp_entry_t ***) info->i_indirect + ENTRIES_PER_PAGE/2 + -+ baseidx/ENTRIES_PER_PAGE/ENTRIES_PER_PAGE ; - len = max - baseidx + 1; - baseidx += ENTRIES_PER_PAGE*ENTRIES_PER_PAGE/2+SHMEM_NR_DIRECT; - } -@@ -654,7 +676,8 @@ shmem_truncate_indirect(struct shmem_ino - return shmem_truncate_direct(base, start, len); - } - --static void shmem_truncate (struct inode * inode) -+static void -+shmem_truncate(struct inode *inode) - { - unsigned long index; - unsigned long partial; -@@ -668,16 +691,16 @@ static void shmem_truncate (struct inode - partial = inode->i_size & ~PAGE_CACHE_MASK; - - if (partial) { -- swp_entry_t *entry = shmem_swp_entry(info, index-1, 0); - struct page *page; -- /* -- * This check is racy: it's faintly possible that page -- * was assigned to swap during truncate_inode_pages, -- * and now assigned to file; but better than nothing. -+ swp_entry_t *entry = shmem_swp_entry(info, index - 1, 0); -+ -+ /* This check is racy: it's faintly possible that page was -+ * assigned to swap during truncate_inode_pages, and now -+ * assigned to file; but better than nothing. - */ - if (!IS_ERR(entry) && entry->val) { - spin_unlock(&info->lock); -- page = shmem_getpage_locked(info, inode, index-1); -+ page = shmem_getpage_locked(inode, index - 1); - if (!IS_ERR(page)) { - memclear_highpage_flush(page, partial, - PAGE_CACHE_SIZE - partial); -@@ -697,8 +720,166 @@ static void shmem_truncate (struct inode - up(&info->sem); - } - --static void shmem_delete_inode(struct inode * inode) -+static struct inode * -+shmem_find_inode(struct super_block *sb, long int ino) -+{ -+ struct list_head *p; -+ struct inode *inode = NULL; -+ struct shmem_inode_info *info; -+ -+ spin_lock (&shmem_ilock); -+ list_for_each(p, &shmem_inodes) { -+ info = list_entry(p, struct shmem_inode_info, list); -+ -+ if (info->inode->i_ino == ino && -+ info->inode->i_sb == sb) -+ { -+ inode = info->inode; -+ break; -+ } -+ } -+ -+ spin_unlock (&shmem_ilock); -+ -+ if (inode) -+ igrab(inode); -+ -+ return inode; -+} -+ -+#define switch_fields(x,y) do { \ -+ __typeof__ (x) __tmp = x; \ -+ x = y; y = __tmp; } while (0) -+ -+static inline void -+switch_names(struct dentry *dentry, struct dentry *target) -+{ -+ const unsigned char *old_name, *new_name; -+ -+ memcpy(dentry->d_iname, target->d_iname, -+ DNAME_INLINE_LEN); -+ -+ old_name = target->d_name.name; -+ new_name = dentry->d_name.name; -+ -+ if (old_name == target->d_iname) -+ old_name = dentry->d_iname; -+ -+ if (new_name == dentry->d_iname) -+ new_name = target->d_iname; -+ -+ target->d_name.name = new_name; -+ dentry->d_name.name = old_name; -+} -+ -+static struct dentry * -+shmem_iopen_lookup(struct inode *dir, -+ struct dentry *dentry) -+{ -+ struct inode *inode; -+ unsigned long ino; -+ struct list_head *lp; -+ struct dentry *alternate; -+ char buf[SHMEM_IOPEN_NAME_LEN]; -+ struct shmem_sb_info *sbinfo = SHMEM_SB(dir->i_sb); -+ -+ if (dentry->d_name.len >= SHMEM_IOPEN_NAME_LEN) -+ return ERR_PTR(-ENAMETOOLONG); -+ -+ memcpy(buf, dentry->d_name.name, dentry->d_name.len); -+ buf[dentry->d_name.len] = 0; -+ -+ if (strcmp(buf, ".") == 0) -+ ino = dir->i_ino; -+ else if (strcmp(buf, "..") == 0) -+ ino = sbinfo->root_ino; -+ else -+ ino = simple_strtoul(buf, 0, 0); -+ -+ if (ino < sbinfo->root_ino) -+ return ERR_PTR(-ENOENT); -+ -+ if (!(inode = shmem_find_inode(dir->i_sb, ino))) -+ return ERR_PTR(-ENOENT); -+ -+ ASSERT(list_empty(&dentry->d_alias)); -+ ASSERT(list_empty(&dentry->d_hash)); -+ -+ /* preferrably return a connected dentry */ -+ spin_lock(&dcache_lock); -+ list_for_each(lp, &inode->i_dentry) { -+ alternate = list_entry(lp, struct dentry, d_alias); -+ ASSERT(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED)); -+ } -+ -+ if (!list_empty(&inode->i_dentry)) { -+ alternate = list_entry(inode->i_dentry.next, -+ struct dentry, d_alias); -+ dget_locked(alternate); -+ alternate->d_vfs_flags |= DCACHE_REFERENCED; -+ iput(inode); -+ spin_unlock(&dcache_lock); -+ return alternate; -+ } -+ dentry->d_flags |= DCACHE_NFSD_DISCONNECTED; -+ -+ /* d_add(), but don't drop dcache_lock before adding dentry to inode */ -+ list_add(&dentry->d_alias, &inode->i_dentry); -+ dentry->d_inode = inode; -+ -+ __d_rehash(dentry, 0); -+ spin_unlock(&dcache_lock); -+ -+ return NULL; -+} -+ -+struct dentry * -+shmem_iopen_unalias(struct dentry *dentry, struct inode *inode) -+{ -+ struct dentry *tmp, *goal = NULL; -+ struct list_head *lp; -+ -+ list_for_each(lp, &inode->i_dentry) { -+ tmp = list_entry(lp, struct dentry, d_alias); -+ if (tmp->d_flags & DCACHE_NFSD_DISCONNECTED) { -+ ASSERT(tmp->d_alias.next == &inode->i_dentry); -+ ASSERT(tmp->d_alias.prev == &inode->i_dentry); -+ goal = tmp; -+ dget_locked(goal); -+ break; -+ } -+ } -+ -+ if (!goal) -+ return NULL; -+ -+ goal->d_flags &= ~DCACHE_NFSD_DISCONNECTED; -+ list_del_init(&goal->d_hash); -+ -+ list_del(&goal->d_child); -+ list_del(&dentry->d_child); -+ -+ switch_names(goal, dentry); -+ switch_fields(goal->d_parent, dentry->d_parent); -+ switch_fields(goal->d_name.len, dentry->d_name.len); -+ switch_fields(goal->d_name.hash, dentry->d_name.hash); -+ -+ list_add(&goal->d_child, &goal->d_parent->d_subdirs); -+ list_add(&dentry->d_child, &dentry->d_parent->d_subdirs); -+ __d_rehash(goal, 0); -+ -+ return goal; -+} -+ -+static struct inode_operations iopen_inode_operations = { -+ lookup: shmem_iopen_lookup, -+}; -+ -+static void -+shmem_delete_inode(struct inode *inode) - { -+ struct dentry *dentry; -+ - #ifdef CONFIG_TMPFS_XATTR - struct list_head *tmp, *p; - struct shmem_xattr *xattr; -@@ -706,16 +887,30 @@ static void shmem_delete_inode(struct in - #endif - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - -- if (inode->i_op->truncate == shmem_truncate) { -+ if (inode->i_ino != SHMEM_IOPEN_INO) { -+ /* eliminating iopen alias */ -+ spin_lock(&dcache_lock); -+ if (!list_empty(&inode->i_dentry)) { -+ dentry = list_entry(inode->i_dentry.next, -+ struct dentry, d_alias); -+ shmem_iopen_unalias(dentry, inode); -+ } -+ spin_unlock(&dcache_lock); -+ - spin_lock (&shmem_ilock); - list_del (&SHMEM_I(inode)->list); - spin_unlock (&shmem_ilock); -- inode->i_size = 0; -- shmem_truncate (inode); -+ -+ if (inode->i_op->truncate == shmem_truncate) { -+ inode->i_size = 0; -+ shmem_truncate (inode); -+ } -+ -+ spin_lock (&sbinfo->stat_lock); -+ sbinfo->free_inodes++; -+ spin_unlock (&sbinfo->stat_lock); - } -- spin_lock (&sbinfo->stat_lock); -- sbinfo->free_inodes++; -- spin_unlock (&sbinfo->stat_lock); -+ - #ifdef CONFIG_TMPFS_XATTR - list_for_each_safe(p, tmp, &info->xattrs) { - xattr = list_entry(p, struct shmem_xattr, list); -@@ -725,7 +920,8 @@ static void shmem_delete_inode(struct in - clear_inode(inode); - } - --static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *ptr, swp_entry_t *eptr) -+static inline int -+shmem_find_swp(swp_entry_t entry, swp_entry_t *ptr, swp_entry_t *eptr) - { - swp_entry_t *test; - -@@ -736,7 +932,9 @@ static inline int shmem_find_swp(swp_ent - return -1; - } - --static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) -+static int -+shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, -+ struct page *page) - { - swp_entry_t *ptr; - unsigned long idx; -@@ -875,9 +1073,11 @@ getswap: - * still need to guard against racing with shm_writepage(), which might - * be trying to move the page to the swap cache as we run. - */ --static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct inode * inode, unsigned long idx) -+struct page * -+shmem_getpage_locked(struct inode *inode, unsigned long idx) - { - struct address_space * mapping = inode->i_mapping; -+ struct shmem_inode_info *info = SHMEM_I(inode); - struct shmem_sb_info *sbinfo; - struct page * page; - swp_entry_t *entry; -@@ -941,7 +1141,8 @@ repeat: - swap_free(*entry); - *entry = (swp_entry_t) {0}; - delete_from_swap_cache(page); -- flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_referenced) | (1 << PG_arch_1)); -+ flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | -+ (1 << PG_referenced) | (1 << PG_arch_1)); - page->flags = flags | (1 << PG_dirty); - add_to_page_cache_locked(page, mapping, idx); - info->swapped--; -@@ -985,46 +1186,53 @@ wait_retry: - goto repeat; - } - --int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr) -+struct page * -+shmem_getpage_unlocked(struct inode *inode, unsigned long idx) - { -+ struct page *page; - struct shmem_inode_info *info = SHMEM_I(inode); -- int error; - -- down (&info->sem); -- *ptr = ERR_PTR(-EFAULT); -- if (inode->i_size <= (loff_t) idx * PAGE_CACHE_SIZE) -+ down(&info->sem); -+ page = ERR_PTR(-EFAULT); -+ -+ if (inode->i_size <= (loff_t)idx * PAGE_CACHE_SIZE) - goto failed; - -- *ptr = shmem_getpage_locked(info, inode, idx); -- if (IS_ERR (*ptr)) -+ page = shmem_getpage_locked(inode, idx); -+ -+ if (IS_ERR(page)) - goto failed; - -- UnlockPage(*ptr); -- up (&info->sem); -- return 0; -+ UnlockPage(page); -+ up(&info->sem); -+ return page; - failed: -- up (&info->sem); -- error = PTR_ERR(*ptr); -- *ptr = NOPAGE_SIGBUS; -- if (error == -ENOMEM) -- *ptr = NOPAGE_OOM; -- return error; -+ up(&info->sem); -+ -+ if (PTR_ERR(page) == -ENOMEM) -+ return NOPAGE_OOM; -+ -+ return page; - } - --struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int unused) -+struct page * -+shmem_nopage(struct vm_area_struct *vma, -+ unsigned long address, int unused) - { -- struct page * page; - unsigned int idx; -+ struct page * page; - struct inode * inode = vma->vm_file->f_dentry->d_inode; - -- idx = (address - vma->vm_start) >> PAGE_CACHE_SHIFT; -- idx += vma->vm_pgoff; -+ idx = ((address - vma->vm_start) >> PAGE_CACHE_SHIFT) + -+ vma->vm_pgoff; - -- if (shmem_getpage(inode, idx, &page)) -+ page = shmem_getpage_unlocked(inode, idx); -+ -+ if (IS_ERR(page)) - return page; - - flush_page_to_ram(page); -- return(page); -+ return page; - } - - void shmem_lock(struct file * file, int lock) -@@ -1037,7 +1245,8 @@ void shmem_lock(struct file * file, int - up(&info->sem); - } - --static int shmem_mmap(struct file * file, struct vm_area_struct * vma) -+static int -+shmem_mmap(struct file * file, struct vm_area_struct * vma) - { - struct vm_operations_struct * ops; - struct inode *inode = file->f_dentry->d_inode; -@@ -1050,39 +1259,53 @@ static int shmem_mmap(struct file * file - return 0; - } - --struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev) -+static void -+shmem_fill_inode(struct inode *inode, int mode, int dev) - { -- struct inode * inode; - struct shmem_inode_info *info; -- struct shmem_sb_info *sbinfo = SHMEM_SB(sb); -+ -+ info = SHMEM_I(inode); -+ info->inode = inode; -+ spin_lock_init (&info->lock); -+ sema_init (&info->sem, 1); - -- spin_lock (&sbinfo->stat_lock); -- if (!sbinfo->free_inodes) { -- spin_unlock (&sbinfo->stat_lock); -- return NULL; -- } -- sbinfo->free_inodes--; -- spin_unlock (&sbinfo->stat_lock); -+#ifdef CONFIG_TMPFS_XATTR -+ INIT_LIST_HEAD(&info->xattrs); -+ info->xtail = &info->xattrs; -+#endif - -- inode = new_inode(sb); -- if (inode) { -+ inode->i_blocks = 0; -+ inode->i_rdev = NODEV; -+ inode->i_atime = CURRENT_TIME; -+ inode->i_ctime = CURRENT_TIME; -+ inode->i_mtime = CURRENT_TIME; -+ inode->i_blksize = PAGE_CACHE_SIZE; -+ -+ /* handling speciall iopen inode. */ -+ if (inode->i_ino == SHMEM_IOPEN_INO) { -+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); -+ -+ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; -+ -+ if (sbinfo->options & SHMEM_MOUNT_IOPEN_NOPRIV) -+ inode->i_mode |= 0777; -+ -+ inode->i_uid = 0; -+ inode->i_gid = 0; -+ inode->i_nlink = 1; -+ inode->i_size = 2 * BOGO_DIRENT_SIZE; -+ inode->i_version = 1; -+ inode->i_generation = 0; -+ -+ inode->i_op = &iopen_inode_operations; -+ inode->i_fop = &dcache_dir_ops; -+ inode->i_mapping->a_ops = 0; -+ } else { - inode->i_mode = mode; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; -- inode->i_blksize = PAGE_CACHE_SIZE; -- inode->i_blocks = 0; -- inode->i_rdev = NODEV; - inode->i_mapping->a_ops = &shmem_aops; -- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; -- info = SHMEM_I(inode); -- info->inode = inode; -- spin_lock_init (&info->lock); -- sema_init (&info->sem, 1); - --#ifdef CONFIG_TMPFS_XATTR -- INIT_LIST_HEAD(&info->xattrs); -- info->xtail = &info->xattrs; --#endif - switch (mode & S_IFMT) { - default: - init_special_inode(inode, mode, dev); -@@ -1090,9 +1313,6 @@ struct inode *shmem_get_inode(struct sup - case S_IFREG: - inode->i_op = &shmem_inode_operations; - inode->i_fop = &shmem_file_operations; -- spin_lock (&shmem_ilock); -- list_add_tail(&info->list, &shmem_inodes); -- spin_unlock (&shmem_ilock); - break; - case S_IFDIR: - inode->i_nlink++; -@@ -1104,12 +1324,59 @@ struct inode *shmem_get_inode(struct sup - case S_IFLNK: - break; - } -+ -+ spin_lock (&shmem_ilock); -+ list_add_tail(&info->list, &shmem_inodes); -+ spin_unlock (&shmem_ilock); -+ } -+} -+ -+struct inode * -+shmem_get_inode(struct super_block *sb, -+ int mode, int dev, int root) -+{ -+ struct inode *inode; -+ struct shmem_sb_info *sbinfo = SHMEM_SB(sb); -+ -+ spin_lock (&sbinfo->stat_lock); -+ if (!sbinfo->free_inodes) { -+ spin_unlock (&sbinfo->stat_lock); -+ return NULL; -+ } -+ sbinfo->free_inodes--; -+ spin_unlock (&sbinfo->stat_lock); -+ -+ if ((inode = new_inode(sb))) { -+ shmem_fill_inode(inode, mode, dev); -+ if (root) -+ sbinfo->root_ino = inode->i_ino; - } -+ - return inode; - } - --static int shmem_set_size(struct shmem_sb_info *info, -- unsigned long max_blocks, unsigned long max_inodes) -+void shmem_read_inode(struct inode *inode) -+{ -+ struct shmem_sb_info *sbinfo; -+ -+ if (inode->i_ino != SHMEM_IOPEN_INO) -+ return; -+ -+ sbinfo = SHMEM_SB(inode->i_sb); -+ -+ spin_lock (&sbinfo->stat_lock); -+ if (!sbinfo->free_inodes) { -+ spin_unlock (&sbinfo->stat_lock); -+ return; -+ } -+ sbinfo->free_inodes--; -+ spin_unlock (&sbinfo->stat_lock); -+ shmem_fill_inode(inode, 0, 0); -+} -+ -+static int -+shmem_set_size(struct shmem_sb_info *info, unsigned long max_blocks, -+ unsigned long max_inodes) - { - int error; - unsigned long blocks, inodes; -@@ -1192,7 +1459,6 @@ shmem_file_write(struct file *file,const - - while (count) { - unsigned long bytes, index, offset; -- char *kaddr; - - /* - * Try to find the page in the cache. If it isn't there, -@@ -1201,9 +1467,9 @@ shmem_file_write(struct file *file,const - offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ - index = pos >> PAGE_CACHE_SHIFT; - bytes = PAGE_CACHE_SIZE - offset; -- if (bytes > count) { -+ -+ if (bytes > count) - bytes = count; -- } - - /* - * Bring in the user page that we will copy from _first_. -@@ -1218,7 +1484,7 @@ shmem_file_write(struct file *file,const - - info = SHMEM_I(inode); - down (&info->sem); -- page = shmem_getpage_locked(info, inode, index); -+ page = shmem_getpage_locked(inode, index); - up (&info->sem); - - status = PTR_ERR(page); -@@ -1226,17 +1492,19 @@ shmem_file_write(struct file *file,const - break; - - /* We have exclusive IO access to the page.. */ -- if (!PageLocked(page)) { -+ if (!PageLocked(page)) - PAGE_BUG(page); -- } - -- kaddr = kmap(page); -- status = copy_from_user(kaddr+offset, buf, bytes); -+ status = copy_from_user(kmap(page) + offset, -+ buf, bytes); -+ - kunmap(page); -+ - if (status) - goto fail_write; - - flush_dcache_page(page); -+ - if (bytes > 0) { - SetPageDirty(page); - written += bytes; -@@ -1266,7 +1534,8 @@ fail_write: - goto unlock; - } - --static void do_shmem_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc) -+static void -+do_shmem_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc) - { - struct inode *inode = filp->f_dentry->d_inode; - struct address_space *mapping = inode->i_mapping; -@@ -1292,15 +1561,18 @@ static void do_shmem_file_read(struct fi - - nr = nr - offset; - -- if ((desc->error = shmem_getpage(inode, index, &page))) -+ page = shmem_getpage_unlocked(inode, index); -+ -+ if (IS_ERR(page)) { -+ desc->error = PTR_ERR(page); - break; -+ } - - if (mapping->i_mmap_shared != NULL) - flush_dcache_page(page); - -- /* -- * Ok, we have the page, and it's up-to-date, so -- * now we can copy it to user space... -+ /* Ok, we have the page, and it's up-to-date, so now we can copy -+ * it to user space... - * - * The actor routine returns how many bytes were actually used.. - * NOTE! This may not be the same as how much of a user buffer -@@ -1309,6 +1581,8 @@ static void do_shmem_file_read(struct fi - * pointers and the remaining count). - */ - nr = file_read_actor(desc, page, offset, nr); -+ -+ /* updating counters */ - offset += nr; - index += offset >> PAGE_CACHE_SHIFT; - offset &= ~PAGE_CACHE_MASK; -@@ -1320,7 +1594,8 @@ static void do_shmem_file_read(struct fi - UPDATE_ATIME(inode); - } - --static ssize_t shmem_file_read(struct file * filp, char * buf, size_t count, loff_t *ppos) -+static ssize_t -+shmem_file_read(struct file * filp, char * buf, size_t count, loff_t *ppos) - { - ssize_t retval; - -@@ -1345,7 +1620,8 @@ static ssize_t shmem_file_read(struct fi - return retval; - } - --static int shmem_statfs(struct super_block *sb, struct statfs *buf) -+static int -+shmem_statfs(struct super_block *sb, struct statfs *buf) - { - struct shmem_sb_info *sbinfo = SHMEM_SB(sb); - -@@ -1361,22 +1637,62 @@ static int shmem_statfs(struct super_blo - return 0; - } - --/* -- * Lookup the data. This is trivial - if the dentry didn't already -- * exist, we know it is negative. -- */ --static struct dentry * shmem_lookup(struct inode *dir, struct dentry *dentry) -+static int -+match_dentry(struct dentry *dentry, const char *name) -+{ -+ int len = strlen(name); -+ -+ if (dentry->d_name.len != len) -+ return 0; -+ -+ if (strncmp(dentry->d_name.name, name, len)) -+ return 0; -+ -+ return 1; -+} -+ -+static int -+shmem_iopen_check(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ struct shmem_sb_info *sbinfo = SHMEM_SB(dir->i_sb); -+ -+ if (dir->i_ino != sbinfo->root_ino || -+ !(sbinfo->options & SHMEM_MOUNT_IOPEN) || -+ !match_dentry(dentry, "__iopen__")) -+ { -+ return 0; -+ } -+ -+ if (!(inode = iget(dir->i_sb, SHMEM_IOPEN_INO))) -+ return 0; -+ -+ d_add(dentry, inode); -+ -+ spin_lock (&sbinfo->stat_lock); -+ sbinfo->iopen = dentry; -+ spin_unlock (&sbinfo->stat_lock); -+ -+ dget(dentry); -+ return 1; -+} -+ -+static struct dentry * -+shmem_lookup(struct inode *dir, struct dentry *dentry) - { -- d_add(dentry, NULL); -+ if (!shmem_iopen_check(dir, dentry)) -+ d_add(dentry, NULL); -+ - return NULL; - } - - /* - * File creation. Allocate an inode, and we're done.. - */ --static int shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev) -+static int -+shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev) - { -- struct inode * inode = shmem_get_inode(dir->i_sb, mode, dev); -+ struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev, 0); - int error = -ENOSPC; - - if (inode) { -@@ -1386,20 +1702,24 @@ static int shmem_mknod(struct inode *dir - dget(dentry); /* Extra count - pin the dentry in core */ - error = 0; - } -+ - return error; - } - --static int shmem_mkdir(struct inode * dir, struct dentry * dentry, int mode) -+static int -+shmem_mkdir(struct inode * dir, struct dentry * dentry, int mode) - { - int error; - - if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0))) - return error; -+ - dir->i_nlink++; - return 0; - } - --static int shmem_create(struct inode *dir, struct dentry *dentry, int mode) -+static int -+shmem_create(struct inode *dir, struct dentry *dentry, int mode) - { - return shmem_mknod(dir, dentry, mode | S_IFREG, 0); - } -@@ -1407,7 +1727,8 @@ static int shmem_create(struct inode *di - /* - * Link a file.. - */ --static int shmem_link(struct dentry *old_dentry, struct inode * dir, struct dentry * dentry) -+static int -+shmem_link(struct dentry *old_dentry, struct inode * dir, struct dentry * dentry) - { - struct inode *inode = old_dentry->d_inode; - -@@ -1429,13 +1750,11 @@ static inline int shmem_positive(struct - } - - /* -- * Check that a directory is empty (this works -- * for regular files too, they'll just always be -- * considered empty..). -+ * Check that a directory is empty (this works for regular files too, they'll -+ * just always be considered empty..). - * -- * Note that an empty directory can still have -- * children, they just all have to be negative.. -- */ -+ * Note that an empty directory can still have children, they just all have to -+ * be negative.. */ - static int shmem_empty(struct dentry *dentry) - { - struct list_head *list; -@@ -1456,18 +1775,22 @@ static int shmem_empty(struct dentry *de - return 1; - } - --static int shmem_unlink(struct inode * dir, struct dentry *dentry) -+static int -+shmem_unlink(struct inode *dir, struct dentry *dentry) - { - struct inode *inode = dentry->d_inode; - - dir->i_size -= BOGO_DIRENT_SIZE; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - inode->i_nlink--; -- dput(dentry); /* Undo the count from "create" - this does all the work */ -+ -+ /* undo the count from "create" - this does all the work. */ -+ dput(dentry); - return 0; - } - --static int shmem_rmdir(struct inode * dir, struct dentry *dentry) -+static int -+shmem_rmdir(struct inode *dir, struct dentry *dentry) - { - if (!shmem_empty(dentry)) - return -ENOTEMPTY; -@@ -1477,12 +1800,13 @@ static int shmem_rmdir(struct inode * di - } - - /* -- * The VFS layer already does all the dentry stuff for rename, -- * we just have to decrement the usage count for the target if -- * it exists so that the VFS layer correctly free's it when it -- * gets overwritten. -+ * The VFS layer already does all the dentry stuff for rename, we just have to -+ * decrement the usage count for the target if it exists so that the VFS layer -+ * correctly free's it when it gets overwritten. - */ --static int shmem_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir,struct dentry *new_dentry) -+static int -+shmem_rename(struct inode *old_dir, struct dentry *old_dentry, -+ struct inode *new_dir, struct dentry *new_dentry) - { - struct inode *inode = old_dentry->d_inode; - int they_are_dirs = S_ISDIR(inode->i_mode); -@@ -1507,19 +1831,20 @@ static int shmem_rename(struct inode * o - return 0; - } - --static int shmem_symlink(struct inode * dir, struct dentry *dentry, const char * symname) -+static int -+shmem_symlink(struct inode *dir, struct dentry *dentry, -+ const char *symname) - { - int len; -- struct inode *inode; - struct page *page; -- char *kaddr; -- struct shmem_inode_info * info; -+ struct inode *inode; -+ struct shmem_inode_info *info; - - len = strlen(symname) + 1; - if (len > PAGE_CACHE_SIZE) - return -ENAMETOOLONG; - -- inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); -+ inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0, 0); - if (!inode) - return -ENOSPC; - -@@ -1531,7 +1856,7 @@ static int shmem_symlink(struct inode * - inode->i_op = &shmem_symlink_inline_operations; - } else { - down(&info->sem); -- page = shmem_getpage_locked(info, inode, 0); -+ page = shmem_getpage_locked(inode, 0); - if (IS_ERR(page)) { - up(&info->sem); - iput(inode); -@@ -1541,8 +1866,7 @@ static int shmem_symlink(struct inode * - spin_lock (&shmem_ilock); - list_add_tail(&info->list, &shmem_inodes); - spin_unlock (&shmem_ilock); -- kaddr = kmap(page); -- memcpy(kaddr, symname, len); -+ memcpy(kmap(page), symname, len); - kunmap(page); - SetPageDirty(page); - UnlockPage(page); -@@ -1556,40 +1880,52 @@ static int shmem_symlink(struct inode * - return 0; - } - --static int shmem_readlink_inline(struct dentry *dentry, char *buffer, int buflen) -+static int -+shmem_readlink_inline(struct dentry *dentry, char *buffer, int buflen) - { -- return vfs_readlink(dentry,buffer,buflen, (const char *)SHMEM_I(dentry->d_inode)); -+ return vfs_readlink(dentry,buffer, buflen, -+ (const char *)SHMEM_I(dentry->d_inode)); - } - --static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) -+static int -+shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) - { - return vfs_follow_link(nd, (const char *)SHMEM_I(dentry->d_inode)); - } - --static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen) -+static int -+shmem_readlink(struct dentry *dentry, char *buffer, int buflen) - { -- struct page * page; -- int res = shmem_getpage(dentry->d_inode, 0, &page); -+ int res; -+ struct page *page; -+ -+ page = shmem_getpage_unlocked(dentry->d_inode, 0); - -- if (res) -- return res; -+ if (IS_ERR(page)) -+ return PTR_ERR(page); - - res = vfs_readlink(dentry,buffer,buflen, kmap(page)); - kunmap(page); - page_cache_release(page); -+ - return res; - } - --static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd) -+static int -+shmem_follow_link(struct dentry *dentry, struct nameidata *nd) - { - struct page * page; -- int res = shmem_getpage(dentry->d_inode, 0, &page); -- if (res) -- return res; -+ int res; -+ -+ page = shmem_getpage_unlocked(dentry->d_inode, 0); -+ -+ if (IS_ERR(page)) -+ return PTR_ERR(page); - - res = vfs_follow_link(nd, kmap(page)); - kunmap(page); - page_cache_release(page); -+ - return res; - } - -@@ -1610,7 +1946,10 @@ static struct inode_operations shmem_sym - #endif - }; - --static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long * blocks, unsigned long *inodes) -+static int -+shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, -+ unsigned long *blocks, unsigned long *inodes, -+ unsigned long *opts) - { - char *this_char, *value, *rest; - -@@ -1620,11 +1959,6 @@ static int shmem_parse_options(char *opt - for ( ; this_char; this_char = strtok(NULL,",")) { - if ((value = strchr(this_char,'=')) != NULL) { - *value++ = 0; -- } else { -- printk(KERN_ERR -- "tmpfs: No value for mount option '%s'\n", -- this_char); -- return 1; - } - - if (!strcmp(this_char,"size")) { -@@ -1659,6 +1993,17 @@ static int shmem_parse_options(char *opt - *gid = simple_strtoul(value,&rest,0); - if (*rest) - goto bad_val; -+ } else if (!strcmp(this_char, "iopen")) { -+ *opts |= SHMEM_MOUNT_IOPEN; -+ *opts &= ~SHMEM_MOUNT_IOPEN_NOPRIV; -+ } -+ else if (!strcmp(this_char, "noiopen")) { -+ *opts &= ~SHMEM_MOUNT_IOPEN; -+ *opts &= ~SHMEM_MOUNT_IOPEN_NOPRIV; -+ } -+ else if (!strcmp (this_char, "iopen_nopriv")) { -+ *opts |= SHMEM_MOUNT_IOPEN; -+ *opts |= SHMEM_MOUNT_IOPEN_NOPRIV; - } else { - printk(KERN_ERR "tmpfs: Bad mount option %s\n", - this_char); -@@ -1674,14 +2019,19 @@ bad_val: - - } - --static int shmem_remount_fs (struct super_block *sb, int *flags, char *data) -+static int -+shmem_remount_fs(struct super_block *sb, int *flags, char *data) - { - struct shmem_sb_info *sbinfo = &sb->u.shmem_sb; - unsigned long max_blocks = sbinfo->max_blocks; - unsigned long max_inodes = sbinfo->max_inodes; - -- if (shmem_parse_options (data, NULL, NULL, NULL, &max_blocks, &max_inodes)) -+ if (shmem_parse_options (data, NULL, NULL, NULL, &max_blocks, -+ &max_inodes, &sbinfo->options)) -+ { - return -EINVAL; -+ } -+ - return shmem_set_size(sbinfo, max_blocks, max_inodes); - } - -@@ -1691,7 +2041,8 @@ int shmem_sync_file(struct file * file, - } - #endif - --static struct super_block *shmem_read_super(struct super_block * sb, void * data, int silent) -+static struct super_block * -+shmem_read_super(struct super_block * sb, void * data, int silent) - { - struct inode * inode; - struct dentry * root; -@@ -1710,11 +2061,15 @@ static struct super_block *shmem_read_su - blocks = inodes = si.totalram / 2; - - #ifdef CONFIG_TMPFS -- if (shmem_parse_options (data, &mode, &uid, &gid, &blocks, &inodes)) -+ if (shmem_parse_options (data, &mode, &uid, &gid, &blocks, -+ &inodes, &sbinfo->options)) -+ { - return NULL; -+ } - #endif - - spin_lock_init (&sbinfo->stat_lock); -+ sbinfo->iopen = NULL; - sbinfo->max_blocks = blocks; - sbinfo->free_blocks = blocks; - sbinfo->max_inodes = inodes; -@@ -1724,7 +2079,7 @@ static struct super_block *shmem_read_su - sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = TMPFS_MAGIC; - sb->s_op = &shmem_ops; -- inode = shmem_get_inode(sb, S_IFDIR | mode, 0); -+ inode = shmem_get_inode(sb, S_IFDIR | mode, 0, 1); - if (!inode) - return NULL; - -@@ -1739,7 +2094,19 @@ static struct super_block *shmem_read_su - return sb; - } - -+void shmem_put_super(struct super_block *sb) -+{ -+ struct dentry *iopen; -+ struct shmem_sb_info *sbinfo = SHMEM_SB(sb); - -+ spin_lock(&sbinfo->stat_lock); -+ iopen = sbinfo->iopen; -+ sbinfo->iopen = NULL; -+ spin_unlock(&sbinfo->stat_lock); -+ -+ if (iopen) -+ dput(iopen); -+} - - static struct address_space_operations shmem_aops = { - writepage: shmem_writepage, -@@ -1790,11 +2157,13 @@ static struct super_operations shmem_ops - remount_fs: shmem_remount_fs, - #endif - delete_inode: shmem_delete_inode, -- put_inode: force_delete, -+ read_inode: shmem_read_inode, -+ put_inode: force_delete, -+ put_super: shmem_put_super, - }; - - static struct vm_operations_struct shmem_vm_ops = { -- nopage: shmem_nopage, -+ nopage: shmem_nopage, - }; - - #ifdef CONFIG_TMPFS -@@ -1885,7 +2254,7 @@ struct file *shmem_file_setup(char * nam - goto put_dentry; - - error = -ENOSPC; -- inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); -+ inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0, 0); - if (!inode) - goto close_file; - -@@ -1921,15 +2290,17 @@ int shmem_zero_setup(struct vm_area_stru - - if (vma->vm_file) - fput (vma->vm_file); -+ - vma->vm_file = file; - vma->vm_ops = &shmem_vm_ops; - return 0; - } - - EXPORT_SYMBOL(shmem_file_setup); --EXPORT_SYMBOL(shmem_getpage); - EXPORT_SYMBOL(shmem_xattr_find); - EXPORT_SYMBOL(shmem_xattr_set); - EXPORT_SYMBOL(shmem_xattr_get); - EXPORT_SYMBOL(shmem_xattr_delete); - EXPORT_SYMBOL(shmem_xattr_remove); -+EXPORT_SYMBOL(shmem_getpage_locked); -+EXPORT_SYMBOL(shmem_getpage_unlocked); diff --git a/lustre/kernel_patches/patches/linux-2.4.20-tmpfs-xattr.patch b/lustre/kernel_patches/patches/linux-2.4.20-tmpfs-xattr.patch deleted file mode 100644 index a807ac7..0000000 --- a/lustre/kernel_patches/patches/linux-2.4.20-tmpfs-xattr.patch +++ /dev/null @@ -1,543 +0,0 @@ -diff -rupN --exclude='ide*' linux-2.4.20.orig/fs/Config.in linux-2.4.20/fs/Config.in ---- linux-2.4.20.orig/fs/Config.in 2002-11-29 01:53:15.000000000 +0200 -+++ linux-2.4.20/fs/Config.in 2004-02-08 21:37:47.000000000 +0200 -@@ -48,6 +48,9 @@ if [ "$CONFIG_JFFS2_FS" = "y" -o "$CONFI - fi - tristate 'Compressed ROM file system support' CONFIG_CRAMFS - bool 'Virtual memory file system support (former shm fs)' CONFIG_TMPFS -+if [ "$CONFIG_TMPFS" = "y" ]; then -+ bool ' tmpfs Extended Attributes' CONFIG_TMPFS_XATTR -+fi - define_bool CONFIG_RAMFS y - - tristate 'ISO 9660 CDROM file system support' CONFIG_ISO9660_FS -diff -rupN --exclude='ide*' linux-2.4.20.orig/include/linux/mm.h linux-2.4.20/include/linux/mm.h ---- linux-2.4.20.orig/include/linux/mm.h 2002-08-03 03:39:45.000000000 +0300 -+++ linux-2.4.20/include/linux/mm.h 2004-02-10 11:43:10.000000000 +0200 -@@ -468,6 +468,7 @@ extern void clear_page_tables(struct mm_ - extern int fail_writepage(struct page *); - struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int unused); - struct file *shmem_file_setup(char * name, loff_t size); -+int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr); - extern void shmem_lock(struct file * file, int lock); - extern int shmem_zero_setup(struct vm_area_struct *); - -diff -rupN --exclude='ide*' linux-2.4.20.orig/include/linux/shmem_fs.h linux-2.4.20/include/linux/shmem_fs.h ---- linux-2.4.20.orig/include/linux/shmem_fs.h 2001-12-21 19:42:03.000000000 +0200 -+++ linux-2.4.20/include/linux/shmem_fs.h 2004-02-10 18:39:17.000000000 +0200 -@@ -3,6 +3,8 @@ - - /* inode in-kernel data */ - -+#include -+ - #define SHMEM_NR_DIRECT 16 - - /* -@@ -28,6 +30,10 @@ struct shmem_inode_info { - unsigned long swapped; - int locked; /* into memory */ - struct list_head list; -+#ifdef CONFIG_TMPFS_XATTR -+ struct list_head xattrs; -+ struct list_head *xtail; -+#endif - struct inode *inode; - }; - -@@ -39,6 +45,32 @@ struct shmem_sb_info { - spinlock_t stat_lock; - }; - -+#ifdef CONFIG_TMPFS_XATTR -+struct shmem_xattr { -+ u8 namelen; -+ u16 valuelen; -+ void *entity; -+ struct list_head list; -+}; -+ -+extern struct shmem_xattr * -+shmem_xattr_find(struct inode *inode, const char *name); -+ -+extern ssize_t -+shmem_xattr_set(struct inode *inode, const char *name, -+ const void *value, u16 valuelen, int flags); -+ -+extern ssize_t -+shmem_xattr_get(struct inode *inode, const char *name, -+ void *value, size_t valuelen); -+ -+extern int -+shmem_xattr_delete(struct inode *inode, struct shmem_xattr *xattr); -+ -+extern int -+shmem_xattr_remove(struct inode *inode, const char *name); -+#endif -+ - #define SHMEM_I(inode) (&inode->u.shmem_i) - - #endif -diff -rupN --exclude='ide*' linux-2.4.20.orig/mm/shmem.c linux-2.4.20/mm/shmem.c ---- linux-2.4.20.orig/mm/shmem.c 2002-11-29 01:53:15.000000000 +0200 -+++ linux-2.4.20/mm/shmem.c 2004-02-10 18:44:05.000000000 +0200 -@@ -27,6 +27,8 @@ - #include - #include - #include -+#include -+#include - - #include - -@@ -58,6 +60,344 @@ atomic_t shmem_nrpages = ATOMIC_INIT(0); - - static struct page *shmem_getpage_locked(struct shmem_inode_info *, struct inode *, unsigned long); - -+#ifdef CONFIG_TMPFS -+static struct inode_operations shmem_symlink_inode_operations; -+static struct inode_operations shmem_symlink_inline_operations; -+#endif -+ -+#ifdef CONFIG_TMPFS_XATTR -+#define xattr_name(xattr) \ -+ ((char *)xattr->entity) -+ -+#define xattr_value(xattr) \ -+ ((void *)xattr->entity + xattr->namelen + 1) -+ -+/* allocates memory for new xattr with name length of @namelen and value size of -+ * @valuelen. */ -+static struct shmem_xattr * -+shmem_xattr_alloc(u8 namelen, u16 valuelen) -+{ -+ u16 size; -+ struct shmem_xattr *xattr; -+ -+ size = namelen + 1 + valuelen; -+ -+ if (!(xattr = kmalloc(sizeof(*xattr), GFP_KERNEL))) -+ return NULL; -+ -+ if (!(xattr->entity = kmalloc(size, GFP_KERNEL))) { -+ kfree(xattr); -+ return NULL; -+ } -+ -+ xattr->namelen = namelen; -+ xattr->valuelen = valuelen; -+ return xattr; -+} -+ -+/* reallocs passed @xattr with new @value. */ -+static int -+shmem_xattr_realloc(struct shmem_xattr *xattr, u16 valuelen) -+{ -+ if (xattr->valuelen != valuelen) { -+ u16 new_size; -+ void *entity; -+ -+ /* allocating new entity. */ -+ new_size = xattr->namelen + 1 + valuelen; -+ -+ if (!(entity = kmalloc(new_size, GFP_KERNEL))) -+ return -ENOMEM; -+ -+ /* copying old name to new entity.*/ -+ memcpy(entity, xattr->entity, xattr->namelen); -+ *((char *)(entity + xattr->namelen)) = '\0'; -+ -+ /* finishing the change.*/ -+ kfree(xattr->entity); -+ xattr->entity = entity; -+ xattr->valuelen = valuelen; -+ } -+ -+ return 0; -+} -+ -+/* assigns @name and @value to passed @xattr. */ -+static int -+shmem_xattr_assign(struct shmem_xattr *xattr, -+ const char *name, const void *value) -+{ -+ if (name) { -+ if (xattr->namelen != strlen(name)) -+ return -EINVAL; -+ -+ memcpy(xattr->entity, name, xattr->namelen); -+ *((char *)(xattr->entity + xattr->namelen)) = '\0'; -+ } -+ -+ if (value) { -+ memcpy(xattr_value(xattr), -+ value, xattr->valuelen); -+ } -+ -+ return 0; -+} -+ -+/* frees passed @xattr. */ -+static void -+shmem_xattr_free(struct shmem_xattr *xattr) -+{ -+ kfree(xattr->entity); -+ kfree(xattr); -+} -+ -+/* lookups passed @name inside @inode's xattr list. */ -+struct shmem_xattr * -+shmem_xattr_find(struct inode *inode, const char *name) -+{ -+ u8 namelen; -+ struct list_head *p; -+ struct shmem_xattr *xattr; -+ struct shmem_inode_info *info; -+ -+ info = SHMEM_I(inode); -+ namelen = strlen(name); -+ -+ list_for_each(p, &info->xattrs) { -+ xattr = list_entry(p, struct shmem_xattr, list); -+ -+ if (xattr->namelen == namelen && -+ !memcmp(xattr->entity, name, namelen)) -+ { -+ return xattr; -+ } -+ } -+ -+ return NULL; -+} -+ -+/* allocates new xattr and fills it with passed value, name, etc. */ -+ssize_t -+shmem_xattr_set(struct inode *inode, const char *name, -+ const void *value, u16 valuelen, int flags) -+{ -+ ssize_t error; -+ struct shmem_xattr *xattr; -+ struct shmem_inode_info *info; -+ -+ xattr = shmem_xattr_find(inode, name); -+ -+ if (xattr) { -+ if (flags & XATTR_CREATE) -+ return -EEXIST; -+ -+ if ((error = shmem_xattr_realloc(xattr, valuelen))) -+ return error; -+ -+ if ((error = shmem_xattr_assign(xattr, NULL, value))) -+ return error; -+ } else { -+ info = SHMEM_I(inode); -+ -+ if (flags & XATTR_REPLACE) -+ return -ENODATA; -+ -+ if (!(xattr = shmem_xattr_alloc(strlen(name), valuelen))) -+ return -ENOMEM; -+ -+ if ((error = shmem_xattr_assign(xattr, name, value))) -+ return error; -+ -+ list_add(&xattr->list, info->xtail); -+ info->xtail = &xattr->list; -+ } -+ -+ return 0; -+} -+ -+/* fills passed @value by attribute value found by @name. */ -+ssize_t -+shmem_xattr_get(struct inode *inode, const char *name, -+ void *value, size_t valuelen) -+{ -+ struct shmem_xattr *xattr; -+ -+ if (!(xattr = shmem_xattr_find(inode, name))) -+ return -ENODATA; -+ -+ /* handling value size guess request */ -+ if (valuelen == 0 || value == NULL) -+ return xattr->valuelen; -+ -+ if (xattr->valuelen > valuelen) -+ return -ERANGE; -+ -+ memcpy(value, xattr_value(xattr), -+ xattr->valuelen); -+ -+ return xattr->valuelen; -+} -+ -+/* deletes passed @xattr from inode xattr list and frees it. */ -+int -+shmem_xattr_delete(struct inode *inode, struct shmem_xattr *xattr) -+{ -+ struct shmem_inode_info *info; -+ -+ info = SHMEM_I(inode); -+ -+ if (&xattr->list == info->xtail) -+ info->xtail = xattr->list.prev; -+ -+ list_del(&xattr->list); -+ shmem_xattr_free(xattr); -+ -+ return 0; -+} -+ -+/* removes attribute found by passed @name. */ -+int -+shmem_xattr_remove(struct inode *inode, const char *name) -+{ -+ struct shmem_xattr *xattr; -+ -+ if (!(xattr = shmem_xattr_find(inode, name))) -+ return -ENODATA; -+ -+ return shmem_xattr_delete(inode, xattr); -+} -+ -+static int -+shmem_xattr_can_read(struct inode *inode, const char *name) -+{ -+ /* check for inlined symlinks. They store path inside inode info and -+ * thus, cannot be used for access xattrs. */ -+ if (S_ISLNK(inode->i_mode) && -+ inode->i_op == &shmem_symlink_inline_operations) -+ { -+ return -EPERM; -+ } -+ -+ return permission(inode, MAY_READ); -+} -+ -+static int -+shmem_xattr_can_write(struct inode *inode, const char *name) -+{ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode) || -+ S_ISLNK(inode->i_mode)) -+ { -+ return -EPERM; -+ } -+ -+ if ((!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) || -+ inode->i_mode & S_ISVTX) -+ { -+ return -EPERM; -+ } -+ -+ return permission(inode, MAY_WRITE); -+} -+ -+/* fills passed @value with data of attribute @name from @dentry->d_inode -+ * attribute list. */ -+static ssize_t -+shmem_getxattr(struct dentry *dentry, const char *name, -+ void *value, size_t valuelen) -+{ -+ int error; -+ struct inode *inode = dentry->d_inode; -+ -+ if (name == NULL) -+ return -EINVAL; -+ -+ if ((error = shmem_xattr_can_read(inode, name))) -+ return error; -+ -+ return shmem_xattr_get(inode, name, -+ value, valuelen); -+ -+} -+ -+/* updates attribute with @name inside @dentry->d_inode attributes list (if -+ * any), or creates new attribute with name @name and value @value and put it to -+ * inode attributes list.*/ -+static int -+shmem_setxattr(struct dentry *dentry, const char *name, -+ void *value, size_t valuelen, int flags) -+{ -+ int error; -+ struct inode *inode = dentry->d_inode; -+ -+ if (name == NULL) -+ return -EINVAL; -+ -+ if ((error = shmem_xattr_can_write(inode, name))) -+ return error; -+ -+ if (value == NULL) { -+ value = ""; -+ valuelen = 0; -+ } -+ -+ return shmem_xattr_set(inode, name, value, -+ valuelen, flags); -+} -+ -+/* removes attribute with passed @name from @dentry->d_inode attributes list. */ -+static int -+shmem_removexattr(struct dentry *dentry, const char *name) -+{ -+ int error; -+ struct inode *inode = dentry->d_inode; -+ -+ if (name == NULL) -+ return -EINVAL; -+ -+ if ((error = shmem_xattr_can_write(inode, name))) -+ return error; -+ -+ return shmem_xattr_remove(inode, name); -+} -+ -+/* fills passed @data with list of @dentry->d_inode attributes. Returns size of -+ * actuall data put to @data. */ -+static ssize_t -+shmem_listxattr(struct dentry *dentry, char *data, size_t buf_size) -+{ -+ ssize_t size = 0; -+ struct list_head *p; -+ struct shmem_xattr *xattr; -+ struct shmem_inode_info *info; -+ struct inode *inode = dentry->d_inode; -+ -+ info = SHMEM_I(inode); -+ -+ list_for_each(p, &info->xattrs) { -+ xattr = list_entry(p, struct shmem_xattr, list); -+ size += xattr->namelen + 1; -+ } -+ -+ /* handling data size guess request. */ -+ if (buf_size == 0 || data == NULL) -+ return size; -+ -+ if (size > buf_size) -+ return -ERANGE; -+ -+ list_for_each(p, &info->xattrs) { -+ xattr = list_entry(p, struct shmem_xattr, list); -+ memcpy(data, xattr->entity, xattr->namelen + 1); -+ data += xattr->namelen + 1; -+ } -+ -+ return size; -+} -+#endif -+ - /* - * shmem_recalc_inode - recalculate the size of an inode - * -@@ -359,6 +699,11 @@ static void shmem_truncate (struct inode - - static void shmem_delete_inode(struct inode * inode) - { -+#ifdef CONFIG_TMPFS_XATTR -+ struct list_head *tmp, *p; -+ struct shmem_xattr *xattr; -+ struct shmem_inode_info * info = SHMEM_I(inode); -+#endif - struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); - - if (inode->i_op->truncate == shmem_truncate) { -@@ -371,6 +716,12 @@ static void shmem_delete_inode(struct in - spin_lock (&sbinfo->stat_lock); - sbinfo->free_inodes++; - spin_unlock (&sbinfo->stat_lock); -+#ifdef CONFIG_TMPFS_XATTR -+ list_for_each_safe(p, tmp, &info->xattrs) { -+ xattr = list_entry(p, struct shmem_xattr, list); -+ shmem_xattr_delete(inode, xattr); -+ } -+#endif - clear_inode(inode); - } - -@@ -634,7 +985,7 @@ wait_retry: - goto repeat; - } - --static int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr) -+int shmem_getpage(struct inode * inode, unsigned long idx, struct page **ptr) - { - struct shmem_inode_info *info = SHMEM_I(inode); - int error; -@@ -727,6 +1078,11 @@ struct inode *shmem_get_inode(struct sup - info->inode = inode; - spin_lock_init (&info->lock); - sema_init (&info->sem, 1); -+ -+#ifdef CONFIG_TMPFS_XATTR -+ INIT_LIST_HEAD(&info->xattrs); -+ info->xtail = &info->xattrs; -+#endif - switch (mode & S_IFMT) { - default: - init_special_inode(inode, mode, dev); -@@ -777,10 +1133,6 @@ out: - } - - #ifdef CONFIG_TMPFS -- --static struct inode_operations shmem_symlink_inode_operations; --static struct inode_operations shmem_symlink_inline_operations; -- - static ssize_t - shmem_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos) - { -@@ -1250,6 +1602,12 @@ static struct inode_operations shmem_sym - truncate: shmem_truncate, - readlink: shmem_readlink, - follow_link: shmem_follow_link, -+#ifdef CONFIG_TMPFS_XATTR -+ setxattr: shmem_setxattr, -+ getxattr: shmem_getxattr, -+ listxattr: shmem_listxattr, -+ removexattr: shmem_removexattr, -+#endif - }; - - static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long * blocks, unsigned long *inodes) -@@ -1398,6 +1756,12 @@ static struct file_operations shmem_file - - static struct inode_operations shmem_inode_operations = { - truncate: shmem_truncate, -+#ifdef CONFIG_TMPFS_XATTR -+ setxattr: shmem_setxattr, -+ getxattr: shmem_getxattr, -+ listxattr: shmem_listxattr, -+ removexattr: shmem_removexattr, -+#endif - }; - - static struct inode_operations shmem_dir_inode_operations = { -@@ -1411,6 +1775,12 @@ static struct inode_operations shmem_dir - rmdir: shmem_rmdir, - mknod: shmem_mknod, - rename: shmem_rename, -+#ifdef CONFIG_TMPFS_XATTR -+ setxattr: shmem_setxattr, -+ getxattr: shmem_getxattr, -+ listxattr: shmem_listxattr, -+ removexattr: shmem_removexattr, -+#endif - #endif - }; - -@@ -1557,3 +1927,9 @@ int shmem_zero_setup(struct vm_area_stru - } - - EXPORT_SYMBOL(shmem_file_setup); -+EXPORT_SYMBOL(shmem_getpage); -+EXPORT_SYMBOL(shmem_xattr_find); -+EXPORT_SYMBOL(shmem_xattr_set); -+EXPORT_SYMBOL(shmem_xattr_get); -+EXPORT_SYMBOL(shmem_xattr_delete); -+EXPORT_SYMBOL(shmem_xattr_remove); diff --git a/lustre/kernel_patches/series/vanilla-2.4.20 b/lustre/kernel_patches/series/vanilla-2.4.20 index ae838ca..d11bec0 100644 --- a/lustre/kernel_patches/series/vanilla-2.4.20 +++ b/lustre/kernel_patches/series/vanilla-2.4.20 @@ -50,7 +50,5 @@ kernel_text_address-2.4.20-vanilla.patch ext3-xattr-ptr-arith-fix.patch gfp_memalloc-2.4.22.patch procfs-ndynamic-2.4.patch -linux-2.4.20-tmpfs-xattr.patch -linux-2.4.20-tmpfs-iopen.patch linux-2.4.20-filemap.patch ext3-truncate-buffer-head.patch diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 2a09a2e..37cca17 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -1005,6 +1005,11 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) rc = llog_origin_handle_next_block(req); ldlm_callback_reply(req, rc); RETURN(0); + case LLOG_ORIGIN_HANDLE_PREV_BLOCK: + OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); + rc = llog_origin_handle_prev_block(req); + ldlm_callback_reply(req, rc); + RETURN(0); case LLOG_ORIGIN_HANDLE_READ_HEADER: OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); rc = llog_origin_handle_read_header(req); diff --git a/lustre/lov/lov_log.c b/lustre/lov/lov_log.c index 7809366..5931326 100644 --- a/lustre/lov/lov_log.c +++ b/lustre/lov/lov_log.c @@ -56,14 +56,15 @@ * we need to keep cookies in stripe order, even if some are NULL, so that * the right cookies are passed back to the right OSTs at the client side. * Unset cookies should be all-zero (which will never occur naturally). */ -static int lov_llog_origin_add(struct llog_ctxt *ctxt, - struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, - struct llog_cookie *logcookies, int numcookies) +static int lov_llog_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, + void *buf, struct llog_cookie *logcookies, + int numcookies, void *data) { struct obd_device *obd = ctxt->loc_obd; struct lov_obd *lov = &obd->u.lov; struct lov_oinfo *loi; struct llog_unlink_rec *lur; + struct lov_stripe_md *lsm = (struct lov_stripe_md *)data; int i, rc = 0; ENTRY; @@ -82,7 +83,7 @@ static int lov_llog_origin_add(struct llog_ctxt *ctxt, lur->lur_oid = loi->loi_id; lur->lur_ogen = loi->loi_gr; rc += llog_add(cctxt, &lur->lur_hdr, NULL, logcookies + rc, - numcookies - rc); + numcookies - rc, NULL); } OBD_FREE(lur, sizeof(*lur)); @@ -119,9 +120,11 @@ static int lov_llog_origin_connect(struct llog_ctxt *ctxt, int count, } /* the replicators commit callback */ -static int lov_llog_repl_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *lsm, - int count, struct llog_cookie *cookies, int flags) +static int lov_llog_repl_cancel(struct llog_ctxt *ctxt, int count, + struct llog_cookie *cookies, int flags, + void *data) { + struct lov_stripe_md *lsm = (struct lov_stripe_md *)data; struct lov_obd *lov; struct obd_device *obd = ctxt->loc_obd; struct lov_oinfo *loi; @@ -138,7 +141,7 @@ static int lov_llog_repl_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *ls struct llog_ctxt *cctxt = llog_get_context(child, ctxt->loc_idx); int err; - err = llog_cancel(cctxt, NULL, 1, cookies, flags); + err = llog_cancel(cctxt, 1, cookies, flags, NULL); if (err && lov->tgts[loi->loi_ost_idx].active) { CERROR("error: objid "LPX64" subobj "LPX64 " on OST idx %d: rc = %d\n", lsm->lsm_object_id, @@ -159,21 +162,20 @@ static struct llog_operations lov_size_repl_logops = { lop_cancel: lov_llog_repl_cancel }; - int lov_llog_init(struct obd_device *obd, struct obd_device *tgt, int count, struct llog_catid *logid) { struct lov_obd *lov = &obd->u.lov; int i, rc = 0; ENTRY; - - rc = llog_setup(obd, LLOG_UNLINK_ORIG_CTXT, tgt, 0, NULL, - &lov_unlink_orig_logops); + + rc = obd_llog_setup(obd, LLOG_UNLINK_ORIG_CTXT, tgt, 0, NULL, + &lov_unlink_orig_logops); if (rc) RETURN(rc); - rc = llog_setup(obd, LLOG_SIZE_REPL_CTXT, tgt, 0, NULL, - &lov_size_repl_logops); + rc = obd_llog_setup(obd, LLOG_SIZE_REPL_CTXT, tgt, 0, NULL, + &lov_size_repl_logops); if (rc) RETURN(rc); @@ -195,11 +197,11 @@ int lov_llog_finish(struct obd_device *obd, int count) int i, rc = 0; ENTRY; - rc = llog_cleanup(llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT)); + rc = obd_llog_cleanup(llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT)); if (rc) RETURN(rc); - rc = llog_cleanup(llog_get_context(obd, LLOG_SIZE_REPL_CTXT)); + rc = obd_llog_cleanup(llog_get_context(obd, LLOG_SIZE_REPL_CTXT)); if (rc) RETURN(rc); diff --git a/lustre/lvfs/Makefile.in b/lustre/lvfs/Makefile.in index 791e48d..89c76dd 100644 --- a/lustre/lvfs/Makefile.in +++ b/lustre/lvfs/Makefile.in @@ -1,6 +1,7 @@ -MODULES := lvfs fsfilt_@BACKINGFS@ +MODULES := lvfs fsfilt_@BACKINGFS@ fsfilt_smfs -lvfs-objs := lvfs_common.o lvfs_linux.o fsfilt.o +lvfs-objs := fsfilt.o lvfs_common.o llog_lvfs.o lvfs_linux.o +lvfs-objs += llog.o llog_cat.o lvfs_reint.o lvfs_undo.o ifeq ($(PATCHLEVEL),6) fsfilt_@BACKINGFS@-objs := fsfilt-@BACKINGFS@.o @@ -9,6 +10,8 @@ $(obj)/fsfilt-%.c: $(obj)/fsfilt_%.c ln -s $< $@ endif +fsfilt_smfs-objs := fsfilt-smfs.o + # for on 2.6 EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LUSTRE@ -I@LUSTRE@/ldiskfs diff --git a/lustre/lvfs/Makefile.mk b/lustre/lvfs/Makefile.mk index 81c177a..bc1e6b2 100644 --- a/lustre/lvfs/Makefile.mk +++ b/lustre/lvfs/Makefile.mk @@ -1,4 +1,5 @@ include $(src)/../portals/Kernelenv -obj-y += lvfs.o fsfilt_ext3.o -lvfs-objs := fsfilt.o lvfs_common.o lvfs_linux.o +obj-y += lvfs.o fsfilt_ext3.o fsfilt_smfs.o +lvfs-objs := fsfilt.o lvfs_common.o llog_lvfs.o lvfs_linux.o +lvfs-objs += llog.o llog_cat.o lvfs_reint.o lvfs_undo.o diff --git a/lustre/lvfs/autoMakefile.am b/lustre/lvfs/autoMakefile.am index 1261554..16a2658 100644 --- a/lustre/lvfs/autoMakefile.am +++ b/lustre/lvfs/autoMakefile.am @@ -5,7 +5,7 @@ if LIBLUSTRE noinst_LIBRARIES = liblvfs.a -liblvfs_a_SOURCES = lvfs_userfs.c +liblvfs_a_SOURCES = lvfs_userfs.c llog_lvfs.c llog_cat.c llog.c liblvfs_a_CFLAGS = $(LLCFLAGS) liblvfs_a_CPPFLAGS = $(LLCPPFLAGS) @@ -16,14 +16,17 @@ endif if MODULES -modulefs_DATA = lvfs$(KMODEXT) fsfilt_$(BACKINGFS)$(KMODEXT) +modulefs_DATA = lvfs$(KMODEXT) fsfilt_$(BACKINGFS)$(KMODEXT) fsfilt_smfs$(KMODEXT) -sources: fsfilt_$(BACKINGFS).c +sources: fsfilt_$(BACKINGFS).c fsfilt_smfs.c touch sources fsfilt_extN.c: fsfilt_ext3.c sed -e "s/EXT3/EXTN/g" -e "s/ext3/extN/g" $< > $@ +fsfilt_smfs.c: + $< > $@ + ldiskfs_sed_flags = \ -e "s/dx_hash_info/ext3_dx_hash_info/g" \ -e "s/dir_private_info/ext3_dir_private_info/g" \ @@ -35,8 +38,9 @@ fsfilt_ldiskfs.c: fsfilt_ext3.c endif # MODULES -DIST_SOURCES = fsfilt.c fsfilt_ext3.c fsfilt_reiserfs.c lvfs_common.c \ - lvfs_internal.h lvfs_linux.c lvfs_userfs.c +DIST_SOURCES = fsfilt.c fsfilt_ext3.c fsfilt_smfs.c fsfilt_reiserfs.c \ + lvfs_common.c lvfs_internal.h lvfs_linux.c llog.c llog_cat.c \ + llog_lvfs.c lvfs_reint.c lvfs_undo.c lvfs_userfs.c MOSTLYCLEANFILES = *.o *.ko *.mod.c CLEANFILES = fsfilt-*.c fsfilt_ldiskfs.c fsfilt_extN.c sources diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index 5457625..1870988 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -45,6 +45,10 @@ #include #include #include +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#include +#include +#endif static kmem_cache_t *fcb_cache; static atomic_t fcb_cache_count = ATOMIC_INIT(0); @@ -82,6 +86,19 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private, goto journal_start; } + /* XXX BUG 3188 -- must return to one set of opcodes */ + /* FIXME - cache hook */ + if (op & 0x20) { + nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS; + op = op & ~0x20; + } + + /* FIXME - kml */ + if (op & 0x10) { + nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS; + op = op & ~0x10; + } + switch(op) { case FSFILT_OP_RMDIR: case FSFILT_OP_UNLINK: @@ -123,8 +140,11 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private, nblocks = (LLOG_CHUNK_SIZE >> inode->i_blkbits) + EXT3_DELETE_TRANS_BLOCKS * logs; break; + case FSFILT_OP_NOOP: + nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS; + break; default: CERROR("unknown transaction start op %d\n", op); - LBUG(); + LBUG(); } LASSERT(current->journal_info == desc_private); @@ -350,7 +370,7 @@ static int fsfilt_ext3_commit_wait(struct inode *inode, void *h) tid_t tid = (tid_t)(long)h; CDEBUG(D_INODE, "commit wait: %lu\n", (unsigned long) tid); - if (is_journal_aborted(EXT3_JOURNAL(inode))) + if (is_journal_aborted(EXT3_JOURNAL(inode))) return -EIO; log_wait_commit(EXT3_JOURNAL(inode), tid); @@ -467,6 +487,57 @@ static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size) return rc; } +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +static int fsfilt_ext3_send_bio(struct inode *inode, struct bio *bio) +{ + submit_bio(WRITE, bio); + return 0; +} +#else +static int fsfilt_ext3_send_bio(struct inode *inode, struct kiobuf *bio) +{ + int rc, blocks_per_page; + + rc = brw_kiovec(WRITE, 1, &bio, inode->i_dev, + bio->blocks, 1 << inode->i_blkbits); + + blocks_per_page = PAGE_SIZE >> inode->i_blkbits; + + if (rc != (1 << inode->i_blkbits) * bio->nr_pages * + blocks_per_page) { + CERROR("short write? expected %d, wrote %d\n", + (1 << inode->i_blkbits) * bio->nr_pages * + blocks_per_page, rc); + } + + return rc; +} +#endif + +/* FIXME-UMKA: This should be used in 2.6.x io code later. */ +static struct page *fsfilt_ext3_getpage(struct inode *inode, long int index) +{ + int rc; + struct page *page; + + page = grab_cache_page(inode->i_mapping, index); + if (page == NULL) + return ERR_PTR(-ENOMEM); + + if (PageUptodate(page)) { + unlock_page(page); + return page; + } + + rc = inode->i_mapping->a_ops->readpage(NULL, page); + if (rc < 0) { + page_cache_release(page); + return ERR_PTR(rc); + } + + return page; +} + static ssize_t fsfilt_ext3_readpage(struct file *file, char *buf, size_t count, loff_t *off) { @@ -533,7 +604,9 @@ static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error) atomic_dec(&fcb_cache_count); } -static int fsfilt_ext3_add_journal_cb(struct obd_device *obd, __u64 last_rcvd, +static int fsfilt_ext3_add_journal_cb(struct obd_device *obd, + struct super_block *sb, + __u64 last_rcvd, void *handle, fsfilt_cb_t cb_func, void *cb_data) { @@ -752,6 +825,44 @@ static int fsfilt_ext3_setup(struct super_block *sb) return 0; } +static int fsfilt_ext3_set_xattr(struct inode * inode, void *handle, char *name, + void *buffer, int buffer_size) +{ + int rc = 0; + + lock_kernel(); + + rc = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_TRUSTED, + name, buffer, buffer_size, 0); + unlock_kernel(); + if (rc) + CERROR("set xattr %s from inode %lu: rc %d\n", + name, inode->i_ino, rc); + return rc; +} + +static int fsfilt_ext3_get_xattr(struct inode *inode, char *name, + void *buffer, int buffer_size) +{ + int rc = 0; + lock_kernel(); + + rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, + name, buffer, buffer_size); + unlock_kernel(); + + if (buffer == NULL) + return (rc == -ENODATA) ? 0 : rc; + if (rc < 0) { + CDEBUG(D_INFO, "error getting EA %s from inode %lu: rc %d\n", + name, inode->i_ino, rc); + memset(buffer, 0, buffer_size); + return (rc == -ENODATA) ? 0 : rc; + } + + return rc; +} + /* If fso is NULL, op is FSFILT operation, otherwise op is number of fso objects. Logs is number of logfiles to update */ static int fsfilt_ext3_get_op_len(int op, struct fsfilt_objinfo *fso, int logs) @@ -804,6 +915,10 @@ static struct fsfilt_operations fsfilt_ext3_ops = { .fs_write_record = fsfilt_ext3_write_record, .fs_read_record = fsfilt_ext3_read_record, .fs_setup = fsfilt_ext3_setup, + .fs_getpage = fsfilt_ext3_getpage, + .fs_send_bio = fsfilt_ext3_send_bio, + .fs_set_xattr = fsfilt_ext3_set_xattr, + .fs_get_xattr = fsfilt_ext3_get_xattr, .fs_get_op_len = fsfilt_ext3_get_op_len, }; @@ -811,7 +926,6 @@ static int __init fsfilt_ext3_init(void) { int rc; - //rc = ext3_xattr_register(); fcb_cache = kmem_cache_create("fsfilt_ext3_fcb", sizeof(struct fsfilt_cb_data), 0, 0, NULL, NULL); @@ -839,8 +953,6 @@ static void __exit fsfilt_ext3_exit(void) CERROR("can't free fsfilt callback cache: count %d, rc = %d\n", atomic_read(&fcb_cache_count), rc); } - - //rc = ext3_xattr_unregister(); } module_init(fsfilt_ext3_init); diff --git a/lustre/lvfs/fsfilt_smfs.c b/lustre/lvfs/fsfilt_smfs.c new file mode 100644 index 0000000..7dfee3c --- /dev/null +++ b/lustre/lvfs/fsfilt_smfs.c @@ -0,0 +1,697 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/lib/fsfilt_smfs.c + * Lustre filesystem abstraction routines + * + * Copyright (C) 2002, 2003 Cluster File Systems, Inc. + * Author: Wang Di + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_FILTER + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void *fsfilt_smfs_start(struct inode *inode, int op, + void *desc_private, int logs) +{ + void *handle; + struct inode *cache_inode = I2CI(inode); + struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); + + if (cache_fsfilt == NULL) + return NULL; + + if (!cache_fsfilt->fs_start) + return ERR_PTR(-ENOSYS); + + handle = cache_fsfilt->fs_start(cache_inode, op, + desc_private, logs); + return handle; +} + +static void *fsfilt_smfs_brw_start(int objcount, struct fsfilt_objinfo *fso, + int niocount, struct niobuf_local *nb, + void *desc_private, int logs) +{ + struct fsfilt_operations *cache_fsfilt; + struct dentry *cache_dentry = NULL; + struct inode *cache_inode = NULL; + struct fsfilt_objinfo cache_fso; + void *rc = NULL; + + ENTRY; + cache_fsfilt = I2FOPS(fso->fso_dentry->d_inode); + if (cache_fsfilt == NULL) + return NULL; + + cache_inode = I2CI(fso->fso_dentry->d_inode); + cache_dentry = pre_smfs_dentry(NULL, cache_inode, fso->fso_dentry); + + if (!cache_dentry) + GOTO(exit, rc = ERR_PTR(-ENOMEM)); + + cache_fso.fso_dentry = cache_dentry; + cache_fso.fso_bufcnt = fso->fso_bufcnt; + + if (!cache_fsfilt->fs_brw_start) + return ERR_PTR(-ENOSYS); + + rc = (cache_fsfilt->fs_brw_start(objcount, &cache_fso, + niocount, nb, desc_private, + logs)); +exit: + post_smfs_dentry(cache_dentry); + return rc; +} + +static int fsfilt_smfs_commit(struct inode *inode, void *h, + int force_sync) +{ + struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); + struct inode *cache_inode = NULL; + int rc = -EIO; + + cache_inode = I2CI(inode); + + if (cache_fsfilt == NULL) + RETURN(rc); + + if (!cache_fsfilt->fs_commit) + RETURN(-ENOSYS); + + rc = cache_fsfilt->fs_commit(cache_inode, h, force_sync); + + RETURN(rc); +} + +static int fsfilt_smfs_commit_async(struct inode *inode, void *h, + void **wait_handle) +{ + struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); + struct inode *cache_inode = NULL; + int rc = -EIO; + + cache_inode = I2CI(inode); + if (cache_fsfilt == NULL) + RETURN(-EINVAL); + + if (!cache_fsfilt->fs_commit_async) + RETURN(-ENOSYS); + + rc = cache_fsfilt->fs_commit_async(cache_inode, h, wait_handle); + + RETURN(rc); +} + +static int fsfilt_smfs_commit_wait(struct inode *inode, void *h) +{ + struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); + struct inode *cache_inode = NULL; + int rc = -EIO; + + cache_inode = I2CI(inode); + if (cache_fsfilt == NULL) + RETURN(-EINVAL); + + if (!cache_fsfilt->fs_commit_wait) + RETURN(-ENOSYS); + + rc = cache_fsfilt->fs_commit_wait(cache_inode, h); + + RETURN(rc); +} + +static int fsfilt_smfs_setattr(struct dentry *dentry, void *handle, + struct iattr *iattr, int do_trunc) +{ + struct fsfilt_operations *cache_fsfilt = I2FOPS(dentry->d_inode); + struct dentry *cache_dentry = NULL; + struct inode *cache_inode = NULL; + int rc = -EIO; + + if (!cache_fsfilt) + RETURN(rc); + + cache_inode = I2CI(dentry->d_inode); + + cache_dentry = pre_smfs_dentry(NULL, cache_inode, dentry); + if (!cache_dentry) + GOTO(exit, rc = -ENOMEM); + + pre_smfs_inode(dentry->d_inode, cache_inode); + + if (!cache_fsfilt->fs_setattr) + RETURN(-ENOSYS); + + rc = cache_fsfilt->fs_setattr(cache_dentry, handle, + iattr, do_trunc); + + post_smfs_inode(dentry->d_inode, cache_inode); + +exit: + post_smfs_dentry(cache_dentry); + RETURN(rc); +} + +static int fsfilt_smfs_iocontrol(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); + struct inode *cache_inode = NULL; + struct smfs_file_info *sfi = NULL; + int rc = -EIO; + ENTRY; + + if (!cache_fsfilt) + RETURN(rc); + + cache_inode = I2CI(inode); + + if (!cache_inode) + RETURN(rc); + + if (file != NULL) { + sfi = F2SMFI(file); + + if (sfi->magic != SMFS_FILE_MAGIC) + BUG(); + } else { + sfi = NULL; + } + + if (!cache_fsfilt->fs_iocontrol) + RETURN(-ENOSYS); + + if (sfi) { + rc = cache_fsfilt->fs_iocontrol(cache_inode, + sfi->c_file, + cmd, arg); + } else { + rc = cache_fsfilt->fs_iocontrol(cache_inode, + NULL, cmd, arg); + } + + /* FIXME-UMKA: Should this be in duplicate_inode()? */ + if (rc == 0 && cmd == EXT3_IOC_SETFLAGS) + inode->i_flags = cache_inode->i_flags; + + post_smfs_inode(inode, cache_inode); + + RETURN(rc); +} + +static int fsfilt_smfs_set_md(struct inode *inode, void *handle, + void *lmm, int lmm_size) +{ + struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); + struct inode *cache_inode = NULL; + int rc = -EIO; + + if (!cache_fsfilt) + RETURN(-EINVAL); + + cache_inode = I2CI(inode); + + if (!cache_inode) + RETURN(-ENOENT); + + pre_smfs_inode(inode, cache_inode); + + if (!cache_fsfilt->fs_set_md) + RETURN(-ENOSYS); + + down(&cache_inode->i_sem); + + rc = cache_fsfilt->fs_set_md(cache_inode, handle, + lmm, lmm_size); + + up(&cache_inode->i_sem); + + post_smfs_inode(inode, cache_inode); + + RETURN(rc); +} + +/* Must be called with i_sem held */ +static int fsfilt_smfs_get_md(struct inode *inode, void *lmm, int lmm_size) +{ + struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); + struct inode *cache_inode = NULL; + int rc = -EIO; + + if (!cache_fsfilt) + RETURN(-EINVAL); + + cache_inode = I2CI(inode); + + if (!cache_inode) + RETURN(-ENOENT); + + pre_smfs_inode(inode, cache_inode); + + if (!cache_fsfilt->fs_get_md) + RETURN(-ENOSYS); + + down(&cache_inode->i_sem); + + rc = cache_fsfilt->fs_get_md(cache_inode, lmm, + lmm_size); + + up(&cache_inode->i_sem); + + post_smfs_inode(inode, cache_inode); + + RETURN(rc); +} + +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +static int fsfilt_smfs_send_bio(struct inode *inode, + struct bio *bio) +#else +static int fsfilt_smfs_send_bio(struct inode *inode, + struct kiobuf *bio) +#endif +{ + struct inode *cache_inode; + struct fsfilt_operations *cache_fsfilt; + + cache_fsfilt = I2FOPS(inode); + if (!cache_fsfilt) + RETURN(-EINVAL); + + cache_inode = I2CI(inode); + if (!cache_inode) + RETURN(-EINVAL); + + if (!cache_fsfilt->fs_send_bio) + RETURN(-ENOSYS); + + return cache_fsfilt->fs_send_bio(cache_inode, bio); +} + +static struct page * +fsfilt_smfs_getpage(struct inode *inode, long int index) +{ + struct fsfilt_operations *cache_fsfilt; + struct inode *cache_inode; + + cache_fsfilt = I2FOPS(inode); + if (!cache_fsfilt) + RETURN(ERR_PTR(-EINVAL)); + + cache_inode = I2CI(inode); + if (!cache_inode) + RETURN(ERR_PTR(-EINVAL)); + + if (!cache_fsfilt->fs_getpage) + RETURN(ERR_PTR(-ENOSYS)); + + return cache_fsfilt->fs_getpage(cache_inode, index); +} + +static ssize_t fsfilt_smfs_readpage(struct file *file, char *buf, + size_t count, loff_t *off) +{ + struct fsfilt_operations *cache_fsfilt; + struct smfs_file_info *sfi; + struct inode *cache_inode; + loff_t tmp_ppos; + loff_t *cache_ppos; + ssize_t rc = -EIO; + + ENTRY; + + cache_fsfilt = I2FOPS(file->f_dentry->d_inode); + if (!cache_fsfilt) + RETURN(rc); + + cache_inode = I2CI(file->f_dentry->d_inode); + if (!cache_inode) + RETURN(rc); + + sfi = F2SMFI(file); + if (sfi->magic != SMFS_FILE_MAGIC) + BUG(); + + if (off != &(file->f_pos)) { + cache_ppos = &tmp_ppos; + } else { + cache_ppos = &sfi->c_file->f_pos; + } + *cache_ppos = *off; + + pre_smfs_inode(file->f_dentry->d_inode, cache_inode); + + if (cache_fsfilt->fs_readpage) + rc = cache_fsfilt->fs_readpage(sfi->c_file, buf, + count, cache_ppos); + + *off = *cache_ppos; + post_smfs_inode(file->f_dentry->d_inode, cache_inode); + duplicate_file(file, sfi->c_file); + + RETURN(rc); +} + +static int fsfilt_smfs_add_journal_cb(struct obd_device *obd, + struct super_block *sb, + __u64 last_rcvd, void *handle, + fsfilt_cb_t cb_func, + void *cb_data) +{ + struct fsfilt_operations *cache_fsfilt = S2SMI(sb)->sm_cache_fsfilt; + struct super_block *csb = S2CSB(sb); + int rc = -EIO; + + if (!cache_fsfilt) + RETURN(rc); + if (cache_fsfilt->fs_add_journal_cb) + rc = cache_fsfilt->fs_add_journal_cb(obd, csb, last_rcvd, + handle, cb_func, cb_data); + RETURN(rc); +} + +static int fsfilt_smfs_statfs(struct super_block *sb, struct obd_statfs *osfs) +{ + struct fsfilt_operations *cache_fsfilt = S2SMI(sb)->sm_cache_fsfilt; + struct super_block *csb = S2CSB(sb); + int rc = -EIO; + + if (!cache_fsfilt) + RETURN(rc); + + if (!cache_fsfilt->fs_statfs) + RETURN(-ENOSYS); + + rc = cache_fsfilt->fs_statfs(csb, osfs); + duplicate_sb(csb, sb); + + RETURN(rc); +} + +static int fsfilt_smfs_sync(struct super_block *sb) +{ + struct fsfilt_operations *cache_fsfilt = S2SMI(sb)->sm_cache_fsfilt; + struct super_block *csb = S2CSB(sb); + int rc = -EIO; + + if(!cache_fsfilt) + RETURN(-EINVAL); + + if (!cache_fsfilt->fs_sync) + RETURN(-ENOSYS); + + rc = cache_fsfilt->fs_sync(csb); + + RETURN(rc); +} + +int fsfilt_smfs_map_inode_page(struct inode *inode, struct page *page, + unsigned long *blocks, int *created, int create) +{ + struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); + struct inode *cache_inode = NULL; + int rc = -EIO; + + if (!cache_fsfilt) + RETURN(-EINVAL); + + cache_inode = I2CI(inode); + + if (!cache_inode) + RETURN(rc); + + if (!cache_fsfilt->fs_map_inode_page) + RETURN(-ENOSYS); + + down(&cache_inode->i_sem); + rc = cache_fsfilt->fs_map_inode_page(cache_inode, page, + blocks, created, create); + up(&cache_inode->i_sem); + + RETURN(rc); +} + +static int fsfilt_smfs_prep_san_write(struct inode *inode, long *blocks, + int nblocks, loff_t newsize) +{ + struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); + struct inode *cache_inode = NULL; + int rc = -EIO; + + if (!cache_fsfilt) + RETURN(-EINVAL); + + cache_inode = I2CI(inode); + + if (!cache_inode) + RETURN(-EINVAL); + + if (!cache_fsfilt->fs_prep_san_write) + RETURN(-ENOSYS); + + down(&cache_inode->i_sem); + rc = cache_fsfilt->fs_prep_san_write(cache_inode, blocks, + nblocks, newsize); + up(&cache_inode->i_sem); + + RETURN(rc); +} + +static int fsfilt_smfs_read_record(struct file * file, void *buf, + int size, loff_t *offs) +{ + struct fsfilt_operations *cache_fsfilt; + struct inode *cache_inode; + struct smfs_file_info *sfi; + loff_t tmp_ppos; + loff_t *cache_ppos; + ssize_t rc; + + ENTRY; + cache_fsfilt = I2FOPS(file->f_dentry->d_inode); + if (!cache_fsfilt) + RETURN(-EINVAL); + + cache_inode = I2CI(file->f_dentry->d_inode); + + if (!cache_inode) + RETURN(-EINVAL); + + sfi = F2SMFI(file); + if (sfi->magic != SMFS_FILE_MAGIC) BUG(); + + if (offs != &(file->f_pos)) { + cache_ppos = &tmp_ppos; + } else { + cache_ppos = &sfi->c_file->f_pos; + } + *cache_ppos = *offs; + + pre_smfs_inode(file->f_dentry->d_inode, cache_inode); + + if (!cache_fsfilt->fs_read_record) + RETURN(-ENOSYS); + + rc = cache_fsfilt->fs_read_record(sfi->c_file, buf, + size, cache_ppos); + + *offs = *cache_ppos; + post_smfs_inode(file->f_dentry->d_inode, cache_inode); + duplicate_file(file, sfi->c_file); + + RETURN(rc); +} + +static int fsfilt_smfs_write_record(struct file *file, void *buf, int bufsize, + loff_t *offs, int force_sync) +{ + struct fsfilt_operations *cache_fsfilt; + struct inode *cache_inode; + struct smfs_file_info *sfi; + loff_t tmp_ppos; + loff_t *cache_ppos; + ssize_t rc = -EIO; + + ENTRY; + + cache_fsfilt = I2FOPS(file->f_dentry->d_inode); + if (!cache_fsfilt) + RETURN(-EINVAL); + + cache_inode = I2CI(file->f_dentry->d_inode); + + if (!cache_inode) + RETURN(-EINVAL); + + sfi = F2SMFI(file); + if (sfi->magic != SMFS_FILE_MAGIC) BUG(); + + if (offs != &(file->f_pos)) { + cache_ppos = &tmp_ppos; + } else { + cache_ppos = &sfi->c_file->f_pos; + } + *cache_ppos = *offs; + + pre_smfs_inode(file->f_dentry->d_inode, cache_inode); + + if (!cache_fsfilt->fs_write_record) + RETURN(-ENOSYS); + + rc = cache_fsfilt->fs_write_record(sfi->c_file, buf, + bufsize, cache_ppos, force_sync); + *offs = *cache_ppos; + post_smfs_inode(file->f_dentry->d_inode, cache_inode); + duplicate_file(file, sfi->c_file); + + RETURN(rc); +} + +static int fsfilt_smfs_setup(struct super_block *sb) +{ + struct smfs_super_info *smfs_info = S2SMI(sb); + struct fsfilt_operations *cache_fsfilt; + struct super_block *csb; + int rc = 0; + + /* It should be initialized olready by smfs_read_super(). */ + if (!(cache_fsfilt = smfs_info->sm_cache_fsfilt)) + cache_fsfilt = fsfilt_get_ops(smfs_info->cache_fs_type); + + if (!cache_fsfilt) + RETURN(-ENOENT); + + csb = S2CSB(sb); + + if (cache_fsfilt->fs_setup) + rc = cache_fsfilt->fs_setup(csb); + + RETURN(rc); +} + +static int fsfilt_smfs_set_xattr(struct inode *inode, void *handle, + char *name, void *buffer, + int buffer_size) +{ + struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); + struct inode *cache_inode = NULL; + int rc = -EIO; + + if (!cache_fsfilt) + RETURN(rc); + + cache_inode = I2CI(inode); + if (!cache_inode) + RETURN(rc); + + pre_smfs_inode(inode, cache_inode); + + if (cache_fsfilt->fs_set_xattr) + rc = cache_fsfilt->fs_set_xattr(cache_inode, handle, name, + buffer, buffer_size); + post_smfs_inode(inode, cache_inode); + + RETURN(rc); +} + +static int fsfilt_smfs_get_xattr(struct inode *inode, char *name, + void *buffer, int buffer_size) +{ + struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); + struct inode *cache_inode = NULL; + int rc = -EIO; + + if (!cache_fsfilt) + RETURN(rc); + + cache_inode = I2CI(inode); + if (!cache_inode) + RETURN(rc); + + pre_smfs_inode(inode, cache_inode); + + if (cache_fsfilt->fs_get_xattr) + rc = cache_fsfilt->fs_get_xattr(cache_inode, name, + buffer, buffer_size); + post_smfs_inode(inode, cache_inode); + + RETURN(rc); +} + +static struct fsfilt_operations fsfilt_smfs_ops = { + .fs_type = "smfs", + .fs_owner = THIS_MODULE, + .fs_start = fsfilt_smfs_start, + .fs_brw_start = fsfilt_smfs_brw_start, + .fs_commit = fsfilt_smfs_commit, + .fs_commit_async = fsfilt_smfs_commit_async, + .fs_commit_wait = fsfilt_smfs_commit_wait, + .fs_setattr = fsfilt_smfs_setattr, + .fs_iocontrol = fsfilt_smfs_iocontrol, + .fs_set_md = fsfilt_smfs_set_md, + .fs_get_md = fsfilt_smfs_get_md, + .fs_readpage = fsfilt_smfs_readpage, + .fs_getpage = fsfilt_smfs_getpage, + .fs_add_journal_cb = fsfilt_smfs_add_journal_cb, + .fs_statfs = fsfilt_smfs_statfs, + .fs_sync = fsfilt_smfs_sync, + .fs_map_inode_page = fsfilt_smfs_map_inode_page, + .fs_prep_san_write = fsfilt_smfs_prep_san_write, + .fs_write_record = fsfilt_smfs_write_record, + .fs_read_record = fsfilt_smfs_read_record, + .fs_setup = fsfilt_smfs_setup, + .fs_send_bio = fsfilt_smfs_send_bio, + .fs_set_xattr = fsfilt_smfs_set_xattr, + .fs_get_xattr = fsfilt_smfs_get_xattr, + + /* FIXME-UMKA: probably fsfilt_smfs_get_op_len() should be put here + * too. */ +}; + +static int __init fsfilt_smfs_init(void) +{ + int rc; + + rc = fsfilt_register_ops(&fsfilt_smfs_ops); + return rc; +} + +static void __exit fsfilt_smfs_exit(void) +{ + fsfilt_unregister_ops(&fsfilt_smfs_ops); +} + +module_init(fsfilt_smfs_init); +module_exit(fsfilt_smfs_exit); + +MODULE_AUTHOR("Cluster File Systems, Inc. "); +MODULE_DESCRIPTION("Lustre SMFS Filesystem Helper v0.1"); +MODULE_LICENSE("GPL"); diff --git a/lustre/obdclass/llog.c b/lustre/lvfs/llog.c similarity index 65% rename from lustre/obdclass/llog.c rename to lustre/lvfs/llog.c index 0ad595f..12da23b 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/lvfs/llog.c @@ -38,9 +38,7 @@ #include #endif -#include #include -#include /* Allocate a new log or catalog handle */ struct llog_handle *llog_alloc_handle(void) @@ -58,7 +56,6 @@ struct llog_handle *llog_alloc_handle(void) } EXPORT_SYMBOL(llog_alloc_handle); - void llog_free_handle(struct llog_handle *loghandle) { if (!loghandle) @@ -66,9 +63,9 @@ void llog_free_handle(struct llog_handle *loghandle) if (!loghandle->lgh_hdr) goto out; - if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN) + if (le32_to_cpu(loghandle->lgh_hdr->llh_flags) & LLOG_F_IS_PLAIN) list_del_init(&loghandle->u.phd.phd_entry); - if (loghandle->lgh_hdr->llh_flags & LLOG_F_IS_CAT) + if (le32_to_cpu(loghandle->lgh_hdr->llh_flags) & LLOG_F_IS_CAT) LASSERT(list_empty(&loghandle->u.chd.chd_head)); OBD_FREE(loghandle->lgh_hdr, LLOG_CHUNK_SIZE); @@ -97,10 +94,10 @@ int llog_cancel_rec(struct llog_handle *loghandle, int index) RETURN(-EINVAL); } - llh->llh_count--; + llh->llh_count = cpu_to_le32(le32_to_cpu(llh->llh_count) - 1); - if ((llh->llh_flags & LLOG_F_ZAP_WHEN_EMPTY) && - (llh->llh_count == 1) && + if ((le32_to_cpu(llh->llh_flags) & LLOG_F_ZAP_WHEN_EMPTY) && + (le32_to_cpu(llh->llh_count) == 1) && (loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) { rc = llog_destroy(loghandle); if (rc) @@ -131,10 +128,10 @@ int llog_init_handle(struct llog_handle *handle, int flags, RETURN(-ENOMEM); handle->lgh_hdr = llh; /* first assign flags to use llog_client_ops */ - llh->llh_flags = flags; + llh->llh_flags = cpu_to_le32(flags); rc = llog_read_header(handle); if (rc == 0) { - flags = llh->llh_flags; + flags = le32_to_cpu(llh->llh_flags); if (uuid) LASSERT(obd_uuid_equals(uuid, &llh->llh_tgtuuid)); GOTO(out, rc); @@ -146,20 +143,21 @@ int llog_init_handle(struct llog_handle *handle, int flags, rc = 0; handle->lgh_last_idx = 0; /* header is record with index 0 */ - llh->llh_count = 1; /* for the header record */ - llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC; - llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE; + llh->llh_count = cpu_to_le32(1); /* for the header record */ + llh->llh_hdr.lrh_type = cpu_to_le32(LLOG_HDR_MAGIC); + llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = + cpu_to_le32(LLOG_CHUNK_SIZE); llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0; - llh->llh_timestamp = LTIME_S(CURRENT_TIME); + llh->llh_timestamp = cpu_to_le64(LTIME_S(CURRENT_TIME)); if (uuid) memcpy(&llh->llh_tgtuuid, uuid, sizeof(llh->llh_tgtuuid)); - llh->llh_bitmap_offset = offsetof(typeof(*llh),llh_bitmap); + llh->llh_bitmap_offset = cpu_to_le32(offsetof(typeof(*llh),llh_bitmap)); ext2_set_bit(0, llh->llh_bitmap); out: if (flags & LLOG_F_IS_CAT) { INIT_LIST_HEAD(&handle->u.chd.chd_head); - llh->llh_size = sizeof(struct llog_logid_rec); + llh->llh_size = cpu_to_le32(sizeof(struct llog_logid_rec)); } else if (flags & LLOG_F_IS_PLAIN) INIT_LIST_HEAD(&handle->u.phd.phd_entry); @@ -234,12 +232,11 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, GOTO(out, rc); rec = buf; - idx = rec->lrh_index; + idx = le32_to_cpu(rec->lrh_index); if (idx < index) CDEBUG(D_HA, "index %u : idx %u\n", index, idx); while (idx < index) { - rec = (struct llog_rec_hdr *) - ((char *)rec + rec->lrh_len); + rec = ((void *)rec + le32_to_cpu(rec->lrh_len)); idx ++; } @@ -266,8 +263,7 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, ++index; if (index > last_index) GOTO(out, rc = 0); - rec = (struct llog_rec_hdr *) - ((char *)rec + rec->lrh_len); + rec = ((void *)rec + le32_to_cpu(rec->lrh_len)); } } @@ -277,3 +273,86 @@ int llog_process(struct llog_handle *loghandle, llog_cb_t cb, RETURN(rc); } EXPORT_SYMBOL(llog_process); + +int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb, + void *data, void *catdata) +{ + struct llog_log_hdr *llh = loghandle->lgh_hdr; + struct llog_process_cat_data *cd = catdata; + void *buf; + int rc = 0, first_index = 1, index, idx; + struct llog_rec_tail *tail; + ENTRY; + + OBD_ALLOC(buf, LLOG_CHUNK_SIZE); + if (!buf) + RETURN(-ENOMEM); + + if (cd != NULL) + first_index = cd->first_idx + 1; + if (cd != NULL && cd->last_idx) + index = cd->last_idx; + else + index = LLOG_BITMAP_BYTES * 8 - 1; + + while (rc == 0) { + struct llog_rec_hdr *rec; + + /* skip records not set in bitmap */ + while (index >= first_index && + !ext2_test_bit(index, llh->llh_bitmap)) + --index; + + LASSERT(index >= first_index - 1); + if (index == first_index - 1) + break; + + /* get the buf with our target record; avoid old garbage */ + memset(buf, 0, LLOG_CHUNK_SIZE); + rc = llog_prev_block(loghandle, index, buf, LLOG_CHUNK_SIZE); + if (rc) + GOTO(out, rc); + + rec = buf; + idx = le32_to_cpu(rec->lrh_index); + if (idx < index) + CDEBUG(D_HA, "index %u : idx %u\n", index, idx); + while (idx < index) { + rec = ((void *)rec + le32_to_cpu(rec->lrh_len)); + idx ++; + } + + /* process records in buffer, starting where we found one */ + while ((void *)rec >= buf) { + if (rec->lrh_index == 0) + GOTO(out, 0); /* no more records */ + + /* if set, process the callback on this record */ + if (ext2_test_bit(index, llh->llh_bitmap)) { + rc = cb(loghandle, rec, data); + if (rc == LLOG_PROC_BREAK) { + CWARN("recovery from log: "LPX64":%x" + " stopped\n", + loghandle->lgh_id.lgl_oid, + loghandle->lgh_id.lgl_ogen); + GOTO(out, rc); + } + if (rc) + GOTO(out, rc); + } + + /* previous record, still in buffer? */ + --index; + if (index < first_index) + GOTO(out, rc = 0); + tail = (void *)rec - sizeof(struct llog_rec_tail); + rec = ((void *)rec - le32_to_cpu(tail->lrt_len)); + } + } + + out: + if (buf) + OBD_FREE(buf, LLOG_CHUNK_SIZE); + RETURN(rc); +} +EXPORT_SYMBOL(llog_reverse_process); diff --git a/lustre/obdclass/llog_cat.c b/lustre/lvfs/llog_cat.c similarity index 57% rename from lustre/obdclass/llog_cat.c rename to lustre/lvfs/llog_cat.c index d4fa370..67a6f80 100644 --- a/lustre/obdclass/llog_cat.c +++ b/lustre/lvfs/llog_cat.c @@ -38,7 +38,6 @@ #include #endif -#include #include #include @@ -51,7 +50,7 @@ static struct llog_handle *llog_cat_new_log(struct llog_handle *cathandle) { struct llog_handle *loghandle; struct llog_log_hdr *llh; - struct llog_logid_rec rec = { { 0 }, }; + struct llog_logid_rec rec; int rc, index, bitmap_size; ENTRY; @@ -60,8 +59,8 @@ static struct llog_handle *llog_cat_new_log(struct llog_handle *cathandle) index = (cathandle->lgh_last_idx + 1) % bitmap_size; - /* maximum number of available slots in catlog is bitmap_size - 2 */ - if (llh->llh_cat_idx == index) { + /* maximum number of available slots in catalog is bitmap_size - 2 */ + if (llh->llh_cat_idx == cpu_to_le32(index)) { CERROR("no free catalog slots for log...\n"); RETURN(ERR_PTR(-ENOSPC)); } else { @@ -73,8 +72,8 @@ static struct llog_handle *llog_cat_new_log(struct llog_handle *cathandle) LBUG(); /* should never happen */ } cathandle->lgh_last_idx = index; - llh->llh_count++; - llh->llh_tail.lrt_index = index; + llh->llh_count = cpu_to_le32(le32_to_cpu(llh->llh_count) + 1); + llh->llh_tail.lrt_index = cpu_to_le32(index); } rc = llog_create(cathandle->lgh_ctxt, &loghandle, NULL, NULL); @@ -91,12 +90,12 @@ static struct llog_handle *llog_cat_new_log(struct llog_handle *cathandle) LPX64"\n", loghandle->lgh_id.lgl_oid, loghandle->lgh_id.lgl_ogen, index, cathandle->lgh_id.lgl_oid); /* build the record for this log in the catalog */ - rec.lid_hdr.lrh_len = sizeof(rec); - rec.lid_hdr.lrh_index = index; - rec.lid_hdr.lrh_type = LLOG_LOGID_MAGIC; + rec.lid_hdr.lrh_len = cpu_to_le32(sizeof(rec)); + rec.lid_hdr.lrh_index = cpu_to_le32(index); + rec.lid_hdr.lrh_type = cpu_to_le32(LLOG_LOGID_MAGIC); rec.lid_id = loghandle->lgh_id; - rec.lid_tail.lrt_len = sizeof(rec); - rec.lid_tail.lrt_index = index; + rec.lid_tail.lrt_len = cpu_to_le32(sizeof(rec)); + rec.lid_tail.lrt_index = cpu_to_le32(index); /* update the catalog: header and record */ rc = llog_write_rec(cathandle, &rec.lid_hdr, @@ -105,7 +104,7 @@ static struct llog_handle *llog_cat_new_log(struct llog_handle *cathandle) GOTO(out_destroy, rc); } - loghandle->lgh_hdr->llh_cat_idx = index; + loghandle->lgh_hdr->llh_cat_idx = cpu_to_le32(index); cathandle->u.chd.chd_current_log = loghandle; LASSERT(list_empty(&loghandle->u.phd.phd_entry)); list_add_tail(&loghandle->u.phd.phd_entry, &cathandle->u.chd.chd_head); @@ -116,7 +115,6 @@ static struct llog_handle *llog_cat_new_log(struct llog_handle *cathandle) RETURN(loghandle); } -EXPORT_SYMBOL(llog_cat_new_log); /* Open an existent log handle and add it to the open list. * This log handle will be closed when all of the records in it are removed. @@ -125,7 +123,7 @@ EXPORT_SYMBOL(llog_cat_new_log); * We return a lock on the handle to ensure nobody yanks it from us. */ int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res, - struct llog_logid *logid) + struct llog_logid *logid) { struct llog_handle *loghandle; int rc = 0; @@ -163,14 +161,15 @@ int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res, if (!rc) { loghandle->u.phd.phd_cat_handle = cathandle; loghandle->u.phd.phd_cookie.lgc_lgl = cathandle->lgh_id; - loghandle->u.phd.phd_cookie.lgc_index = - loghandle->lgh_hdr->llh_cat_idx; + loghandle->u.phd.phd_cookie.lgc_index = + le32_to_cpu(loghandle->lgh_hdr->llh_cat_idx); } out: *res = loghandle; RETURN(rc); } +EXPORT_SYMBOL(llog_cat_id2handle); int llog_cat_put(struct llog_handle *cathandle) { @@ -257,21 +256,13 @@ int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec, int rc; ENTRY; - LASSERT(rec->lrh_len <= LLOG_CHUNK_SIZE); + LASSERT(le32_to_cpu(rec->lrh_len) <= LLOG_CHUNK_SIZE); loghandle = llog_cat_current_log(cathandle, 1); if (IS_ERR(loghandle)) RETURN(PTR_ERR(loghandle)); /* loghandle is already locked by llog_cat_current_log() for us */ rc = llog_write_rec(loghandle, rec, reccookie, 1, buf, -1); up_write(&loghandle->lgh_lock); - if (rc == -ENOSPC) { - /* to create a new plain log */ - loghandle = llog_cat_current_log(cathandle, 1); - if (IS_ERR(loghandle)) - RETURN(PTR_ERR(loghandle)); - rc = llog_write_rec(loghandle, rec, reccookie, 1, buf, -1); - up_write(&loghandle->lgh_lock); - } RETURN(rc); } @@ -287,7 +278,7 @@ EXPORT_SYMBOL(llog_cat_add_rec); * Assumes caller has already pushed us into the kernel context. */ int llog_cat_cancel_records(struct llog_handle *cathandle, int count, - struct llog_cookie *cookies) + struct llog_cookie *cookies) { int i, index, rc = 0; ENTRY; @@ -328,21 +319,21 @@ int llog_cat_cancel_records(struct llog_handle *cathandle, int count, } EXPORT_SYMBOL(llog_cat_cancel_records); -int llog_cat_process_cb(struct llog_handle *cat_llh, struct llog_rec_hdr *rec, - void *data) +static int llog_cat_process_cb(struct llog_handle *cat_llh, + struct llog_rec_hdr *rec, void *data) { struct llog_process_data *d = data; struct llog_logid_rec *lir = (struct llog_logid_rec *)rec; struct llog_handle *llh; int rc; - if (rec->lrh_type != LLOG_LOGID_MAGIC) { + if (le32_to_cpu(rec->lrh_type) != LLOG_LOGID_MAGIC) { CERROR("invalid record in catalog\n"); RETURN(-EINVAL); } CWARN("processing log "LPX64":%x at index %u of catalog "LPX64"\n", lir->lid_id.lgl_oid, lir->lid_id.lgl_ogen, - rec->lrh_index, cat_llh->lgh_id.lgl_oid); + le32_to_cpu(rec->lrh_index), cat_llh->lgh_id.lgl_oid); rc = llog_cat_id2handle(cat_llh, &llh, &lir->lid_id); if (rc) { @@ -363,15 +354,15 @@ int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data) int rc; ENTRY; - LASSERT(llh->llh_flags & LLOG_F_IS_CAT); + LASSERT(llh->llh_flags &cpu_to_le32(LLOG_F_IS_CAT)); d.lpd_data = data; d.lpd_cb = cb; if (llh->llh_cat_idx > cat_llh->lgh_last_idx) { - CWARN("catlog "LPX64" crosses index zero\n", + CWARN("catalog "LPX64" crosses index zero\n", cat_llh->lgh_id.lgl_oid); - cd.first_idx = llh->llh_cat_idx; + cd.first_idx = le32_to_cpu(llh->llh_cat_idx); cd.last_idx = 0; rc = llog_process(cat_llh, llog_cat_process_cb, &d, &cd); if (rc != 0) @@ -388,6 +379,70 @@ int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data) } EXPORT_SYMBOL(llog_cat_process); +static int llog_cat_reverse_process_cb(struct llog_handle *cat_llh, + struct llog_rec_hdr *rec, void *data) +{ + struct llog_process_data *d = data; + struct llog_logid_rec *lir = (struct llog_logid_rec *)rec; + struct llog_handle *llh; + int rc; + + if (le32_to_cpu(rec->lrh_type) != LLOG_LOGID_MAGIC) { + CERROR("invalid record in catalog\n"); + RETURN(-EINVAL); + } + CWARN("processing log "LPX64":%x at index %u of catalog "LPX64"\n", + lir->lid_id.lgl_oid, lir->lid_id.lgl_ogen, + le32_to_cpu(rec->lrh_index), cat_llh->lgh_id.lgl_oid); + + rc = llog_cat_id2handle(cat_llh, &llh, &lir->lid_id); + if (rc) { + CERROR("Cannot find handle for log "LPX64"\n", + lir->lid_id.lgl_oid); + RETURN(rc); + } + + rc = llog_reverse_process(llh, d->lpd_cb, d->lpd_data, NULL); + RETURN(rc); +} + +int llog_cat_reverse_process(struct llog_handle *cat_llh, + llog_cb_t cb, void *data) +{ + struct llog_process_data d; + struct llog_process_cat_data cd; + struct llog_log_hdr *llh = cat_llh->lgh_hdr; + int rc; + ENTRY; + + LASSERT(llh->llh_flags &cpu_to_le32(LLOG_F_IS_CAT)); + d.lpd_data = data; + d.lpd_cb = cb; + + if (llh->llh_cat_idx > cat_llh->lgh_last_idx) { + CWARN("catalog "LPX64" crosses index zero\n", + cat_llh->lgh_id.lgl_oid); + + cd.first_idx = 0; + cd.last_idx = cat_llh->lgh_last_idx; + rc = llog_reverse_process(cat_llh, llog_cat_reverse_process_cb, + &d, &cd); + if (rc != 0) + RETURN(rc); + + cd.first_idx = le32_to_cpu(llh->llh_cat_idx); + cd.last_idx = 0; + rc = llog_reverse_process(cat_llh, llog_cat_reverse_process_cb, + &d, &cd); + } else { + rc = llog_reverse_process(cat_llh, llog_cat_reverse_process_cb, + &d, NULL); + } + + RETURN(rc); +} +EXPORT_SYMBOL(llog_cat_reverse_process); + int llog_cat_set_first_idx(struct llog_handle *cathandle, int index) { struct llog_log_hdr *llh = cathandle->lgh_hdr; @@ -395,17 +450,17 @@ int llog_cat_set_first_idx(struct llog_handle *cathandle, int index) ENTRY; bitmap_size = sizeof(llh->llh_bitmap) * 8; - if (llh->llh_cat_idx == (index - 1)) { - idx = llh->llh_cat_idx + 1; - llh->llh_cat_idx = idx; + if (llh->llh_cat_idx == cpu_to_le32(index - 1)) { + idx = le32_to_cpu(llh->llh_cat_idx) + 1; + llh->llh_cat_idx = cpu_to_le32(idx); if (idx == cathandle->lgh_last_idx) goto out; for (i = (index + 1) % bitmap_size; i != cathandle->lgh_last_idx; i = (i + 1) % bitmap_size) { if (!ext2_test_bit(i, llh->llh_bitmap)) { - idx = llh->llh_cat_idx + 1; - llh->llh_cat_idx = idx; + idx = le32_to_cpu(llh->llh_cat_idx) + 1; + llh->llh_cat_idx = cpu_to_le32(idx); } else if (i == 0) { llh->llh_cat_idx = 0; } else { @@ -413,55 +468,166 @@ int llog_cat_set_first_idx(struct llog_handle *cathandle, int index) } } out: - CDEBUG(D_HA, "set catlog "LPX64" first idx %u\n", - cathandle->lgh_id.lgl_oid, llh->llh_cat_idx); + CDEBUG(D_HA, "set catalog "LPX64" first idx %u\n", + cathandle->lgh_id.lgl_oid,le32_to_cpu(llh->llh_cat_idx)); } RETURN(0); } +EXPORT_SYMBOL(llog_cat_set_first_idx); -#if 0 -/* Assumes caller has already pushed us into the kernel context. */ -int llog_cat_init(struct llog_handle *cathandle, struct obd_uuid *tgtuuid) +int llog_catalog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, + void *buf, struct llog_cookie *logcookies, + int numcookies, void *data) { - struct llog_log_hdr *llh; - loff_t offset = 0; - int rc = 0; + struct llog_handle *cathandle; + int rc; ENTRY; + + cathandle = ctxt->loc_handle; + LASSERT(cathandle != NULL); + + rc = llog_cat_add_rec(cathandle, rec, logcookies, buf); + if (rc != 1) + CERROR("write one catalog record failed: %d\n", rc); + RETURN(rc); +} +EXPORT_SYMBOL(llog_catalog_add); - LASSERT(sizeof(*llh) == LLOG_CHUNK_SIZE); +int llog_catalog_cancel(struct llog_ctxt *ctxt, int count, + struct llog_cookie *cookies, int flags, void *data) +{ + struct llog_handle *cathandle; + int rc; + ENTRY; - down(&cathandle->lgh_lock); - llh = cathandle->lgh_hdr; + if (cookies == NULL || count == 0) + RETURN(-EINVAL); + cathandle = ctxt->loc_handle; + LASSERT(cathandle != NULL); + rc = llog_cat_cancel_records(cathandle, count, cookies); + RETURN(rc); +} +EXPORT_SYMBOL(llog_catalog_cancel); - if (cathandle->lgh_file->f_dentry->d_inode->i_size == 0) { - llog_write_rec(cathandle, &llh->llh_hdr, NULL, 0, NULL, 0); - -write_hdr: - rc = lustre_fwrite(cathandle->lgh_file, llh, LLOG_CHUNK_SIZE, - &offset); - if (rc != LLOG_CHUNK_SIZE) { - CERROR("error writing catalog header: rc %d\n", rc); - OBD_FREE(llh, sizeof(*llh)); - if (rc >= 0) - rc = -ENOSPC; - } else - rc = 0; - } else { - rc = lustre_fread(cathandle->lgh_file, llh, LLOG_CHUNK_SIZE, - &offset); - if (rc != LLOG_CHUNK_SIZE) { - CERROR("error reading catalog header: rc %d\n", rc); - /* Can we do much else if the header is bad? */ - goto write_hdr; - } else - rc = 0; +int llog_catalog_setup(struct llog_ctxt **res, char *name, + struct lvfs_run_ctxt *lvfs_ctxt, + struct fsfilt_operations *fsops, + struct dentry *logs_de, + struct dentry *objects_de) +{ + struct llog_ctxt *ctxt; + struct llog_catid catid; + struct llog_handle *handle; + int rc; + + ENTRY; + + OBD_ALLOC(ctxt, sizeof(*ctxt)); + if (!ctxt) + RETURN(-ENOMEM); + + *res = ctxt; + + ctxt->loc_fsops = fsops; + ctxt->loc_lvfs_ctxt = lvfs_ctxt; + ctxt->loc_logs_dir = logs_de; + ctxt->loc_objects_dir = objects_de; + ctxt->loc_logops = &llog_lvfs_ops; + ctxt->loc_logops->lop_add = llog_catalog_add; + ctxt->loc_logops->lop_cancel = llog_catalog_cancel; + + memset(&catid, 0, sizeof(struct llog_catid)); + rc = llog_get_cat_list(lvfs_ctxt, fsops, name, 1, &catid); + if (rc) { + CERROR("error llog_get_cat_list rc: %d\n", rc); + RETURN(rc); + } + if (catid.lci_logid.lgl_oid) + rc = llog_create(ctxt, &handle, &catid.lci_logid, 0); + else { + rc = llog_create(ctxt, &handle, NULL, NULL); + if (!rc) + catid.lci_logid = handle->lgh_id; } + if (rc) + GOTO(out, rc); - cathandle->lgh_tgtuuid = &llh->llh_tgtuuid; - up(&cathandle->lgh_lock); + ctxt->loc_handle = handle; + rc = llog_init_handle(handle, LLOG_F_IS_CAT, NULL); + if (rc) + GOTO(out, rc); + + rc = llog_put_cat_list(lvfs_ctxt, fsops, name, 1, &catid); + if (rc) + CERROR("error llog_get_cat_list rc: %d\n", rc); +out: + if (ctxt && rc) + OBD_FREE(ctxt, sizeof(*ctxt)); RETURN(rc); } -EXPORT_SYMBOL(llog_cat_init); +EXPORT_SYMBOL(llog_catalog_setup); -#endif +int llog_catalog_cleanup(struct llog_ctxt *ctxt) +{ + struct llog_handle *cathandle, *n, *loghandle; + struct llog_log_hdr *llh; + int rc, index; + ENTRY; + + if (!ctxt) + return 0; + + cathandle = ctxt->loc_handle; + if (cathandle) { + list_for_each_entry_safe(loghandle, n, + &cathandle->u.chd.chd_head, + u.phd.phd_entry) { + llh = loghandle->lgh_hdr; + if ((le32_to_cpu(llh->llh_flags) & + LLOG_F_ZAP_WHEN_EMPTY) && + (le32_to_cpu(llh->llh_count) == 1)) { + rc = llog_destroy(loghandle); + if (rc) + CERROR("failure destroying log during " + "cleanup: %d\n", rc); + LASSERT(rc == 0); + index = loghandle->u.phd.phd_cookie.lgc_index; + llog_free_handle(loghandle); + + LASSERT(index); + llog_cat_set_first_idx(cathandle, index); + rc = llog_cancel_rec(cathandle, index); + if (rc == 0) + CDEBUG(D_HA, "cancel plain log at index" + " %u of catalog "LPX64"\n", + index,cathandle->lgh_id.lgl_oid); + } + } + llog_cat_put(ctxt->loc_handle); + } + OBD_FREE(ctxt, sizeof(*ctxt)); + return 0; +} +EXPORT_SYMBOL(llog_catalog_cleanup); + +int llog_cat_half_bottom(struct llog_cookie *cookie, struct llog_handle *handle) +{ + struct llog_handle *loghandle; + struct llog_logid *lgl = &cookie->lgc_lgl; + int rc; + + down_read(&handle->lgh_lock); + rc = llog_cat_id2handle(handle, &loghandle, lgl); + if (rc) + GOTO(out, rc); + if (2 * loghandle->lgh_hdr->llh_cat_idx <= + handle->lgh_last_idx + handle->lgh_hdr->llh_cat_idx + 1) + rc = 1; + else + rc = 0; +out: + up_read(&handle->lgh_lock); + RETURN(rc); +} +EXPORT_SYMBOL(llog_cat_half_bottom); diff --git a/lustre/lvfs/llog_lvfs.c b/lustre/lvfs/llog_lvfs.c new file mode 100644 index 0000000..66e9686 --- /dev/null +++ b/lustre/lvfs/llog_lvfs.c @@ -0,0 +1,933 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001-2003 Cluster File Systems, Inc. + * Author: Andreas Dilger + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * OST<->MDS recovery logging infrastructure. + * + * Invariants in implementation: + * - we do not share logs among different OST<->MDS connections, so that + * if an OST or MDS fails it need only look at log(s) relevant to itself + */ + +#define DEBUG_SUBSYSTEM S_LOG + +#ifndef EXPORT_SYMTAB +#define EXPORT_SYMTAB +#endif + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#include +#include +#include + +#ifdef __KERNEL__ + +static int llog_lvfs_pad(struct llog_ctxt *ctxt, struct l_file *file, + int len, int index) +{ + struct llog_rec_hdr rec; + struct llog_rec_tail tail; + int rc; + ENTRY; + + LASSERT(len >= LLOG_MIN_REC_SIZE && (len & 0x7) == 0); + + tail.lrt_len = rec.lrh_len = cpu_to_le32(len); + tail.lrt_index = rec.lrh_index = cpu_to_le32(index); + rec.lrh_type = 0; + + rc = llog_fsfilt_write_record(ctxt, file, &rec, sizeof(rec), + &file->f_pos, 0); + if (rc) { + CERROR("error writing padding record: rc %d\n", rc); + goto out; + } + + file->f_pos += len - sizeof(rec) - sizeof(tail); + rc = llog_fsfilt_write_record(ctxt, file, &tail, sizeof(tail), + &file->f_pos, 0); + if (rc) { + CERROR("error writing padding record: rc %d\n", rc); + goto out; + } + + out: + RETURN(rc); +} + +static int llog_lvfs_write_blob(struct llog_ctxt *ctxt, struct l_file *file, + struct llog_rec_hdr *rec, void *buf, loff_t off) +{ + int rc; + struct llog_rec_tail end; + loff_t saved_off = file->f_pos; + int buflen = le32_to_cpu(rec->lrh_len); + + ENTRY; + file->f_pos = off; + + if (!buf) { + rc = llog_fsfilt_write_record(ctxt, file, rec, buflen, + &file->f_pos, 0); + if (rc) { + CERROR("error writing log record: rc %d\n", rc); + goto out; + } + GOTO(out, rc = 0); + } + + /* the buf case */ + rec->lrh_len = cpu_to_le32(sizeof(*rec) + buflen + sizeof(end)); + rc = llog_fsfilt_write_record(ctxt, file, rec, sizeof(*rec), + &file->f_pos, 0); + if (rc) { + CERROR("error writing log hdr: rc %d\n", rc); + goto out; + } + + rc = llog_fsfilt_write_record(ctxt, file, buf, buflen, + &file->f_pos, 0); + if (rc) { + CERROR("error writing log buffer: rc %d\n", rc); + goto out; + } + + end.lrt_len = rec->lrh_len; + end.lrt_index = rec->lrh_index; + rc = llog_fsfilt_write_record(ctxt, file, &end, sizeof(end), + &file->f_pos, 0); + if (rc) { + CERROR("error writing log tail: rc %d\n", rc); + goto out; + } + + rc = 0; + out: + if (saved_off > file->f_pos) + file->f_pos = saved_off; + LASSERT(rc <= 0); + RETURN(rc); +} + +static int llog_lvfs_read_blob(struct llog_ctxt *ctxt, struct l_file *file, + void *buf, int size, loff_t off) +{ + loff_t offset = off; + int rc; + ENTRY; + + rc = llog_fsfilt_read_record(ctxt, file, buf, size, &offset); + if (rc) { + CERROR("error reading log record: rc %d\n", rc); + RETURN(rc); + } + RETURN(0); +} + +static int llog_lvfs_read_header(struct llog_handle *handle) +{ + struct llog_ctxt *ctxt = handle->lgh_ctxt; + int rc; + ENTRY; + + LASSERT(sizeof(*handle->lgh_hdr) == LLOG_CHUNK_SIZE); + LASSERT(ctxt != NULL); + + if (handle->lgh_file->f_dentry->d_inode->i_size == 0) { + CDEBUG(D_HA, "not reading header from 0-byte log\n"); + RETURN(LLOG_EEMPTY); + } + + rc = llog_lvfs_read_blob(ctxt, handle->lgh_file, handle->lgh_hdr, + LLOG_CHUNK_SIZE, 0); + if (rc) + CERROR("error reading log header\n"); + + handle->lgh_last_idx = le32_to_cpu(handle->lgh_hdr->llh_tail.lrt_index); + handle->lgh_file->f_pos = handle->lgh_file->f_dentry->d_inode->i_size; + + RETURN(rc); +} + +/* returns negative in on error; 0 if success && reccookie == 0; 1 otherwise */ +/* appends if idx == -1, otherwise overwrites record idx. */ +static int llog_lvfs_write_rec(struct llog_handle *loghandle, + struct llog_rec_hdr *rec, + struct llog_cookie *reccookie, + int cookiecount, + void *buf, int idx) +{ + struct llog_log_hdr *llh; + int reclen = le32_to_cpu(rec->lrh_len), index, rc; + struct llog_rec_tail *lrt; + struct llog_ctxt *ctxt = loghandle->lgh_ctxt; + struct file *file; + loff_t offset; + size_t left; + ENTRY; + + llh = loghandle->lgh_hdr; + file = loghandle->lgh_file; + + /* record length should not bigger than LLOG_CHUNK_SIZE */ + if (buf) + rc = (reclen > LLOG_CHUNK_SIZE - sizeof(struct llog_rec_hdr) + - sizeof(struct llog_rec_tail)) ? -E2BIG : 0; + else + rc = (reclen > LLOG_CHUNK_SIZE) ? -E2BIG : 0; + if (rc) + RETURN(rc); + + if (idx != -1) { + loff_t saved_offset; + + /* no header: only allowed to insert record 1 */ + if (idx != 1 && !file->f_dentry->d_inode->i_size) { + CERROR("idx != -1 in empty log\n"); + LBUG(); + } + + if (idx && llh->llh_size && llh->llh_size != reclen) + RETURN(-EINVAL); + + rc = llog_lvfs_write_blob(ctxt, file, &llh->llh_hdr, NULL, 0); + /* we are done if we only write the header or on error */ + if (rc || idx == 0) + RETURN(rc); + + saved_offset = sizeof(*llh) + (idx-1)*le32_to_cpu(rec->lrh_len); + rc = llog_lvfs_write_blob(ctxt, file, rec, buf, saved_offset); + if (rc == 0 && reccookie) { + reccookie->lgc_lgl = loghandle->lgh_id; + reccookie->lgc_index = idx; + rc = 1; + } + RETURN(rc); + } + + /* Make sure that records don't cross a chunk boundary, so we can + * process them page-at-a-time if needed. If it will cross a chunk + * boundary, write in a fake (but referenced) entry to pad the chunk. + * + * We know that llog_current_log() will return a loghandle that is + * big enough to hold reclen, so all we care about is padding here. + */ + left = LLOG_CHUNK_SIZE - (file->f_pos & (LLOG_CHUNK_SIZE - 1)); + if (buf) + reclen = sizeof(*rec) + le32_to_cpu(rec->lrh_len) + + sizeof(struct llog_rec_tail); + + /* NOTE: padding is a record, but no bit is set */ + if (left != 0 && left != reclen && + left < (reclen + LLOG_MIN_REC_SIZE)) { + loghandle->lgh_last_idx++; + rc = llog_lvfs_pad(ctxt, file, left, loghandle->lgh_last_idx); + if (rc) + RETURN(rc); + } + + loghandle->lgh_last_idx++; + index = loghandle->lgh_last_idx; + rec->lrh_index = cpu_to_le32(index); + if (buf == NULL) { + lrt = (void *)rec + le32_to_cpu(rec->lrh_len) - sizeof(*lrt); + lrt->lrt_len = rec->lrh_len; + lrt->lrt_index = rec->lrh_index; + } + if (ext2_set_bit(index, llh->llh_bitmap)) { + CERROR("argh, index %u already set in log bitmap?\n", index); + LBUG(); /* should never happen */ + } + llh->llh_count = cpu_to_le32(le32_to_cpu(llh->llh_count) + 1); + llh->llh_tail.lrt_index = cpu_to_le32(index); + + offset = 0; + rc = llog_lvfs_write_blob(ctxt, file, &llh->llh_hdr, NULL, 0); + if (rc) + RETURN(rc); + + rc = llog_lvfs_write_blob(ctxt, file, rec, buf, file->f_pos); + if (rc) + RETURN(rc); + + CDEBUG(D_HA, "added record "LPX64": idx: %u, %u bytes\n", + loghandle->lgh_id.lgl_oid, index, le32_to_cpu(rec->lrh_len)); + if (rc == 0 && reccookie) { + reccookie->lgc_lgl = loghandle->lgh_id; + reccookie->lgc_index = index; + if (le32_to_cpu(rec->lrh_type) == MDS_UNLINK_REC) + reccookie->lgc_subsys = LLOG_UNLINK_ORIG_CTXT; + else if (le32_to_cpu(rec->lrh_type) == OST_SZ_REC) + reccookie->lgc_subsys = LLOG_SIZE_ORIG_CTXT; + else if (le32_to_cpu(rec->lrh_type) == OST_RAID1_REC) + reccookie->lgc_subsys = LLOG_RD1_ORIG_CTXT; + else + reccookie->lgc_subsys = -1; + rc = 1; + } + if (rc == 0 && (le32_to_cpu(rec->lrh_type) == LLOG_GEN_REC || + le32_to_cpu(rec->lrh_type) == SMFS_UPDATE_REC)) + rc = 1; + + RETURN(rc); +} + +/* We can skip reading at least as many log blocks as the number of +* minimum sized log records we are skipping. If it turns out +* that we are not far enough along the log (because the +* actual records are larger than minimum size) we just skip +* some more records. */ + +static void llog_skip_over(__u64 *off, int curr, int goal) +{ + if (goal <= curr) + return; + *off = (*off + (goal-curr-1) * LLOG_MIN_REC_SIZE) & + ~(LLOG_CHUNK_SIZE - 1); +} + +/* sets: + * - curr_offset to the furthest point read in the log file + * - curr_idx to the log index preceeding curr_offset + * returns -EIO/-EINVAL on error + */ +static int llog_lvfs_next_block(struct llog_handle *loghandle, int *curr_idx, + int next_idx, __u64 *curr_offset, void *buf, + int len) +{ + struct llog_ctxt *ctxt = loghandle->lgh_ctxt; + int rc; + ENTRY; + + if (len == 0 || len & (LLOG_CHUNK_SIZE - 1)) + RETURN(-EINVAL); + + CDEBUG(D_OTHER, "looking for log index %u (cur idx %u off "LPU64")\n", + next_idx, *curr_idx, *curr_offset); + + while (*curr_offset < loghandle->lgh_file->f_dentry->d_inode->i_size) { + struct llog_rec_hdr *rec; + struct llog_rec_tail *tail; + loff_t ppos; + + llog_skip_over(curr_offset, *curr_idx, next_idx); + + ppos = *curr_offset; + rc = llog_fsfilt_read_record(ctxt, loghandle->lgh_file, + buf, len, &ppos); + + if (rc) { + CERROR("Cant read llog block at log id "LPU64 + "/%u offset "LPU64"\n", + loghandle->lgh_id.lgl_oid, + loghandle->lgh_id.lgl_ogen, + *curr_offset); + RETURN(rc); + } + + /* put number of bytes read into rc to make code simpler */ + rc = ppos - *curr_offset; + *curr_offset = ppos; + + if (rc == 0) /* end of file, nothing to do */ + RETURN(0); + + if (rc < sizeof(*tail)) { + CERROR("Invalid llog block at log id "LPU64"/%u offset " + LPU64"\n", loghandle->lgh_id.lgl_oid, + loghandle->lgh_id.lgl_ogen, *curr_offset); + RETURN(-EINVAL); + } + + tail = buf + rc - sizeof(struct llog_rec_tail); + *curr_idx = le32_to_cpu(tail->lrt_index); + + /* this shouldn't happen */ + if (tail->lrt_index == 0) { + CERROR("Invalid llog tail at log id "LPU64"/%u offset " + LPU64"\n", loghandle->lgh_id.lgl_oid, + loghandle->lgh_id.lgl_ogen, *curr_offset); + RETURN(-EINVAL); + } + if (le32_to_cpu(tail->lrt_index) < next_idx) + continue; + + /* sanity check that the start of the new buffer is no farther + * than the record that we wanted. This shouldn't happen. */ + rec = buf; + if (le32_to_cpu(rec->lrh_index) > next_idx) { + CERROR("missed desired record? %u > %u\n", + le32_to_cpu(rec->lrh_index), next_idx); + RETURN(-ENOENT); + } + RETURN(0); + } + RETURN(-EIO); +} + +static int llog_lvfs_prev_block(struct llog_handle *loghandle, + int prev_idx, void *buf, int len) +{ + struct llog_ctxt *ctxt = loghandle->lgh_ctxt; + __u64 curr_offset; + int rc; + ENTRY; + + if (len == 0 || len & (LLOG_CHUNK_SIZE - 1)) + RETURN(-EINVAL); + + CDEBUG(D_OTHER, "looking for log index %u n", prev_idx); + + curr_offset = LLOG_CHUNK_SIZE; + llog_skip_over(&curr_offset, 0, prev_idx); + + while (curr_offset < loghandle->lgh_file->f_dentry->d_inode->i_size) { + struct llog_rec_hdr *rec; + struct llog_rec_tail *tail; + loff_t ppos; + + ppos = curr_offset; + rc = llog_fsfilt_read_record(ctxt, loghandle->lgh_file, + buf, len, &ppos); + + if (rc) { + CERROR("Cant read llog block at log id "LPU64 + "/%u offset "LPU64"\n", + loghandle->lgh_id.lgl_oid, + loghandle->lgh_id.lgl_ogen, + curr_offset); + RETURN(rc); + } + + /* put number of bytes read into rc to make code simpler */ + rc = ppos - curr_offset; + curr_offset = ppos; + + if (rc == 0) /* end of file, nothing to do */ + RETURN(0); + + if (rc < sizeof(*tail)) { + CERROR("Invalid llog block at log id "LPU64"/%u offset " + LPU64"\n", loghandle->lgh_id.lgl_oid, + loghandle->lgh_id.lgl_ogen, curr_offset); + RETURN(-EINVAL); + } + + tail = buf + rc - sizeof(struct llog_rec_tail); + + /* this shouldn't happen */ + if (tail->lrt_index == 0) { + CERROR("Invalid llog tail at log id "LPU64"/%u offset " + LPU64"\n", loghandle->lgh_id.lgl_oid, + loghandle->lgh_id.lgl_ogen, curr_offset); + RETURN(-EINVAL); + } + if (le32_to_cpu(tail->lrt_index) < prev_idx) + continue; + + /* sanity check that the start of the new buffer is no farther + * than the record that we wanted. This shouldn't happen. */ + rec = buf; + if (le32_to_cpu(rec->lrh_index) > prev_idx) { + CERROR("missed desired record? %u > %u\n", + le32_to_cpu(rec->lrh_index), prev_idx); + RETURN(-ENOENT); + } + RETURN(0); + } + RETURN(-EIO); +} + +static struct file *llog_filp_open(char *name, int flags, int mode) +{ + char *logname; + struct file *filp; + int len; + + OBD_ALLOC(logname, PATH_MAX); + if (logname == NULL) + return ERR_PTR(-ENOMEM); + + len = snprintf(logname, PATH_MAX, "LOGS/%s", name); + if (len >= PATH_MAX - 1) { + filp = ERR_PTR(-ENAMETOOLONG); + } else { + filp = l_filp_open(logname, flags, mode); + if (IS_ERR(filp)) { + CERROR("logfile creation %s: %ld\n", logname, + PTR_ERR(filp)); + } + } + + OBD_FREE(logname, PATH_MAX); + return filp; +} + +static struct file *llog_object_create(struct llog_ctxt *ctxt) +{ + unsigned int tmpname = ll_insecure_random_int(); + char fidname[LL_FID_NAMELEN]; + struct file *filp; + struct dentry *new_child, *parent; + void *handle; + int rc = 0, err, namelen; + ENTRY; + + sprintf(fidname, "OBJECTS/%u", tmpname); + filp = filp_open(fidname, O_CREAT | O_EXCL, 0644); + if (IS_ERR(filp)) { + rc = PTR_ERR(filp); + if (rc == -EEXIST) { + CERROR("impossible object name collision %u\n", + tmpname); + LBUG(); + } + CERROR("error creating tmp object %u: rc %d\n", tmpname, rc); + RETURN(filp); + } + + namelen = ll_fid2str(fidname, filp->f_dentry->d_inode->i_ino, + filp->f_dentry->d_inode->i_generation); + parent = filp->f_dentry->d_parent; + down(&parent->d_inode->i_sem); + new_child = lookup_one_len(fidname, parent, namelen); + if (IS_ERR(new_child)) { + CERROR("getting neg dentry for obj rename: %d\n", rc); + GOTO(out_close, rc = PTR_ERR(new_child)); + } + if (new_child->d_inode != NULL) { + CERROR("impossible non-negative obj dentry %lu:%u!\n", + filp->f_dentry->d_inode->i_ino, + filp->f_dentry->d_inode->i_generation); + LBUG(); + } + + handle = llog_fsfilt_start(ctxt, parent->d_inode, FSFILT_OP_RENAME, NULL); + if (IS_ERR(handle)) + GOTO(out_dput, rc = PTR_ERR(handle)); + + lock_kernel(); + rc = vfs_rename(parent->d_inode, filp->f_dentry, + parent->d_inode, new_child); + unlock_kernel(); + if (rc) + CERROR("error renaming new object %lu:%u: rc %d\n", + filp->f_dentry->d_inode->i_ino, + filp->f_dentry->d_inode->i_generation, rc); + + err = llog_fsfilt_commit(ctxt, parent->d_inode, handle, 0); + if (!rc) + rc = err; +out_dput: + dput(new_child); +out_close: + up(&parent->d_inode->i_sem); + if (rc) { + filp_close(filp, 0); + filp = (struct file *)rc; + } + + RETURN(filp); +} + +static int llog_add_link_object(struct llog_ctxt *ctxt, struct llog_logid logid, + struct dentry *dentry) +{ + struct dentry *new_child; + char fidname[LL_FID_NAMELEN]; + void *handle; + int namelen, rc = 0, err; + ENTRY; + + namelen = ll_fid2str(fidname, logid.lgl_oid, logid.lgl_ogen); + down(&ctxt->loc_objects_dir->d_inode->i_sem); + new_child = lookup_one_len(fidname, ctxt->loc_objects_dir, namelen); + if (IS_ERR(new_child)) { + CERROR("getting neg dentry for obj rename: %d\n", rc); + GOTO(out, rc = PTR_ERR(new_child)); + } + if (new_child->d_inode == dentry->d_inode) + GOTO(out_dput, rc); + if (new_child->d_inode != NULL) { + CERROR("impossible non-negative obj dentry "LPX64":%u!\n", + logid.lgl_oid, logid.lgl_ogen); + LBUG(); + } + handle = llog_fsfilt_start(ctxt, ctxt->loc_objects_dir->d_inode, + FSFILT_OP_LINK, NULL); + if (IS_ERR(handle)) + GOTO(out_dput, rc = PTR_ERR(handle)); + + lock_kernel(); + rc = vfs_link(dentry, ctxt->loc_objects_dir->d_inode, new_child); + unlock_kernel(); + if (rc) + CERROR("error link new object "LPX64":%u: rc %d\n", + logid.lgl_oid, logid.lgl_ogen, rc); + err = llog_fsfilt_commit(ctxt, ctxt->loc_objects_dir->d_inode, handle, 0); +out_dput: + l_dput(new_child); +out: + up(&ctxt->loc_objects_dir->d_inode->i_sem); + RETURN(rc); +} + +/* This is a callback from the llog_* functions. + * Assumes caller has already pushed us into the kernel context. */ +static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res, + struct llog_logid *logid, char *name) +{ + struct llog_handle *handle; + struct lvfs_run_ctxt saved; + int rc = 0; + int open_flags = O_RDWR | O_CREAT | O_LARGEFILE; + ENTRY; + + handle = llog_alloc_handle(); + if (handle == NULL) + RETURN(-ENOMEM); + *res = handle; + + LASSERT(ctxt); + if (ctxt->loc_lvfs_ctxt) + push_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); + + if (logid != NULL) { + char logname[LL_FID_NAMELEN + 10] = "OBJECTS/"; + char fidname[LL_FID_NAMELEN]; + ll_fid2str(fidname, logid->lgl_oid, logid->lgl_ogen); + strcat(logname, fidname); + + handle->lgh_file = filp_open(logname, O_RDWR | O_LARGEFILE, 0644); + if (IS_ERR(handle->lgh_file)) { + CERROR("cannot open %s file\n", logname); + GOTO(cleanup, rc = PTR_ERR(handle->lgh_file)); + } + if (!S_ISREG(handle->lgh_file->f_dentry->d_inode->i_mode)) { + CERROR("%s is not a regular file!: mode = %o\n", logname, + handle->lgh_file->f_dentry->d_inode->i_mode); + GOTO(cleanup, rc = -ENOENT); + } + LASSERT(handle->lgh_file->f_dentry->d_parent == ctxt->loc_objects_dir); + handle->lgh_id = *logid; + } else if (name) { + handle->lgh_file = llog_filp_open(name, open_flags, 0644); + if (IS_ERR(handle->lgh_file)) + GOTO(cleanup, rc = PTR_ERR(handle->lgh_file)); + LASSERT(handle->lgh_file->f_dentry->d_parent == ctxt->loc_logs_dir); + + handle->lgh_id.lgl_oid = handle->lgh_file->f_dentry->d_inode->i_ino; + handle->lgh_id.lgl_ogen = handle->lgh_file->f_dentry->d_inode->i_generation; + rc = llog_add_link_object(ctxt, handle->lgh_id, handle->lgh_file->f_dentry); + if (rc) + GOTO(cleanup, rc); + } else { + handle->lgh_file = llog_object_create(ctxt); + if (IS_ERR(handle->lgh_file)) + GOTO(cleanup, rc = PTR_ERR(handle->lgh_file)); + LASSERT(handle->lgh_file->f_dentry->d_parent == ctxt->loc_objects_dir); + handle->lgh_id.lgl_oid = handle->lgh_file->f_dentry->d_inode->i_ino; + handle->lgh_id.lgl_ogen = handle->lgh_file->f_dentry->d_inode->i_generation; + } + + handle->lgh_id.lgl_ogr = 1; + handle->lgh_ctxt = ctxt; + finish: + if (ctxt->loc_lvfs_ctxt) + pop_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); + RETURN(rc); +cleanup: + llog_free_handle(handle); + goto finish; +} + +static int llog_lvfs_close(struct llog_handle *handle) +{ + int rc; + ENTRY; + + rc = filp_close(handle->lgh_file, 0); + if (rc) + CERROR("error closing log: rc %d\n", rc); + RETURN(rc); +} + +static int llog_lvfs_destroy(struct llog_handle *loghandle) +{ + struct llog_ctxt *ctxt = loghandle->lgh_ctxt; + struct lvfs_run_ctxt saved; + struct dentry *fdentry; + struct inode *parent_inode; + char fidname[LL_FID_NAMELEN]; + void *handle; + int rc = -EINVAL, err, namelen; + ENTRY; + + if (ctxt->loc_lvfs_ctxt) + push_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); + + fdentry = loghandle->lgh_file->f_dentry; + parent_inode = fdentry->d_parent->d_inode; + + if (!strcmp(fdentry->d_parent->d_name.name, "LOGS")) { + LASSERT(parent_inode == ctxt->loc_logs_dir->d_inode); + + namelen = ll_fid2str(fidname, fdentry->d_inode->i_ino, + fdentry->d_inode->i_generation); + dget(fdentry); + rc = llog_lvfs_close(loghandle); + if (rc) { + dput(fdentry); + GOTO(out, rc); + } + + handle = llog_fsfilt_start(ctxt, parent_inode, + FSFILT_OP_UNLINK, NULL); + if (IS_ERR(handle)) { + dput(fdentry); + GOTO(out, rc = PTR_ERR(handle)); + } + + down(&parent_inode->i_sem); + rc = vfs_unlink(parent_inode, fdentry); + up(&parent_inode->i_sem); + dput(fdentry); + + if (!rc) { + down(&ctxt->loc_objects_dir->d_inode->i_sem); + fdentry = lookup_one_len(fidname, ctxt->loc_objects_dir, + namelen); + if (fdentry == NULL || fdentry->d_inode == NULL) { + CERROR("destroy non_existent object %s\n", fidname); + GOTO(out_err, rc = IS_ERR(fdentry) ? + PTR_ERR(fdentry) : -ENOENT); + } + rc = vfs_unlink(ctxt->loc_objects_dir->d_inode, fdentry); + l_dput(fdentry); +out_err: + up(&ctxt->loc_objects_dir->d_inode->i_sem); + } + err = llog_fsfilt_commit(ctxt, parent_inode, handle, 0); + if (err && !rc) + err = rc; + + GOTO(out, rc); + } + + if (!strcmp(fdentry->d_parent->d_name.name, "OBJECTS")) { + LASSERT(parent_inode == ctxt->loc_objects_dir->d_inode); + + dget(fdentry); + rc = llog_lvfs_close(loghandle); + if (rc == 0) { + down(&parent_inode->i_sem); + rc = vfs_unlink(parent_inode, fdentry); + up(&parent_inode->i_sem); + } + dput(fdentry); + } +out: + if (ctxt->loc_lvfs_ctxt) + pop_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); + RETURN(rc); +} + +/* reads the catalog list */ +int llog_get_cat_list(struct lvfs_run_ctxt *ctxt, + struct fsfilt_operations *fsops, char *name, + int count, struct llog_catid *idarray) +{ + struct lvfs_run_ctxt saved; + struct l_file *file; + int size = sizeof(*idarray) * count; + loff_t off = 0; + int rc; + + LASSERT(count); + + if (ctxt) + push_ctxt(&saved, ctxt, NULL); + file = l_filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700); + if (!file || IS_ERR(file)) { + rc = PTR_ERR(file); + CERROR("OBD filter: cannot open/create %s: rc = %d\n", + name, rc); + GOTO(out, rc); + } + + if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { + CERROR("%s is not a regular file!: mode = %o\n", name, + file->f_dentry->d_inode->i_mode); + GOTO(out, rc = -ENOENT); + } + + rc = fsops->fs_read_record(file, idarray, size, &off); + if (rc) { + CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n", + name, rc); + GOTO(out, rc); + } + + out: + if (file && !IS_ERR(file)) + rc = filp_close(file, 0); + if (ctxt) + pop_ctxt(&saved, ctxt, NULL); + RETURN(rc); +} +EXPORT_SYMBOL(llog_get_cat_list); + +/* writes the cat list */ +int llog_put_cat_list(struct lvfs_run_ctxt *ctxt, + struct fsfilt_operations *fsops, char *name, + int count, struct llog_catid *idarray) +{ + struct lvfs_run_ctxt saved; + struct l_file *file; + int size = sizeof(*idarray) * count; + loff_t off = 0; + int rc; + + LASSERT(count); + + if (ctxt) + push_ctxt(&saved, ctxt, NULL); + file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700); + if (!file || IS_ERR(file)) { + rc = PTR_ERR(file); + CERROR("OBD filter: cannot open/create %s: rc = %d\n", + name, rc); + GOTO(out, rc); + } + + if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { + CERROR("%s is not a regular file!: mode = %o\n", name, + file->f_dentry->d_inode->i_mode); + GOTO(out, rc = -ENOENT); + } + + rc = fsops->fs_write_record(file, idarray, size, &off, 1); + if (rc) { + CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n", + name, rc); + GOTO(out, rc); + } + + out: + if (file && !IS_ERR(file)) + rc = filp_close(file, 0); + if (ctxt) + pop_ctxt(&saved, ctxt, NULL); + RETURN(rc); +} +EXPORT_SYMBOL(llog_put_cat_list); + +struct llog_operations llog_lvfs_ops = { + lop_create: llog_lvfs_create, + lop_destroy: llog_lvfs_destroy, + lop_close: llog_lvfs_close, + lop_read_header: llog_lvfs_read_header, + lop_write_rec: llog_lvfs_write_rec, + lop_next_block: llog_lvfs_next_block, + lop_prev_block: llog_lvfs_prev_block, +}; +EXPORT_SYMBOL(llog_lvfs_ops); + +#else /* !__KERNEL__ */ + +static int llog_lvfs_read_header(struct llog_handle *handle) +{ + LBUG(); + return 0; +} + +static int llog_lvfs_write_rec(struct llog_handle *loghandle, + struct llog_rec_hdr *rec, + struct llog_cookie *reccookie, int cookiecount, + void *buf, int idx) +{ + LBUG(); + return 0; +} + +static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res, + struct llog_logid *logid, char *name) +{ + LBUG(); + return 0; +} + +static int llog_lvfs_close(struct llog_handle *handle) +{ + LBUG(); + return 0; +} + +static int llog_lvfs_destroy(struct llog_handle *handle) +{ + LBUG(); + return 0; +} + +int llog_get_cat_list(struct lvfs_run_ctxt *ctxt, + struct fsfilt_operations *fsops, char *name, + int count, struct llog_catid *idarray) +{ + LBUG(); + return 0; +} + +int llog_put_cat_list(struct lvfs_run_ctxt *ctxt, + struct fsfilt_operations *fsops, char *name, + int count, struct llog_catid *idarray) +{ + LBUG(); + return 0; +} + +int llog_lvfs_prev_block(struct llog_handle *loghandle, + int prev_idx, void *buf, int len) +{ + LBUG(); + return 0; +} + +int llog_lvfs_next_block(struct llog_handle *loghandle, + int next_idx, void *buf, int len) +{ + LBUG(); + return 0; +} + +struct llog_operations llog_lvfs_ops = { + lop_create: llog_lvfs_create, + lop_destroy: llog_lvfs_destroy, + lop_close: llog_lvfs_close, + lop_read_header: llog_lvfs_read_header, + lop_write_rec: llog_lvfs_write_rec, + lop_next_block: llog_lvfs_next_block, + lop_prev_block: llog_lvfs_prev_block, +}; +#endif diff --git a/lustre/lvfs/lvfs_common.c b/lustre/lvfs/lvfs_common.c index c1a6640..d8ab4a3 100644 --- a/lustre/lvfs/lvfs_common.c +++ b/lustre/lvfs/lvfs_common.c @@ -27,7 +27,7 @@ #include -struct dentry *lvfs_fid2dentry(struct obd_run_ctxt *ctxt, __u64 id, +struct dentry *lvfs_fid2dentry(struct lvfs_run_ctxt *ctxt, __u64 id, __u32 gen, __u64 gr, void *data) { return ctxt->cb_ops.l_fid2dentry(id, gen, gr, data); diff --git a/lustre/lvfs/lvfs_internal.h b/lustre/lvfs/lvfs_internal.h index 4d68116..d74bafd 100644 --- a/lustre/lvfs/lvfs_internal.h +++ b/lustre/lvfs/lvfs_internal.h @@ -6,3 +6,6 @@ void fsfilt_extN_exit(void); int fsfilt_reiser_init(void); void fsfilt_reiser_exit(void); + +int lookup_by_path(char *path, int flags, struct nameidata *nd); +struct dentry *lookup_create(struct nameidata *nd, int is_dir); diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index 1e58081..657076c 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -74,8 +74,8 @@ int obd_memmax; #endif /* push / pop to root of obd store */ -void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, - struct obd_ucred *uc) +void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *uc) { //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n"); ASSERT_CTXT_MAGIC(new_ctx->magic); @@ -105,21 +105,21 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, LASSERT(new_ctx->pwdmnt); if (uc) { - save->ouc.ouc_fsuid = current->fsuid; - save->ouc.ouc_fsgid = current->fsgid; - save->ouc.ouc_cap = current->cap_effective; - save->ouc.ouc_suppgid1 = current_groups[0]; - save->ouc.ouc_suppgid2 = current_groups[1]; - - current->fsuid = uc->ouc_fsuid; - current->fsgid = uc->ouc_fsgid; - current->cap_effective = uc->ouc_cap; + save->luc.luc_fsuid = current->fsuid; + save->luc.luc_fsgid = current->fsgid; + save->luc.luc_cap = current->cap_effective; + save->luc.luc_suppgid1 = current_groups[0]; + save->luc.luc_suppgid2 = current_groups[1]; + + current->fsuid = uc->luc_fsuid; + current->fsgid = uc->luc_fsgid; + current->cap_effective = uc->luc_cap; current_ngroups = 0; - if (uc->ouc_suppgid1 != -1) - current_groups[current_ngroups++] = uc->ouc_suppgid1; - if (uc->ouc_suppgid2 != -1) - current_groups[current_ngroups++] = uc->ouc_suppgid2; + if (uc->luc_suppgid1 != -1) + current_groups[current_ngroups++] = uc->luc_suppgid1; + if (uc->luc_suppgid2 != -1) + current_groups[current_ngroups++] = uc->luc_suppgid2; } set_fs(new_ctx->fs); set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd); @@ -137,8 +137,8 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, } EXPORT_SYMBOL(push_ctxt); -void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, - struct obd_ucred *uc) +void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *uc) { //printk("pc0"); ASSERT_CTXT_MAGIC(saved->magic); @@ -165,12 +165,12 @@ void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, dput(saved->pwd); mntput(saved->pwdmnt); if (uc) { - current->fsuid = saved->ouc.ouc_fsuid; - current->fsgid = saved->ouc.ouc_fsgid; - current->cap_effective = saved->ouc.ouc_cap; + current->fsuid = saved->luc.luc_fsuid; + current->fsgid = saved->luc.luc_fsgid; + current->cap_effective = saved->luc.luc_cap; current_ngroups = saved->ngroups; - current_groups[0] = saved->ouc.ouc_suppgid1; - current_groups[1] = saved->ouc.ouc_suppgid2; + current_groups[0] = saved->luc.luc_suppgid1; + current_groups[1] = saved->luc.luc_suppgid2; } /* @@ -325,7 +325,7 @@ int lustre_fsync(struct file *file) } EXPORT_SYMBOL(lustre_fsync); -struct l_file *l_dentry_open(struct obd_run_ctxt *ctxt, struct l_dentry *de, +struct l_file *l_dentry_open(struct lvfs_run_ctxt *ctxt, struct l_dentry *de, int flags) { mntget(ctxt->pwdmnt); diff --git a/lustre/lvfs/lvfs_reint.c b/lustre/lvfs/lvfs_reint.c new file mode 100644 index 0000000..2e4e211 --- /dev/null +++ b/lustre/lvfs/lvfs_reint.c @@ -0,0 +1,542 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#define DEBUG_SUBSYSTEM S_FILTER + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "lvfs_internal.h" + +#include +#include + +/* from namei.c*/ +struct dentry *lookup_create(struct nameidata *nd, int is_dir) +{ + struct dentry *dentry; + + dentry = ERR_PTR(-EEXIST); + if (nd->last_type != LAST_NORM) + goto fail; + dentry = lookup_hash(&nd->last, nd->dentry); + if (IS_ERR(dentry)) + goto fail; + if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) + goto enoent; + return dentry; +enoent: + dput(dentry); + dentry = ERR_PTR(-ENOENT); +fail: + return dentry; +} + +int lookup_by_path(char *path, int flags, struct nameidata *nd) +{ + struct dentry *dentry = NULL; + int rc = 0; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + if (path_init(path, flags, nd)) { +#else + if (path_lookup(path, flags, nd)) { +#endif + rc = path_walk(path, nd); + if (rc) + RETURN(rc); + } else + RETURN(-EINVAL); + + dentry = nd->dentry; + + if (!dentry->d_inode || is_bad_inode(dentry->d_inode) || + (!S_ISDIR(dentry->d_inode->i_mode))) { + path_release(nd); + RETURN(-ENODEV); + } + RETURN(rc); +} + +static int lvfs_reint_create(struct super_block *sb, + struct reint_record *r_rec) +{ + struct fsfilt_operations *fsfilt = S2SMI(sb)->sm_fsfilt; + char *path = r_rec->rec_data1; + int type = r_rec->u_rec.ur_iattr.ia_mode & S_IFMT; + struct nameidata nd; + struct dentry *dparent = NULL; + struct dentry *dentry = NULL; + struct inode *dir = NULL; + void *handle = NULL; + int rc = 0, created = 0, err = 0; + + rc = lookup_by_path(path, LOOKUP_PARENT, &nd); + if (rc) + RETURN(rc); + + dparent = nd.dentry; + + down(&dparent->d_inode->i_sem); + /*create a new dentry*/ + dentry = lookup_create(&nd, 0); + dir = dparent->d_inode; + + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_CLEAN_INODE_REC(dir); + + switch(type) { + case S_IFREG: + handle = fsfilt->fs_start(dir, FSFILT_OP_CREATE, NULL, 0); + if (IS_ERR(handle)) + GOTO(cleanup, rc = PTR_ERR(handle)); + rc = ll_vfs_create(dir, dentry, r_rec->u_rec.ur_iattr.ia_mode, + NULL); + break; + case S_IFDIR: + handle = fsfilt->fs_start(dir, FSFILT_OP_MKDIR, NULL, 0); + if (IS_ERR(handle)) + GOTO(cleanup, rc = PTR_ERR(handle)); + rc = vfs_mkdir(dir, dentry, r_rec->u_rec.ur_iattr.ia_mode); + break; + case S_IFLNK: { + char *new_path = r_rec->rec_data2; + handle = fsfilt->fs_start(dir, FSFILT_OP_SYMLINK, NULL, 0); + if (IS_ERR(handle)) + GOTO(cleanup, rc = PTR_ERR(handle)); + rc = vfs_symlink(dir, dentry, new_path); + break; + } + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: { + int rdev = r_rec->u_rec.ur_rdev; + handle = fsfilt->fs_start(dir, FSFILT_OP_MKNOD, NULL, 0); + if (IS_ERR(handle)) + GOTO(cleanup, (handle = NULL, rc = PTR_ERR(handle))); + rc = vfs_mknod(dir, dentry, r_rec->u_rec.ur_iattr.ia_mode, + rdev); + break; + } + default: + CERROR("Error type %d in create\n", type); + rc = -EINVAL; + break; + } + + if (rc) { + CERROR("Error for creating mkdir %s\n", path); + GOTO(cleanup, 0); + } else { + struct iattr iattr; + + created = 1; + + LTIME_S(iattr.ia_atime) = LTIME_S(r_rec->u_rec.ur_iattr.ia_atime); + LTIME_S(iattr.ia_ctime) = LTIME_S(r_rec->u_rec.ur_iattr.ia_ctime); + LTIME_S(iattr.ia_mtime) = LTIME_S(r_rec->u_rec.ur_iattr.ia_mtime); + + iattr.ia_uid = r_rec->u_rec.ur_fsuid; + if (dir->i_mode & S_ISGID) + iattr.ia_gid = dir->i_gid; + else + iattr.ia_gid = r_rec->u_rec.ur_fsgid; + iattr.ia_valid = ATTR_UID | ATTR_GID | ATTR_ATIME | + ATTR_MTIME | ATTR_CTIME; + + rc = fsfilt->fs_setattr(dentry, handle, &iattr, 0); + if (rc) { + CERROR("error on child setattr: rc = %d\n", rc); + GOTO(cleanup, rc); + } + + iattr.ia_valid = ATTR_MTIME | ATTR_CTIME; + rc = fsfilt->fs_setattr(dparent, handle, &iattr, 0); + if (rc) { + CERROR("error on parent setattr: rc = %d\n", rc); + GOTO(cleanup, rc); + } + } +cleanup: + if (rc && created) { + /* Destroy the file we just created. This should not need + * extra journal credits, as we have already modified all of + * the blocks needed in order to create the file in the first + * place. + */ + switch (type) { + case S_IFDIR: + err = vfs_rmdir(dir, dentry); + if (err) + CERROR("rmdir in error path: %d\n", err); + break; + default: + err = vfs_unlink(dir, dentry); + if (err) + CERROR("unlink in error path: %d\n", err); + break; + } + } else { + rc = err; + } + if (handle) + rc = fsfilt->fs_commit(dentry->d_inode, handle, 0); + + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_SET_INODE_REC(dir); + up(&dparent->d_inode->i_sem); + path_release(&nd); + if (dentry) + l_dput(dentry); + + RETURN(0); +}; + +static int lvfs_reint_link(struct super_block *sb, + struct reint_record *r_rec) +{ + struct fsfilt_operations *fsfilt = S2SMI(sb)->sm_fsfilt; + char *old_path = r_rec->rec_data1; + char *new_path = r_rec->rec_data2; + struct nameidata old_nd; + struct nameidata new_nd; + struct dentry *old_dparent; + struct dentry *new_dparent; + struct dentry *old_dentry = NULL; + struct dentry *new_dentry = NULL; + void *handle = NULL; + struct inode *dir = NULL; + int rc = 0; + + /*get parent dentry*/ + rc = lookup_by_path(new_path, LOOKUP_PARENT, &new_nd); + if (rc) + RETURN(rc); + + new_dparent = new_nd.dentry; + + dir = new_dparent->d_inode; + + new_dentry = lookup_create(&new_nd, 0); + + rc = lookup_by_path(old_path, LOOKUP_PARENT, &old_nd); + if (rc) { + path_release(&new_nd); + RETURN(rc); + } + old_dparent = old_nd.dentry; + old_dentry = lookup_one_len(old_nd.last.name, old_dparent, + old_nd.last.len); + + if (! old_dentry || !old_dentry->d_inode + || is_bad_inode(old_dentry->d_inode)) { + GOTO(cleanup, rc = -ENODEV); + } + if (dir->i_rdev != old_dentry->d_inode->i_rdev) { + GOTO(cleanup, rc = -EINVAL); + } + + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_CLEAN_INODE_REC(dir); + + handle = fsfilt->fs_start(dir, FSFILT_OP_LINK, NULL, 0); + if (IS_ERR(handle)) + GOTO(cleanup, rc = PTR_ERR(handle)); + + rc = vfs_link(old_dentry, dir, new_dentry); + if (rc) { + CERROR("replay error: vfs_link error rc=%d", rc); + GOTO(cleanup, rc); + } +cleanup: + if (handle) { + rc = fsfilt->fs_commit(dir, handle, 0); + } + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_SET_INODE_REC(dir); + if (old_dentry) + l_dput(old_dentry); + if (new_dentry) + l_dput(new_dentry); + path_release(&new_nd); + path_release(&old_nd); + RETURN(rc); +}; +static int lvfs_reint_unlink(struct super_block *sb, + struct reint_record *r_rec) +{ + struct fsfilt_operations *fsfilt = S2SMI(sb)->sm_fsfilt; + int type = r_rec->u_rec.ur_iattr.ia_mode & S_IFMT; + char *path = r_rec->rec_data1; + struct nameidata nd; + struct dentry *dparent = NULL; + struct dentry *dentry = NULL; + struct inode *dir = NULL; + void *handle = NULL; + int rc = 0; + + rc = lookup_by_path(path, LOOKUP_PARENT, &nd); + if (rc) + RETURN(rc); + + dparent = nd.dentry; + + dir = dparent->d_inode; + + dentry = lookup_one_len(nd.last.name, dparent, + nd.last.len); + if (! dentry || !dentry->d_inode + || is_bad_inode(dentry->d_inode)) { + GOTO(cleanup, rc = -ENODEV); + } + + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_CLEAN_INODE_REC(dir); + + handle = fsfilt->fs_start(dir, FSFILT_OP_UNLINK, NULL, 0); + + if (IS_ERR(handle)) + GOTO(cleanup, rc = PTR_ERR(handle)); + + switch (type) { + case S_IFDIR: + rc = vfs_rmdir(dir, dentry); + if (rc) + CERROR("rmdir in error path: %d\n", rc); + break; + default: + rc = vfs_unlink(dir, dentry); + if (rc) + CERROR("unlink in error path: %d\n", rc); + break; + } + if (!rc) { + /*time attr of dir inode*/ + struct iattr *iattr = &r_rec->u_rec.ur_pattr; + + iattr->ia_valid = ATTR_MTIME | ATTR_CTIME; + rc = fsfilt->fs_setattr(dparent, handle, iattr, 0); + if (rc) { + CERROR("error on parent setattr: rc = %d\n", rc); + GOTO(cleanup, rc); + } + } +cleanup: + if (handle) + fsfilt->fs_commit(dir, handle, 0); + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_SET_INODE_REC(dir); + if (dentry) + l_dput(dentry); + path_release(&nd); + RETURN(rc); +}; + +static int lvfs_reint_rename(struct super_block *sb, + struct reint_record *r_rec) +{ + struct fsfilt_operations *fsfilt = S2SMI(sb)->sm_fsfilt; + char *path = r_rec->rec_data1; + char *new_path = r_rec->rec_data2; + struct nameidata nd; + struct nameidata new_nd; + struct dentry *dparent = NULL; + struct dentry *new_dparent = NULL; + struct dentry *dentry = NULL; + struct dentry *new_dentry = NULL; + struct inode *dir = NULL; + struct inode *new_dir = NULL; + void *handle = NULL; + int rc = 0; + + rc = lookup_by_path(path, LOOKUP_PARENT, &nd); + if (rc) + RETURN(rc); + + dparent = nd.dentry; + dir = dparent->d_inode; + dentry = lookup_one_len(nd.last.name, dparent, nd.last.len); + + if (!dentry || !dentry->d_inode + || is_bad_inode(dentry->d_inode)) { + path_release(&nd); + RETURN(rc); + } + rc = lookup_by_path(new_path, LOOKUP_PARENT, &new_nd); + if (rc) { + path_release(&nd); + path_release(&new_nd); + RETURN(rc); + } + new_dparent = new_nd.dentry; + new_dir = new_dparent->d_inode; + new_dentry = lookup_create(&new_nd, 0); + + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_CLEAN_INODE_REC(dir); + + handle = fsfilt->fs_start(dir, FSFILT_OP_RENAME, NULL, 0); + if (IS_ERR(handle)) + GOTO(cleanup, rc = PTR_ERR(handle)); + + rc = vfs_rename(dir, dentry, new_dir, new_dentry); + if (rc) { + CERROR("unlink in error path: %d\n", rc); + GOTO(cleanup, 0); + } else { + /*restore time attr of dir inode*/ + struct iattr *iattr = &r_rec->u_rec.ur_pattr; + + iattr->ia_valid = ATTR_MTIME | ATTR_CTIME; + rc = fsfilt->fs_setattr(dparent, handle, iattr, 0); + if (rc) { + CERROR("error on parent setattr: rc = %d\n", rc); + GOTO(cleanup, rc); + } + rc = fsfilt->fs_setattr(new_dparent, handle, iattr, 0); + if (rc) { + CERROR("error on parent setattr: rc = %d\n", rc); + GOTO(cleanup, rc); + } + } +cleanup: + if (handle) + rc = fsfilt->fs_commit(dir, handle, 0); + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_SET_INODE_REC(dir); + if (dentry) + l_dput(dentry); + if (new_dentry) + l_dput(new_dentry); + path_release(&nd); + path_release(&new_nd); + RETURN(0); +}; + +static int lvfs_reint_setattr(struct super_block *sb, + struct reint_record *r_rec) +{ + struct fsfilt_operations *fsfilt = S2SMI(sb)->sm_fsfilt; + char *path = r_rec->rec_data1; + struct nameidata nd; + struct dentry *dparent = NULL; + struct dentry *dentry = NULL; + struct inode *dir = NULL; + void *handle = NULL; + int rc = 0; + + rc = lookup_by_path(path, LOOKUP_PARENT, &nd); + if (rc) + RETURN(rc); + + dparent = nd.dentry; + dir = dparent->d_inode; + dentry = lookup_one_len(nd.last.name, dparent, nd.last.len); + + if (!dentry || !dentry->d_inode + || is_bad_inode(dentry->d_inode)) { + path_release(&nd); + RETURN(rc); + } + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_CLEAN_INODE_REC(dir); + handle = fsfilt->fs_start(dir, FSFILT_OP_SETATTR, NULL, 0); + if (IS_ERR(handle)) + GOTO(cleanup, rc = PTR_ERR(handle)); + rc = fsfilt->fs_setattr(dentry, handle, &r_rec->u_rec.ur_pattr, 0); +cleanup: + if (handle) + fsfilt->fs_commit(dir, handle, 0); + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_SET_INODE_REC(dir); + if (dentry) + l_dput(dentry); + path_release(&nd); + RETURN(0); +}; +static int lvfs_reint_close(struct super_block *sb, + struct reint_record *r_rec) +{ + struct fsfilt_operations *fsfilt = S2SMI(sb)->sm_fsfilt; + char *path = r_rec->rec_data1; + struct nameidata nd; + struct dentry *dparent = NULL; + struct dentry *dentry = NULL; + struct inode *dir = NULL; + void *handle = NULL; + struct iattr *iattr = &r_rec->u_rec.ur_iattr; + int rc = 0; + + rc = lookup_by_path(path, LOOKUP_PARENT, &nd); + if (rc) + RETURN(rc); + + dparent = nd.dentry; + dir = dparent->d_inode; + dentry = lookup_one_len(nd.last.name, dparent, nd.last.len); + + if (!dentry || !dentry->d_inode + || is_bad_inode(dentry->d_inode)) { + path_release(&nd); + RETURN(rc); + } + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_CLEAN_INODE_REC(dir); + handle = fsfilt->fs_start(dir, FSFILT_OP_CREATE, NULL, 0); + if (IS_ERR(handle)) + GOTO(cleanup, rc = PTR_ERR(handle)); + iattr->ia_valid = ATTR_MTIME | ATTR_CTIME | ATTR_SIZE; + + rc = fsfilt->fs_setattr(dentry, handle, iattr, 0); + if (rc) { + CERROR("error on parent setattr: rc = %d\n", rc); + GOTO(cleanup, rc); + } +cleanup: + if (handle) + fsfilt->fs_commit(dir, handle, 0); + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_SET_INODE_REC(dir); + if (dentry) + l_dput(dentry); + path_release(&nd); + RETURN(0); +}; + +typedef int (*lvfs_reinter)(struct super_block *sb, struct reint_record *); +static lvfs_reinter reinters[REINT_MAX + 1] = { + [REINT_SETATTR] lvfs_reint_setattr, + [REINT_CREATE] lvfs_reint_create, + [REINT_LINK] lvfs_reint_link, + [REINT_UNLINK] lvfs_reint_unlink, + [REINT_RENAME] lvfs_reint_rename, + [REINT_CLOSE] lvfs_reint_close, +}; +int lvfs_reint(struct super_block *sb, + void *r_rec) +{ + return reinters[((struct reint_record*)r_rec)->u_rec.ur_opcode](sb, + (struct reint_record *)r_rec); +}; + +EXPORT_SYMBOL(lvfs_reint); diff --git a/lustre/lvfs/lvfs_undo.c b/lustre/lvfs/lvfs_undo.c new file mode 100644 index 0000000..43e9dbf --- /dev/null +++ b/lustre/lvfs/lvfs_undo.c @@ -0,0 +1,421 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#define DEBUG_SUBSYSTEM S_FILTER + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "lvfs_internal.h" + +#include +#include + +static int lvfs_undo_create(struct super_block *sb, + struct reint_record *r_rec) +{ + struct fsfilt_operations *fsfilt = S2SMI(sb)->sm_fsfilt; + char *path = r_rec->rec_data1; + int type = r_rec->u_rec.ur_iattr.ia_mode & S_IFMT; + struct nameidata nd; + struct dentry *dparent = NULL; + struct dentry *dentry = NULL; + struct inode *dir = NULL; + void *handle = NULL; + int rc = 0; + + rc = lookup_by_path(path, LOOKUP_PARENT, &nd); + if (rc) + RETURN(rc); + + dparent = nd.dentry; + dir = dparent->d_inode; + + down(&dir->i_sem); + dentry = lookup_one_len(nd.last.name, dparent, nd.last.len); + + if (!dentry->d_inode || is_bad_inode(dentry->d_inode)) { + up(&dir->i_sem); + if (dentry) + l_dput(dentry); + path_release(&nd); + RETURN(-ENODEV); + } + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_CLEAN_INODE_REC(dir); + + switch(type) { + case S_IFREG: + case S_IFLNK: + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + handle = fsfilt->fs_start(dir, FSFILT_OP_UNLINK, NULL, 0); + if (IS_ERR(handle)) + GOTO(cleanup, rc = PTR_ERR(handle)); + rc = vfs_unlink(dir, dentry); + if (rc) + CERROR("unlink in error path: %d\n", rc); + break; + case S_IFDIR: + handle = fsfilt->fs_start(dir, FSFILT_OP_RMDIR, NULL, 0); + if (IS_ERR(handle)) + GOTO(cleanup, rc = PTR_ERR(handle)); + + rc = vfs_rmdir(dir, dentry); + if (rc) + CERROR("rmdir in error path: %d\n", rc); + break; + default: + CERROR("Error type %d in create\n", type); + rc = -EINVAL; + break; + } + + if (rc) { + CERROR("Error for undo node %s\n", path); + GOTO(cleanup, 0); + } else { + /*restore time attr of dir inode*/ + struct iattr *iattr = &r_rec->u_rec.ur_pattr; + + iattr->ia_valid = ATTR_MTIME | ATTR_CTIME; + rc = fsfilt->fs_setattr(dparent, handle, iattr, 0); + if (rc) { + CERROR("error on parent setattr: rc = %d\n", rc); + GOTO(cleanup, rc); + } + } +cleanup: + if (handle) + rc = fsfilt->fs_commit(dparent->d_inode, handle, 0); + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_SET_INODE_REC(dir); + if (dentry) + l_dput(dentry); + up(&dir->i_sem); + path_release(&nd); + RETURN(0); +}; + +static int lvfs_undo_link(struct super_block *sb, + struct reint_record *r_rec) +{ + struct fsfilt_operations *fsfilt = S2SMI(sb)->sm_fsfilt; + char *new_path = r_rec->rec_data2; + struct nameidata nd; + struct dentry *dparent = NULL; + struct dentry *dentry = NULL; + struct inode *dir = NULL; + void *handle = NULL; + int rc = 0; + + rc = lookup_by_path(new_path, LOOKUP_PARENT, &nd); + if (rc) + RETURN(rc); + + dparent = nd.dentry; + dir = dparent->d_inode; + + down(&dir->i_sem); + dentry = lookup_one_len(nd.last.name, dparent, nd.last.len); + + if (!dentry->d_inode || is_bad_inode(dentry->d_inode)) { + up(&dir->i_sem); + path_release(&nd); + RETURN(-ENODEV); + } + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_CLEAN_INODE_REC(dir); + + handle = fsfilt->fs_start(dir, FSFILT_OP_UNLINK, NULL, 0); + if (IS_ERR(handle)) + GOTO(cleanup, rc = PTR_ERR(handle)); + rc = vfs_unlink(dir, dentry); + if (rc) + CERROR("unlink in error path: %d\n", rc); + + if (rc) { + CERROR("Error for undo node %s\n", new_path); + GOTO(cleanup, 0); + } else { + /*restore time attr of dir inode*/ + struct iattr *iattr = &r_rec->u_rec.ur_pattr; + + iattr->ia_valid = ATTR_MTIME | ATTR_CTIME; + rc = fsfilt->fs_setattr(dparent, handle, iattr, 0); + if (rc) { + CERROR("error on parent setattr: rc = %d\n", rc); + GOTO(cleanup, rc); + } + } +cleanup: + if (handle) + rc = fsfilt->fs_commit(dir, handle, 0); + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_SET_INODE_REC(dir); + if (dentry) + l_dput(dentry); + up(&dir->i_sem); + path_release(&nd); + RETURN(0); +} + +static int lvfs_undo_unlink(struct super_block *sb, + struct reint_record *r_rec) +{ + struct fsfilt_operations *fsfilt = S2SMI(sb)->sm_fsfilt; + char *path = r_rec->rec_data1; + struct nameidata nd; + struct dentry *dparent; + struct dentry *dentry = NULL; + struct nameidata del_nd; + struct dentry *del_dparent = NULL; + struct dentry *del_dentry = NULL; + void *handle = NULL; + struct inode *dir = NULL; + int rc = 0; + + /*get parent dentry*/ + rc = lookup_by_path(path, LOOKUP_PARENT, &nd); + if (rc) + RETURN(rc); + + dparent = nd.dentry; + dir = dparent->d_inode; + + dentry = lookup_create(&nd, 0); + + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_CLEAN_INODE_REC(dir); + if (SMFS_DO_DEC_LINK(r_rec->u_rec.ur_flags)) { + ino_t ino = *((ino_t *)r_rec->rec_data2); + struct inode* inode = iget(dir->i_sb, ino); + if (!inode) + GOTO(cleanup1, rc = -EINVAL); + handle = fsfilt->fs_start(dir, FSFILT_OP_LINK, NULL, 0); + if (IS_ERR(handle)) + GOTO(cleanup1, rc = PTR_ERR(handle)); + + del_dentry = pre_smfs_dentry(NULL, inode, dentry); + rc = vfs_link(del_dentry, dir, dentry); +cleanup1: + post_smfs_dentry(del_dentry); + iput(inode); + } else { + char *del_path = r_rec->rec_data2; + + rc = lookup_by_path(del_path, LOOKUP_PARENT, &del_nd); + if (rc) + GOTO(cleanup, rc = -ENODEV); + del_dparent = del_nd.dentry; + del_dentry = lookup_one_len(del_nd.last.name, del_dparent, + del_nd.last.len); + + if (! del_dentry || !del_dentry->d_inode + || is_bad_inode(del_dentry->d_inode)) + GOTO(cleanup2, rc = -ENODEV); + + handle = fsfilt->fs_start(dir, FSFILT_OP_RENAME, NULL, 0); + if (IS_ERR(handle)) + GOTO(cleanup2, rc = PTR_ERR(handle)); + + lock_kernel(); + /*move the del dentry back to the original palace*/ + rc = vfs_rename(del_dparent->d_inode, del_dentry, dir, dentry); + unlock_kernel(); + if (!rc && S_ISDIR(del_dentry->d_inode->i_mode)) + del_dentry->d_inode->i_flags &=~S_DEAD; +cleanup2: + if (del_dentry) + l_dput(del_dentry); + path_release(&del_nd); + } + if (!rc) { + /*restore time attr of dir inode*/ + struct iattr *iattr = &r_rec->u_rec.ur_pattr; + + iattr->ia_valid = ATTR_MTIME | ATTR_CTIME; + rc = fsfilt->fs_setattr(dparent, handle, iattr, 0); + if (rc) { + CERROR("error on parent setattr: rc = %d\n", rc); + GOTO(cleanup, rc); + } + } +cleanup: + if (handle) + fsfilt->fs_commit(dir, handle, 0); + if (dentry); + l_dput(dentry); + path_release(&nd); + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_SET_INODE_REC(dir); + RETURN(rc); +} + +static int lvfs_undo_rename(struct super_block *sb, + struct reint_record *r_rec) +{ + struct fsfilt_operations *fsfilt = S2SMI(sb)->sm_fsfilt; + char *path = r_rec->rec_data1; + char *new_path = r_rec->rec_data2; + struct nameidata nd; + struct nameidata new_nd; + struct dentry *dparent; + struct dentry *new_dparent; + struct dentry *dentry = NULL; + struct dentry *new_dentry = NULL; + void *handle = NULL; + struct inode *dir = NULL; + struct inode *new_dir = NULL; + int rc = 0; + + /*get parent dentry*/ + rc = lookup_by_path(path, LOOKUP_PARENT, &nd); + if (rc) + RETURN(rc); + + dparent = nd.dentry; + dir = dparent->d_inode; + dentry = lookup_create(&nd, 0); + + rc = lookup_by_path(new_path, LOOKUP_PARENT, &new_nd); + if (rc) { + path_release(&nd); + RETURN(rc); + } + new_dparent = new_nd.dentry; + new_dir = new_dparent->d_inode; + new_dentry = lookup_one_len(new_nd.last.name, new_dparent, + new_nd.last.len); + + if (! new_dentry || !new_dentry->d_inode + || is_bad_inode(new_dentry->d_inode)) { + GOTO(cleanup, rc = -ENODEV); + } + + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_CLEAN_INODE_REC(new_dir); + + handle = fsfilt->fs_start(new_dir, FSFILT_OP_RENAME, NULL, 0); + if (IS_ERR(handle)) + GOTO(cleanup, rc = PTR_ERR(handle)); + + lock_kernel(); + /*move the del dentry back to the original palace*/ + rc = vfs_rename(new_dir, new_dentry, dir, dentry); + unlock_kernel(); + if (rc) { + CERROR("Error for undo node %s\n", new_path); + GOTO(cleanup, 0); + } else { + /*restore time attr of dir inode*/ + struct iattr *iattr = &r_rec->u_rec.ur_pattr; + + iattr->ia_valid = ATTR_MTIME | ATTR_CTIME; + rc = fsfilt->fs_setattr(dparent, handle, iattr, 0); + if (rc) { + CERROR("error on parent setattr: rc = %d\n", rc); + GOTO(cleanup, rc); + } + } +cleanup: + if (handle) + rc = fsfilt->fs_commit(new_dir, handle, 0); + if (dentry); + l_dput(dentry); + if (new_dentry) + l_dput(new_dentry); + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_SET_INODE_REC(new_dir); + path_release(&nd); + path_release(&new_nd); + RETURN(rc); +}; + +static int lvfs_undo_setattr(struct super_block *sb, + struct reint_record *r_rec) +{ + struct fsfilt_operations *fsfilt = S2SMI(sb)->sm_fsfilt; + char *path = r_rec->rec_data1; + struct nameidata nd; + struct dentry *dparent = NULL; + struct dentry *dentry = NULL; + struct inode *dir = NULL; + void *handle = NULL; + int rc = 0; + + rc = lookup_by_path(path, LOOKUP_PARENT, &nd); + if (rc) + RETURN(rc); + + dparent = nd.dentry; + dir = dparent->d_inode; + dentry = lookup_one_len(nd.last.name, dparent, nd.last.len); + + if (!dentry || !dentry->d_inode + || is_bad_inode(dentry->d_inode)) { + path_release(&nd); + RETURN(rc); + } + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_CLEAN_INODE_REC(dir); + handle = fsfilt->fs_start(dir, FSFILT_OP_SETATTR, NULL, 0); + if (IS_ERR(handle)) + GOTO(cleanup, rc = PTR_ERR(handle)); + + /*FIXME later, did not set parent attr*/ + r_rec->u_rec.ur_iattr.ia_valid = r_rec->u_rec.ur_pattr.ia_valid; + rc = fsfilt->fs_setattr(dentry, handle, &r_rec->u_rec.ur_iattr, 0); +cleanup: + if (handle) + fsfilt->fs_commit(dir, handle, 0); + if (!SMFS_DO_WRITE_KML(r_rec->u_rec.ur_flags)) + SMFS_SET_INODE_REC(dir); + if (dentry) + l_dput(dentry); + path_release(&nd); + RETURN(0); + + RETURN(0); +}; + + +typedef int (*lvfs_undoer)(struct super_block *sb, struct reint_record *); + +static lvfs_undoer undoers[REINT_MAX + 1] = { + [REINT_SETATTR] lvfs_undo_setattr, + [REINT_CREATE] lvfs_undo_create, + [REINT_LINK] lvfs_undo_link, + [REINT_UNLINK] lvfs_undo_unlink, + [REINT_RENAME] lvfs_undo_rename, +}; + +int lvfs_undo(struct super_block *sb, + void *r_rec) +{ + return undoers[((struct reint_record*)r_rec)->u_rec.ur_opcode](sb, + (struct reint_record *)r_rec); +}; + +EXPORT_SYMBOL(lvfs_undo); diff --git a/lustre/lvfs/lvfs_userfs.c b/lustre/lvfs/lvfs_userfs.c index 021a0a3..396ee30 100644 --- a/lustre/lvfs/lvfs_userfs.c +++ b/lustre/lvfs/lvfs_userfs.c @@ -25,20 +25,19 @@ #include #include -#include "lvfs_internal.h" #include #include /* XXX currently ctxt functions should not be used ?? */ -void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, - struct obd_ucred *uc) +void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *uc) { LBUG(); } -void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, - struct obd_ucred *uc) +void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *uc) { LBUG(); } diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 2cb67a2..fbc448d 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -980,8 +980,8 @@ static int mdc_llog_init(struct obd_device *obd, struct obd_device *tgt, int rc; ENTRY; - rc = llog_setup(obd, LLOG_CONFIG_REPL_CTXT, tgt, 0, NULL, - &llog_client_ops); + rc = obd_llog_setup(obd, LLOG_CONFIG_REPL_CTXT, tgt, 0, NULL, + &llog_client_ops); if (rc == 0) { ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT); ctxt->loc_imp = obd->u.cli.cl_import; @@ -995,7 +995,7 @@ static int mdc_llog_finish(struct obd_device *obd, int count) int rc; ENTRY; - rc = llog_cleanup(llog_get_context(obd, LLOG_CONFIG_REPL_CTXT)); + rc = obd_llog_cleanup(llog_get_context(obd, LLOG_CONFIG_REPL_CTXT)); RETURN(rc); } diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 6ebbcad..b7e7aa6 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -130,7 +130,7 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file, rc = -ETIMEDOUT; /* XXX should this be a different errno? */ } - + DEBUG_REQ(D_ERROR, req, "bulk failed: %s %d(%d), evicting %s@%s\n", (rc == -ETIMEDOUT) ? "timeout" : "network error", desc->bd_nob_transferred, count, @@ -303,7 +303,7 @@ out: return rc; } -static int mds_init_export(struct obd_export *exp) +static int mds_init_export(struct obd_export *exp) { struct mds_export_data *med = &exp->exp_mds_data; @@ -316,7 +316,7 @@ static int mds_destroy_export(struct obd_export *export) { struct mds_export_data *med; struct obd_device *obd = export->exp_obd; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; int rc = 0; ENTRY; @@ -326,7 +326,8 @@ static int mds_destroy_export(struct obd_export *export) if (obd_uuid_equals(&export->exp_client_uuid, &obd->obd_uuid)) GOTO(out, 0); - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + /* Close any open files (which may also cause orphan unlinking). */ spin_lock(&med->med_open_lock); while (!list_empty(&med->med_open_head)) { @@ -345,7 +346,7 @@ static int mds_destroy_export(struct obd_export *export) dentry->d_name.len, dentry->d_name.name, ll_bdevname(dentry->d_inode->i_sb, btmp), dentry->d_inode->i_ino); - rc = mds_mfd_close(NULL, obd, mfd, + rc = mds_mfd_close(NULL, obd, mfd, !(export->exp_flags & OBD_OPT_FAILOVER)); if (rc) @@ -353,7 +354,7 @@ static int mds_destroy_export(struct obd_export *export) spin_lock(&med->med_open_lock); } spin_unlock(&med->med_open_lock); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); out: mds_client_free(export, !(export->exp_flags & OBD_OPT_FAILOVER)); @@ -658,10 +659,10 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, { struct obd_device *obd = req->rq_export->exp_obd; struct ldlm_reply *rep = NULL; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct mds_body *body; struct dentry *dparent = NULL, *dchild = NULL; - struct obd_ucred uc; + struct lvfs_ucred uc; struct lustre_handle parent_lockh; int namesize; int rc = 0, cleanup_phase = 0, resent_req = 0; @@ -689,17 +690,17 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, LASSERT (offset == 0 || offset == 2); /* if requests were at offset 2, the getattr reply goes back at 1 */ - if (offset) { + if (offset) { rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep)); offset = 1; } - uc.ouc_fsuid = body->fsuid; - uc.ouc_fsgid = body->fsgid; - uc.ouc_cap = body->capability; - uc.ouc_suppgid1 = body->suppgid; - uc.ouc_suppgid2 = -1; - push_ctxt(&saved, &obd->obd_ctxt, &uc); + uc.luc_fsuid = body->fsuid; + uc.luc_fsgid = body->fsgid; + uc.luc_cap = body->capability; + uc.luc_suppgid1 = body->suppgid; + uc.luc_suppgid2 = -1; + push_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); cleanup_phase = 1; /* kernel context */ intent_set_disposition(rep, DISP_LOOKUP_EXECD); @@ -789,7 +790,7 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, } l_dput(dchild); case 1: - pop_ctxt(&saved, &obd->obd_ctxt, &uc); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); default: ; } return rc; @@ -799,10 +800,10 @@ static int mds_getattr(int offset, struct ptlrpc_request *req) { struct mds_obd *mds = mds_req2mds(req); struct obd_device *obd = req->rq_export->exp_obd; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct dentry *de; struct mds_body *body; - struct obd_ucred uc; + struct lvfs_ucred uc; int rc = 0; ENTRY; @@ -813,10 +814,10 @@ static int mds_getattr(int offset, struct ptlrpc_request *req) RETURN (-EFAULT); } - uc.ouc_fsuid = body->fsuid; - uc.ouc_fsgid = body->fsgid; - uc.ouc_cap = body->capability; - push_ctxt(&saved, &obd->obd_ctxt, &uc); + uc.luc_fsuid = body->fsuid; + uc.luc_fsgid = body->fsgid; + uc.luc_cap = body->capability; + push_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); de = mds_fid2dentry(mds, &body->fid1, NULL); if (IS_ERR(de)) { rc = req->rq_status = -ENOENT; @@ -834,7 +835,7 @@ static int mds_getattr(int offset, struct ptlrpc_request *req) l_dput(de); GOTO(out_pop, rc); out_pop: - pop_ctxt(&saved, &obd->obd_ctxt, &uc); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); return rc; } @@ -937,9 +938,9 @@ static int mds_readpage(struct ptlrpc_request *req) struct dentry *de; struct file *file; struct mds_body *body, *repbody; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; int rc, size = sizeof(*repbody); - struct obd_ucred uc; + struct lvfs_ucred uc; ENTRY; rc = lustre_pack_reply(req, 1, &size, NULL); @@ -952,10 +953,10 @@ static int mds_readpage(struct ptlrpc_request *req) if (body == NULL) GOTO (out, rc = -EFAULT); - uc.ouc_fsuid = body->fsuid; - uc.ouc_fsgid = body->fsgid; - uc.ouc_cap = body->capability; - push_ctxt(&saved, &obd->obd_ctxt, &uc); + uc.luc_fsuid = body->fsuid; + uc.luc_fsgid = body->fsgid; + uc.luc_cap = body->capability; + push_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); de = mds_fid2dentry(&obd->u.mds, &body->fid1, &mnt); if (IS_ERR(de)) GOTO(out_pop, rc = PTR_ERR(de)); @@ -994,7 +995,7 @@ static int mds_readpage(struct ptlrpc_request *req) out_file: filp_close(file, 0); out_pop: - pop_ctxt(&saved, &obd->obd_ctxt, &uc); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); out: req->rq_status = rc; RETURN(0); @@ -1271,6 +1272,11 @@ int mds_handle(struct ptlrpc_request *req) OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); rc = llog_origin_handle_next_block(req); break; + case LLOG_ORIGIN_HANDLE_PREV_BLOCK: + DEBUG_REQ(D_INODE, req, "llog prev block"); + OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); + rc = llog_origin_handle_prev_block(req); + break; case LLOG_ORIGIN_HANDLE_READ_HEADER: DEBUG_REQ(D_INODE, req, "llog read header"); OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); @@ -1343,12 +1349,12 @@ int mds_update_server_data(struct obd_device *obd, int force_sync) struct mds_obd *mds = &obd->u.mds; struct mds_server_data *msd = mds->mds_server_data; struct file *filp = mds->mds_rcvd_filp; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; loff_t off = 0; int rc; ENTRY; - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); msd->msd_last_transno = cpu_to_le64(mds->mds_last_transno); CDEBUG(D_SUPER, "MDS mount_count is "LPU64", last_transno is "LPU64"\n", @@ -1356,7 +1362,7 @@ int mds_update_server_data(struct obd_device *obd, int force_sync) rc = fsfilt_write_record(obd, filp, msd, sizeof(*msd), &off,force_sync); if (rc) CERROR("error writing MDS server data: rc = %d\n", rc); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); RETURN(rc); } @@ -1368,9 +1374,10 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) struct lprocfs_static_vars lvars; struct lustre_cfg* lcfg = buf; struct mds_obd *mds = &obd->u.mds; + char *options = NULL; struct vfsmount *mnt; - int rc = 0; unsigned long page; + int rc = 0; ENTRY; dev_clear_rdonly(2); @@ -1382,15 +1389,27 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) if (IS_ERR(obd->obd_fsops)) RETURN(rc = PTR_ERR(obd->obd_fsops)); - if (!(page = __get_free_page(GFP_KERNEL))) + page = __get_free_page(GFP_KERNEL); + if (!page) RETURN(-ENOMEM); - memset((void *)page, 0, PAGE_SIZE); - sprintf((char *)page, "iopen_nopriv,errors=remount-ro"); + options = (char *)page; + memset(options, 0, PAGE_SIZE); - mnt = do_kern_mount(lcfg->lcfg_inlbuf2, 0, - lcfg->lcfg_inlbuf1, (void *)page); + /* here we use "iopen_nopriv" hardcoded, because it affects MDS utility + * and the rest of options are passed by mount options. Probably this + * should be moved to somewhere else like startup scripts or lconf. */ + sprintf(options, "iopen_nopriv"); + + if (lcfg->lcfg_inllen4 > 0 && lcfg->lcfg_inlbuf4) + sprintf(options + strlen(options), ",%s", + lcfg->lcfg_inlbuf4); + + mnt = do_kern_mount(lcfg->lcfg_inlbuf2, 0, + lcfg->lcfg_inlbuf1, options); + free_page(page); + if (IS_ERR(mnt)) { rc = PTR_ERR(mnt); CERROR("do_kern_mount failed: rc = %d\n", rc); @@ -1474,23 +1493,25 @@ static int mds_postsetup(struct obd_device *obd) int rc = 0; ENTRY; - - rc = llog_setup(obd, LLOG_CONFIG_ORIG_CTXT, obd, 0, NULL, - &llog_lvfs_ops); + rc = obd_llog_setup(obd, LLOG_CONFIG_ORIG_CTXT, obd, 0, + NULL, &llog_lvfs_ops); if (rc) RETURN(rc); - if (mds->mds_profile) { - struct obd_run_ctxt saved; + /* This check for @dumb string is needed to handle + mounting MDS with smfs. Read lconf:MDSDEV.write_conf() for + more detail explanation. */ + if (mds->mds_profile && strcmp(mds->mds_profile, "dumb")) { + struct lvfs_run_ctxt saved; struct lustre_profile *lprof; struct config_llog_instance cfg; cfg.cfg_instance = NULL; cfg.cfg_uuid = mds->mds_lov_uuid; - push_ctxt(&saved, &obd->obd_ctxt, NULL); - rc = class_config_parse_llog(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + rc = class_config_parse_llog(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), mds->mds_profile, &cfg); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if (rc) GOTO(err_llog, rc); @@ -1509,11 +1530,11 @@ static int mds_postsetup(struct obd_device *obd) err_cleanup: mds_lov_clean(obd); err_llog: - llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT)); + obd_llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT)); RETURN(rc); } -static int mds_postrecov(struct obd_device *obd) +static int mds_postrecov(struct obd_device *obd) { int rc, rc2; @@ -1543,7 +1564,7 @@ int mds_lov_clean(struct obd_device *obd) if (mds->mds_profile) { char * cln_prof; struct config_llog_instance cfg; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; int len = strlen(mds->mds_profile) + sizeof("-clean") + 1; OBD_ALLOC(cln_prof, len); @@ -1552,10 +1573,10 @@ int mds_lov_clean(struct obd_device *obd) cfg.cfg_instance = NULL; cfg.cfg_uuid = mds->mds_lov_uuid; - push_ctxt(&saved, &obd->obd_ctxt, NULL); - class_config_parse_llog(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + class_config_parse_llog(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), cln_prof, &cfg); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); OBD_FREE(cln_prof, len); OBD_FREE(mds->mds_profile, strlen(mds->mds_profile) + 1); @@ -1571,7 +1592,7 @@ static int mds_precleanup(struct obd_device *obd, int flags) mds_lov_disconnect(obd, flags); mds_lov_clean(obd); - llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT)); + obd_llog_cleanup(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT)); RETURN(rc); } @@ -1734,7 +1755,7 @@ static int mds_intent_policy(struct ldlm_namespace *ns, RETURN(ELDLM_LOCK_ABORTED); if (intent_disposition(rep, DISP_LOOKUP_NEG) && !intent_disposition(rep, DISP_OPEN_OPEN)) -#endif +#endif RETURN(ELDLM_LOCK_ABORTED); break; case IT_LOOKUP: @@ -1746,11 +1767,11 @@ static int mds_intent_policy(struct ldlm_namespace *ns, getattr_part); /* FIXME: LDLM can set req->rq_status. MDS sets policy_res{1,2} with disposition and status. - - replay: returns 0 & req->status is old status + - replay: returns 0 & req->status is old status - otherwise: returns req->status */ if (intent_disposition(rep, DISP_LOOKUP_NEG)) rep->lock_policy_res2 = 0; - if (!intent_disposition(rep, DISP_LOOKUP_POS) || + if (!intent_disposition(rep, DISP_LOOKUP_POS) || rep->lock_policy_res2) RETURN(ELDLM_LOCK_ABORTED); if (req->rq_status != 0) { @@ -1833,8 +1854,7 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf) mds->mds_service = ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, - mds_handle, "mds", - obd->obd_proc_entry); + mds_handle, "mds", obd->obd_proc_entry); if (!mds->mds_service) { CERROR("failed to start service\n"); diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index 0c74ec0..c907a91 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -99,15 +99,15 @@ int mds_client_add(struct obd_device *obd, struct mds_obd *mds, (cl_idx * le16_to_cpu(mds->mds_server_data->msd_client_size)); if (new_client) { - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; loff_t off = med->med_off; struct file *file = mds->mds_rcvd_filp; int rc; - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = fsfilt_write_record(obd, file, med->med_mcd, sizeof(*med->med_mcd), &off, 1); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if (rc) return rc; @@ -124,7 +124,7 @@ int mds_client_free(struct obd_export *exp, int clear_client) struct mds_obd *mds = &exp->exp_obd->u.mds; struct obd_device *obd = exp->exp_obd; struct mds_client_data zero_mcd; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; int rc; unsigned long *bitmap = mds->mds_client_bitmap; @@ -150,10 +150,10 @@ int mds_client_free(struct obd_export *exp, int clear_client) if (clear_client) { memset(&zero_mcd, 0, sizeof zero_mcd); - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = fsfilt_write_record(obd, mds->mds_rcvd_filp, &zero_mcd, sizeof(zero_mcd), &med->med_off, 1); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); CDEBUG(rc == 0 ? D_INFO : D_ERROR, "zeroing out client %s idx %u in %s rc %d\n", @@ -367,7 +367,7 @@ err_msd: int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt) { struct mds_obd *mds = &obd->u.mds; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct dentry *dentry; struct file *file; int rc; @@ -382,14 +382,14 @@ int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt) fsfilt_setup(obd, mds->mds_sb); - OBD_SET_CTXT_MAGIC(&obd->obd_ctxt); - obd->obd_ctxt.pwdmnt = mnt; - obd->obd_ctxt.pwd = mnt->mnt_root; - obd->obd_ctxt.fs = get_ds(); - obd->obd_ctxt.cb_ops = mds_lvfs_ops; + OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt); + obd->obd_lvfs_ctxt.pwdmnt = mnt; + obd->obd_lvfs_ctxt.pwd = mnt->mnt_root; + obd->obd_lvfs_ctxt.fs = get_ds(); + obd->obd_lvfs_ctxt.cb_ops = mds_lvfs_ops; /* setup the directory tree */ - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); dentry = simple_mkdir(current->fs->pwd, "ROOT", 0755, 0); if (IS_ERR(dentry)) { rc = PTR_ERR(dentry); @@ -475,7 +475,7 @@ int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt) GOTO(err_lov_objid, rc = -ENOENT); } err_pop: - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); return rc; @@ -502,7 +502,7 @@ err_fid: int mds_fs_cleanup(struct obd_device *obd, int flags) { struct mds_obd *mds = &obd->u.mds; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; int rc = 0; if (flags & OBD_OPT_FAILOVER) @@ -512,7 +512,7 @@ int mds_fs_cleanup(struct obd_device *obd, int flags) class_disconnect_exports(obd, flags); /* cleans up client info too */ mds_server_free_data(mds); - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if (mds->mds_rcvd_filp) { rc = filp_close(mds->mds_rcvd_filp, 0); mds->mds_rcvd_filp = NULL; @@ -537,7 +537,7 @@ int mds_fs_cleanup(struct obd_device *obd, int flags) l_dput(mds->mds_pending_dir); mds->mds_pending_dir = NULL; } - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); shrink_dcache_parent(mds->mds_fid_de); dput(mds->mds_fid_de); @@ -548,20 +548,20 @@ int mds_fs_cleanup(struct obd_device *obd, int flags) * performance sensitive, it is accomplished by creating a file, checking the * fid, and renaming it. */ int mds_obd_create(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md **ea, struct obd_trans_info *oti) + struct lov_stripe_md **ea, struct obd_trans_info *oti) { struct mds_obd *mds = &exp->exp_obd->u.mds; struct inode *parent_inode = mds->mds_objects_dir->d_inode; unsigned int tmpname = ll_insecure_random_int(); struct file *filp; struct dentry *new_child; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; char fidname[LL_FID_NAMELEN]; void *handle; int rc = 0, err, namelen; ENTRY; - push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); + push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); sprintf(fidname, "OBJECTS/%u", tmpname); filp = filp_open(fidname, O_CREAT | O_EXCL, 0644); @@ -625,7 +625,7 @@ out_close: rc = err; } out_pop: - pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); + pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); RETURN(rc); } @@ -635,14 +635,14 @@ int mds_obd_destroy(struct obd_export *exp, struct obdo *oa, struct mds_obd *mds = &exp->exp_obd->u.mds; struct inode *parent_inode = mds->mds_objects_dir->d_inode; struct obd_device *obd = exp->exp_obd; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; char fidname[LL_FID_NAMELEN]; struct dentry *de; void *handle; int err, namelen, rc = 0; ENTRY; - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); namelen = ll_fid2str(fidname, oa->o_id, oa->o_generation); @@ -658,9 +658,9 @@ int mds_obd_destroy(struct obd_export *exp, struct obdo *oa, that is unlinked, not spanned across multiple OSTs */ handle = fsfilt_start_log(obd, mds->mds_objects_dir->d_inode, FSFILT_OP_UNLINK, oti, 1); - if (IS_ERR(handle)) { + + if (IS_ERR(handle)) GOTO(out_dput, rc = PTR_ERR(handle)); - } rc = vfs_unlink(mds->mds_objects_dir->d_inode, de); if (rc) @@ -674,6 +674,6 @@ out_dput: if (de != NULL) l_dput(de); up(&parent_inode->i_sem); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); RETURN(rc); } diff --git a/lustre/mds/mds_log.c b/lustre/mds/mds_log.c index b8ce8b5..905ee6f 100644 --- a/lustre/mds/mds_log.c +++ b/lustre/mds/mds_log.c @@ -37,9 +37,9 @@ #include "mds_internal.h" -static int mds_llog_origin_add(struct llog_ctxt *ctxt, - struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, - struct llog_cookie *logcookies, int numcookies) +static int mds_llog_origin_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, + void *buf, struct llog_cookie *logcookies, + int numcookies, void *data) { struct obd_device *obd = ctxt->loc_obd; struct obd_device *lov_obd = obd->u.mds.mds_osc_obd; @@ -48,14 +48,13 @@ static int mds_llog_origin_add(struct llog_ctxt *ctxt, ENTRY; lctxt = llog_get_context(lov_obd, ctxt->loc_idx); - rc = llog_add(lctxt, rec, lsm, logcookies, numcookies); + rc = llog_add(lctxt, rec, buf, logcookies, numcookies, data); RETURN(rc); } static int mds_llog_origin_connect(struct llog_ctxt *ctxt, int count, struct llog_logid *logid, - struct llog_gen *gen, - struct obd_uuid *uuid) + struct llog_gen *gen, struct obd_uuid *uuid) { struct obd_device *obd = ctxt->loc_obd; struct obd_device *lov_obd = obd->u.mds.mds_osc_obd; @@ -68,8 +67,9 @@ static int mds_llog_origin_connect(struct llog_ctxt *ctxt, int count, RETURN(rc); } -static int mds_llog_repl_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *lsm, - int count, struct llog_cookie *cookies, int flags) +static int mds_llog_repl_cancel(struct llog_ctxt *ctxt, int count, + struct llog_cookie *cookies, int flags, + void *data) { struct obd_device *obd = ctxt->loc_obd; struct obd_device *lov_obd = obd->u.mds.mds_osc_obd; @@ -78,7 +78,7 @@ static int mds_llog_repl_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *ls ENTRY; lctxt = llog_get_context(lov_obd, ctxt->loc_idx); - rc = llog_cancel(lctxt, lsm, count, cookies,flags); + rc = llog_cancel(lctxt, count, cookies, flags, data); RETURN(rc); } @@ -101,8 +101,8 @@ int mds_log_op_unlink(struct obd_device *obd, struct inode *inode, RETURN(rc); ctxt = llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT); - rc = llog_add(ctxt, NULL, lsm, logcookies, - cookies_size / sizeof(struct llog_cookie)); + rc = llog_add(ctxt, NULL, NULL, logcookies, + cookies_size / sizeof(struct llog_cookie), lsm); obd_free_memmd(mds->mds_osc_exp, &lsm); @@ -115,7 +115,7 @@ static struct llog_operations mds_unlink_orig_logops = { }; static struct llog_operations mds_size_repl_logops = { - lop_cancel: mds_llog_repl_cancel + lop_cancel: mds_llog_repl_cancel, }; int mds_llog_init(struct obd_device *obd, struct obd_device *tgt, @@ -125,13 +125,13 @@ int mds_llog_init(struct obd_device *obd, struct obd_device *tgt, int rc; ENTRY; - rc = llog_setup(obd, LLOG_UNLINK_ORIG_CTXT, tgt, 0, NULL, - &mds_unlink_orig_logops); + rc = obd_llog_setup(obd, LLOG_UNLINK_ORIG_CTXT, tgt, 0, NULL, + &mds_unlink_orig_logops); if (rc) RETURN(rc); - rc = llog_setup(obd, LLOG_SIZE_REPL_CTXT, tgt, 0, NULL, - &mds_size_repl_logops); + rc = obd_llog_setup(obd, LLOG_SIZE_REPL_CTXT, tgt, 0, NULL, + &mds_size_repl_logops); if (rc) RETURN(rc); @@ -148,11 +148,11 @@ int mds_llog_finish(struct obd_device *obd, int count) int rc; ENTRY; - rc = llog_cleanup(llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT)); + rc = obd_llog_cleanup(llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT)); if (rc) RETURN(rc); - rc = llog_cleanup(llog_get_context(obd, LLOG_SIZE_REPL_CTXT)); + rc = obd_llog_cleanup(llog_get_context(obd, LLOG_SIZE_REPL_CTXT)); if (rc) RETURN(rc); diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index 2eece4d..4a8a531 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -180,6 +180,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) { struct mds_obd *mds = &obd->u.mds; struct lustre_handle conn = {0,}; + char name[32] = "CATLIST"; int valsize; int rc, i; ENTRY; @@ -192,14 +193,16 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) mds->mds_osc_obd = class_name2obd(lov_name); if (!mds->mds_osc_obd) { - CERROR("MDS cannot locate LOV %s\n", lov_name); + CERROR("MDS cannot locate LOV %s\n", + lov_name); mds->mds_osc_obd = ERR_PTR(-ENOTCONN); RETURN(-ENOTCONN); } rc = obd_connect(&conn, mds->mds_osc_obd, &obd->obd_uuid); if (rc) { - CERROR("MDS cannot connect to LOV %s (%d)\n", lov_name, rc); + CERROR("MDS cannot connect to LOV %s (%d)\n", + lov_name, rc); mds->mds_osc_obd = ERR_PTR(rc); RETURN(rc); } @@ -228,7 +231,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) GOTO(err_reg, rc); } - rc = llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count); + rc = obd_llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, name); if (rc) { CERROR("failed to initialize catalog %d\n", rc); GOTO(err_reg, rc); @@ -328,7 +331,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, struct obd_device *obd = exp->exp_obd; struct mds_obd *mds = &obd->u.mds; struct obd_ioctl_data *data = karg; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; int rc = 0; switch (cmd) { @@ -337,7 +340,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, if (mds->mds_cfg_llh) RETURN(-EBUSY); - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), &mds->mds_cfg_llh, NULL, name); if (rc == 0) @@ -345,7 +348,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, &cfg_uuid); else mds->mds_cfg_llh = NULL; - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); RETURN(rc); } @@ -354,9 +357,9 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, if (!mds->mds_cfg_llh) RETURN(-EBADF); - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = llog_close(mds->mds_cfg_llh); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); mds->mds_cfg_llh = NULL; RETURN(rc); @@ -366,19 +369,19 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, char *name = data->ioc_inlbuf1; if (mds->mds_cfg_llh) RETURN(-EBUSY); - - push_ctxt(&saved, &obd->obd_ctxt, NULL); - rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), &mds->mds_cfg_llh, NULL, name); if (rc == 0) { llog_init_handle(mds->mds_cfg_llh, LLOG_F_IS_PLAIN, NULL); - + rc = llog_destroy(mds->mds_cfg_llh); llog_free_handle(mds->mds_cfg_llh); } - pop_ctxt(&saved, &obd->obd_ctxt, NULL); - + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + mds->mds_cfg_llh = NULL; RETURN(rc); } @@ -409,10 +412,10 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, RETURN(rc); } - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = llog_write_rec(mds->mds_cfg_llh, &rec, NULL, 0, cfg_buf, -1); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); OBD_FREE(cfg_buf, data->ioc_plen1); RETURN(rc); @@ -421,9 +424,9 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case OBD_IOC_PARSE: { struct llog_ctxt *ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = class_config_parse_llog(ctxt, data->ioc_inlbuf1, NULL); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if (rc) RETURN(rc); @@ -433,9 +436,9 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case OBD_IOC_DUMP_LOG: { struct llog_ctxt *ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = class_config_dump_llog(ctxt, data->ioc_inlbuf1, NULL); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if (rc) RETURN(rc); @@ -459,7 +462,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case OBD_IOC_CATLOGLIST: { int count = mds->mds_lov_desc.ld_tgt_count; - rc = llog_catlog_list(obd, count, data); + rc = llog_catalog_list(obd, count, data); RETURN(rc); } @@ -468,15 +471,17 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case OBD_IOC_LLOG_REMOVE: { struct llog_ctxt *ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); + char name[32] = "CATLIST"; int rc2; obd_llog_finish(obd, mds->mds_lov_desc.ld_tgt_count); - push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_ctxt, NULL); + push_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); rc = llog_ioctl(ctxt, cmd, data); - pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_ctxt, NULL); - llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count); - rc2 = obd_set_info(mds->mds_osc_exp, strlen("mds_conn"), "mds_conn", - 0, NULL); + pop_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); + obd_llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count, + name); + rc2 = obd_set_info(mds->mds_osc_exp, strlen("mds_conn"), + "mds_conn", 0, NULL); if (!rc) rc = rc2; RETURN(rc); @@ -486,9 +491,9 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, struct llog_ctxt *ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); - push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_ctxt, NULL); + push_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); rc = llog_ioctl(ctxt, cmd, data); - pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_ctxt, NULL); + pop_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); RETURN(rc); } @@ -526,8 +531,8 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, int active) } else { LASSERT(llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT) != NULL); - rc = obd_set_info(obd->u.mds.mds_osc_exp, strlen("mds_conn"), "mds_conn", - 0, uuid); + rc = obd_set_info(obd->u.mds.mds_osc_exp, strlen("mds_conn"), + "mds_conn", 0, uuid); if (rc != 0) RETURN(rc); diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index 8954cb5..ce7736c 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -729,7 +729,7 @@ int mds_pin(struct ptlrpc_request *req) { struct obd_device *obd = req->rq_export->exp_obd; struct mds_body *request_body, *reply_body; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; int rc, size = sizeof(*reply_body); ENTRY; @@ -740,10 +740,10 @@ int mds_pin(struct ptlrpc_request *req) RETURN(rc); reply_body = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*reply_body)); - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = mds_open_by_fid(req, &request_body->fid1, reply_body, request_body->flags, NULL, NULL); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); RETURN(rc); } @@ -992,8 +992,13 @@ int mds_open(struct mds_update_record *rec, int offset, GOTO(cleanup, rc); cleanup: - rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle, - req, rc, rep ? rep->lock_policy_res1 : 0); + if (dchild && dchild->d_inode) + rc = mds_finish_transno(mds, dchild->d_inode, handle, req, rc, + rep ? rep->lock_policy_res1 : 0); + else + rc = mds_finish_transno(mds, !IS_ERR(dparent) ? dparent->d_inode : + NULL, handle, req, rc, + rep ? rep->lock_policy_res1 : 0); switch (cleanup_phase) { case 2: @@ -1176,7 +1181,7 @@ int mds_close(struct ptlrpc_request *req) struct obd_device *obd = req->rq_export->exp_obd; struct mds_body *body; struct mds_file_data *mfd; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct inode *inode; int rc, repsize[3] = {sizeof(struct mds_body), obd->u.mds.mds_max_mdsize, @@ -1222,9 +1227,9 @@ int mds_close(struct ptlrpc_request *req) list_del(&mfd->mfd_list); spin_unlock(&med->med_open_lock); - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); req->rq_status = mds_mfd_close(rc ? NULL : req, obd, mfd, 1); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if (OBD_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK)) { CERROR("test case OBD_FAIL_MDS_CLOSE_PACK\n"); diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 2dfc6c6..5ff4beb 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "mds_internal.h" @@ -79,9 +80,9 @@ static void mds_cancel_cookies_cb(struct obd_device *obd, __u64 transno, } else { ///* XXX 0 normally, SENDNOW for debug */); ctxt = llog_get_context(obd,mlcd->mlcd_cookies[0].lgc_subsys+1); - rc = llog_cancel(ctxt, lsm, mlcd->mlcd_cookielen / - sizeof(*mlcd->mlcd_cookies), - mlcd->mlcd_cookies, OBD_LLOG_FL_SENDNOW); + rc = llog_cancel(ctxt, mlcd->mlcd_cookielen / + sizeof(*mlcd->mlcd_cookies), + mlcd->mlcd_cookies, OBD_LLOG_FL_SENDNOW, lsm); if (rc) CERROR("error cancelling %d log cookies: rc %d\n", (int)(mlcd->mlcd_cookielen / @@ -146,8 +147,8 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, mcd->mcd_last_result = cpu_to_le32(rc); mcd->mcd_last_data = cpu_to_le32(op_data); - fsfilt_add_journal_cb(req->rq_export->exp_obd, transno, handle, - mds_commit_cb, NULL); + fsfilt_add_journal_cb(req->rq_export->exp_obd, mds->mds_sb, + transno, handle, mds_commit_cb, NULL); err = fsfilt_write_record(obd, mds->mds_rcvd_filp, mcd, sizeof(*mcd), &off, 0); @@ -486,8 +487,8 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, EXIT; cleanup: if (mlcd != NULL) - fsfilt_add_journal_cb(req->rq_export->exp_obd, 0, handle, - mds_cancel_cookies_cb, mlcd); + fsfilt_add_journal_cb(req->rq_export->exp_obd, mds->mds_sb, 0, + handle, mds_cancel_cookies_cb, mlcd); err = mds_finish_transno(mds, inode, handle, req, rc, 0); switch (cleanup_phase) { case 1: @@ -1883,7 +1884,7 @@ int mds_reint_rec(struct mds_update_record *rec, int offset, struct ptlrpc_request *req, struct lustre_handle *lockh) { struct obd_device *obd = req->rq_export->exp_obd; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; int rc; ENTRY; @@ -1891,9 +1892,9 @@ int mds_reint_rec(struct mds_update_record *rec, int offset, LASSERT(rec->ur_opcode <= REINT_MAX && reinters[rec->ur_opcode] != NULL); - push_ctxt(&saved, &obd->obd_ctxt, &rec->ur_uc); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, &rec->ur_uc); rc = reinters[rec->ur_opcode] (rec, offset, req, lockh); - pop_ctxt(&saved, &obd->obd_ctxt, &rec->ur_uc); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &rec->ur_uc); RETURN(rc); } diff --git a/lustre/mds/mds_unlink_open.c b/lustre/mds/mds_unlink_open.c index a79e44c..b0cc7ec 100644 --- a/lustre/mds/mds_unlink_open.c +++ b/lustre/mds/mds_unlink_open.c @@ -249,7 +249,7 @@ out_free_lmm: int mds_cleanup_orphans(struct obd_device *obd) { struct mds_obd *mds = &obd->u.mds; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct file *file; struct dentry *dchild, *dentry; struct vfsmount *mnt; @@ -261,7 +261,7 @@ int mds_cleanup_orphans(struct obd_device *obd) int rc = 0, item = 0, namlen; ENTRY; - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); dentry = dget(mds->mds_pending_dir); if (IS_ERR(dentry)) GOTO(err_pop, rc = PTR_ERR(dentry)); @@ -334,7 +334,7 @@ err_out: OBD_FREE(dirent, sizeof(*dirent)); } err_pop: - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if (rc == 0) rc = item; RETURN(rc); diff --git a/lustre/obdclass/Makefile.in b/lustre/obdclass/Makefile.in index d092fc5..bf094bd 100644 --- a/lustre/obdclass/Makefile.in +++ b/lustre/obdclass/Makefile.in @@ -1,6 +1,6 @@ MODULES := obdclass llog_test -obdclass-objs := llog.o llog_cat.o llog_lvfs.o llog_obd.o class_obd.o +obdclass-objs := llog_obd.o class_obd.o obdclass-objs += debug.o genops.o sysctl.o uuid.o llog_ioctl.o obdclass-objs += lprocfs_status.o lustre_handles.o lustre_peer.o obdclass-objs += statfs_pack.o obdo.o obd_config.o diff --git a/lustre/obdclass/Makefile.mk b/lustre/obdclass/Makefile.mk index 43bdb68..f01b998 100644 --- a/lustre/obdclass/Makefile.mk +++ b/lustre/obdclass/Makefile.mk @@ -6,9 +6,9 @@ include $(src)/../portals/Kernelenv obj-y += obdclass.o -obdclass-objs := llog.o llog_cat.o class_obd.o debug.o \ +obdclass-objs := class_obd.o debug.o \ genops.o sysctl.o uuid.o lprocfs_status.o lustre_handles.o \ - lustre_peer.o statfs_pack.o obdo.o llog_lvfs.o llog_obd.o \ + lustre_peer.o statfs_pack.o obdo.o llog_obd.o \ obd_config.o llog_ioctl.o $(obj)/class_obd.o: lustre_build_version diff --git a/lustre/obdclass/autoMakefile.am b/lustre/obdclass/autoMakefile.am index 79ba5ac..d537a22 100644 --- a/lustre/obdclass/autoMakefile.am +++ b/lustre/obdclass/autoMakefile.am @@ -3,8 +3,7 @@ if LIBLUSTRE noinst_LIBRARIES = liblustreclass.a liblustreclass_a_SOURCES = class_obd.c debug.c genops.c statfs_pack.c uuid.c liblustreclass_a_SOURCES += lustre_handles.c lustre_peer.c lprocfs_status.c -liblustreclass_a_SOURCES += obdo.c obd_config.c llog.c llog_obd.c llog_cat.c -liblustreclass_a_SOURCES += llog_lvfs.c #llog_ioctl.c rbtree.c +liblustreclass_a_SOURCES += obdo.c obd_config.c llog_obd.c #llog_ioctl.c rbtree.c liblustreclass_a_CPPFLAGS = $(LLCPPFLAGS) -DLUSTRE_VERSION=\"32\" -DBUILD_VERSION=\"1\" liblustreclass_a_CFLAGS = $(LLCFLAGS) @@ -16,4 +15,4 @@ noinst_DATA = llog_test$(KMODEXT) endif # MODULES MOSTLYCLEANFILES = *.o *.ko *.mod.c llog-test.c -DIST_SOURCES = $(filter-out llog-test.c,$(obdclass-objs:.o=.c)) $(llog-test-objs:.o=.c) llog_test.c llog_internal.h +DIST_SOURCES = $(filter-out llog-test.c,$(obdclass-objs:.o=.c)) $(llog-test-objs:.o=.c) llog_test.c diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 43be2ae..09bf499 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -67,7 +67,6 @@ #include #endif #include -#include "llog_internal.h" #ifndef __KERNEL__ /* liblustre workaround */ diff --git a/lustre/obdclass/llog_internal.h b/lustre/obdclass/llog_internal.h deleted file mode 100644 index 0066087..0000000 --- a/lustre/obdclass/llog_internal.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __LLOG_INTERNAL_H__ -#define __LLOG_INTERNAL_H__ - -int llog_put_cat_list(struct obd_device *obd, struct obd_device *disk_obd, - char *name, int count, struct llog_catid *idarray); -int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res, - struct llog_logid *logid); -#endif diff --git a/lustre/obdclass/llog_ioctl.c b/lustre/obdclass/llog_ioctl.c index 682f33a..621b94f 100644 --- a/lustre/obdclass/llog_ioctl.c +++ b/lustre/obdclass/llog_ioctl.c @@ -16,7 +16,6 @@ #include #include #include -#include "llog_internal.h" static int str2logid(struct llog_logid *logid, char *str, int len) { @@ -377,8 +376,8 @@ out: } EXPORT_SYMBOL(llog_ioctl); -int llog_catlog_list(struct obd_device *obd, int count, - struct obd_ioctl_data *data) +int llog_catalog_list(struct obd_device *obd, int count, + struct obd_ioctl_data *data) { int size, i; struct llog_catid *idarray; @@ -394,7 +393,8 @@ int llog_catlog_list(struct obd_device *obd, int count, RETURN(-ENOMEM); memset(idarray, 0, size); - rc = llog_get_cat_list(obd, obd, name, count, idarray); + rc = llog_get_cat_list(&obd->obd_lvfs_ctxt, obd->obd_fsops, + name, count, idarray); if (rc) { OBD_FREE(idarray, size); RETURN(rc); @@ -418,4 +418,4 @@ int llog_catlog_list(struct obd_device *obd, int count, RETURN(0); } -EXPORT_SYMBOL(llog_catlog_list); +EXPORT_SYMBOL(llog_catalog_list); diff --git a/lustre/obdclass/llog_lvfs.c b/lustre/obdclass/llog_lvfs.c deleted file mode 100644 index 9d99e9a..0000000 --- a/lustre/obdclass/llog_lvfs.c +++ /dev/null @@ -1,765 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * Author: Andreas Dilger - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * OST<->MDS recovery logging infrastructure. - * - * Invariants in implementation: - * - we do not share logs among different OST<->MDS connections, so that - * if an OST or MDS fails it need only look at log(s) relevant to itself - */ - -#define DEBUG_SUBSYSTEM S_LOG - -#ifndef EXPORT_SYMTAB -#define EXPORT_SYMTAB -#endif - -#ifdef __KERNEL__ -#include -#else -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include "llog_internal.h" - -#ifdef __KERNEL__ - -static int llog_lvfs_pad(struct obd_device *obd, struct l_file *file, - int len, int index) -{ - struct llog_rec_hdr rec = { 0 }; - struct llog_rec_tail tail; - int rc; - ENTRY; - - LASSERT(len >= LLOG_MIN_REC_SIZE && (len & 0x7) == 0); - - tail.lrt_len = rec.lrh_len = len; - tail.lrt_index = rec.lrh_index = index; - rec.lrh_type = 0; - - rc = fsfilt_write_record(obd, file, &rec, sizeof(rec), &file->f_pos, 0); - if (rc) { - CERROR("error writing padding record: rc %d\n", rc); - goto out; - } - - file->f_pos += len - sizeof(rec) - sizeof(tail); - rc = fsfilt_write_record(obd, file, &tail, sizeof(tail),&file->f_pos,0); - if (rc) { - CERROR("error writing padding record: rc %d\n", rc); - goto out; - } - - out: - RETURN(rc); -} - -static int llog_lvfs_write_blob(struct obd_device *obd, struct l_file *file, - struct llog_rec_hdr *rec, void *buf, loff_t off) -{ - int rc; - struct llog_rec_tail end; - loff_t saved_off = file->f_pos; - int buflen = rec->lrh_len; - - ENTRY; - file->f_pos = off; - - if (!buf) { - rc = fsfilt_write_record(obd, file, rec, buflen,&file->f_pos,0); - if (rc) { - CERROR("error writing log record: rc %d\n", rc); - goto out; - } - GOTO(out, rc = 0); - } - - /* the buf case */ - rec->lrh_len = sizeof(*rec) + buflen + sizeof(end); - rc = fsfilt_write_record(obd, file, rec, sizeof(*rec), &file->f_pos, 0); - if (rc) { - CERROR("error writing log hdr: rc %d\n", rc); - goto out; - } - - rc = fsfilt_write_record(obd, file, buf, buflen, &file->f_pos, 0); - if (rc) { - CERROR("error writing log buffer: rc %d\n", rc); - goto out; - } - - end.lrt_len = rec->lrh_len; - end.lrt_index = rec->lrh_index; - rc = fsfilt_write_record(obd, file, &end, sizeof(end), &file->f_pos, 0); - if (rc) { - CERROR("error writing log tail: rc %d\n", rc); - goto out; - } - - rc = 0; - out: - if (saved_off > file->f_pos) - file->f_pos = saved_off; - LASSERT(rc <= 0); - RETURN(rc); -} - -static int llog_lvfs_read_blob(struct obd_device *obd, struct l_file *file, - void *buf, int size, loff_t off) -{ - loff_t offset = off; - int rc; - ENTRY; - - rc = fsfilt_read_record(obd, file, buf, size, &offset); - if (rc) { - CERROR("error reading log record: rc %d\n", rc); - RETURN(rc); - } - RETURN(0); -} - -static int llog_lvfs_read_header(struct llog_handle *handle) -{ - struct obd_device *obd; - int rc; - ENTRY; - - LASSERT(sizeof(*handle->lgh_hdr) == LLOG_CHUNK_SIZE); - - obd = handle->lgh_ctxt->loc_exp->exp_obd; - - if (handle->lgh_file->f_dentry->d_inode->i_size == 0) { - CDEBUG(D_HA, "not reading header from 0-byte log\n"); - RETURN(LLOG_EEMPTY); - } - - rc = llog_lvfs_read_blob(obd, handle->lgh_file, handle->lgh_hdr, - LLOG_CHUNK_SIZE, 0); - if (rc) { - CERROR("error reading log header from %*s\n", - handle->lgh_file->f_dentry->d_name.len, - handle->lgh_file->f_dentry->d_name.name); - } else { - struct llog_rec_hdr *llh_hdr = &handle->lgh_hdr->llh_hdr; - /* - * These need to be fixed for bug 1987 - */ - if (llh_hdr->lrh_type != LLOG_HDR_MAGIC) { - CERROR("bad log %*s header magic: %#x (expected %#x)\n", - handle->lgh_file->f_dentry->d_name.len, - handle->lgh_file->f_dentry->d_name.name, - llh_hdr->lrh_type, LLOG_HDR_MAGIC); - rc = -EIO; - } else if (llh_hdr->lrh_len != LLOG_CHUNK_SIZE) { - CERROR("incorrectly sized log %*s header: %#x " - "(expected %#x)\n", - handle->lgh_file->f_dentry->d_name.len, - handle->lgh_file->f_dentry->d_name.name, - llh_hdr->lrh_len, LLOG_CHUNK_SIZE); - CERROR("you may need to re-run lconf --write_conf.\n"); - rc = -EIO; - } - } - - handle->lgh_last_idx = handle->lgh_hdr->llh_tail.lrt_index; - handle->lgh_file->f_pos = handle->lgh_file->f_dentry->d_inode->i_size; - - RETURN(rc); -} - -/* returns negative in on error; 0 if success && reccookie == 0; 1 otherwise */ -/* appends if idx == -1, otherwise overwrites record idx. */ -static int llog_lvfs_write_rec(struct llog_handle *loghandle, - struct llog_rec_hdr *rec, - struct llog_cookie *reccookie, int cookiecount, - void *buf, int idx) -{ - struct llog_log_hdr *llh; - int reclen = rec->lrh_len, index, rc; - struct llog_rec_tail *lrt; - struct obd_device *obd; - struct file *file; - size_t left; - ENTRY; - - llh = loghandle->lgh_hdr; - file = loghandle->lgh_file; - obd = loghandle->lgh_ctxt->loc_exp->exp_obd; - - /* record length should not bigger than LLOG_CHUNK_SIZE */ - if (buf) - rc = (reclen > LLOG_CHUNK_SIZE - sizeof(struct llog_rec_hdr) - - sizeof(struct llog_rec_tail)) ? -E2BIG : 0; - else - rc = (reclen > LLOG_CHUNK_SIZE) ? -E2BIG : 0; - if (rc) - RETURN(rc); - - if (idx != -1) { - loff_t saved_offset; - - /* no header: only allowed to insert record 1 */ - if (idx != 1 && !file->f_dentry->d_inode->i_size) { - CERROR("idx != -1 in empty log\n"); - LBUG(); - } - - if (idx && llh->llh_size && llh->llh_size != reclen) - RETURN(-EINVAL); - - rc = llog_lvfs_write_blob(obd, file, &llh->llh_hdr, NULL, 0); - /* we are done if we only write the header or on error */ - if (rc || idx == 0) - RETURN(rc); - - saved_offset = sizeof(*llh) + (idx-1)*rec->lrh_len; - rc = llog_lvfs_write_blob(obd, file, rec, buf, saved_offset); - if (rc == 0 && reccookie) { - reccookie->lgc_lgl = loghandle->lgh_id; - reccookie->lgc_index = idx; - rc = 1; - } - RETURN(rc); - } - - /* Make sure that records don't cross a chunk boundary, so we can - * process them page-at-a-time if needed. If it will cross a chunk - * boundary, write in a fake (but referenced) entry to pad the chunk. - * - * We know that llog_current_log() will return a loghandle that is - * big enough to hold reclen, so all we care about is padding here. - */ - left = LLOG_CHUNK_SIZE - (file->f_pos & (LLOG_CHUNK_SIZE - 1)); - if (buf) - reclen = sizeof(*rec) + rec->lrh_len + - sizeof(struct llog_rec_tail); - - /* NOTE: padding is a record, but no bit is set */ - if (left != 0 && left != reclen && - left < (reclen + LLOG_MIN_REC_SIZE)) { - int bitmap_size = sizeof(llh->llh_bitmap) * 8; - loghandle->lgh_last_idx++; - rc = llog_lvfs_pad(obd, file, left, loghandle->lgh_last_idx); - if (rc) - RETURN(rc); - /* if it's the last idx in log file, then return -ENOSPC */ - if (loghandle->lgh_last_idx == bitmap_size - 1) - RETURN(-ENOSPC); - } - - loghandle->lgh_last_idx++; - index = loghandle->lgh_last_idx; - rec->lrh_index = index; - if (buf == NULL) { - lrt = (struct llog_rec_tail *) - ((char *)rec + rec->lrh_len - sizeof(*lrt)); - lrt->lrt_len = rec->lrh_len; - lrt->lrt_index = rec->lrh_index; - } - if (ext2_set_bit(index, llh->llh_bitmap)) { - CERROR("argh, index %u already set in log bitmap?\n", index); - LBUG(); /* should never happen */ - } - llh->llh_count++; - llh->llh_tail.lrt_index = index; - - rc = llog_lvfs_write_blob(obd, file, &llh->llh_hdr, NULL, 0); - if (rc) - RETURN(rc); - - rc = llog_lvfs_write_blob(obd, file, rec, buf, file->f_pos); - if (rc) - RETURN(rc); - - CDEBUG(D_HA, "added record "LPX64": idx: %u, %u bytes\n", - loghandle->lgh_id.lgl_oid, index, rec->lrh_len); - if (rc == 0 && reccookie) { - reccookie->lgc_lgl = loghandle->lgh_id; - reccookie->lgc_index = index; - if (rec->lrh_type == MDS_UNLINK_REC) - reccookie->lgc_subsys = LLOG_UNLINK_ORIG_CTXT; - else if (rec->lrh_type == OST_SZ_REC) - reccookie->lgc_subsys = LLOG_SIZE_ORIG_CTXT; - else if (rec->lrh_type == OST_RAID1_REC) - reccookie->lgc_subsys = LLOG_RD1_ORIG_CTXT; - else - reccookie->lgc_subsys = -1; - rc = 1; - } - if (rc == 0 && rec->lrh_type == LLOG_GEN_REC) - rc = 1; - - RETURN(rc); -} - -/* We can skip reading at least as many log blocks as the number of -* minimum sized log records we are skipping. If it turns out -* that we are not far enough along the log (because the -* actual records are larger than minimum size) we just skip -* some more records. */ - -static void llog_skip_over(__u64 *off, int curr, int goal) -{ - if (goal <= curr) - return; - *off = (*off + (goal-curr-1) * LLOG_MIN_REC_SIZE) & - ~(LLOG_CHUNK_SIZE - 1); -} - - -/* sets: - * - cur_offset to the furthest point read in the log file - * - cur_idx to the log index preceeding cur_offset - * returns -EIO/-EINVAL on error - */ -static int llog_lvfs_next_block(struct llog_handle *loghandle, int *cur_idx, - int next_idx, __u64 *cur_offset, void *buf, - int len) -{ - int rc; - ENTRY; - - if (len == 0 || len & (LLOG_CHUNK_SIZE - 1)) - RETURN(-EINVAL); - - CDEBUG(D_OTHER, "looking for log index %u (cur idx %u off "LPU64")\n", - next_idx, *cur_idx, *cur_offset); - - while (*cur_offset < loghandle->lgh_file->f_dentry->d_inode->i_size) { - struct llog_rec_hdr *rec; - struct llog_rec_tail *tail; - loff_t ppos; - - llog_skip_over(cur_offset, *cur_idx, next_idx); - - ppos = *cur_offset; - rc = fsfilt_read_record(loghandle->lgh_ctxt->loc_exp->exp_obd, - loghandle->lgh_file, buf, len, - &ppos); - - if (rc) { - CERROR("Cant read llog block at log id "LPU64 - "/%u offset "LPU64"\n", - loghandle->lgh_id.lgl_oid, - loghandle->lgh_id.lgl_ogen, - *cur_offset); - RETURN(rc); - } - - /* put number of bytes read into rc to make code simpler */ - rc = ppos - *cur_offset; - *cur_offset = ppos; - - if (rc == 0) /* end of file, nothing to do */ - RETURN(0); - - if (rc < sizeof(*tail)) { - CERROR("Invalid llog block at log id "LPU64"/%u offset " - LPU64"\n", loghandle->lgh_id.lgl_oid, - loghandle->lgh_id.lgl_ogen, *cur_offset); - RETURN(-EINVAL); - } - - tail = buf + rc - sizeof(struct llog_rec_tail); - *cur_idx = tail->lrt_index; - - /* this shouldn't happen */ - if (tail->lrt_index == 0) { - CERROR("Invalid llog tail at log id "LPU64"/%u offset " - LPU64"\n", loghandle->lgh_id.lgl_oid, - loghandle->lgh_id.lgl_ogen, *cur_offset); - RETURN(-EINVAL); - } - if (tail->lrt_index < next_idx) - continue; - - /* sanity check that the start of the new buffer is no farther - * than the record that we wanted. This shouldn't happen. */ - rec = buf; - if (rec->lrh_index > next_idx) { - CERROR("missed desired record? %u > %u\n", - rec->lrh_index, next_idx); - RETURN(-ENOENT); - } - RETURN(0); - } - RETURN(-EIO); -} - -static struct file *llog_filp_open(char *name, int flags, int mode) -{ - char *logname; - struct file *filp; - int len; - - OBD_ALLOC(logname, PATH_MAX); - if (logname == NULL) - return ERR_PTR(-ENOMEM); - - len = snprintf(logname, PATH_MAX, "LOGS/%s", name); - if (len >= PATH_MAX - 1) { - filp = ERR_PTR(-ENAMETOOLONG); - } else { - filp = l_filp_open(logname, flags, mode); - if (IS_ERR(filp)) - CERROR("logfile creation %s: %ld\n", logname, - PTR_ERR(filp)); - } - - OBD_FREE(logname, PATH_MAX); - return filp; -} - -/* This is a callback from the llog_* functions. - * Assumes caller has already pushed us into the kernel context. */ -static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res, - struct llog_logid *logid, char *name) -{ - struct llog_handle *handle; - struct obd_device *obd; - struct l_dentry *dchild = NULL; - struct obdo *oa = NULL; - int rc = 0, cleanup_phase = 1; - int open_flags = O_RDWR | O_CREAT | O_LARGEFILE; - ENTRY; - - handle = llog_alloc_handle(); - if (handle == NULL) - RETURN(-ENOMEM); - *res = handle; - - LASSERT(ctxt); - LASSERT(ctxt->loc_exp); - obd = ctxt->loc_exp->exp_obd; - - if (logid != NULL) { - dchild = obd_lvfs_fid2dentry(ctxt->loc_exp, logid->lgl_oid, - logid->lgl_ogen, logid->lgl_ogr); - - if (IS_ERR(dchild)) { - rc = PTR_ERR(dchild); - CERROR("error looking up logfile "LPX64":0x%x: rc %d\n", - logid->lgl_oid, logid->lgl_ogen, rc); - GOTO(cleanup, rc); - } - - cleanup_phase = 2; - if (dchild->d_inode == NULL) { - rc = -ENOENT; - CERROR("nonexistent log file "LPX64":"LPX64": rc %d\n", - logid->lgl_oid, logid->lgl_ogr, rc); - GOTO(cleanup, rc); - } - - handle->lgh_file = l_dentry_open(&obd->obd_ctxt, dchild, - O_RDWR | O_LARGEFILE); - if (IS_ERR(handle->lgh_file)) { - rc = PTR_ERR(handle->lgh_file); - CERROR("error opening logfile "LPX64"0x%x: rc %d\n", - logid->lgl_oid, logid->lgl_ogen, rc); - GOTO(cleanup, rc); - } - - /* assign the value of lgh_id for handle directly */ - handle->lgh_id = *logid; - - } else if (name) { - handle->lgh_file = llog_filp_open(name, open_flags, 0644); - if (IS_ERR(handle->lgh_file)) - GOTO(cleanup, rc = PTR_ERR(handle->lgh_file)); - - handle->lgh_id.lgl_ogr = 1; - handle->lgh_id.lgl_oid = - handle->lgh_file->f_dentry->d_inode->i_ino; - handle->lgh_id.lgl_ogen = - handle->lgh_file->f_dentry->d_inode->i_generation; - } else { - oa = obdo_alloc(); - if (oa == NULL) - GOTO(cleanup, rc = -ENOMEM); - /* XXX get some filter group constants */ - oa->o_gr = 1; - oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLGROUP; - rc = obd_create(ctxt->loc_exp, oa, NULL, NULL); - if (rc) - GOTO(cleanup, rc); - - dchild = obd_lvfs_fid2dentry(ctxt->loc_exp, oa->o_id, - oa->o_generation, oa->o_gr); - - if (IS_ERR(dchild)) - GOTO(cleanup, rc = PTR_ERR(dchild)); - cleanup_phase = 2; - handle->lgh_file = l_dentry_open(&obd->obd_ctxt, dchild, - open_flags); - if (IS_ERR(handle->lgh_file)) - GOTO(cleanup, rc = PTR_ERR(handle->lgh_file)); - - handle->lgh_id.lgl_ogr = oa->o_gr; - handle->lgh_id.lgl_oid = oa->o_id; - handle->lgh_id.lgl_ogen = oa->o_generation; - } - - handle->lgh_ctxt = ctxt; - finish: - if (oa) - obdo_free(oa); - RETURN(rc); -cleanup: - switch (cleanup_phase) { - case 2: - l_dput(dchild); - case 1: - llog_free_handle(handle); - } - goto finish; -} - -static int llog_lvfs_close(struct llog_handle *handle) -{ - int rc; - ENTRY; - - rc = filp_close(handle->lgh_file, 0); - if (rc) - CERROR("error closing log: rc %d\n", rc); - RETURN(rc); -} - -static int llog_lvfs_destroy(struct llog_handle *handle) -{ - struct dentry *fdentry; - struct obdo *oa; - int rc; - ENTRY; - - fdentry = handle->lgh_file->f_dentry; - if (!strcmp(fdentry->d_parent->d_name.name, "LOGS")) { - struct obd_device *obd = handle->lgh_ctxt->loc_exp->exp_obd; - struct inode *inode = fdentry->d_parent->d_inode; - struct obd_run_ctxt saved; - - push_ctxt(&saved, &obd->obd_ctxt, NULL); - dget(fdentry); - rc = llog_lvfs_close(handle); - - if (rc == 0) { - down(&inode->i_sem); - rc = vfs_unlink(inode, fdentry); - up(&inode->i_sem); - } - - dput(fdentry); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); - RETURN(rc); - } - - oa = obdo_alloc(); - if (oa == NULL) - RETURN(-ENOMEM); - - oa->o_id = handle->lgh_id.lgl_oid; - oa->o_gr = handle->lgh_id.lgl_ogr; - oa->o_generation = handle->lgh_id.lgl_ogen; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLGENER; - - rc = llog_lvfs_close(handle); - if (rc) - GOTO(out, rc); - - rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL); - out: - obdo_free(oa); - RETURN(rc); -} - -/* reads the catalog list */ -int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd, - char *name, int count, struct llog_catid *idarray) -{ - struct obd_run_ctxt saved; - struct l_file *file; - int rc; - int size = sizeof(*idarray) * count; - loff_t off = 0; - - LASSERT(count); - - push_ctxt(&saved, &obd->obd_ctxt, NULL); - file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700); - if (!file || IS_ERR(file)) { - rc = PTR_ERR(file); - CERROR("OBD filter: cannot open/create %s: rc = %d\n", - name, rc); - GOTO(out, rc); - } - - if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { - CERROR("%s is not a regular file!: mode = %o\n", name, - file->f_dentry->d_inode->i_mode); - GOTO(out, rc = -ENOENT); - } - - rc = fsfilt_read_record(disk_obd, file, idarray, size, &off); - if (rc) { - CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n", - name, rc); - GOTO(out, rc); - } - - out: - pop_ctxt(&saved, &obd->obd_ctxt, NULL); - if (file && !IS_ERR(file)) - rc = filp_close(file, 0); - RETURN(rc); -} -EXPORT_SYMBOL(llog_get_cat_list); - -/* writes the cat list */ -int llog_put_cat_list(struct obd_device *obd, struct obd_device *disk_obd, - char *name, int count, struct llog_catid *idarray) -{ - struct obd_run_ctxt saved; - struct l_file *file; - int rc; - int size = sizeof(*idarray) * count; - loff_t off = 0; - - LASSERT(count); - - push_ctxt(&saved, &obd->obd_ctxt, NULL); - file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700); - if (!file || IS_ERR(file)) { - rc = PTR_ERR(file); - CERROR("OBD filter: cannot open/create %s: rc = %d\n", - name, rc); - GOTO(out, rc); - } - - if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { - CERROR("%s is not a regular file!: mode = %o\n", name, - file->f_dentry->d_inode->i_mode); - GOTO(out, rc = -ENOENT); - } - - rc = fsfilt_write_record(disk_obd, file, idarray, size, &off, 1); - if (rc) { - CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n", - name, rc); - GOTO(out, rc); - } - - out: - pop_ctxt(&saved, &obd->obd_ctxt, NULL); - if (file && !IS_ERR(file)) - rc = filp_close(file, 0); - RETURN(rc); -} - -struct llog_operations llog_lvfs_ops = { - lop_write_rec: llog_lvfs_write_rec, - lop_next_block: llog_lvfs_next_block, - lop_read_header: llog_lvfs_read_header, - lop_create: llog_lvfs_create, - lop_destroy: llog_lvfs_destroy, - lop_close: llog_lvfs_close, - // lop_cancel: llog_lvfs_cancel, -}; - -EXPORT_SYMBOL(llog_lvfs_ops); - -#else /* !__KERNEL__ */ - -static int llog_lvfs_read_header(struct llog_handle *handle) -{ - LBUG(); - return 0; -} - -static int llog_lvfs_write_rec(struct llog_handle *loghandle, - struct llog_rec_hdr *rec, - struct llog_cookie *reccookie, int cookiecount, - void *buf, int idx) -{ - LBUG(); - return 0; -} - -static int llog_lvfs_next_block(struct llog_handle *loghandle, int *cur_idx, - int next_idx, __u64 *cur_offset, void *buf, - int len) -{ - LBUG(); - return 0; -} - -static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res, - struct llog_logid *logid, char *name) -{ - LBUG(); - return 0; -} - -static int llog_lvfs_close(struct llog_handle *handle) -{ - LBUG(); - return 0; -} - -static int llog_lvfs_destroy(struct llog_handle *handle) -{ - LBUG(); - return 0; -} - -int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd, - char *name, int count, struct llog_catid *idarray) -{ - LBUG(); - return 0; -} - -int llog_put_cat_list(struct obd_device *obd, struct obd_device *disk_obd, - char *name, int count, struct llog_catid *idarray) -{ - LBUG(); - return 0; -} - -struct llog_operations llog_lvfs_ops = { - lop_write_rec: llog_lvfs_write_rec, - lop_next_block: llog_lvfs_next_block, - lop_read_header: llog_lvfs_read_header, - lop_create: llog_lvfs_create, - lop_destroy: llog_lvfs_destroy, - lop_close: llog_lvfs_close, -// lop_cancel: llog_lvfs_cancel, -}; -#endif diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c index e9a9856..bdcf6d3 100644 --- a/lustre/obdclass/llog_obd.c +++ b/lustre/obdclass/llog_obd.c @@ -22,12 +22,12 @@ #include #include #include -#include "llog_internal.h" /* helper functions for calling the llog obd methods */ -int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, - int count, struct llog_logid *logid, struct llog_operations *op) +int obd_llog_setup(struct obd_device *obd, int index, + struct obd_device *disk_obd, int count, + struct llog_logid *logid, struct llog_operations *op) { int rc = 0; struct llog_ctxt *ctxt; @@ -41,82 +41,52 @@ int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, RETURN(-ENOMEM); obd->obd_llog_ctxt[index] = ctxt; + ctxt->loc_logops = op; ctxt->loc_obd = obd; - ctxt->loc_exp = class_export_get(disk_obd->obd_self_export); ctxt->loc_idx = index; - ctxt->loc_logops = op; + ctxt->loc_exp = class_export_get(disk_obd->obd_self_export); sema_init(&ctxt->loc_sem, 1); + if (op->lop_close == llog_lvfs_ops.lop_close) { + ctxt->loc_fsops = disk_obd->obd_fsops; + ctxt->loc_lvfs_ctxt = &disk_obd->obd_lvfs_ctxt; + if (!strcmp(disk_obd->obd_type->typ_name, "mds")) { + struct mds_obd *mds = &disk_obd->u.mds; + ctxt->loc_objects_dir = mds->mds_objects_dir; + ctxt->loc_logs_dir = mds->mds_logs_dir; + } + } + if (op->lop_setup) rc = op->lop_setup(obd, index, disk_obd, count, logid); - if (ctxt && rc) + if (ctxt && rc) { + obd->obd_llog_ctxt[index] = NULL; OBD_FREE(ctxt, sizeof(*ctxt)); + } RETURN(rc); } -EXPORT_SYMBOL(llog_setup); +EXPORT_SYMBOL(obd_llog_setup); -int llog_cleanup(struct llog_ctxt *ctxt) +int obd_llog_cleanup(struct llog_ctxt *ctxt) { int rc = 0; ENTRY; LASSERT(ctxt); - if (CTXTP(ctxt, cleanup)) - rc = CTXTP(ctxt, cleanup)(ctxt); - ctxt->loc_obd->obd_llog_ctxt[ctxt->loc_idx] = NULL; class_export_put(ctxt->loc_exp); ctxt->loc_exp = NULL; - OBD_FREE(ctxt, sizeof(*ctxt)); - - RETURN(rc); -} -EXPORT_SYMBOL(llog_cleanup); - -int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp) -{ - int rc = 0; - ENTRY; - - if (!ctxt) - RETURN(0); - - if (CTXTP(ctxt, sync)) - rc = CTXTP(ctxt, sync)(ctxt, exp); - - RETURN(rc); -} -EXPORT_SYMBOL(llog_sync); -int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, - struct lov_stripe_md *lsm, struct llog_cookie *logcookies, - int numcookies) -{ - int rc; - ENTRY; - - LASSERT(ctxt); - CTXT_CHECK_OP(ctxt, add, -EOPNOTSUPP); - - rc = CTXTP(ctxt, add)(ctxt, rec, lsm, logcookies, numcookies); - RETURN(rc); -} -EXPORT_SYMBOL(llog_add); - -int llog_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *lsm, - int count, struct llog_cookie *cookies, int flags) -{ - int rc; - ENTRY; + if (CTXTP(ctxt, cleanup)) + rc = CTXTP(ctxt, cleanup)(ctxt); + else + OBD_FREE(ctxt, sizeof(*ctxt)); - LASSERT(ctxt); - CTXT_CHECK_OP(ctxt, cancel, -EOPNOTSUPP); - rc = CTXTP(ctxt, cancel)(ctxt, lsm, count, cookies, flags); RETURN(rc); } -EXPORT_SYMBOL(llog_cancel); +EXPORT_SYMBOL(obd_llog_cleanup); /* callback func for llog_process in llog_obd_origin_setup */ static int cat_cancel_cb(struct llog_handle *cathandle, @@ -175,7 +145,7 @@ int llog_obd_origin_setup(struct obd_device *obd, int index, { struct llog_ctxt *ctxt; struct llog_handle *handle; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; int rc; ENTRY; @@ -196,93 +166,26 @@ int llog_obd_origin_setup(struct obd_device *obd, int index, *logid = handle->lgh_id; } if (rc) - GOTO(out, rc); + RETURN(rc); ctxt->loc_handle = handle; - push_ctxt(&saved, &disk_obd->obd_ctxt, NULL); + push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); rc = llog_init_handle(handle, LLOG_F_IS_CAT, NULL); - pop_ctxt(&saved, &disk_obd->obd_ctxt, NULL); + pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); if (rc) - GOTO(out, rc); + RETURN(rc); rc = llog_process(handle, (llog_cb_t)cat_cancel_cb, NULL, NULL); if (rc) CERROR("llog_process with cat_cancel_cb failed: %d\n", rc); - out: - if (ctxt && rc) { - obd->obd_llog_ctxt[index] = NULL; - OBD_FREE(ctxt, sizeof(*ctxt)); - } RETURN(rc); } EXPORT_SYMBOL(llog_obd_origin_setup); -int llog_obd_origin_cleanup(struct llog_ctxt *ctxt) -{ - struct llog_handle *cathandle, *n, *loghandle; - struct llog_log_hdr *llh; - int rc, index; - ENTRY; - - if (!ctxt) - return 0; - - cathandle = ctxt->loc_handle; - if (cathandle) { - list_for_each_entry_safe(loghandle, n, - &cathandle->u.chd.chd_head, - u.phd.phd_entry) { - llh = loghandle->lgh_hdr; - if ((llh->llh_flags & - LLOG_F_ZAP_WHEN_EMPTY) && - (llh->llh_count == 1)) { - rc = llog_destroy(loghandle); - if (rc) - CERROR("failure destroying log during " - "cleanup: %d\n", rc); - LASSERT(rc == 0); - - index = loghandle->u.phd.phd_cookie.lgc_index; - llog_free_handle(loghandle); - - LASSERT(index); - llog_cat_set_first_idx(cathandle, index); - rc = llog_cancel_rec(cathandle, index); - if (rc == 0) - CDEBUG(D_HA, "cancel plain log at index" - " %u of catalog "LPX64"\n", - index,cathandle->lgh_id.lgl_oid); - } - } - llog_cat_put(ctxt->loc_handle); - } - return 0; -} -EXPORT_SYMBOL(llog_obd_origin_cleanup); - -/* add for obdfilter/sz and mds/unlink */ -int llog_obd_origin_add(struct llog_ctxt *ctxt, - struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, - struct llog_cookie *logcookies, int numcookies) -{ - struct llog_handle *cathandle; - int rc; - ENTRY; - - cathandle = ctxt->loc_handle; - LASSERT(cathandle != NULL); - rc = llog_cat_add_rec(cathandle, rec, logcookies, NULL); - if (rc != 1) - CERROR("write one catalog record failed: %d\n", rc); - RETURN(rc); -} -EXPORT_SYMBOL(llog_obd_origin_add); - -int llog_cat_initialize(struct obd_device *obd, int count) +int obd_llog_cat_initialize(struct obd_device *obd, int count, char *name) { struct llog_catid *idarray; int size = sizeof(*idarray) * count; - char name[32] = CATLIST; int rc; ENTRY; @@ -290,7 +193,8 @@ int llog_cat_initialize(struct obd_device *obd, int count) if (!idarray) RETURN(-ENOMEM); - rc = llog_get_cat_list(obd, obd, name, count, idarray); + rc = llog_get_cat_list(&obd->obd_lvfs_ctxt, obd->obd_fsops, + name, count, idarray); if (rc) { CERROR("rc: %d\n", rc); GOTO(out, rc); @@ -302,7 +206,8 @@ int llog_cat_initialize(struct obd_device *obd, int count) GOTO(out, rc); } - rc = llog_put_cat_list(obd, obd, name, count, idarray); + rc = llog_put_cat_list(&obd->obd_lvfs_ctxt, obd->obd_fsops, + name, count, idarray); if (rc) { CERROR("rc: %d\n", rc); GOTO(out, rc); @@ -312,7 +217,7 @@ int llog_cat_initialize(struct obd_device *obd, int count) OBD_FREE(idarray, size); RETURN(rc); } -EXPORT_SYMBOL(llog_cat_initialize); +EXPORT_SYMBOL(obd_llog_cat_initialize); int obd_llog_init(struct obd_device *obd, struct obd_device *disk_obd, int count, struct llog_catid *logid) diff --git a/lustre/obdclass/llog_test.c b/lustre/obdclass/llog_test.c index 049db79..bd50854 100644 --- a/lustre/obdclass/llog_test.c +++ b/lustre/obdclass/llog_test.c @@ -364,8 +364,6 @@ static int llog_cancel_rec_cb(struct llog_handle *llh, struct llog_rec_hdr *rec, RETURN(0); } - - /* Test log and catalogue processing */ static int llog_test_5(struct obd_device *obd) { @@ -424,6 +422,14 @@ static int llog_test_5(struct obd_device *obd) GOTO(out, rc); } + CWARN("5f: print plain log entries reversely.. expect 6\n"); + rc = llog_cat_reverse_process(llh, plain_print_cb, "foobar"); + if (rc) { + CERROR("5f: reversely process with plain_print_cb failed: %d\n", + rc); + GOTO(out, rc); + } + out: CWARN("5: close re-opened catalog\n"); if (llh) @@ -478,6 +484,10 @@ static int llog_test_6(struct obd_device *obd, char *name) if (rc) CERROR("6: llog_process failed %d\n", rc); + rc = llog_reverse_process(llh, (llog_cb_t)plain_print_cb, NULL, NULL); + if (rc) + CERROR("6: llog_reverse_process failed %d\n", rc); + parse_out: rc = llog_close(llh); if (rc) { @@ -531,14 +541,14 @@ static int llog_test_7(struct obd_device *obd) static int llog_run_tests(struct obd_device *obd) { struct llog_handle *llh; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct llog_ctxt *ctxt = llog_get_context(obd, LLOG_TEST_ORIG_CTXT); int rc, err, cleanup_phase = 0; char name[10]; ENTRY; sprintf(name, "%x", llog_test_rand); - push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_ctxt, NULL); + push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); rc = llog_test_1(obd, name); if (rc) @@ -578,7 +588,7 @@ static int llog_run_tests(struct obd_device *obd) if (!rc) rc = err; case 0: - pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_ctxt, NULL); + pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); } return rc; @@ -591,7 +601,8 @@ static int llog_test_llog_init(struct obd_device *obd, struct obd_device *tgt, int rc; ENTRY; - rc = llog_setup(obd, LLOG_TEST_ORIG_CTXT, tgt, 0, NULL, &llog_lvfs_ops); + rc = obd_llog_setup(obd, LLOG_TEST_ORIG_CTXT, tgt, 0, NULL, + &llog_lvfs_ops); RETURN(rc); } @@ -600,7 +611,7 @@ static int llog_test_llog_finish(struct obd_device *obd, int count) int rc; ENTRY; - rc = llog_cleanup(llog_get_context(obd, LLOG_TEST_ORIG_CTXT)); + rc = obd_llog_cleanup(llog_get_context(obd, LLOG_TEST_ORIG_CTXT)); RETURN(rc); } diff --git a/lustre/obdclass/simple.c b/lustre/obdclass/simple.c index 722de4a..48cf4d23 100644 --- a/lustre/obdclass/simple.c +++ b/lustre/obdclass/simple.c @@ -47,8 +47,8 @@ #endif /* push / pop to root of obd store */ -void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, - struct obd_ucred *uc) +void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *uc) { //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n"); ASSERT_CTXT_MAGIC(new_ctx->magic); @@ -78,21 +78,21 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, LASSERT(new_ctx->pwdmnt); if (uc) { - save->ouc.ouc_fsuid = current->fsuid; - save->ouc.ouc_fsgid = current->fsgid; - save->ouc.ouc_cap = current->cap_effective; - save->ouc.ouc_suppgid1 = current->groups[0]; - save->ouc.ouc_suppgid2 = current->groups[1]; - - current->fsuid = uc->ouc_fsuid; - current->fsgid = uc->ouc_fsgid; - current->cap_effective = uc->ouc_cap; + save->luc.luc_fsuid = current->fsuid; + save->luc.luc_fsgid = current->fsgid; + save->luc.luc_cap = current->cap_effective; + save->luc.luc_suppgid1 = current->groups[0]; + save->luc.luc_suppgid2 = current->groups[1]; + + current->fsuid = uc->luc_fsuid; + current->fsgid = uc->luc_fsgid; + current->cap_effective = uc->luc_cap; current->ngroups = 0; - if (uc->ouc_suppgid1 != -1) - current->groups[current->ngroups++] = uc->ouc_suppgid1; - if (uc->ouc_suppgid2 != -1) - current->groups[current->ngroups++] = uc->ouc_suppgid2; + if (uc->luc_suppgid1 != -1) + current->groups[current->ngroups++] = uc->luc_suppgid1; + if (uc->luc_suppgid2 != -1) + current->groups[current->ngroups++] = uc->luc_suppgid2; } set_fs(new_ctx->fs); set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd); @@ -110,8 +110,8 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, } EXPORT_SYMBOL(push_ctxt); -void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, - struct obd_ucred *uc) +void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *uc) { //printk("pc0"); ASSERT_CTXT_MAGIC(saved->magic); @@ -138,12 +138,12 @@ void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, dput(saved->pwd); mntput(saved->pwdmnt); if (uc) { - current->fsuid = saved->ouc.ouc_fsuid; - current->fsgid = saved->ouc.ouc_fsgid; - current->cap_effective = saved->ouc.ouc_cap; + current->fsuid = saved->luc.luc_fsuid; + current->fsgid = saved->luc.luc_fsgid; + current->cap_effective = saved->luc.luc_cap; current->ngroups = saved->ngroups; - current->groups[0] = saved->ouc.ouc_suppgid1; - current->groups[1] = saved->ouc.ouc_suppgid2; + current->groups[0] = saved->luc.luc_suppgid1; + current->groups[1] = saved->luc.luc_suppgid2; } /* diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 2ab2652..cb09e69 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -108,8 +108,8 @@ int filter_finish_transno(struct obd_export *exp, struct obd_trans_info *oti, fcd->fcd_last_xid = 0; off = fed->fed_lr_off; - fsfilt_add_journal_cb(exp->exp_obd, last_rcvd, oti->oti_handle, - filter_commit_cb, NULL); + fsfilt_add_journal_cb(exp->exp_obd, filter->fo_sb, last_rcvd, + oti->oti_handle, filter_commit_cb, NULL); err = fsfilt_write_record(exp->exp_obd, filter->fo_rcvd_filp, fcd, sizeof(*fcd), &off, 0); if (err) { @@ -185,7 +185,7 @@ static int filter_client_add(struct obd_device *obd, struct filter_obd *filter, fed->fed_lr_idx, fed->fed_lr_off, fed->fed_fcd->fcd_uuid); if (new_client) { - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; loff_t off = fed->fed_lr_off; int err; void *handle; @@ -193,7 +193,7 @@ static int filter_client_add(struct obd_device *obd, struct filter_obd *filter, CDEBUG(D_INFO, "writing client fcd at idx %u (%llu) (len %u)\n", fed->fed_lr_idx,off,(unsigned int)sizeof(*fed->fed_fcd)); - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); /* Transaction needed to fix bug 1403 */ handle = fsfilt_start(obd, filter->fo_rcvd_filp->f_dentry->d_inode, @@ -210,7 +210,7 @@ static int filter_client_add(struct obd_device *obd, struct filter_obd *filter, filter->fo_rcvd_filp->f_dentry->d_inode, handle, 1); } - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if (err) { CERROR("error writing %s client idx %u: rc %d\n", @@ -227,7 +227,7 @@ static int filter_client_free(struct obd_export *exp, int flags) struct filter_obd *filter = &exp->exp_obd->u.filter; struct obd_device *obd = exp->exp_obd; struct filter_client_data zero_fcd; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; int rc; loff_t off; ENTRY; @@ -258,10 +258,10 @@ static int filter_client_free(struct obd_export *exp, int flags) } memset(&zero_fcd, 0, sizeof zero_fcd); - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = fsfilt_write_record(obd, filter->fo_rcvd_filp, &zero_fcd, sizeof(zero_fcd), &off, 1); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); CDEBUG(rc == 0 ? D_INFO : D_ERROR, "zeroing disconnecting client %s at idx %u (%llu) in %s rc %d\n", @@ -732,14 +732,14 @@ static int filter_prep_groups(struct obd_device *obd) /* setup the object store with correct subdirectories */ static int filter_prep(struct obd_device *obd) { - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct filter_obd *filter = &obd->u.filter; struct file *file; struct inode *inode; int rc = 0; ENTRY; - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); file = filp_open(LAST_RCVD, O_RDWR | O_CREAT | O_LARGEFILE, 0700); if (!file || IS_ERR(file)) { rc = PTR_ERR(file); @@ -772,7 +772,7 @@ static int filter_prep(struct obd_device *obd) GOTO(err_server_data, rc); out: - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); return(rc); @@ -789,7 +789,7 @@ static int filter_prep(struct obd_device *obd) /* cleanup the filter: write last used object id to status file */ static void filter_post(struct obd_device *obd) { - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct filter_obd *filter = &obd->u.filter; int rc, i; @@ -797,7 +797,7 @@ static void filter_post(struct obd_device *obd) * best to start a transaction with h_sync, because we removed this * from lastobjid */ - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = filter_update_server_data(obd, filter->fo_rcvd_filp, filter->fo_fsd, 0); if (rc) @@ -817,7 +817,7 @@ static void filter_post(struct obd_device *obd) filter_cleanup_groups(obd); filter_free_server_data(filter); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); } static void filter_set_last_id(struct filter_obd *filter, struct obdo *oa, @@ -1150,12 +1150,13 @@ static int filter_intent_policy(struct ldlm_namespace *ns, } /* mount the file system (secretly) */ -int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, - char *option) +int filter_common_setup(struct obd_device *obd, obd_count len, + void *buf, char *option) { struct lustre_cfg* lcfg = buf; struct filter_obd *filter = &obd->u.filter; struct vfsmount *mnt; + char name[32] = "CATLIST"; int rc = 0; ENTRY; @@ -1194,11 +1195,15 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, filter->fo_fstype = mnt->mnt_sb->s_type->name; CDEBUG(D_SUPER, "%s: mnt = %p\n", filter->fo_fstype, mnt); - OBD_SET_CTXT_MAGIC(&obd->obd_ctxt); - obd->obd_ctxt.pwdmnt = mnt; - obd->obd_ctxt.pwd = mnt->mnt_root; - obd->obd_ctxt.fs = get_ds(); - obd->obd_ctxt.cb_ops = filter_lvfs_ops; + OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt); + obd->obd_lvfs_ctxt.pwdmnt = mnt; + obd->obd_lvfs_ctxt.pwd = mnt->mnt_root; + obd->obd_lvfs_ctxt.fs = get_ds(); + obd->obd_lvfs_ctxt.cb_ops = filter_lvfs_ops; + + rc = fsfilt_setup(obd, mnt->mnt_sb); + if (rc) + GOTO(err_mntput, rc); rc = filter_prep(obd); if (rc) @@ -1227,7 +1232,7 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, "filter_ldlm_cb_client", &obd->obd_ldlm_client); - rc = llog_cat_initialize(obd, 1); + rc = obd_llog_cat_initialize(obd, 1, name); if (rc) { CERROR("failed to setup llogging subsystems\n"); GOTO(err_post, rc); @@ -1251,32 +1256,13 @@ static int filter_setup(struct obd_device *obd, obd_count len, void *buf) { struct lprocfs_static_vars lvars; struct lustre_cfg* lcfg = buf; - const char *str = NULL; - char *option = NULL; - int n = 0; int rc; - if (!lcfg->lcfg_inlbuf1 || !lcfg->lcfg_inlbuf2) - RETURN(-EINVAL); - - if (!strcmp(lcfg->lcfg_inlbuf2, "ext3") || - !strcmp(lcfg->lcfg_inlbuf2, "ldiskfs")) { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - /* bug 1577: implement async-delete for 2.5 */ - str = "errors=remount-ro,asyncdel"; -#else - str = "errors=remount-ro"; -#endif - n = strlen(str) + 1; - OBD_ALLOC(option, n); - if (option == NULL) - RETURN(-ENOMEM); - strcpy(option, str); - } - - rc = filter_common_setup(obd, len, buf, option); - if (option) - OBD_FREE(option, n); + /* all mount options including errors=remount-ro and asyncdel are passed + * using 4th lcfg param. And it is good, finally we have got rid of + * hardcoded fs types in the code. */ + + rc = filter_common_setup(obd, len, buf, lcfg->lcfg_inlbuf4); lprocfs_init_vars(filter, &lvars); if (rc == 0 && lprocfs_obd_setup(obd, lvars.obd_vars) == 0 && @@ -1613,7 +1599,7 @@ static int filter_getattr(struct obd_export *exp, struct obdo *oa, static int filter_setattr(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *md, struct obd_trans_info *oti) { - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct filter_obd *filter; struct dentry *dentry; struct iattr iattr; @@ -1633,7 +1619,7 @@ static int filter_setattr(struct obd_export *exp, struct obdo *oa, iattr_from_obdo(&iattr, oa, oa->o_valid); - push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); + push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); lock_kernel(); if (iattr.ia_valid & ATTR_SIZE) @@ -1681,7 +1667,7 @@ out_unlock: if (iattr.ia_valid & ATTR_SIZE) up(&dentry->d_inode->i_sem); unlock_kernel(); - pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); + pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); f_dput(dentry); RETURN(rc); @@ -1945,7 +1931,7 @@ static int filter_create(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti) { struct obd_device *obd = NULL; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct lov_stripe_md *lsm = NULL; obd_gr group = 0; int rc = 0, diff; @@ -1966,7 +1952,7 @@ static int filter_create(struct obd_export *exp, struct obdo *oa, } obd = exp->exp_obd; - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if ((oa->o_valid & OBD_MD_FLFLAGS) && (oa->o_flags & OBD_FL_RECREATE_OBJS)) { @@ -1988,7 +1974,7 @@ static int filter_create(struct obd_export *exp, struct obdo *oa, } } - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if (rc && ea != NULL && *ea != lsm) { obd_free_memmd(exp, &lsm); } else if (rc == 0 && ea != NULL) { @@ -2009,7 +1995,7 @@ static int filter_destroy(struct obd_export *exp, struct obdo *oa, struct obd_device *obd; struct filter_obd *filter; struct dentry *dchild = NULL, *dparent = NULL; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; void *handle = NULL; struct llog_cookie *fcc = NULL; int rc, rc2, cleanup_phase = 0, have_prepared = 0; @@ -2022,7 +2008,7 @@ static int filter_destroy(struct obd_export *exp, struct obdo *oa, obd = exp->exp_obd; filter = &obd->u.filter; - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); acquire_locks: dparent = filter_parent_lock(obd, group, oa->o_id); @@ -2062,9 +2048,10 @@ static int filter_destroy(struct obd_export *exp, struct obdo *oa, goto acquire_locks; } - handle = fsfilt_start_log(obd, dparent->d_inode,FSFILT_OP_UNLINK,oti,1); + handle = fsfilt_start_log(obd, dparent->d_inode, FSFILT_OP_UNLINK, oti, 1); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); + cleanup_phase = 3; /* Our MDC connection is established by the MDS to us */ @@ -2081,11 +2068,13 @@ cleanup: case 3: if (fcc != NULL) { if (oti != NULL) - fsfilt_add_journal_cb(obd, 0, oti->oti_handle, + fsfilt_add_journal_cb(obd, filter->fo_sb, 0, + oti->oti_handle, filter_cancel_cookies_cb, fcc); else - fsfilt_add_journal_cb(obd, 0, handle, + fsfilt_add_journal_cb(obd, filter->fo_sb, 0, + handle, filter_cancel_cookies_cb, fcc); } @@ -2101,7 +2090,7 @@ cleanup: case 1: filter_parent_unlock(dparent); case 0: - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); break; default: CERROR("invalid cleanup_phase %d\n", cleanup_phase); @@ -2134,7 +2123,7 @@ static int filter_truncate(struct obd_export *exp, struct obdo *oa, static int filter_sync(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm, obd_off start, obd_off end) { - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct filter_obd *filter; struct dentry *dentry; struct llog_ctxt *ctxt; @@ -2156,7 +2145,7 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa, if (IS_ERR(dentry)) RETURN(PTR_ERR(dentry)); - push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); + push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); down(&dentry->d_inode->i_sem); rc = filemap_fdatasync(dentry->d_inode->i_mapping); @@ -2176,7 +2165,7 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa, oa->o_valid = OBD_MD_FLID; obdo_from_inode(oa, dentry->d_inode, FILTER_VALID_FLAGS); - pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); + pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); f_dput(dentry); RETURN(rc); @@ -2311,7 +2300,7 @@ int filter_iocontrol(unsigned int cmd, struct obd_export *exp, } case OBD_IOC_CATLOGLIST: { - rc = llog_catlog_list(obd, 1, data); + rc = llog_catalog_list(obd, 1, data); RETURN(rc); } @@ -2324,9 +2313,9 @@ int filter_iocontrol(unsigned int cmd, struct obd_export *exp, /* struct llog_ctxt *ctxt = NULL; - push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_ctxt, NULL); + push_ctxt(&saved, &ctxt->loc_ctxt, NULL); rc = llog_ioctl(ctxt, cmd, data); - pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_ctxt, NULL); + pop_ctxt(&saved, &ctxt->loc_ctxt, NULL); RETURN(rc); */ @@ -2342,12 +2331,12 @@ int filter_iocontrol(unsigned int cmd, struct obd_export *exp, static struct llog_operations filter_unlink_repl_logops; static struct llog_operations filter_size_orig_logops = { lop_setup: llog_obd_origin_setup, - lop_cleanup: llog_obd_origin_cleanup, - lop_add: llog_obd_origin_add + lop_cleanup: llog_catalog_cleanup, + lop_add: llog_catalog_add, }; static int filter_llog_init(struct obd_device *obd, struct obd_device *tgt, - int count, struct llog_catid *logid) + int count, struct llog_catid *catid) { struct llog_ctxt *ctxt; int rc; @@ -2358,16 +2347,17 @@ static int filter_llog_init(struct obd_device *obd, struct obd_device *tgt, filter_unlink_repl_logops.lop_connect = llog_repl_connect; filter_unlink_repl_logops.lop_sync = llog_obd_repl_sync; - rc = llog_setup(obd, LLOG_UNLINK_REPL_CTXT, tgt, 0, NULL, - &filter_unlink_repl_logops); + rc = obd_llog_setup(obd, LLOG_UNLINK_REPL_CTXT, tgt, 0, NULL, + &filter_unlink_repl_logops); if (rc) RETURN(rc); /* FIXME - assign unlink_cb for filter's recovery */ ctxt = llog_get_context(obd, LLOG_UNLINK_REPL_CTXT); - ctxt->llog_proc_cb = filter_recov_log_unlink_cb; + ctxt->loc_proc_cb = filter_recov_log_unlink_cb; - rc = llog_setup(obd, LLOG_SIZE_ORIG_CTXT, tgt, 0, NULL, - &filter_size_orig_logops); + /* FIXME - count should be 1 to setup size log */ + rc = obd_llog_setup(obd, LLOG_SIZE_ORIG_CTXT, tgt, 0, &catid->lci_logid, + &filter_size_orig_logops); RETURN(rc); } @@ -2376,11 +2366,11 @@ static int filter_llog_finish(struct obd_device *obd, int count) int rc; ENTRY; - rc = llog_cleanup(llog_get_context(obd, LLOG_UNLINK_REPL_CTXT)); + rc = obd_llog_cleanup(llog_get_context(obd, LLOG_UNLINK_REPL_CTXT)); if (rc) RETURN(rc); - rc = llog_cleanup(llog_get_context(obd, LLOG_SIZE_ORIG_CTXT)); + rc = obd_llog_cleanup(llog_get_context(obd, LLOG_SIZE_ORIG_CTXT)); RETURN(rc); } diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c index ccbbc74..8cf3f59 100644 --- a/lustre/obdfilter/filter_io.c +++ b/lustre/obdfilter/filter_io.c @@ -35,18 +35,20 @@ #include #include "filter_internal.h" -static int filter_start_page_read(struct inode *inode, struct niobuf_local *lnb) +static int filter_start_page_read(struct obd_device *obd, struct inode *inode, + struct niobuf_local *lnb) { - struct address_space *mapping = inode->i_mapping; struct page *page; unsigned long index = lnb->offset >> PAGE_SHIFT; - int rc; - page = grab_cache_page(mapping, index); /* locked page */ - if (page == NULL) - return lnb->rc = -ENOMEM; + page = fsfilt_getpage(obd, inode, index); + if (IS_ERR(page)) { + CERROR("page index %lu, rc = %ld\n", index, PTR_ERR(page)); - LASSERT(page->mapping == mapping); + lnb->page = NULL; + lnb->rc = PTR_ERR(page); + return lnb->rc; + } lnb->page = page; @@ -55,19 +57,6 @@ static int filter_start_page_read(struct inode *inode, struct niobuf_local *lnb) else lnb->rc = lnb->len; - if (PageUptodate(page)) { - unlock_page(page); - return 0; - } - - rc = mapping->a_ops->readpage(NULL, page); - if (rc < 0) { - CERROR("page index %lu, rc = %d\n", index, rc); - lnb->page = NULL; - page_cache_release(page); - return lnb->rc = rc; - } - return 0; } @@ -284,7 +273,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, struct obd_trans_info *oti) { struct obd_device *obd = exp->exp_obd; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct obd_ioobj *o; struct niobuf_remote *rnb; struct niobuf_local *lnb = NULL; @@ -320,7 +309,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, memset(res, 0, niocount * sizeof(*res)); - push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); + push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); for (i = 0, o = obj; i < objcount; i++, o++) { LASSERT(o->ioo_bufcnt); @@ -361,7 +350,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, * easy to detect later. */ break; } else { - rc = filter_start_page_read(inode, lnb); + rc = filter_start_page_read(obd, inode, lnb); } if (rc) { @@ -422,7 +411,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, CERROR("NULL dentry in cleanup -- tell CFS\n"); case 0: OBD_FREE(fso, objcount * sizeof(*fso)); - pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); + pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); } return rc; } @@ -569,7 +558,7 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, struct niobuf_local *res, struct obd_trans_info *oti) { - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct niobuf_remote *rnb; struct niobuf_local *lnb; struct fsfilt_objinfo fso; @@ -583,7 +572,7 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, memset(res, 0, niocount * sizeof(*res)); - push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); + push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); dentry = filter_fid2dentry(exp->exp_obd, NULL, obj->ioo_gr, obj->ioo_id); if (IS_ERR(dentry)) @@ -666,7 +655,7 @@ cleanup: spin_unlock(&exp->exp_obd->obd_osfs_lock); default: ; } - pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); + pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL); return rc; } diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c index a3464f3..1839f16 100644 --- a/lustre/obdfilter/filter_io_24.c +++ b/lustre/obdfilter/filter_io_24.c @@ -155,14 +155,11 @@ static int filter_direct_io(int rw, struct dentry *dchild, struct kiobuf *iobuf, if (rc < 0) GOTO(cleanup, rc); - rc = brw_kiovec(WRITE, 1, &iobuf, inode->i_dev, iobuf->blocks, - 1 << inode->i_blkbits); + rc = fsfilt_send_bio(obd, inode, iobuf); + CDEBUG(D_INFO, "tried to write %d pages, rc = %d\n", iobuf->nr_pages, rc); - if (rc != (1 << inode->i_blkbits) * iobuf->nr_pages * blocks_per_page) - CERROR("short write? expected %d, wrote %d\n", - (1 << inode->i_blkbits) * iobuf->nr_pages * - blocks_per_page, rc); + if (rc > 0) rc = 0; @@ -227,7 +224,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, int rc) { struct obd_device *obd = exp->exp_obd; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct niobuf_local *lnb; struct fsfilt_objinfo fso; struct iattr iattr = { 0 }; @@ -287,7 +284,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, iattr.ia_size = this_size; } - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); cleanup_phase = 2; down(&inode->i_sem); @@ -326,7 +323,7 @@ cleanup: switch (cleanup_phase) { case 2: - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); LASSERT(current->journal_info == NULL); case 1: free_kiovec(1, &iobuf); diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index c90d371..c9d2151 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -103,7 +103,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, struct bio *bio = NULL; int blocks_per_page, err; struct niobuf_local *lnb; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct fsfilt_objinfo fso; struct iattr iattr = { 0 }; struct inode *inode = NULL; @@ -114,6 +114,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, struct obd_device *obd = exp->exp_obd; ENTRY; + LASSERT(oti != NULL); LASSERT(objcount == 1); LASSERT(current->journal_info == NULL); @@ -139,7 +140,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, fso.fso_dentry = res->dentry; fso.fso_bufcnt = obj->ioo_bufcnt; - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); cleanup_phase = 2; oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, @@ -199,7 +200,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, } } - /* We expect these pages to be in offset order, but we'll + /* we expect these pages to be in offset order, but we'll * be forgiving */ this_size = lnb->offset + lnb->len; if (this_size > iattr.ia_size) @@ -209,7 +210,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, #warning This probably needs filemap_fdatasync() like filter_io_24 (bug 2366) if (bio) { atomic_inc(&dreq->numreqs); - submit_bio(WRITE, bio); + fsfilt_send_bio(obd, inode, bio); } /* time to wait for I/O completion */ @@ -254,7 +255,7 @@ cleanup: switch (cleanup_phase) { case 2: - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); LASSERT(current->journal_info == NULL); case 1: OBD_FREE(dreq, sizeof(*dreq)); diff --git a/lustre/obdfilter/filter_log.c b/lustre/obdfilter/filter_log.c index eb3df7a..1e1a3d3 100644 --- a/lustre/obdfilter/filter_log.c +++ b/lustre/obdfilter/filter_log.c @@ -97,9 +97,11 @@ void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno, void *cb_data, int error) { struct llog_cookie *cookie = cb_data; - llog_cancel(llog_get_context(obd, cookie->lgc_subsys + 1), - NULL, 1, cookie, 0); - //NULL, 1, cookie, OBD_LLOG_FL_SENDNOW); + int rc; + rc = llog_cancel(llog_get_context(obd, cookie->lgc_subsys + 1), + 1, cookie, 0, NULL); + if (rc) + CERROR("error cancelling log cookies: rc = %d\n", rc); OBD_FREE(cb_data, sizeof(struct llog_cookie)); } @@ -141,7 +143,7 @@ int filter_recov_log_unlink_cb(struct llog_handle *llh, else rc = LLOG_PROC_BREAK; CWARN("fetch generation log, send cookie\n"); - llog_cancel(ctxt, NULL, 1, &cookie, 0); + llog_cancel(ctxt, 1, &cookie, 0, NULL); RETURN(rc); } @@ -159,7 +161,7 @@ int filter_recov_log_unlink_cb(struct llog_handle *llh, obdo_free(oa); if (rc == -ENOENT) { CDEBUG(D_HA, "object already removed, send cookie\n"); - llog_cancel(ctxt, NULL, 1, &cookie, 0); + llog_cancel(ctxt, 1, &cookie, 0, NULL); RETURN(0); } diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 458529d..d095a38 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -2817,17 +2817,17 @@ static int osc_llog_init(struct obd_device *obd, struct obd_device *tgt, osc_unlink_orig_logops = llog_lvfs_ops; osc_unlink_orig_logops.lop_setup = llog_obd_origin_setup; - osc_unlink_orig_logops.lop_cleanup = llog_obd_origin_cleanup; - osc_unlink_orig_logops.lop_add = llog_obd_origin_add; + osc_unlink_orig_logops.lop_cleanup = llog_catalog_cleanup; + osc_unlink_orig_logops.lop_add = llog_catalog_add; osc_unlink_orig_logops.lop_connect = llog_origin_connect; - rc = llog_setup(obd, LLOG_UNLINK_ORIG_CTXT, tgt, count, - &catid->lci_logid, &osc_unlink_orig_logops); + rc = obd_llog_setup(obd, LLOG_UNLINK_ORIG_CTXT, tgt, count, + &catid->lci_logid, &osc_unlink_orig_logops); if (rc) RETURN(rc); - rc = llog_setup(obd, LLOG_SIZE_REPL_CTXT, tgt, count, NULL, - &osc_size_repl_logops); + rc = obd_llog_setup(obd, LLOG_SIZE_REPL_CTXT, tgt, count, NULL, + &osc_size_repl_logops); RETURN(rc); } @@ -2836,11 +2836,11 @@ static int osc_llog_finish(struct obd_device *obd, int count) int rc; ENTRY; - rc = llog_cleanup(llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT)); + rc = obd_llog_cleanup(llog_get_context(obd, LLOG_UNLINK_ORIG_CTXT)); if (rc) RETURN(rc); - rc = llog_cleanup(llog_get_context(obd, LLOG_SIZE_REPL_CTXT)); + rc = obd_llog_cleanup(llog_get_context(obd, LLOG_SIZE_REPL_CTXT)); RETURN(rc); } diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h index 5a185d7..958889a 100644 --- a/lustre/portals/include/linux/kp30.h +++ b/lustre/portals/include/linux/kp30.h @@ -694,11 +694,11 @@ typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data); # endif #endif -#ifndef LP_POISON +/*#ifndef LP_POISON # define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) # define LL_POISON ((long)0x5a5a5a5a5a5a5a5a) # define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) -#endif +#endif*/ #if defined(__x86_64__) # define LPU64 "%Lu" @@ -706,18 +706,33 @@ typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data); # define LPX64 "%#Lx" # define LPSZ "%lu" # define LPSSZ "%ld" +#ifndef LP_POISON +# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) +#endif #elif (BITS_PER_LONG == 32 || __WORDSIZE == 32) # define LPU64 "%Lu" # define LPD64 "%Ld" # define LPX64 "%#Lx" # define LPSZ "%u" # define LPSSZ "%d" +#ifndef LP_POISON +# define LI_POISON ((int)0x5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a) +#endif #elif (BITS_PER_LONG == 64 || __WORDSIZE == 64) # define LPU64 "%lu" # define LPD64 "%ld" # define LPX64 "%#lx" # define LPSZ "%lu" # define LPSSZ "%ld" +#ifndef LP_POISON +# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) +#endif #endif #ifndef LPU64 # error "No word size defined" diff --git a/lustre/portals/include/linux/libcfs.h b/lustre/portals/include/linux/libcfs.h index 5b1b8a5..1d27768 100644 --- a/lustre/portals/include/linux/libcfs.h +++ b/lustre/portals/include/linux/libcfs.h @@ -79,6 +79,7 @@ extern unsigned int portal_cerror; #define S_PTLROUTER 0x00100000 #define S_COBD 0x00200000 #define S_IBNAL 0x00400000 +#define S_SM 0x00800000 /* If you change these values, please keep portals/utils/debug.c * up to date! */ diff --git a/lustre/portals/utils/debug.c b/lustre/portals/utils/debug.c index ae2da55..b6bfec0 100644 --- a/lustre/portals/utils/debug.c +++ b/lustre/portals/utils/debug.c @@ -538,6 +538,7 @@ static struct mod_paths { {"mds", "lustre/mds"}, {"mdc", "lustre/mdc"}, {"llite", "lustre/llite"}, + {"smfs", "lustre/smfs"}, {"obdecho", "lustre/obdecho"}, {"ldlm", "lustre/ldlm"}, {"obdfilter", "lustre/obdfilter"}, @@ -546,6 +547,7 @@ static struct mod_paths { {"fsfilt_ext3", "lustre/lvfs"}, {"fsfilt_extN", "lustre/lvfs"}, {"fsfilt_reiserfs", "lustre/lvfs"}, + {"fsfilt_smfs", "lustre/lvfs"}, {"mds_ext2", "lustre/mds"}, {"mds_ext3", "lustre/mds"}, {"mds_extN", "lustre/mds"}, diff --git a/lustre/ptlrpc/llog_client.c b/lustre/ptlrpc/llog_client.c index 1098b40..4fcaf37 100644 --- a/lustre/ptlrpc/llog_client.c +++ b/lustre/ptlrpc/llog_client.c @@ -1,7 +1,7 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Copyright (C) 2001-2003 Cluster File Systems, Inc. + * Copyright (C) 2001-2004 Cluster File Systems, Inc. * Author: Andreas Dilger * * This file is part of Lustre, http://www.lustre.org. @@ -85,12 +85,12 @@ static int llog_client_create(struct llog_ctxt *ctxt, struct llog_handle **res, if (rc) GOTO(err_free, rc); - body = lustre_swab_repbuf(req, 0, sizeof(*body), + body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_llogd_body); - if (body == NULL) { + if (body == NULL) { CERROR ("Can't unpack llogd_body\n"); GOTO(err_free, rc =-EFAULT); - } + } handle->lgh_id = body->lgd_logid; handle->lgh_ctxt = ctxt; @@ -105,6 +105,55 @@ err_free: goto out; } +static int llog_client_prev_block(struct llog_handle *loghandle, + int prev_idx, void *buf, int len) +{ + struct obd_import *imp = loghandle->lgh_ctxt->loc_imp; + struct ptlrpc_request *req = NULL; + struct llogd_body *body; + void * ptr; + int size = sizeof(*body); + int repsize[2] = {sizeof (*body)}; + int rc; + ENTRY; + + req = ptlrpc_prep_req(imp, LLOG_ORIGIN_HANDLE_PREV_BLOCK, 1,&size,NULL); + if (!req) + GOTO(out, rc = -ENOMEM); + + body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); + body->lgd_logid = loghandle->lgh_id; + body->lgd_ctxt_idx = loghandle->lgh_ctxt->loc_idx - 1; + body->lgd_llh_flags = loghandle->lgh_hdr->llh_flags; + body->lgd_index = prev_idx; + body->lgd_len = len; + repsize[1] = len; + + req->rq_replen = lustre_msg_size(2, repsize); + rc = ptlrpc_queue_wait(req); + if (rc) + GOTO(out, rc); + + body = lustre_swab_repbuf(req, 0, sizeof(*body), + lustre_swab_llogd_body); + if (body == NULL) { + CERROR ("Can't unpack llogd_body\n"); + GOTO(out, rc =-EFAULT); + } + + ptr = lustre_msg_buf(req->rq_repmsg, 1, len); + if (ptr == NULL) { + CERROR ("Can't unpack bitmap\n"); + GOTO(out, rc =-EFAULT); + } + + memcpy(buf, ptr, len); + +out: + if (req) + ptlrpc_req_finished(req); + RETURN(rc); +} static int llog_client_next_block(struct llog_handle *loghandle, int *cur_idx, int next_idx, @@ -138,18 +187,18 @@ static int llog_client_next_block(struct llog_handle *loghandle, if (rc) GOTO(out, rc); - body = lustre_swab_repbuf(req, 0, sizeof(*body), + body = lustre_swab_repbuf(req, 0, sizeof(*body), lustre_swab_llogd_body); - if (body == NULL) { + if (body == NULL) { CERROR ("Can't unpack llogd_body\n"); GOTO(out, rc =-EFAULT); - } + } ptr = lustre_msg_buf(req->rq_repmsg, 1, len); - if (ptr == NULL) { + if (ptr == NULL) { CERROR ("Can't unpack bitmap\n"); GOTO(out, rc =-EFAULT); - } + } *cur_idx = body->lgd_saved_index; *cur_offset = body->lgd_cur_offset; @@ -189,8 +238,8 @@ static int llog_client_read_header(struct llog_handle *handle) if (rc) GOTO(out, rc); - hdr = lustre_swab_repbuf(req, 0, sizeof(*hdr), lustre_swab_llog_hdr); - if (hdr == NULL) { + hdr = lustre_swab_repbuf(req, 0, sizeof(*hdr), lustre_swab_llog_hdr); + if (hdr == NULL) { CERROR ("Can't unpack llog_hdr\n"); GOTO(out, rc =-EFAULT); } @@ -227,6 +276,7 @@ static int llog_client_close(struct llog_handle *handle) struct llog_operations llog_client_ops = { + lop_prev_block: llog_client_prev_block, lop_next_block: llog_client_next_block, lop_read_header: llog_client_read_header, lop_create: llog_client_create, diff --git a/lustre/ptlrpc/llog_net.c b/lustre/ptlrpc/llog_net.c index 0eb8d41..9e6ae48 100644 --- a/lustre/ptlrpc/llog_net.c +++ b/lustre/ptlrpc/llog_net.c @@ -71,7 +71,7 @@ int llog_origin_connect(struct llog_ctxt *ctxt, int count, lgr->lgr_hdr.lrh_len = lgr->lgr_tail.lrt_len = sizeof(*lgr); lgr->lgr_hdr.lrh_type = LLOG_GEN_REC; lgr->lgr_gen = ctxt->loc_gen; - rc = llog_add(ctxt, &lgr->lgr_hdr, NULL, NULL, 1); + rc = llog_add(ctxt, &lgr->lgr_hdr, NULL, NULL, 1, NULL); OBD_FREE(lgr, sizeof(*lgr)); if (rc != 1) RETURN(rc); diff --git a/lustre/ptlrpc/llog_server.c b/lustre/ptlrpc/llog_server.c index fa53b3c..d064fc8 100644 --- a/lustre/ptlrpc/llog_server.c +++ b/lustre/ptlrpc/llog_server.c @@ -47,10 +47,9 @@ int llog_origin_handle_create(struct ptlrpc_request *req) { struct obd_export *exp = req->rq_export; struct obd_device *obd = exp->exp_obd; - struct obd_device *disk_obd; struct llog_handle *loghandle; struct llogd_body *body; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct llog_logid *logid = NULL; struct llog_ctxt *ctxt; char * name = NULL; @@ -78,8 +77,7 @@ int llog_origin_handle_create(struct ptlrpc_request *req) ctxt = llog_get_context(obd, body->lgd_ctxt_idx); LASSERT(ctxt != NULL); - disk_obd = ctxt->loc_exp->exp_obd; - push_ctxt(&saved, &disk_obd->obd_ctxt, NULL); + push_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); rc = llog_create(ctxt, &loghandle, logid, name); if (rc) @@ -97,7 +95,73 @@ out_close: if (!rc) rc = rc2; out_pop: - pop_ctxt(&saved, &disk_obd->obd_ctxt, NULL); + pop_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); +out: + RETURN(rc); +} + +int llog_origin_handle_prev_block(struct ptlrpc_request *req) +{ + struct obd_export *exp = req->rq_export; + struct obd_device *obd = exp->exp_obd; + struct llog_handle *loghandle; + struct llogd_body *body; + struct lvfs_run_ctxt saved; + struct llog_ctxt *ctxt; + __u32 flags; + __u8 *buf; + void * ptr; + int rc, rc2, size[] = {sizeof (*body), LLOG_CHUNK_SIZE}; + ENTRY; + + body = lustre_swab_reqbuf(req, 0, sizeof(*body), + lustre_swab_llogd_body); + if (body == NULL) { + CERROR ("Can't unpack llogd_body\n"); + GOTO(out, rc =-EFAULT); + } + + OBD_ALLOC(buf, LLOG_CHUNK_SIZE); + if (!buf) + GOTO(out, rc = -ENOMEM); + + ctxt = llog_get_context(obd, body->lgd_ctxt_idx); + LASSERT(ctxt != NULL); + push_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); + + rc = llog_create(ctxt, &loghandle, &body->lgd_logid, NULL); + if (rc) + GOTO(out_pop, rc); + + flags = body->lgd_llh_flags; + rc = llog_init_handle(loghandle, flags, NULL); + if (rc) + GOTO(out_close, rc); + + memset(buf, 0, LLOG_CHUNK_SIZE); + rc = llog_prev_block(loghandle, body->lgd_index, buf, LLOG_CHUNK_SIZE); + if (rc) + GOTO(out_close, rc); + + + rc = lustre_pack_reply(req, 2, size, NULL); + if (rc) + GOTO(out_close, rc = -ENOMEM); + + ptr = lustre_msg_buf(req->rq_repmsg, 0, sizeof (body)); + memcpy(ptr, body, sizeof(*body)); + + ptr = lustre_msg_buf(req->rq_repmsg, 1, LLOG_CHUNK_SIZE); + memcpy(ptr, buf, LLOG_CHUNK_SIZE); + +out_close: + rc2 = llog_close(loghandle); + if (!rc) + rc = rc2; + +out_pop: + pop_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); + OBD_FREE(buf, LLOG_CHUNK_SIZE); out: RETURN(rc); } @@ -106,10 +170,9 @@ int llog_origin_handle_next_block(struct ptlrpc_request *req) { struct obd_export *exp = req->rq_export; struct obd_device *obd = exp->exp_obd; - struct obd_device *disk_obd; struct llog_handle *loghandle; struct llogd_body *body; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct llog_ctxt *ctxt; __u32 flags; __u8 *buf; @@ -132,8 +195,7 @@ int llog_origin_handle_next_block(struct ptlrpc_request *req) ctxt = llog_get_context(obd, body->lgd_ctxt_idx); LASSERT(ctxt != NULL); - disk_obd = ctxt->loc_exp->exp_obd; - push_ctxt(&saved, &disk_obd->obd_ctxt, NULL); + push_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); rc = llog_create(ctxt, &loghandle, &body->lgd_logid, NULL); if (rc) @@ -168,7 +230,7 @@ out_close: rc = rc2; out_pop: - pop_ctxt(&saved, &disk_obd->obd_ctxt, NULL); + pop_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); OBD_FREE(buf, LLOG_CHUNK_SIZE); out: RETURN(rc); @@ -178,11 +240,10 @@ int llog_origin_handle_read_header(struct ptlrpc_request *req) { struct obd_export *exp = req->rq_export; struct obd_device *obd = exp->exp_obd; - struct obd_device *disk_obd; struct llog_handle *loghandle; struct llogd_body *body; struct llog_log_hdr *hdr; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct llog_ctxt *ctxt; __u32 flags; int size[] = {sizeof (*hdr)}; @@ -198,8 +259,7 @@ int llog_origin_handle_read_header(struct ptlrpc_request *req) ctxt = llog_get_context(obd, body->lgd_ctxt_idx); LASSERT(ctxt != NULL); - disk_obd = ctxt->loc_exp->exp_obd; - push_ctxt(&saved, &disk_obd->obd_ctxt, NULL); + push_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); rc = llog_create(ctxt, &loghandle, &body->lgd_logid, NULL); if (rc) @@ -224,7 +284,7 @@ out_close: rc = rc2; out_pop: - pop_ctxt(&saved, &disk_obd->obd_ctxt, NULL); + pop_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); out: RETURN(rc); @@ -242,11 +302,10 @@ int llog_origin_handle_close(struct ptlrpc_request *req) int llog_origin_handle_cancel(struct ptlrpc_request *req) { struct obd_device *obd = req->rq_export->exp_obd; - struct obd_device *disk_obd; struct llog_cookie *logcookies; struct llog_ctxt *ctxt; int num_cookies, rc = 0, err, i; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct llog_handle *cathandle; struct inode *inode; void *handle; @@ -265,15 +324,14 @@ int llog_origin_handle_cancel(struct ptlrpc_request *req) RETURN(-ENOENT); } - disk_obd = ctxt->loc_exp->exp_obd; - push_ctxt(&saved, &disk_obd->obd_ctxt, NULL); + push_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); for (i = 0; i < num_cookies; i++, logcookies++) { cathandle = ctxt->loc_handle; LASSERT(cathandle != NULL); inode = cathandle->lgh_file->f_dentry->d_inode; - handle = fsfilt_start_log(disk_obd, inode, - FSFILT_OP_CANCEL_UNLINK, NULL, 1); + handle = llog_fsfilt_start(ctxt, inode, + FSFILT_OP_CANCEL_UNLINK, NULL); if (IS_ERR(handle)) { CERROR("fsfilt_start failed: %ld\n", PTR_ERR(handle)); GOTO(pop_ctxt, rc = PTR_ERR(handle)); @@ -281,7 +339,7 @@ int llog_origin_handle_cancel(struct ptlrpc_request *req) rc = llog_cat_cancel_records(cathandle, 1, logcookies); - err = fsfilt_commit(disk_obd, inode, handle, 0); + err = llog_fsfilt_commit(ctxt, inode, handle, 0); if (err) { CERROR("error committing transaction: %d\n", err); if (!rc) @@ -290,7 +348,7 @@ int llog_origin_handle_cancel(struct ptlrpc_request *req) } } pop_ctxt: - pop_ctxt(&saved, &disk_obd->obd_ctxt, NULL); + pop_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); if (rc) CERROR("cancel %d llog-records failed: %d\n", num_cookies, rc); else @@ -305,7 +363,7 @@ static int llog_catinfo_config(struct obd_device *obd, char *buf, int buf_len, { struct mds_obd *mds = &obd->u.mds; struct llog_ctxt *ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct llog_handle *handle = NULL; char name[4][64]; int rc, i, l, remains = buf_len; @@ -314,7 +372,7 @@ static int llog_catinfo_config(struct obd_device *obd, char *buf, int buf_len, if (ctxt == NULL || mds == NULL) RETURN(-EOPNOTSUPP); - push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_ctxt, NULL); + push_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); sprintf(name[0], "%s", mds->mds_profile); sprintf(name[1], "%s-clean", mds->mds_profile); @@ -350,7 +408,7 @@ static int llog_catinfo_config(struct obd_device *obd, char *buf, int buf_len, break; } out_pop: - pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_ctxt, NULL); + pop_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); RETURN(rc); } @@ -422,7 +480,7 @@ static int llog_catinfo_deletions(struct obd_device *obd, char *buf, { struct mds_obd *mds = &obd->u.mds; struct llog_handle *handle; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; int size, i, count; struct llog_catid *idarray; struct llog_logid *id; @@ -441,11 +499,12 @@ static int llog_catinfo_deletions(struct obd_device *obd, char *buf, if (!idarray) RETURN(-ENOMEM); - rc = llog_get_cat_list(obd, obd, name, count, idarray); + rc = llog_get_cat_list(&obd->obd_lvfs_ctxt, obd->obd_fsops, + name, count, idarray); if (rc) GOTO(out_free, rc); - push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_ctxt, NULL); + push_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); data.ctxt = ctxt; data.out = buf; @@ -483,7 +542,7 @@ static int llog_catinfo_deletions(struct obd_device *obd, char *buf, break; } out_pop: - pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_ctxt, NULL); + pop_ctxt(&saved, ctxt->loc_lvfs_ctxt, NULL); out_free: OBD_FREE(idarray, size); RETURN(rc); @@ -533,6 +592,11 @@ int llog_origin_handle_create(struct ptlrpc_request *req) LBUG(); return 0; } +int llog_origin_handle_prev_block(struct ptlrpc_request *req) +{ + LBUG(); + return 0; +} int llog_origin_handle_next_block(struct ptlrpc_request *req) { LBUG(); diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 9bfdbb4..94475ee 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -775,7 +775,7 @@ void lustre_assert_wire_constants(void) (long long)REINT_RENAME); LASSERTF(REINT_OPEN == 6, " found %lld\n", (long long)REINT_OPEN); - LASSERTF(REINT_MAX == 6, " found %lld\n", + LASSERTF(REINT_MAX == 8, " found %lld\n", (long long)REINT_MAX); LASSERTF(DISP_IT_EXECD == 1, " found %lld\n", (long long)DISP_IT_EXECD); diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index caadab0..508aa82 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -204,6 +204,7 @@ EXPORT_SYMBOL(ptlrpc_lprocfs_unregister_obd); /* llogd.c */ EXPORT_SYMBOL(llog_origin_handle_create); +EXPORT_SYMBOL(llog_origin_handle_prev_block); EXPORT_SYMBOL(llog_origin_handle_next_block); EXPORT_SYMBOL(llog_origin_handle_read_header); EXPORT_SYMBOL(llog_origin_handle_close); diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c index 490dc36..bacc59b 100644 --- a/lustre/ptlrpc/recov_thread.c +++ b/lustre/ptlrpc/recov_thread.c @@ -133,9 +133,8 @@ EXPORT_SYMBOL(llcd_send); * log record for the deletion. The commit callback calls this * function */ -int llog_obd_repl_cancel(struct llog_ctxt *ctxt, - struct lov_stripe_md *lsm, int count, - struct llog_cookie *cookies, int flags) +int llog_obd_repl_cancel(struct llog_ctxt *ctxt, int count, + struct llog_cookie *cookies, int flags, void *data) { struct llog_canceld_ctxt *llcd; int rc = 0; @@ -204,7 +203,7 @@ int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp) } up(&ctxt->loc_sem); } else { - rc = llog_cancel(ctxt, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW); + rc = llog_cancel(ctxt, 0, NULL, OBD_LLOG_FL_SENDNOW, NULL); } RETURN(rc); @@ -591,7 +590,7 @@ int llog_repl_connect(struct llog_ctxt *ctxt, int count, ctxt->loc_llcd = llcd; up(&ctxt->loc_sem); - rc = llog_recovery_generic(ctxt, ctxt->llog_proc_cb, logid); + rc = llog_recovery_generic(ctxt, ctxt->loc_proc_cb, logid); if (rc != 0) CERROR("error recovery process: %d\n", rc); @@ -601,9 +600,8 @@ EXPORT_SYMBOL(llog_repl_connect); #else /* !__KERNEL__ */ -int llog_obd_repl_cancel(struct llog_ctxt *ctxt, - struct lov_stripe_md *lsm, int count, - struct llog_cookie *cookies, int flags) +int llog_obd_repl_cancel(struct llog_ctxt *ctxt, int count, + struct llog_cookie *cookies, int flags, void *data) { return 0; } diff --git a/lustre/smfs/Makefile.in b/lustre/smfs/Makefile.in index a9bda5b..e23f03b 100644 --- a/lustre/smfs/Makefile.in +++ b/lustre/smfs/Makefile.in @@ -1,6 +1,5 @@ MODULES := smfs -smfs-objs := super.o options.o inode.o cache.o dir.o sysctl.o file.o -smfs-objs += symlink.o sm_fs.o kml.o reint.o journal.o journal_ext3.o -smfs-objs += smfs_llog.o +smfs-objs := super.o options.o inode.o cache.o cache_space.o dir.o ioctl.o +smfs-objs += sysctl.o file.o symlink.o sm_fs.o kml.o journal.o smfs_llog.o @INCLUDE_RULES@ diff --git a/lustre/smfs/autoMakefile.am b/lustre/smfs/autoMakefile.am index 3b277a9..952cfa7 100644 --- a/lustre/smfs/autoMakefile.am +++ b/lustre/smfs/autoMakefile.am @@ -8,4 +8,4 @@ modulefs_DATA = smfs$(KMODEXT) endif MOSTLYCLEANFILES = *.o *.ko *.mod.c -DIST_SOURCES = $(smfs-objs:%.o=%.c) kml_idl.h smfs_internal.h smfs_support.h +DIST_SOURCES = $(smfs-objs:%.o=%.c) smfs_internal.h diff --git a/lustre/smfs/cache.c b/lustre/smfs/cache.c index d94a0cb..f68a1a8 100644 --- a/lustre/smfs/cache.c +++ b/lustre/smfs/cache.c @@ -1,5 +1,23 @@ -/* - * snapfs/cache.c +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * */ #define DEBUG_SUBSYSTEM S_SM @@ -11,378 +29,333 @@ #include #include #include +#include +#include +#include +#include -#include "smfs_internal.h" -struct sm_ops smfs_operations; - -extern struct inode_operations smfs_file_iops; -extern struct file_operations smfs_file_fops; -extern struct inode_operations smfs_sym_iops; -extern struct file_operations smfs_sym_fops; -extern struct super_operations smfs_super_ops; -extern struct journal_operations smfs_journal_ops; +#include "smfs_internal.h" +int smfs_init_sm_ops(struct smfs_super_info *smb) +{ + struct sm_operations *sm_ops; /*cache ops for set cache inode ops*/ + OBD_ALLOC(sm_ops, sizeof(struct sm_operations)); + if (!sm_ops) + RETURN(-ENOMEM); -inline struct super_operations *cache_sops(struct sm_ops *smfs_ops) -{ - return &smfs_ops->sm_sb_ops; + smb->sm_ops = sm_ops; + RETURN(0); } -inline struct inode_operations *cache_diops(struct sm_ops *smfs_ops) +void smfs_cleanup_sm_ops(struct smfs_super_info *smb) { - return &smfs_ops->sm_dir_iops; + if (smb->sm_ops) + OBD_FREE(smb->sm_ops, sizeof(struct sm_operations)); } -inline struct inode_operations *cache_fiops(struct sm_ops *smfs_ops) +static void setup_iops(struct inode *cache_inode, + struct inode_operations *iops, + struct inode_operations *cache_iops) { - return &smfs_ops->sm_file_iops; -} -inline struct inode_operations *cache_siops(struct sm_ops *smfs_ops) -{ - return &smfs_ops->sm_sym_iops; + if (cache_inode->i_op && cache_iops && iops) { + if (cache_inode->i_op->create) + iops->create = cache_iops->create; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + if (cache_inode->i_op->create_it) + iops->create_it = cache_iops->create_it; +#endif + if (cache_inode->i_op->lookup) + iops->lookup = cache_iops->lookup; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + if (cache_inode->i_op->lookup_raw) + iops->lookup_raw = cache_iops->lookup_raw; + if (cache_inode->i_op->lookup_it) + iops->lookup_it = cache_iops->lookup_it; +#endif + if (cache_inode->i_op->link) + iops->link = cache_iops->link; + if (cache_inode->i_op->link_raw) + iops->link_raw = cache_iops->link_raw; + if (cache_inode->i_op->unlink) + iops->unlink = cache_iops->unlink; + if (cache_inode->i_op->unlink_raw) + iops->unlink_raw = cache_iops->unlink_raw; + if (cache_inode->i_op->symlink) + iops->symlink = cache_iops->symlink; + if (cache_inode->i_op->symlink_raw) + iops->symlink_raw = cache_iops->symlink_raw; + if (cache_inode->i_op->mkdir) + iops->mkdir = cache_iops->mkdir; + if (cache_inode->i_op->mkdir_raw) + iops->mkdir_raw = cache_iops->mkdir_raw; + if (cache_inode->i_op->rmdir) + iops->rmdir = cache_iops->rmdir; + if (cache_inode->i_op->rmdir_raw) + iops->rmdir_raw = cache_iops->rmdir_raw; + if (cache_inode->i_op->mknod) + iops->mknod = cache_iops->mknod; + if (cache_inode->i_op->mknod_raw) + iops->mknod_raw = cache_iops->mknod_raw; + if (cache_inode->i_op->rename) + iops->rename = cache_iops->rename; + if (cache_inode->i_op->rename_raw) + iops->rename_raw = cache_iops->rename_raw; + if (cache_inode->i_op->readlink) + iops->readlink = cache_iops->readlink; + if (cache_inode->i_op->follow_link) + iops->follow_link = cache_iops->follow_link; + if (cache_inode->i_op->truncate) + iops->truncate = cache_iops->truncate; + if (cache_inode->i_op->permission) + iops->permission = cache_iops->permission; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + if (cache_inode->i_op->revalidate) + iops->revalidate = cache_iops->revalidate; + if (cache_inode->i_op->revalidate_it) + iops->revalidate_it = cache_iops->revalidate_it; +#endif + if (cache_inode->i_op->setattr) + iops->setattr = cache_iops->setattr; + if (cache_inode->i_op->setattr_raw) + iops->setattr_raw = cache_iops->setattr_raw; + if (cache_inode->i_op->getattr) + iops->getattr = cache_iops->getattr; + if (cache_inode->i_op->setxattr) + iops->setxattr = cache_iops->setxattr; + if (cache_inode->i_op->getxattr) + iops->getxattr = cache_iops->getxattr; + if (cache_inode->i_op->listxattr) + iops->listxattr = cache_iops->listxattr; + if (cache_inode->i_op->removexattr) + iops->removexattr = cache_iops->removexattr; + } } -inline struct file_operations *cache_dfops(struct sm_ops *smfs_ops) +static void setup_fops(struct inode *cache_inode, + struct file_operations *fops, + struct file_operations *cache_fops) { - return &smfs_ops->sm_dir_fops; + if (cache_inode->i_fop && cache_fops && fops) { + if (cache_inode->i_fop->llseek) + fops->llseek = cache_fops->llseek; + if (cache_inode->i_fop->read) + fops->read = cache_fops->read; + if (cache_inode->i_fop->write) + fops->write = cache_fops->write; + if (cache_inode->i_fop->readdir) + fops->readdir = cache_fops->readdir; + if (cache_inode->i_fop->poll) + fops->poll = cache_fops->poll; + if (cache_inode->i_fop->ioctl) + fops->ioctl = cache_fops->ioctl; + if (cache_inode->i_fop->mmap) + fops->mmap = cache_fops->mmap; +// if (cache_inode->i_fop->open) + fops->open = cache_fops->open; + if (cache_inode->i_fop->flush) + fops->flush = cache_fops->flush; + // if (cache_inode->i_fop->release) + fops->release = cache_fops->release; + if (cache_inode->i_fop->fsync) + fops->fsync = cache_fops->fsync; + if (cache_inode->i_fop->fasync) + fops->fasync = cache_fops->fasync; + if (cache_inode->i_fop->lock) + fops->lock = cache_fops->lock; + if (cache_inode->i_fop->readv) + fops->readv = cache_fops->readv; + if (cache_inode->i_fop->writev) + fops->writev = cache_fops->writev; + if (cache_inode->i_fop->sendpage) + fops->sendpage = cache_fops->sendpage; + if (cache_inode->i_fop->get_unmapped_area) + fops->get_unmapped_area = cache_fops->get_unmapped_area; + } } -inline struct file_operations *cache_ffops(struct sm_ops *smfs_ops) +static void setup_sm_file_ops(struct inode *cache_inode, + struct inode *inode, + struct inode_operations *cache_iops, + struct file_operations *cache_fops) { - return &smfs_ops->sm_file_fops; -} + struct smfs_super_info *smb; + struct inode_operations *iops; + struct file_operations *fops; -inline struct file_operations *cache_sfops(struct sm_ops *smfs_ops) -{ - return &smfs_ops->sm_sym_fops; -} + smb = S2SMI(inode->i_sb); -inline struct dentry_operations *cache_dops(struct sm_ops *smfs_ops) -{ - return &smfs_ops->sm_dentry_ops; -} + if (smb->ops_check & FILE_OPS_CHECK) + return; + smb->ops_check |= FILE_OPS_CHECK; -inline struct journal_operations *journal_ops(struct sm_ops *smfs_ops) -{ - return &smfs_ops->sm_journal_ops; -} + iops = cache_fiops(smb); + fops = cache_ffops(smb); -void init_smfs_cache() -{ - memset(&smfs_operations, 0, sizeof(struct sm_ops)); -} -void cleanup_smfs_cache() -{ - return; -} + memset(iops , 0 , sizeof (struct inode_operations)); + memset(fops , 0 , sizeof (struct file_operations)); -static void setup_iops(struct inode *cache_inode, - struct inode_operations *iops, - struct inode_operations *cache_iops) -{ + setup_iops(cache_inode, iops, cache_iops); + setup_fops(cache_inode, fops, cache_fops); - if (cache_inode->i_op && cache_iops && iops) { - if (cache_inode->i_op->create) - iops->create = cache_iops->create; - if (cache_inode->i_op->create_it) - iops->create_it = cache_iops->create_it; - if (cache_inode->i_op->lookup) - iops->lookup = cache_iops->lookup; - if (cache_inode->i_op->lookup_raw) - iops->lookup_raw = cache_iops->lookup_raw; - if (cache_inode->i_op->lookup_it) - iops->lookup_it = cache_iops->lookup_it; - if (cache_inode->i_op->link) - iops->link = cache_iops->link; - if (cache_inode->i_op->link_raw) - iops->link_raw = cache_iops->link_raw; - if (cache_inode->i_op->unlink) - iops->unlink = cache_iops->unlink; - if (cache_inode->i_op->unlink_raw) - iops->unlink_raw = cache_iops->unlink_raw; - if (cache_inode->i_op->symlink) - iops->symlink = cache_iops->symlink; - if (cache_inode->i_op->symlink_raw) - iops->symlink_raw = cache_iops->symlink_raw; - if (cache_inode->i_op->mkdir) - iops->mkdir = cache_iops->mkdir; - if (cache_inode->i_op->mkdir_raw) - iops->mkdir_raw = cache_iops->mkdir_raw; - if (cache_inode->i_op->rmdir) - iops->rmdir = cache_iops->rmdir; - if (cache_inode->i_op->rmdir_raw) - iops->rmdir_raw = cache_iops->rmdir_raw; - if (cache_inode->i_op->mknod) - iops->mknod = cache_iops->mknod; - if (cache_inode->i_op->mknod_raw) - iops->mknod_raw = cache_iops->mknod_raw; - if (cache_inode->i_op->rename) - iops->rename = cache_iops->rename; - if (cache_inode->i_op->rename_raw) - iops->rename_raw = cache_iops->rename_raw; - if (cache_inode->i_op->readlink) - iops->readlink = cache_iops->readlink; - if (cache_inode->i_op->follow_link) - iops->follow_link = cache_iops->follow_link; - if (cache_inode->i_op->truncate) - iops->truncate = cache_iops->truncate; - if (cache_inode->i_op->permission) - iops->permission = cache_iops->permission; - if (cache_inode->i_op->revalidate) - iops->revalidate = cache_iops->revalidate; - if (cache_inode->i_op->revalidate_it) - iops->revalidate_it = cache_iops->revalidate_it; - if (cache_inode->i_op->setattr) - iops->setattr = cache_iops->setattr; - if (cache_inode->i_op->setattr_raw) - iops->setattr_raw = cache_iops->setattr_raw; - if (cache_inode->i_op->getattr) - iops->getattr = cache_iops->getattr; - if (cache_inode->i_op->setxattr) - iops->setxattr = cache_iops->setxattr; - if (cache_inode->i_op->getxattr) - iops->getxattr = cache_iops->getxattr; - if (cache_inode->i_op->listxattr) - iops->listxattr = cache_iops->listxattr; - if (cache_inode->i_op->removexattr) - iops->removexattr = cache_iops->removexattr; - } + return; } -static void setup_fops(struct inode *cache_inode, - struct file_operations *fops, - struct file_operations *cache_fops) -{ - if (cache_inode->i_fop && cache_fops && fops) { - if (cache_inode->i_fop->llseek) - fops->llseek = cache_fops->llseek; - if (cache_inode->i_fop->read) - fops->read = cache_fops->read; - if (cache_inode->i_fop->write) - fops->write = cache_fops->write; - if (cache_inode->i_fop->readdir) - fops->readdir = cache_fops->readdir; - if (cache_inode->i_fop->poll) - fops->poll = cache_fops->poll; - if (cache_inode->i_fop->ioctl) - fops->ioctl = cache_fops->ioctl; - if (cache_inode->i_fop->mmap) - fops->mmap = cache_fops->mmap; - if (cache_inode->i_fop->open) - fops->open = cache_fops->open; - if (cache_inode->i_fop->flush) - fops->flush = cache_fops->flush; - if (cache_inode->i_fop->release) - fops->release = cache_fops->release; - if (cache_inode->i_fop->fsync) - fops->fsync = cache_fops->fsync; - if (cache_inode->i_fop->fasync) - fops->fasync = cache_fops->fasync; - if (cache_inode->i_fop->lock) - fops->lock = cache_fops->lock; - if (cache_inode->i_fop->readv) - fops->readv = cache_fops->readv; - if (cache_inode->i_fop->writev) - fops->writev = cache_fops->writev; - if (cache_inode->i_fop->sendpage) - fops->sendpage = cache_fops->sendpage; - if (cache_inode->i_fop->get_unmapped_area) - fops->get_unmapped_area = cache_fops->get_unmapped_area; - } -} -static void setup_sm_file_ops(struct inode *cache_inode, - struct inode *inode, - struct inode_operations *cache_iops, - struct file_operations *cache_fops) + +static void setup_sm_dir_ops(struct inode *cache_inode, + struct inode *inode, + struct inode_operations *cache_dir_iops, + struct file_operations *cache_dir_fops) { - struct smfs_super_info *smb; - struct inode_operations *iops; - struct file_operations *fops; + struct smfs_super_info *smb; + struct inode_operations *iops; + struct file_operations *fops; + + smb = S2SMI(inode->i_sb); - smb = S2SMI(inode->i_sb); - - if (smb->ops_check & FILE_OPS_CHECK) - return; - smb->ops_check |= FILE_OPS_CHECK; + if (smb->ops_check & DIR_OPS_CHECK) + return; + smb->ops_check |= DIR_OPS_CHECK; - iops = cache_fiops(&smfs_operations); - fops = cache_ffops(&smfs_operations); + iops = cache_diops(smb); + fops = cache_dfops(smb); - memset(iops , 0 , sizeof (struct inode_operations)); - memset(fops , 0 , sizeof (struct file_operations)); + memset(iops, 0, sizeof (struct inode_operations)); + memset(fops, 0, sizeof (struct file_operations)); - setup_iops(cache_inode, iops, cache_iops); - setup_fops(cache_inode, fops, cache_fops); + setup_iops(cache_inode, iops, cache_dir_iops); + setup_fops(cache_inode, fops, cache_dir_fops); - return; + return; } -static void setup_sm_dir_ops(struct inode *cache_inode, - struct inode *inode, - struct inode_operations *cache_dir_iops, - struct file_operations *cache_dir_fops) +static void setup_sm_symlink_ops(struct inode *cache_inode, + struct inode *inode, + struct inode_operations *cache_sym_iops, + struct file_operations *cache_sym_fops) { - struct smfs_super_info *smb; - struct inode_operations *iops; - struct file_operations *fops; + struct smfs_super_info *smb; + struct inode_operations *iops; + struct file_operations *fops; + + smb = S2SMI(inode->i_sb); - smb = S2SMI(inode->i_sb); - - if (smb->ops_check & DIR_OPS_CHECK) - return; - smb->ops_check |= DIR_OPS_CHECK; + if (smb->ops_check & SYMLINK_OPS_CHECK) + return; + smb->ops_check |= SYMLINK_OPS_CHECK; - iops = cache_diops(&smfs_operations); - fops = cache_dfops(&smfs_operations); + iops = cache_siops(smb); + fops = cache_sfops(smb); - memset(iops, 0, sizeof (struct inode_operations)); - memset(fops, 0, sizeof (struct file_operations)); + memset(iops , 0 , sizeof (struct inode_operations)); + memset(fops , 0 , sizeof (struct file_operations)); - setup_iops(cache_inode, iops, cache_dir_iops); - setup_fops(cache_inode, fops, cache_dir_fops); + setup_iops(cache_inode, iops, cache_sym_iops); + setup_fops(cache_inode, fops, cache_sym_fops); - return; + return; } -static void setup_sm_symlink_ops(struct inode *cache_inode, - struct inode *inode, - struct inode_operations *cache_sym_iops, - struct file_operations *cache_sym_fops) +static void setup_sm_sb_ops(struct super_block *cache_sb, + struct super_block *sb, + struct super_operations *smfs_sops) { - struct smfs_super_info *smb; - struct inode_operations *iops; - struct file_operations *fops; + struct smfs_super_info *smb; + struct super_operations *sops; - smb = S2SMI(inode->i_sb); - - if (smb->ops_check & SYMLINK_OPS_CHECK) - return; - smb->ops_check |= SYMLINK_OPS_CHECK; + ENTRY; - iops = cache_siops(&smfs_operations); - fops = cache_sfops(&smfs_operations); + smb = S2SMI(sb); - memset(iops , 0 , sizeof (struct inode_operations)); - memset(fops , 0 , sizeof (struct file_operations)); + if (smb->ops_check & SB_OPS_CHECK) + return; + smb->ops_check |= SB_OPS_CHECK; + sops = cache_sops(smb); + memset(sops, 0, sizeof (struct super_operations)); - setup_iops(cache_inode, iops, cache_sym_iops); - setup_fops(cache_inode, fops, cache_sym_fops); + if (cache_sb->s_op) { + if (cache_sb->s_op->read_inode) + sops->read_inode = smfs_sops->read_inode; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + if (cache_sb->s_op->read_inode2) + sops->read_inode2 = smfs_sops->read_inode2; +#endif + if (cache_sb->s_op->dirty_inode) + sops->dirty_inode = smfs_sops->dirty_inode; + if (cache_sb->s_op->write_inode) + sops->write_inode = smfs_sops->write_inode; + if (cache_sb->s_op->put_inode) + sops->put_inode = smfs_sops->put_inode; + if (cache_sb->s_op->delete_inode) + sops->delete_inode = smfs_sops->delete_inode; + if (cache_sb->s_op->put_super) + sops->put_super = smfs_sops->put_super; + if (cache_sb->s_op->write_super) + sops->write_super = smfs_sops->write_super; + if (cache_sb->s_op->write_super_lockfs) + sops->write_super_lockfs =smfs_sops->write_super_lockfs; + if (cache_sb->s_op->unlockfs) + sops->unlockfs = smfs_sops->unlockfs; + if (cache_sb->s_op->statfs) + sops->statfs = smfs_sops->statfs; + if (cache_sb->s_op->remount_fs) + sops->remount_fs = smfs_sops->remount_fs; + if (cache_sb->s_op->umount_begin) + sops->umount_begin = smfs_sops->umount_begin; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + if (cache_sb->s_op->fh_to_dentry) + sops->fh_to_dentry = smfs_sops->fh_to_dentry; + if (cache_sb->s_op->dentry_to_fh) + sops->dentry_to_fh = smfs_sops->dentry_to_fh; +#endif + if (cache_sb->s_op->show_options) + sops->show_options = smfs_sops->show_options; + + /* FIXME-WANGDI: we need this method to clear the cache + * inode. */ + sops->clear_inode = smfs_sops->clear_inode; + } - return; + return; } -static void setup_sm_sb_ops(struct super_block *cache_sb, - struct super_block *sb, - struct super_operations *smfs_sops) -{ - struct smfs_super_info *smb; - struct super_operations *sops; - - ENTRY; - - smb = S2SMI(sb); - - if (smb->ops_check & SB_OPS_CHECK) - return; - smb->ops_check |= SB_OPS_CHECK; - sops = cache_sops(&smfs_operations); - memset(sops, 0, sizeof (struct super_operations)); - - if (cache_sb->s_op) { - if (cache_sb->s_op->read_inode) - sops->read_inode = smfs_sops->read_inode; - if (cache_sb->s_op->read_inode2) - sops->read_inode2 = smfs_sops->read_inode2; - if (cache_sb->s_op->dirty_inode) - sops->dirty_inode = smfs_sops->dirty_inode; - if (cache_sb->s_op->write_inode) - sops->write_inode = smfs_sops->write_inode; - if (cache_sb->s_op->put_inode) - sops->put_inode = smfs_sops->put_inode; - if (cache_sb->s_op->delete_inode) - sops->delete_inode = smfs_sops->delete_inode; - if (cache_sb->s_op->put_super) - sops->put_super = smfs_sops->put_super; - if (cache_sb->s_op->write_super) - sops->write_super = smfs_sops->write_super; - if (cache_sb->s_op->write_super_lockfs) - sops->write_super_lockfs = smfs_sops->write_super_lockfs; - if (cache_sb->s_op->unlockfs) - sops->unlockfs = smfs_sops->unlockfs; - if (cache_sb->s_op->statfs) - sops->statfs = smfs_sops->statfs; - if (cache_sb->s_op->remount_fs) - sops->remount_fs = smfs_sops->remount_fs; - if (cache_sb->s_op->umount_begin) - sops->umount_begin = smfs_sops->umount_begin; - if (cache_sb->s_op->fh_to_dentry) - sops->fh_to_dentry = smfs_sops->fh_to_dentry; - if (cache_sb->s_op->dentry_to_fh) - sops->dentry_to_fh = smfs_sops->dentry_to_fh; - if (cache_sb->s_op->show_options) - sops->show_options = smfs_sops->show_options; - /*FIXME we need this method to clear the cache inode */ - sops->clear_inode = smfs_sops->clear_inode; - } - - return; -} void sm_set_inode_ops(struct inode *cache_inode, struct inode *inode) { - /* XXX now set the correct snap_{file,dir,sym}_iops */ + struct smfs_super_info *smb = S2SMI(inode->i_sb); + /* XXX now set the correct sm_{file,dir,sym}_iops */ if (S_ISDIR(inode->i_mode)) { - setup_sm_dir_ops(cache_inode, inode, + setup_sm_dir_ops(cache_inode, inode, &smfs_dir_iops, &smfs_dir_fops); - inode->i_op = cache_diops(&smfs_operations); - inode->i_fop = cache_dfops(&smfs_operations); + inode->i_op = cache_diops(smb); + inode->i_fop = cache_dfops(smb); } else if (S_ISREG(inode->i_mode)) { - setup_sm_file_ops(cache_inode, inode, + setup_sm_file_ops(cache_inode, inode, &smfs_file_iops, &smfs_file_fops); CDEBUG(D_INODE, "inode %lu, i_op at %p\n", inode->i_ino, inode->i_op); - inode->i_fop = cache_ffops(&smfs_operations); - inode->i_op = cache_fiops(&smfs_operations); - - } else if (S_ISLNK(inode->i_mode)) { + inode->i_fop = cache_ffops(smb); + inode->i_op = cache_fiops(smb); + + } else if (S_ISLNK(inode->i_mode)) { setup_sm_symlink_ops(cache_inode, inode, - &smfs_sym_iops, - &smfs_sym_fops); - inode->i_op = cache_siops(&smfs_operations); - inode->i_fop = cache_sfops(&smfs_operations); + &smfs_sym_iops, + &smfs_sym_fops); + inode->i_op = cache_siops(smb); + inode->i_fop = cache_sfops(smb); CDEBUG(D_INODE, "inode %lu, i_op at %p\n", inode->i_ino, inode->i_op); } } -void sm_set_sb_ops (struct super_block *cache_sb, - struct super_block *sb) -{ - struct smfs_super_info *smb; - - smb = S2SMI(sb); - - setup_sm_sb_ops(cache_sb, sb, &smfs_super_ops); - - sb->s_op = cache_sops(&smfs_operations); - return; -} -void setup_sm_journal_ops(char *cache_type) +void sm_set_sb_ops(struct super_block *cache_sb, struct super_block *sb) { - struct journal_operations *jops; - - jops = journal_ops(&smfs_operations); - - if (strlen(cache_type) == strlen("ext3") && - memcmp(cache_type, "ext3", strlen("ext3")) == 0 ) { -#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE) - memcpy(jops, &smfs_ext3_journal_ops, - sizeof(struct journal_operations)); -#else - memset(jops, 0, sizeof(journal_operations)); -#endif - CDEBUG(D_SUPER, "ops at %p\n", jops); - } -} + struct smfs_super_info *smb; + + smb = S2SMI(sb); + setup_sm_sb_ops(cache_sb, sb, &smfs_super_ops); + + sb->s_op = cache_sops(smb); + return; +} diff --git a/lustre/smfs/cache_space.c b/lustre/smfs/cache_space.c new file mode 100644 index 0000000..a2f3133 --- /dev/null +++ b/lustre/smfs/cache_space.c @@ -0,0 +1,649 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre/smfs/cache_space.c + * A library of functions to manage cache space based on ARC + * (modified LRU) replacement algorithm. + * + * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#define DEBUG_SUBSYSTEM S_SM + +#include +#include + +#include "smfs_internal.h" + +struct cache_purge_param { + int nfract; /* Percentage of cache dirty to activate cpurge */ + int ndirty; /* Maximum number of objects to write out per + wake-cycle */ + int interval; /* jiffies delay between cache purge */ + int nfract_sync;/* Percentage of cache dirty to activate + cpurge synchronously */ + int nfract_stop_cpurge; /* Percentage of cache dirty to stop cpurge */ +} cf_prm = {30, 512, 600 * HZ, 60, 20}; + +static struct cache_purge_queue smfs_cpq; +static struct cache_purge_queue *cpq = &smfs_cpq; + +int cache_space_hook_init(struct super_block *sb) +{ + struct smfs_super_info *smfs_info = S2SMI(sb); + + SMFS_SET_CACHE_HOOK(smfs_info); + return 0; +} +int cache_space_hook_exit(struct super_block *sb) +{ + struct smfs_super_info *smfs_info = S2SMI(sb); + + SMFS_CLEAN_CACHE_HOOK(smfs_info); + return 0; +} +int smfs_cache_hook(struct inode *inode) +{ + struct smfs_super_info *smfs_info = I2CSB(inode); + + if (SMFS_CACHE_HOOK(smfs_info) && SMFS_INIT_REC(smfs_info) + && SMFS_INODE_CACHE_HOOK(inode)) + return 1; + else + return 0; +} + +static int cache_leaf_node(struct dentry *dentry, __u64 *active_entry) +{ + struct inode *inode = dentry->d_inode; + + if (S_ISDIR(inode->i_mode)) { + if (inode->i_nlink != 2) + return 0; + if (!strncmp(dentry->d_name.name, "lost+found", dentry->d_name.len)) + return 0; + LASSERT(active_entry != NULL); + get_active_entry(inode, active_entry); + return(*active_entry > 0 ? 0 : 1); + } else { + if (inode->i_nlink != 1) + return 0; + if (!strncmp(dentry->d_name.name, KML_LOG_NAME, dentry->d_name.len) || + !strncmp(dentry->d_name.name, CACHE_LRU_LOG, dentry->d_name.len)) + return 0; + return 1; + } +} +static int cache_pre_leaf_node(struct dentry *dentry, __u64 *active_entry, int op) +{ + if (((op == 0 && dentry->d_inode->i_nlink == 0) || + (op == 1 && dentry->d_inode->i_nlink == 2)) && + strncmp(dentry->d_name.name, KML_LOG_NAME, dentry->d_name.len) && + strncmp(dentry->d_name.name, CACHE_LRU_LOG, dentry->d_name.len)) + return 1; + else if ((op == 2 && dentry->d_inode->i_nlink == 0) || + (op == 3 && dentry->d_inode->i_nlink == 3)) { + LASSERT(active_entry != NULL); + get_active_entry(dentry->d_inode, active_entry); + return(*active_entry > 0 ? 0 : 1); + } + return 0; +} + +static int set_lru_logcookie(struct inode *inode, void *handle, + struct llog_cookie *logcookie) +{ + struct fsfilt_operations *fsops = I2CSB(inode)->sm_fsfilt; + int rc; + rc = fsops->fs_set_xattr(inode, handle, XATTR_SMFS_CACHE_LOGCOOKIE, + logcookie, sizeof(*logcookie)); + RETURN(rc); +} +static int get_lru_logcookie(struct inode *inode, struct llog_cookie *logcookie) +{ + struct fsfilt_operations *fsops = I2CSB(inode)->sm_fsfilt; + int rc; + rc = fsops->fs_get_xattr(inode, XATTR_SMFS_CACHE_LOGCOOKIE, + logcookie, sizeof(*logcookie)); + RETURN(rc); +} + +static int try2purge_from_cache(struct ll_fid cfid, struct ll_fid pfid) +{ + struct inode *inode, *parent; + struct super_block *sb = cpq->cpq_sb; + //struct llog_cookie logcookie; + __u32 hoard_priority = 0; + int rc = 0; + ENTRY; + + inode = iget(sb, cfid.id); + if (IS_ERR(inode)) { + CERROR("not existent inode: "LPX64"/%u\n", + cfid.id, cfid.generation); + RETURN(-ENOENT); + } + parent = iget(sb, pfid.id); + if (IS_ERR(parent)) { + CERROR("not existent inode: "LPX64"/%u\n", + pfid.id, pfid.generation); + iput(inode); + RETURN(-ENOENT); + } + + CWARN("inode/parent %lu:%lu on the lru list\n", + inode->i_ino, parent->i_ino); + + rc = get_hoard_priority(inode, &hoard_priority); + if (hoard_priority) { + CWARN("inode %lu set hoard\n", inode->i_ino); + GOTO(out, rc); + } + if (atomic_read(&inode->i_count) > 1 || (inode->i_state & I_DIRTY)) { + CWARN("inode %lu is busy\n", inode->i_ino); + GOTO(out, rc = 0); + } + +out: + iput(inode); + iput(parent); + RETURN(rc); +} + +static int cache_lru_get_rec_cb(struct llog_handle *llh, + struct llog_rec_hdr *rec, void *data) +{ + struct llog_lru_rec *llr; + int count = *(int *)data, rc = 0; + ENTRY; + + if (!(le32_to_cpu(llh->lgh_hdr->llh_flags) & LLOG_F_IS_PLAIN)) { + CERROR("log is not plain\n"); + RETURN(-EINVAL); + } + if (rec->lrh_type != CACHE_LRU_REC) { + CERROR("log record type error\n"); + RETURN(-EINVAL); + } + + llr = (struct llog_lru_rec *)rec; + + if (try2purge_from_cache(llr->llr_cfid, llr->llr_pfid)==1){ + CDEBUG(D_INODE, "purge ino/gen "LPX64"/%u from cache\n", + llr->llr_cfid.id, llr->llr_cfid.generation); + count --; + if (count == 0) + rc = LLOG_PROC_BREAK; + *(int *)data = count; + } + + RETURN(rc); +} + +static int cpurge_stop(void) +{ + struct fsfilt_operations *fsops = S2SMI(cpq->cpq_sb)->sm_fsfilt; + struct obd_statfs osfs; + int rc, free; + + rc = fsops->fs_statfs(cpq->cpq_sb, &osfs); + LASSERT(rc == 0); + + free = osfs.os_bfree * 100; + if (free < cf_prm.nfract_stop_cpurge * osfs.os_blocks) + return 1; + return 0; +} + +static int cache_balance_state(void) +{ + struct fsfilt_operations *fsops = S2SMI(cpq->cpq_sb)->sm_fsfilt; + struct obd_statfs osfs; + int rc, free; + + rc = fsops->fs_statfs(cpq->cpq_sb, &osfs); + LASSERT(rc == 0); + + free = (osfs.os_blocks - osfs.os_bfree) * 100; + if (free > cf_prm.nfract * osfs.os_blocks) { + if (free < cf_prm.nfract_sync) + return 1; + return 0; + } + return -1; +} + +void wakeup_cpurge(void) +{ + wake_up(&cpq->cpq_waitq); +} + +/* walk the lru llog to purge count number of objects */ +static int purge_some_cache(int *count) +{ + int rc; + ENTRY; + + rc = llog_cat_process(cpq->cpq_loghandle, + (llog_cb_t)cache_lru_get_rec_cb, + count); + if (!rc) + CDEBUG(D_INODE, "no enough objects available\n"); + + RETURN(rc); +} + +#define CFLUSH_NR 512 +static void check_cache_space(void) +{ + int state = cache_balance_state(); + ENTRY; + + if (state < 0) + return; + + wakeup_cpurge(); + + if (state > 0) { + int count = CFLUSH_NR; + purge_some_cache(&count); + } +} + +void cache_space_pre(struct inode *inode, int op) +{ + ENTRY; + + /* FIXME have not used op */ + check_cache_space(); +} + +static int cache_space_hook_lru(struct inode *inode, struct inode *parent, + void *handle, int op, int flags) +{ + struct fsfilt_operations *fsops = S2SMI(cpq->cpq_sb)->sm_fsfilt; + struct llog_ctxt *ctxt = cpq->cpq_loghandle->lgh_ctxt; + struct llog_lru_rec *llr = NULL; + struct llog_cookie *logcookie = NULL; + int cookie_size = sizeof(struct llog_cookie); + int rc = 0, err; + ENTRY; + + LASSERT(ctxt != NULL); + + if (op & ~(CACHE_SPACE_DELETE | CACHE_SPACE_INSERT |CACHE_SPACE_COMMIT)) + RETURN(-EINVAL); + + OBD_ALLOC(logcookie, cookie_size); + if (!logcookie) + GOTO(out, rc = -ENOMEM); + + if (op & CACHE_SPACE_DELETE) { + rc = get_lru_logcookie(inode, logcookie); + if (rc < 0) + GOTO(out, rc); + + if (logcookie->lgc_lgl.lgl_oid == 0) { + CWARN("inode %lu/%u is not in lru list\n", + inode->i_ino, inode->i_generation); + GOTO(insert, rc = -ENOENT); + } + if (flags && llog_cat_half_bottom(logcookie, ctxt->loc_handle)) + GOTO(out, rc = 0); + + rc = llog_cancel(ctxt, 1, logcookie, 0, NULL); + if (!rc) { + memset(logcookie, 0, cookie_size); + rc = set_lru_logcookie(inode, handle, logcookie); + if (rc) + GOTO(out, rc); + } else { + CERROR("failed at llog_cancel: %d\n", rc); + GOTO(out, rc); + } + } + +insert: + if (op & CACHE_SPACE_INSERT) { + LASSERT(parent != NULL); + OBD_ALLOC(llr, sizeof(*llr)); + if (llr == NULL) + GOTO(out, rc = -ENOMEM); + + llr->llr_hdr.lrh_len = llr->llr_tail.lrt_len = sizeof(*llr); + llr->llr_hdr.lrh_type = CACHE_LRU_REC; + llr->llr_cfid.id = inode->i_ino; + llr->llr_cfid.generation = inode->i_generation; + llr->llr_cfid.f_type = inode->i_mode & S_IFMT; + llr->llr_pfid.id = parent->i_ino; + llr->llr_pfid.generation = parent->i_generation; + llr->llr_pfid.f_type = parent->i_mode & S_IFMT; + + rc = llog_add(ctxt, &llr->llr_hdr, NULL, logcookie, 1, NULL); + if (rc != 1) { + CERROR("failed at llog_add: %d\n", rc); + GOTO(out, rc); + } + rc = set_lru_logcookie(inode, handle, logcookie); + } + + if (op & CACHE_SPACE_COMMIT) { + if (handle) { + err = fsops->fs_commit(inode, handle, 0); + if (err) { + CERROR("error committing transaction: %d\n", err); + if (!rc) + rc = err; + } + } + } +out: + if (logcookie) + OBD_FREE(logcookie, cookie_size); + if (llr) + OBD_FREE(llr, sizeof(*llr)); + RETURN(rc); +} + +static int cache_purge_thread(void *args) +{ + unsigned long flags; + struct l_wait_info lwi = LWI_TIMEOUT(cf_prm.interval * HZ, NULL, NULL); + ENTRY; + + lock_kernel(); + kportal_daemonize("wb_cache_purge"); + + SIGNAL_MASK_LOCK(current, flags); + sigfillset(¤t->blocked); + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); + + unlock_kernel(); + complete(&cpq->cpq_comp); + + while (1) { + int ndirty = cf_prm.ndirty; + + purge_some_cache(&ndirty); + if (ndirty > 0 || cpurge_stop()) + l_wait_event(cpq->cpq_waitq, + cpq->cpq_flags & SVC_STOPPING, + &lwi); + if (cpq->cpq_flags & SVC_STOPPING) { + cpq->cpq_flags &= ~SVC_STOPPING; + EXIT; + break; + } + } + cpq->cpq_flags = SVC_STOPPED; + complete(&cpq->cpq_comp); + return 0; +} + +int cache_space_hook_setup(struct super_block *sb) +{ + struct llog_ctxt *ctxt; + int rc; + ENTRY; + + /* first to initialize the cache lru catalog on local fs */ + rc = llog_catalog_setup(&ctxt, CACHE_LRU_LOG, + S2SMI(sb)->smsi_ctxt, + S2SMI(sb)->sm_fsfilt, + S2SMI(sb)->smsi_logs_dir, + S2SMI(sb)->smsi_objects_dir); + if (rc) { + CERROR("failed to initialize cache lru list catalog %d\n", rc); + RETURN(rc); + } + cpq->cpq_sb = sb; + cpq->cpq_loghandle = ctxt->loc_handle; + + /* start cache purge daemon, only one daemon now */ + init_waitqueue_head(&cpq->cpq_waitq); + init_completion(&cpq->cpq_comp); + cpq->cpq_flags = 0; + + rc = kernel_thread(cache_purge_thread, NULL, CLONE_VM | CLONE_FILES); + if (rc < 0) { + CERROR("cannot start thread: %d\n", rc); + GOTO(err_out, rc); + } + wait_for_completion(&cpq->cpq_comp); + + RETURN(0); +err_out: + llog_catalog_cleanup(ctxt); + RETURN(rc); +} + +int cache_space_hook_cleanup(void) +{ + int rc; + ENTRY; + + init_completion(&cpq->cpq_comp); + cpq->cpq_flags = SVC_STOPPING; + wake_up(&cpq->cpq_waitq); + wait_for_completion(&cpq->cpq_comp); + + rc = llog_catalog_cleanup(cpq->cpq_loghandle->lgh_ctxt); + if (rc) + CERROR("failed to clean up cache lru list catalog %d\n", rc); + + RETURN(rc); +} + +static int cache_space_hook_create(void *handle, struct inode *dir, + struct dentry *dentry, struct inode *new_dir, + struct dentry *new_dentry) +{ + __u64 active_entry = 0; + int rc; + + LASSERT(cache_leaf_node(dentry, NULL)); + rc = cache_space_hook_lru(dentry->d_inode, dir, handle, + CACHE_SPACE_INSERT, 0); + if (rc) + RETURN(rc); + if (cache_leaf_node(dentry->d_parent, &active_entry)) { + rc = cache_space_hook_lru(dir,NULL,handle,CACHE_SPACE_DELETE,0); + if (rc) + RETURN(rc); + } + if (!active_entry) + rc = get_active_entry(dir, &active_entry); + active_entry ++; + if (!rc) + rc = set_active_entry(dir, &active_entry, handle); + RETURN(rc); +} +static int cache_space_hook_lookup(void *handle, struct inode *dir, + struct dentry *dentry, struct inode *new_dir, + struct dentry *new_dentry) +{ + __u64 active_entry; + int rc = 0; + + if (cache_leaf_node(dentry, &active_entry)) + rc = cache_space_hook_lru(dentry->d_inode, dir, handle, + CACHE_SPACE_DELETE | CACHE_SPACE_INSERT,1); + RETURN(rc); +} +static int cache_space_hook_link(void *handle, struct inode *dir, + struct dentry *dentry, struct inode *new_dir, + struct dentry *new_dentry) +{ + __u64 active_entry = 0; + int rc = 0; + + if (cache_pre_leaf_node(dentry, NULL, 1)) { + rc = cache_space_hook_lru(dentry->d_inode, NULL, + handle, CACHE_SPACE_DELETE, 0); + if (rc) + RETURN(rc); + } + + if (cache_leaf_node(dentry->d_parent, &active_entry)) { + rc = cache_space_hook_lru(dir,NULL,handle,CACHE_SPACE_DELETE,0); + if (rc) + RETURN(rc); + } + + if (!active_entry) + rc = get_active_entry(dir, &active_entry); + active_entry ++; + if (!rc) + rc = set_active_entry(dir, &active_entry, handle); + RETURN(rc); +} +static int cache_space_hook_unlink(void *handle, struct inode *dir, + struct dentry *dentry, struct inode *new_dir, + struct dentry *new_dentry) +{ + __u64 active_entry; + int rc = 0; + + if (cache_pre_leaf_node(dentry, NULL, 0)) + rc = cache_space_hook_lru(dentry->d_inode, NULL, + handle, CACHE_SPACE_DELETE, 0); + else if (cache_leaf_node(dentry, NULL)) + rc = cache_space_hook_lru(dentry->d_inode, dir, + handle, CACHE_SPACE_INSERT,0); + if (rc) + RETURN(rc); + + rc = get_active_entry(dir, &active_entry); + active_entry --; + if (!rc) + rc = set_active_entry(dir, &active_entry, handle); + if (!rc && cache_leaf_node(dentry->d_parent, &active_entry)) + rc = cache_space_hook_lru(dir, + dentry->d_parent->d_parent->d_inode, + handle, CACHE_SPACE_INSERT, 0); + RETURN(rc); +} +static int cache_space_hook_mkdir(void *handle, struct inode *dir, + struct dentry *dentry, struct inode *new_dir, + struct dentry *new_dentry) +{ + __u64 active_entry; + int rc; + + LASSERT(cache_leaf_node(dentry, &active_entry)); + rc = cache_space_hook_lru(dentry->d_inode, dir, handle, + CACHE_SPACE_INSERT,0); + + if (!rc && cache_pre_leaf_node(dentry->d_parent, &active_entry, 3)) + rc = cache_space_hook_lru(dir,NULL,handle,CACHE_SPACE_DELETE,0); + RETURN(rc); +} +static int cache_space_hook_rmdir(void *handle, struct inode *dir, + struct dentry *dentry, struct inode *new_dir, + struct dentry *new_dentry) +{ + __u64 active_entry; + int rc; + + LASSERT(cache_pre_leaf_node(dentry, &active_entry, 2)); + rc = cache_space_hook_lru(dentry->d_inode, NULL, handle, + CACHE_SPACE_DELETE, 0); + + if (!rc && cache_leaf_node(dentry->d_parent, &active_entry)) + rc = cache_space_hook_lru(dir, + dentry->d_parent->d_parent->d_inode, + handle, CACHE_SPACE_INSERT, 0); + RETURN(rc); +} +static int cache_space_hook_rename(void *handle, struct inode *old_dir, + struct dentry *old_dentry, struct inode *new_dir, + struct dentry *new_dentry) +{ + __u64 active_entry; + int rc = 0; + + if (new_dentry->d_inode) { + if (cache_pre_leaf_node(new_dentry, NULL, 0)) + rc = cache_space_hook_lru(new_dentry->d_inode, NULL, + handle, CACHE_SPACE_DELETE,0); + else if (cache_leaf_node(new_dentry, NULL)) + rc = cache_space_hook_lru(new_dentry->d_inode, + new_dir, handle, + CACHE_SPACE_INSERT,0); + } + + if (rc || old_dir == new_dir) + RETURN(rc); + + if (!S_ISDIR(old_dentry->d_inode->i_mode)) { + if (cache_leaf_node(new_dentry->d_parent, &active_entry)) { + rc = cache_space_hook_lru(new_dir, NULL, handle, + CACHE_SPACE_DELETE, 0); + if (rc) + RETURN(rc); + } + if (!active_entry) + rc = get_active_entry(new_dir, &active_entry); + active_entry ++; + if (!rc) + rc = set_active_entry(new_dir, &active_entry, handle); + if (rc) + RETURN(rc); + rc = get_active_entry(old_dir, &active_entry); + active_entry --; + if (!rc) + rc = set_active_entry(old_dir, &active_entry, handle); + } else if (cache_pre_leaf_node(new_dentry->d_parent, &active_entry, 3)) + rc = cache_space_hook_lru(new_dir, NULL, handle, + CACHE_SPACE_DELETE, 0); + + if (!rc && cache_leaf_node(old_dentry->d_parent, &active_entry)) + rc = cache_space_hook_lru(old_dir, + old_dentry->d_parent->d_parent->d_inode, + handle, CACHE_SPACE_INSERT, 0); + RETURN(rc); +} + +typedef int (*cache_hook_op)(void *handle, struct inode *old_dir, + struct dentry *old_dentry, struct inode *new_dir, + struct dentry *new_dentry); +static cache_hook_op cache_space_hook_ops[CACHE_HOOK_MAX + 1] = { + [CACHE_HOOK_CREATE] cache_space_hook_create, + [CACHE_HOOK_LOOKUP] cache_space_hook_lookup, + [CACHE_HOOK_LINK] cache_space_hook_link, + [CACHE_HOOK_UNLINK] cache_space_hook_unlink, + [CACHE_HOOK_SYMLINK] cache_space_hook_create, + [CACHE_HOOK_MKDIR] cache_space_hook_mkdir, + [CACHE_HOOK_RMDIR] cache_space_hook_rmdir, + [CACHE_HOOK_MKNOD] cache_space_hook_create, + [CACHE_HOOK_RENAME] cache_space_hook_rename, +}; + +int cache_space_post(int op, void *handle, struct inode *old_dir, + struct dentry *old_dentry, struct inode *new_dir, + struct dentry *new_dentry) +{ + int rc; + ENTRY; + + LASSERT(op <= CACHE_HOOK_MAX && cache_space_hook_ops[op] != NULL); + + rc = cache_space_hook_ops[op](handle, old_dir, old_dentry, + new_dir, new_dentry); + RETURN(rc); +} diff --git a/lustre/smfs/dir.c b/lustre/smfs/dir.c index 2a99552..ca479a0 100644 --- a/lustre/smfs/dir.c +++ b/lustre/smfs/dir.c @@ -1,7 +1,26 @@ -/* - * dir.c +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * */ -#define DEBUG_SUBSYSTEM S_SNAP + +#define DEBUG_SUBSYSTEM S_SM #include #include @@ -9,405 +28,578 @@ #include #include #include -#include #include +#include +#include +#include +#include +#include +#include -#include "smfs_internal.h" -#include "kml_idl.h" +#include "smfs_internal.h" #define NAME_ALLOC_LEN(len) ((len+16) & ~15) - -void prepare_parent_dentry(struct dentry *dentry, struct inode *inode) -{ - atomic_set(&dentry->d_count, 1); - dentry->d_vfs_flags = 0; - dentry->d_flags = 0; - dentry->d_inode = inode; - dentry->d_op = NULL; - dentry->d_fsdata = NULL; - dentry->d_mounted = 0; - INIT_LIST_HEAD(&dentry->d_hash); - INIT_LIST_HEAD(&dentry->d_lru); - INIT_LIST_HEAD(&dentry->d_subdirs); - INIT_LIST_HEAD(&dentry->d_alias); -} - -void d_unalloc(struct dentry *dentry) -{ - - list_del(&dentry->d_hash); - INIT_LIST_HEAD(&dentry->d_hash); - dput(dentry); /* this will free the dentry memory */ -} -static int smfs_create(struct inode *dir, - struct dentry *dentry, - int mode) +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +static int smfs_create(struct inode *dir, struct dentry *dentry, + int mode) +#else +static int smfs_create(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) +#endif { - struct inode *cache_dir; - struct inode *cache_inode = NULL, *inode; - struct dentry parent; - struct dentry *cache_dentry = NULL; - int rc; - - ENTRY; - - cache_dir = I2CI(dir); + struct inode *inode = NULL; + struct inode *cache_dir = NULL; + struct dentry *cache_dentry = NULL; + struct dentry *cache_parent = NULL; + void *handle = NULL; + int rc = 0; + + ENTRY; + + cache_dir = I2CI(dir); if (!cache_dir) RETURN(-ENOENT); - - prepare_parent_dentry(&parent, cache_dir); - cache_dentry = d_alloc(&parent, &dentry->d_name); - - if (!cache_dentry) { - RETURN(-ENOMEM); - } - - if (cache_dir && cache_dir->i_op->create) - rc = cache_dir->i_op->create(cache_dir, cache_dentry, mode); - - if (rc) - GOTO(exit, rc); - - cache_inode = igrab(cache_dentry->d_inode); - - inode = iget(dir->i_sb, cache_inode->i_ino); - - if (!inode) - GOTO(exit, rc = -ENOMEM); - - d_instantiate(dentry, inode); - - sm_set_inode_ops(cache_inode, inode); + + handle = smfs_trans_start(dir, FSFILT_OP_CREATE, NULL); + if (IS_ERR(handle)) + RETURN(-ENOSPC); + + SMFS_CACHE_HOOK_PRE(CACHE_HOOK_CREATE, handle, dir); + + cache_parent = pre_smfs_dentry(NULL, cache_dir, dentry); + cache_dentry = pre_smfs_dentry(cache_parent, NULL, dentry); + + lock_kernel(); + if (!cache_dentry || !cache_parent) + GOTO(exit, rc = -ENOMEM); + + pre_smfs_inode(dir, cache_dir); + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + if (cache_dir && cache_dir->i_op->create) + rc = cache_dir->i_op->create(cache_dir, cache_dentry, + mode); +#else + if (cache_dir && cache_dir->i_op->create) + rc = cache_dir->i_op->create(cache_dir, cache_dentry, + mode, nd); +#endif + if (rc) + GOTO(exit, rc); + + inode = iget(dir->i_sb, cache_dentry->d_inode->i_ino); + if (!inode) + GOTO(exit, rc = -ENOMEM); + + d_instantiate(dentry, inode); + sm_set_inode_ops(cache_dentry->d_inode, inode); + post_smfs_inode(dir, cache_dir); + + /* Do KML post hook */ + SMFS_KML_POST(dir, dentry, NULL, NULL, REINT_CREATE, "create", rc, + exit); + SMFS_CACHE_HOOK_POST(CACHE_HOOK_CREATE, handle, dir, dentry, + NULL, NULL, rc, exit); exit: - d_unalloc(cache_dentry); - RETURN(rc); + unlock_kernel(); + post_smfs_dentry(cache_dentry); + smfs_trans_commit(dir, handle, 0); + RETURN(rc); } -static struct dentry *smfs_lookup(struct inode *dir, - struct dentry *dentry) +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +static struct dentry *smfs_lookup(struct inode *dir, struct dentry *dentry) +#else +static struct dentry *smfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +#endif { - struct inode *cache_dir; - struct inode *cache_inode = NULL, *inode; - struct dentry parent; - struct dentry *cache_dentry = NULL; - struct dentry *rc = NULL; - - ENTRY; - - cache_dir = I2CI(dir); - if (!cache_dir) + struct inode *cache_dir; + struct inode *cache_inode; + struct inode *inode = NULL; + struct dentry *cache_dentry = NULL; + struct dentry *cache_parent = NULL; + struct dentry *rc = NULL; + void *handle = NULL; + int rc2 = 0; + + ENTRY; + + if (!(cache_dir = I2CI(dir))) RETURN(ERR_PTR(-ENOENT)); - prepare_parent_dentry(&parent, cache_dir); - cache_dentry = d_alloc(&parent, &dentry->d_name); - - if(cache_dir && cache_dir->i_op->lookup) - rc = cache_dir->i_op->lookup(cache_dir, cache_dentry); - - if (rc || !cache_dentry->d_inode || - is_bad_inode(cache_dentry->d_inode) || - IS_ERR(cache_dentry->d_inode)) { - GOTO(exit, rc); - } - - cache_inode = igrab(cache_dentry->d_inode); - - inode = iget(dir->i_sb, cache_inode->i_ino); - - d_add(dentry, inode); + + handle = smfs_trans_start(dir, KML_CACHE_NOOP, NULL); + if (IS_ERR(handle)) + RETURN(ERR_PTR(-ENOSPC)); + + SMFS_CACHE_HOOK_PRE(CACHE_HOOK_LOOKUP, handle, dir); + + /* preparing artificial backing fs dentries. */ + cache_parent = pre_smfs_dentry(NULL, cache_dir, dentry->d_parent); + cache_dentry = pre_smfs_dentry(cache_parent, NULL, dentry); + + if (!cache_dentry || !cache_parent) + GOTO(exit, rc = ERR_PTR(-ENOMEM)); + + if (!cache_dir && cache_dir->i_op->lookup) + GOTO(exit, rc = ERR_PTR(-ENOENT)); + + /* perform lookup in backing fs. */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + rc = cache_dir->i_op->lookup(cache_dir, cache_dentry); +#else + rc = cache_dir->i_op->lookup(cache_dir, cache_dentry, nd); +#endif + + if (rc && IS_ERR(rc)) + GOTO(exit, rc); + + if ((cache_inode = rc ? rc->d_inode : cache_dentry->d_inode)) { + if (IS_ERR(cache_inode)) { + dentry->d_inode = cache_inode; + GOTO(exit, rc = NULL); + } + inode = iget(dir->i_sb, cache_inode->i_ino); + } else { + d_add(dentry, NULL); + GOTO(exit, rc); + } + + d_add(dentry, inode); + rc = NULL; + + SMFS_CACHE_HOOK_POST(CACHE_HOOK_LOOKUP, handle, dir, dentry, + NULL, NULL, rc2, exit); exit: - d_unalloc(cache_dentry); - RETURN(rc); -} + if (rc2) + rc = ERR_PTR(rc2); + post_smfs_dentry(cache_dentry); + smfs_trans_commit(dir, handle, 0); + RETURN(rc); +} +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) static int smfs_lookup_raw(struct inode *dir, const char *name, int len, ino_t *data) { - struct inode *cache_dir; - int rc = 0; + struct inode *cache_dir; + int rc = 0; - cache_dir = I2CI(dir); + cache_dir = I2CI(dir); - if (!cache_dir) + if (!cache_dir) RETURN(-ENOENT); - - if (cache_dir->i_op->lookup_raw) - rc = cache_dir->i_op->lookup_raw(cache_dir, name, len, data); - - RETURN(rc); + if (cache_dir->i_op->lookup_raw) + rc = cache_dir->i_op->lookup_raw(cache_dir, name, len, data); + RETURN(rc); } +#endif -static int smfs_link(struct dentry * old_dentry, - struct inode * dir, struct dentry *dentry) +static int smfs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) { - struct inode *cache_old_inode = NULL; - struct inode *cache_dir = I2CI(dir); - struct inode *inode = NULL; - struct dentry *cache_dentry; - struct dentry *cache_old_dentry; - struct dentry parent; - struct dentry parent_old; - int rc = 0; - - inode = old_dentry->d_inode; - - cache_old_inode = I2CI(inode); - - if (!cache_old_inode || !cache_dir) - RETURN(-ENOENT); - - prepare_parent_dentry(&parent, cache_dir); - cache_dentry = d_alloc(&parent, &dentry->d_name); - - prepare_parent_dentry(&parent_old, cache_dir); - cache_old_dentry = d_alloc(&parent_old, &dentry->d_name); - d_add(cache_old_dentry, cache_old_inode); - pre_smfs_inode(inode, cache_old_dentry->d_inode); - - if (cache_dir->i_op->link) - rc = cache_dir->i_op->link(cache_old_dentry, cache_dir, cache_dentry); - - if (rc) - GOTO(exit, rc); - - atomic_inc(&inode->i_count); - post_smfs_inode(inode, cache_old_dentry->d_inode); - d_instantiate(dentry, inode); + struct inode *cache_old_inode = NULL; + struct inode *cache_dir = I2CI(dir); + struct inode *inode = NULL; + struct dentry *cache_dentry = NULL; + struct dentry *cache_old_dentry = NULL; + struct dentry *cache_parent = NULL; + struct dentry *cache_old_parent = NULL; + void *handle = NULL; + int rc = 0; + + inode = old_dentry->d_inode; + + cache_old_inode = I2CI(inode); + + handle = smfs_trans_start(dir, FSFILT_OP_LINK, NULL); + if (IS_ERR(handle)) + RETURN(-ENOSPC); + + SMFS_CACHE_HOOK_PRE(CACHE_HOOK_LINK, handle, dir); + + cache_parent = pre_smfs_dentry(NULL, cache_dir, dentry); + cache_dentry = pre_smfs_dentry(cache_parent, NULL, dentry); + lock_kernel(); + if (!cache_parent || !cache_dentry) + GOTO(exit, rc = -ENOMEM); + + cache_old_parent = pre_smfs_dentry(NULL, cache_dir, old_dentry); + cache_old_dentry = pre_smfs_dentry(cache_parent, cache_old_inode, + old_dentry); + + if (!cache_old_parent || !cache_old_dentry) + GOTO(exit, rc = -ENOMEM); + + pre_smfs_inode(dir, cache_dir); + pre_smfs_inode(inode, cache_old_dentry->d_inode); + + if (cache_dir->i_op->link) + rc = cache_dir->i_op->link(cache_old_dentry, cache_dir, + cache_dentry); + if (rc) + GOTO(exit, rc); + + atomic_inc(&inode->i_count); + post_smfs_inode(inode, cache_old_dentry->d_inode); + d_instantiate(dentry, inode); + post_smfs_inode(dir, cache_dir); + + SMFS_KML_POST(dir, old_dentry, dentry, NULL, + REINT_LINK, "link", rc, exit); + + SMFS_CACHE_HOOK_POST(CACHE_HOOK_LINK, handle, + dir, old_dentry, NULL, NULL, rc, exit); exit: - if (cache_dentry->d_inode) - igrab(cache_dentry->d_inode); - if (cache_old_dentry->d_inode) - igrab(cache_old_dentry->d_inode); - - d_unalloc(cache_dentry); - d_unalloc(cache_old_dentry); - - RETURN(rc); + unlock_kernel(); + post_smfs_dentry(cache_dentry); + post_smfs_dentry(cache_old_dentry); + smfs_trans_commit(dir, handle, 0); + RETURN(rc); } -static int smfs_unlink(struct inode * dir, - struct dentry *dentry) +static int smfs_unlink(struct inode *dir, struct dentry *dentry) { - struct inode *cache_dir = I2CI(dir); - struct inode *cache_inode = I2CI(dentry->d_inode); - struct dentry *cache_dentry; - struct dentry parent; - int rc = 0; - - if (!cache_dir || !cache_inode) - RETURN(-ENOENT); - - prepare_parent_dentry(&parent, cache_dir); - cache_dentry = d_alloc(&parent, &dentry->d_name); - d_add(cache_dentry, cache_inode); - - if (cache_dir->i_op->unlink) - rc = cache_dir->i_op->unlink(cache_dir, cache_dentry); - - - post_smfs_inode(dentry->d_inode, cache_dentry->d_inode); - post_smfs_inode(dir, cache_dir); - - igrab(cache_dentry->d_inode); - - d_unalloc(cache_dentry); - RETURN(rc); + struct inode *cache_dir = I2CI(dir); + struct inode *cache_inode = I2CI(dentry->d_inode); + struct dentry *cache_dentry; + struct dentry *cache_parent; + void *handle = NULL; + int rc = 0, flag; + + if (!cache_dir || !cache_inode) + RETURN(-ENOENT); + + handle = smfs_trans_start(dir, FSFILT_OP_UNLINK, NULL); + if (IS_ERR(handle)) + RETURN(-ENOSPC); + + SMFS_CACHE_HOOK_PRE(CACHE_HOOK_UNLINK, handle, dir); + + cache_parent = pre_smfs_dentry(NULL, cache_dir, dentry); + cache_dentry = pre_smfs_dentry(cache_parent, cache_inode, dentry); + + if (!cache_parent || !cache_dentry) + GOTO(exit, rc = -ENOMEM); + + if (smfs_do_rec(dir) && dentry->d_inode->i_nlink == 1) { + /*For unlink, we do not really unlink this + *inode and just remove it to a tmp dir for + *undo later, the inode will be deleted when + *the corresponding rec will be cancelled*/ + flag = 0; + SMFS_KML_POST(dir, dentry, &flag, NULL, REINT_UNLINK, + "unlink", rc, exit); + } else { + lock_kernel(); + pre_smfs_inode(dir, cache_dir); + pre_smfs_inode(dentry->d_inode, cache_inode); + if (cache_dir->i_op->unlink) + rc = cache_dir->i_op->unlink(cache_dir, cache_dentry); + post_smfs_inode(dentry->d_inode, cache_dentry->d_inode); + post_smfs_inode(dir, cache_dir); + post_smfs_dentry(cache_dentry); + unlock_kernel(); + flag = 1; + SMFS_KML_POST(dir, dentry, &flag, NULL, REINT_UNLINK, + "dec_link", rc, exit); + } + + SMFS_CACHE_HOOK_POST(CACHE_HOOK_UNLINK, handle, dir, dentry, + NULL, NULL, rc, exit); +exit: + smfs_trans_commit(dir, handle, 0); + RETURN(rc); } -static int smfs_symlink (struct inode * dir, - struct dentry *dentry, - const char * symname) +static int smfs_symlink (struct inode *dir, struct dentry *dentry, + const char *symname) { - struct inode *cache_dir = I2CI(dir); - struct inode *cache_inode = NULL; - struct inode *inode = NULL; - struct dentry *cache_dentry; - struct dentry parent; - int rc = 0; - - if (!cache_dir) - RETURN(-ENOENT); - - prepare_parent_dentry(&parent, cache_dir); - cache_dentry = d_alloc(&parent, &dentry->d_name); - - if (cache_dir->i_op->symlink) - rc = cache_dir->i_op->symlink(cache_dir, cache_dentry, symname); - - cache_inode = igrab(cache_dentry->d_inode); - - inode = iget(dir->i_sb, cache_inode->i_ino); - - if (inode) - d_instantiate(dentry, inode); - else - rc = -ENOENT; - - d_unalloc(cache_dentry); - - RETURN(rc); + struct inode *cache_dir = I2CI(dir); + struct inode *inode = NULL; + struct dentry *cache_dentry; + struct dentry *cache_parent; + void *handle = NULL; + int rc = 0; + + if (!cache_dir) + RETURN(-ENOENT); + + cache_parent = pre_smfs_dentry(NULL, cache_dir, dentry); + cache_dentry = pre_smfs_dentry(cache_parent, NULL, dentry); + + if (!cache_parent || !cache_dentry) + GOTO(exit, rc = -ENOMEM); + + handle = smfs_trans_start(dir, FSFILT_OP_SYMLINK, NULL); + if (IS_ERR(handle)) + RETURN(-ENOSPC); + + SMFS_CACHE_HOOK_PRE(CACHE_HOOK_SYMLINK, handle, dir); + + pre_smfs_inode(dir, cache_dir); + lock_kernel(); + if (cache_dir->i_op->symlink) + rc = cache_dir->i_op->symlink(cache_dir, cache_dentry, symname); + + inode = iget(dir->i_sb, cache_dentry->d_inode->i_ino); + + post_smfs_inode(dir, cache_dir); + if (inode) + d_instantiate(dentry, inode); + else + rc = -ENOENT; + + SMFS_KML_POST(dir, dentry, (char*)symname, NULL, REINT_CREATE, + "symlink", rc, exit); + + SMFS_CACHE_HOOK_POST(CACHE_HOOK_SYMLINK, handle, dir, dentry, + NULL, NULL, rc, exit); +exit: + unlock_kernel(); + post_smfs_dentry(cache_dentry); + smfs_trans_commit(dir, handle, 0); + RETURN(rc); } -static int smfs_mkdir(struct inode * dir, - struct dentry * dentry, - int mode) +static int smfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { - struct inode *cache_dir = I2CI(dir); - struct inode *cache_inode = NULL; - struct inode *inode = NULL; - struct dentry *cache_dentry; - struct dentry parent; - void *handle; - int rc = 0; - - if (!cache_dir) - RETURN(-ENOENT); - - handle = smfs_trans_start(cache_dir, KML_OPCODE_MKDIR); - if (IS_ERR(handle) ) { - CERROR("smfs_do_mkdir: no space for transaction\n"); - RETURN(-ENOSPC); - } - - prepare_parent_dentry(&parent, cache_dir); - cache_dentry = d_alloc(&parent, &dentry->d_name); - - pre_smfs_inode(dir, cache_dir); - lock_kernel(); - if (cache_dir->i_op->mkdir) - rc = cache_dir->i_op->mkdir(cache_dir, cache_dentry, mode); - - cache_inode = igrab(cache_dentry->d_inode); - - inode = iget(dir->i_sb, cache_inode->i_ino); - - if (!inode) - GOTO(exit, rc = -ENOENT); - - d_instantiate(dentry, inode); - /*Do KML post hook*/ - if (smfs_do_kml(dir)) { - rc = post_kml_mkdir(dir, dentry); - GOTO(exit, rc); - } - post_smfs_inode(dir, cache_dir); + struct inode *cache_dir = I2CI(dir); + struct inode *inode = NULL; + struct dentry *cache_dentry; + struct dentry *cache_parent; + void *handle = NULL; + int rc = 0; + + if (!cache_dir) + RETURN(-ENOENT); + + handle = smfs_trans_start(dir, FSFILT_OP_MKDIR, NULL); + if (IS_ERR(handle)) + RETURN(-ENOSPC); + + SMFS_CACHE_HOOK_PRE(CACHE_HOOK_MKDIR, handle, dir); + + cache_parent = pre_smfs_dentry(NULL, cache_dir, dentry); + cache_dentry = pre_smfs_dentry(cache_parent, NULL, dentry); + + lock_kernel(); + if (!cache_parent || !cache_dentry) + GOTO(exit, rc = -ENOMEM); + + pre_smfs_inode(dir, cache_dir); + + if (cache_dir->i_op->mkdir) + rc = cache_dir->i_op->mkdir(cache_dir, cache_dentry, mode); + + inode = iget(dir->i_sb, cache_dentry->d_inode->i_ino); + + if (!inode) + GOTO(exit, rc = -ENOENT); + + d_instantiate(dentry, inode); + post_smfs_inode(dir, cache_dir); + + SMFS_KML_POST(dir, dentry, NULL, NULL, REINT_CREATE, "mkdir", rc, exit); + + SMFS_CACHE_HOOK_POST(CACHE_HOOK_MKDIR, handle, + dir, dentry, NULL, NULL, rc, exit); exit: - unlock_kernel(); - smfs_trans_commit(handle); - d_unalloc(cache_dentry); - RETURN(rc); + unlock_kernel(); + post_smfs_dentry(cache_dentry); + smfs_trans_commit(dir, handle, 0); + RETURN(rc); } -static int smfs_rmdir(struct inode * dir, - struct dentry *dentry) +static int smfs_rmdir(struct inode * dir, struct dentry *dentry) { - struct inode *cache_dir = I2CI(dir); - struct inode *cache_inode = I2CI(dentry->d_inode); - struct dentry *cache_dentry; - struct dentry parent; - int rc = 0; - - if (!cache_dir) - RETURN(-ENOENT); - - prepare_parent_dentry(&parent, cache_dir); - cache_dentry = d_alloc(&parent, &dentry->d_name); - d_add(cache_dentry, cache_inode); - igrab(cache_inode); - - pre_smfs_inode(dir, cache_dir); - pre_smfs_inode(dentry->d_inode, cache_dentry->d_inode); - - - if (cache_dir->i_op->rmdir) - rc = cache_dir->i_op->rmdir(cache_dir, cache_dentry); - - post_smfs_inode(dir, cache_dir); - post_smfs_inode(dentry->d_inode, cache_dentry->d_inode); - d_unalloc(cache_dentry); - RETURN(rc); + struct inode *cache_dir = I2CI(dir); + struct inode *cache_inode = I2CI(dentry->d_inode); + struct dentry *cache_dentry = NULL; + struct dentry *cache_parent = NULL; + void *handle = NULL; + int rc = 0, flag; + + if (!cache_dir) + RETURN(-ENOENT); + + handle = smfs_trans_start(dir, FSFILT_OP_RMDIR, NULL); + if (IS_ERR(handle)) { + CERROR("smfs_do_mkdir: no space for transaction\n"); + RETURN(-ENOSPC); + } + + SMFS_CACHE_HOOK_PRE(CACHE_HOOK_RMDIR, handle, dir); + + if (smfs_do_rec(dir)) { + /*For rmdir, we do not really unlink this + *inode and just remove it to a tmp dir for + *undo later, the inode will be deleted when + *the corresponding rec will be cancelled*/ + if (dentry->d_inode->i_nlink == 2) { + flag = 0; + SMFS_KML_POST(dir, dentry, &flag, NULL, + REINT_UNLINK, "rmdir", rc, exit); + } + } else { + cache_parent = pre_smfs_dentry(NULL, cache_dir, dentry); + cache_dentry = pre_smfs_dentry(cache_parent, cache_inode, + dentry); + + lock_kernel(); + if (!cache_parent || !cache_dentry) + GOTO(exit, rc = -ENOMEM); + + pre_smfs_inode(dir, cache_dir); + pre_smfs_inode(dentry->d_inode, cache_dentry->d_inode); + if (cache_dir->i_op->rmdir) + rc = cache_dir->i_op->rmdir(cache_dir, cache_dentry); + + post_smfs_inode(dir, cache_dir); + post_smfs_inode(dentry->d_inode, cache_dentry->d_inode); + unlock_kernel(); + } + SMFS_CACHE_HOOK_POST(CACHE_HOOK_RMDIR, handle, dir, dentry, + NULL, NULL, rc, exit); +exit: + post_smfs_dentry(cache_dentry); + smfs_trans_commit(dir, handle, 0); + RETURN(rc); } -static int smfs_mknod(struct inode * dir, struct dentry *dentry, +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +static int smfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev) +#else +static int smfs_mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t rdev) +#endif { - struct inode *cache_dir = I2CI(dir); - struct inode *inode = NULL; - struct inode *cache_inode = NULL; - struct dentry *cache_dentry; - struct dentry parent; - int rc = 0; - - if (!cache_dir) - RETURN(-ENOENT); - - prepare_parent_dentry(&parent, cache_dir); - cache_dentry = d_alloc(&parent, &dentry->d_name); - - pre_smfs_inode(dir, cache_dir); - pre_smfs_inode(dentry->d_inode, cache_dentry->d_inode); - - if (cache_dir->i_op->mknod) - rc = cache_dir->i_op->mknod(cache_dir, cache_dentry, mode, rdev); - - if (cache_dentry->d_inode) - cache_inode = igrab(cache_dentry->d_inode); - if (rc) - GOTO(exit, rc); - - inode = iget(dir->i_sb, cache_inode->i_ino); - d_instantiate(dentry, inode); - post_smfs_inode(dir, cache_dir); - post_smfs_inode(dentry->d_inode, cache_dentry->d_inode); + struct inode *cache_dir = I2CI(dir); + struct inode *inode = NULL; + struct dentry *cache_dentry = NULL; + struct dentry *cache_parent = NULL; + void *handle = NULL; + int rc = 0; + + if (!cache_dir) + RETURN(-ENOENT); + + handle = smfs_trans_start(dir, FSFILT_OP_MKNOD, NULL); + if (IS_ERR(handle)) { + CERROR("smfs_do_mkdir: no space for transaction\n"); + RETURN(-ENOSPC); + } + SMFS_CACHE_HOOK_PRE(CACHE_HOOK_MKNOD, handle, dir); + + cache_parent = pre_smfs_dentry(NULL, cache_dir, dentry->d_parent); + cache_dentry = pre_smfs_dentry(cache_parent, NULL, dentry); + lock_kernel(); + if (!cache_parent || !cache_dentry) + GOTO(exit, rc = -ENOMEM); + + pre_smfs_inode(dir, cache_dir); + pre_smfs_inode(dentry->d_inode, cache_dentry->d_inode); + + if (!cache_dir->i_op->mknod) + RETURN(-ENOENT); + + rc = cache_dir->i_op->mknod(cache_dir, cache_dentry, mode, rdev); + if (rc) + GOTO(exit, rc); + + inode = iget(dir->i_sb, cache_dentry->d_inode->i_ino); + d_instantiate(dentry, inode); + + post_smfs_inode(dir, cache_dir); + post_smfs_inode(dentry->d_inode, cache_dentry->d_inode); + + SMFS_KML_POST(dir, dentry, NULL, NULL, + REINT_CREATE, "mknod", rc, exit); + + SMFS_CACHE_HOOK_POST(CACHE_HOOK_MKNOD, handle, dir, + dentry, NULL, NULL, rc, exit); exit: - d_unalloc(cache_dentry); - RETURN(rc); + unlock_kernel(); + post_smfs_dentry(cache_dentry); + smfs_trans_commit(dir, handle, 0); + RETURN(rc); } + static int smfs_rename(struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir,struct dentry *new_dentry) { - struct inode *cache_old_dir = I2CI(old_dir); - struct inode *cache_new_dir = I2CI(new_dir); - struct inode *cache_old_inode = I2CI(old_dentry->d_inode); - struct dentry *cache_old_dentry; - struct dentry *cache_new_dentry; - struct dentry parent_new; - struct dentry parent_old; - int rc = 0; - - if (!cache_old_dir || !cache_new_dir || !cache_old_inode) - RETURN(-ENOENT); - - prepare_parent_dentry(&parent_old, cache_old_dir); - cache_old_dentry = d_alloc(&parent_old, &old_dentry->d_name); - d_add(cache_old_dentry, cache_old_inode); - igrab(cache_old_inode); - - prepare_parent_dentry(&parent_new, cache_new_dir); - cache_new_dentry = d_alloc(&parent_new, &new_dentry->d_name); - - pre_smfs_inode(old_dir, cache_old_dir) ; - pre_smfs_inode(new_dir, cache_new_dir); - - if (cache_old_dir->i_op->rename) - rc = cache_old_dir->i_op->rename(cache_old_dir, cache_old_dentry, - cache_new_dir, cache_new_dentry); - - post_smfs_inode(old_dir, cache_old_dir) ; - post_smfs_inode(new_dir, cache_new_dir); - if (cache_new_dentry->d_inode) { - igrab(cache_new_dentry->d_inode); - } - d_unalloc(cache_old_dentry); - d_unalloc(cache_new_dentry); - RETURN(rc); + struct inode *cache_old_dir = I2CI(old_dir); + struct inode *cache_new_dir = I2CI(new_dir); + struct inode *cache_old_inode = I2CI(old_dentry->d_inode); + + struct inode *cache_new_inode = new_dentry->d_inode ? + I2CI(new_dentry->d_inode) : NULL; + + struct dentry *cache_old_dentry = NULL; + struct dentry *cache_new_dentry = NULL; + struct dentry *cache_new_parent = NULL; + struct dentry *cache_old_parent = NULL; + void *handle = NULL; + int rc = 0; + + if (!cache_old_dir || !cache_new_dir || !cache_old_inode) + RETURN(-ENOENT); + + handle = smfs_trans_start(old_dir, FSFILT_OP_RENAME, NULL); + if (IS_ERR(handle)) { + CERROR("smfs_do_mkdir: no space for transaction\n"); + RETURN(-ENOSPC); + } + lock_kernel(); + + SMFS_CACHE_HOOK_PRE(CACHE_HOOK_RENAME, handle, old_dir); + + cache_old_parent = pre_smfs_dentry(NULL, cache_old_dir, old_dentry); + cache_old_dentry = pre_smfs_dentry(cache_old_parent, cache_old_inode, + old_dentry); + if (!cache_old_parent || !cache_old_dentry) + GOTO(exit, rc = -ENOMEM); + + cache_new_parent = pre_smfs_dentry(NULL, cache_new_dir, new_dentry); + cache_new_dentry = pre_smfs_dentry(cache_new_parent, cache_new_inode, + new_dentry); + if (!cache_new_parent || !cache_new_dentry) + GOTO(exit, rc = -ENOMEM); + + pre_smfs_inode(old_dir, cache_old_dir); + pre_smfs_inode(new_dir, cache_new_dir); + + if (cache_old_dir->i_op->rename) + rc = cache_old_dir->i_op->rename(cache_old_dir, cache_old_dentry, + cache_new_dir, cache_new_dentry); + + post_smfs_inode(old_dir, cache_old_dir); + post_smfs_inode(new_dir, cache_new_dir); + + SMFS_KML_POST(old_dir, old_dentry, new_dir, + new_dentry, REINT_RENAME, "rename", rc, exit); + if (new_dentry->d_inode) + post_smfs_inode(new_dentry->d_inode, cache_new_dentry->d_inode); + + SMFS_CACHE_HOOK_POST(CACHE_HOOK_RENAME, handle, old_dir, old_dentry, + new_dir, new_dentry, rc, exit); +exit: + unlock_kernel(); + post_smfs_dentry(cache_old_dentry); + post_smfs_dentry(cache_new_dentry); + smfs_trans_commit(old_dir, handle, 0); + RETURN(rc); } struct inode_operations smfs_dir_iops = { - create: smfs_create, - lookup: smfs_lookup, - lookup_raw: smfs_lookup_raw, /* BKL held */ + create: smfs_create, + lookup: smfs_lookup, +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + lookup_raw: smfs_lookup_raw, /* BKL held */ +#endif link: smfs_link, /* BKL held */ unlink: smfs_unlink, /* BKL held */ symlink: smfs_symlink, /* BKL held */ @@ -421,58 +613,65 @@ struct inode_operations smfs_dir_iops = { removexattr: smfs_removexattr, /* BKL held */ }; -static ssize_t smfs_read_dir(struct file *filp, char *buf, - size_t size, loff_t *ppos) +static ssize_t smfs_read_dir(struct file *filp, char *buf, + size_t size, loff_t *ppos) { - struct dentry *dentry = filp->f_dentry; - struct inode *cache_inode = NULL; - struct file open_file; - struct dentry open_dentry; - int rc = 0; - - cache_inode = I2CI(dentry->d_inode); - - if (!cache_inode) - RETURN(-EINVAL); - - smfs_prepare_cachefile(dentry->d_inode, filp, cache_inode, - &open_file, &open_dentry); - - if (cache_inode->i_fop->read) - rc = cache_inode->i_fop->read(&open_file, buf, size, ppos); - - smfs_update_file(filp, &open_file); - RETURN(rc); + struct dentry *dentry = filp->f_dentry; + struct inode *cache_inode = NULL; + struct smfs_file_info *sfi = NULL; + loff_t tmp_ppos; + loff_t *cache_ppos; + int rc = 0; + + cache_inode = I2CI(dentry->d_inode); + + if (!cache_inode) + RETURN(-EINVAL); + + sfi = F2SMFI(filp); + if (sfi->magic != SMFS_FILE_MAGIC) BUG(); + + if (ppos != &(filp->f_pos)) + cache_ppos = &tmp_ppos; + else + cache_ppos = &sfi->c_file->f_pos; + *cache_ppos = *ppos; + + if (cache_inode->i_fop->read) + rc = cache_inode->i_fop->read(sfi->c_file, buf, size, + cache_ppos); + + *ppos = *cache_ppos; + duplicate_file(filp, sfi->c_file); + RETURN(rc); } -static int smfs_readdir(struct file * filp, - void * dirent, - filldir_t filldir) +static int smfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct dentry *dentry = filp->f_dentry; - struct inode *cache_inode = NULL; - struct file open_file; - struct dentry open_dentry; - int rc = 0; - - cache_inode = I2CI(dentry->d_inode); - - if (!cache_inode) - RETURN(-EINVAL); - - smfs_prepare_cachefile(dentry->d_inode, filp, cache_inode, - &open_file, &open_dentry); - - if (cache_inode->i_fop->readdir) - rc = cache_inode->i_fop->readdir(&open_file, dirent, filldir); - - smfs_update_file(filp, &open_file); - RETURN(rc); + struct dentry *dentry = filp->f_dentry; + struct inode *cache_inode = NULL; + struct smfs_file_info *sfi = NULL; + int rc = 0; + + cache_inode = I2CI(dentry->d_inode); + if (!cache_inode) + RETURN(-EINVAL); + + sfi = F2SMFI(filp); + if (sfi->magic != SMFS_FILE_MAGIC) BUG(); + + if (cache_inode->i_fop->readdir) + rc = cache_inode->i_fop->readdir(sfi->c_file, dirent, filldir); + + duplicate_file(filp, sfi->c_file); + RETURN(rc); } struct file_operations smfs_dir_fops = { - read: smfs_read_dir, + read: smfs_read_dir, readdir: smfs_readdir, /* BKL held */ ioctl: smfs_ioctl, /* BKL held */ fsync: smfs_fsync, /* BKL held */ + open: smfs_open, + release: smfs_release, }; diff --git a/lustre/smfs/file.c b/lustre/smfs/file.c index eaf1901..b56da39 100644 --- a/lustre/smfs/file.c +++ b/lustre/smfs/file.c @@ -1,5 +1,23 @@ -/* - * file.c +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * */ #define DEBUG_SUBSYSTEM S_SM @@ -11,448 +29,493 @@ #include #include #include +#include +#include +#include +#include +#include #include -#include "smfs_internal.h" - -/* instantiate a file handle to the cache file */ -void smfs_prepare_cachefile(struct inode *inode, - struct file *file, - struct inode *cache_inode, - struct file *cache_file, - struct dentry *cache_dentry) -{ - ENTRY; - cache_file->f_pos = file->f_pos; - cache_file->f_mode = file->f_mode; - cache_file->f_flags = file->f_flags; - cache_file->f_count = file->f_count; - cache_file->f_owner = file->f_owner; - cache_file->f_error = file->f_error; - cache_file->f_op = inode->i_fop; - cache_file->f_dentry = cache_dentry; - cache_file->f_dentry->d_inode = cache_inode; - cache_file->f_vfsmnt = file->f_vfsmnt; - cache_file->private_data = file->private_data; - cache_file->f_it = file->f_it; - cache_file->f_reada = file->f_reada; - cache_file->f_ramax = file->f_ramax; - cache_file->f_raend = file->f_raend; - cache_file->f_ralen = file->f_ralen; - cache_file->f_rawin = file->f_rawin; - EXIT; -} -/* update file structs*/ -void smfs_update_file(struct file *file, - struct file *cache_file) -{ - ENTRY; - file->f_pos = cache_file->f_pos; - file->f_mode = cache_file->f_mode; - file->f_flags = cache_file->f_flags; - file->f_count = cache_file->f_count; - file->f_owner = cache_file->f_owner; - file->f_reada = cache_file->f_reada; - file->f_ramax = cache_file->f_ramax; - file->f_raend = cache_file->f_raend; - file->f_ralen = cache_file->f_ralen; - file->f_rawin = cache_file->f_rawin; - EXIT; -} +#include +#include -static ssize_t smfs_write (struct file *filp, const char *buf, - size_t count, loff_t *ppos) +#include "smfs_internal.h" + +static ssize_t smfs_write(struct file *filp, const char *buf, size_t count, + loff_t *ppos) { - struct inode *cache_inode; - struct dentry *dentry = filp->f_dentry; - struct inode *inode = dentry->d_inode; - struct file open_file; - struct dentry open_dentry; - loff_t tmp_ppos; - loff_t *cache_ppos; - int rc = 0; - - ENTRY; - - cache_inode = I2CI(inode); - + struct inode *cache_inode; + struct smfs_file_info *sfi; + loff_t tmp_ppos; + loff_t *cache_ppos; + int rc = 0; + ENTRY; + + cache_inode = I2CI(filp->f_dentry->d_inode); + if (!cache_inode) RETURN(-ENOENT); - - if (ppos != &(filp->f_pos)) { - cache_ppos = &tmp_ppos; - } else { - cache_ppos = &open_file.f_pos; - } - *cache_ppos = *ppos; - - smfs_prepare_cachefile(inode, filp, cache_inode, - &open_file, &open_dentry); - pre_smfs_inode(inode, cache_inode); - - if (cache_inode->i_fop->write) - rc = cache_inode->i_fop->write(&open_file, buf, count, cache_ppos); - - *ppos = *cache_ppos; - post_smfs_inode(inode, cache_inode); - smfs_update_file(filp, &open_file); - - RETURN(rc); + + sfi = F2SMFI(filp); + + if (sfi->magic != SMFS_FILE_MAGIC) + BUG(); + + if (ppos != &(filp->f_pos)) + cache_ppos = &tmp_ppos; + else + cache_ppos = &sfi->c_file->f_pos; + *cache_ppos = *ppos; + + pre_smfs_inode(filp->f_dentry->d_inode, cache_inode); + + if (cache_inode->i_fop->write) + rc = cache_inode->i_fop->write(sfi->c_file, buf, count, + cache_ppos); + + post_smfs_inode(filp->f_dentry->d_inode, cache_inode); + SMFS_KML_POST(filp->f_dentry->d_inode, filp->f_dentry, + &count ,ppos, REINT_WRITE, "write", rc, exit); +exit: + *ppos = *cache_ppos; + duplicate_file(filp, sfi->c_file); + RETURN(rc); } -int smfs_ioctl(struct inode * inode, struct file * filp, - unsigned int cmd, unsigned long arg) +int smfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, + unsigned long arg) { - struct inode *cache_inode; - struct dentry *dentry = filp->f_dentry; - struct file open_file; - struct dentry open_dentry; - ssize_t rc = 0; - - ENTRY; - - cache_inode = I2CI(dentry->d_inode); + struct inode *cache_inode; + struct smfs_file_info *sfi; + ssize_t rc = 0; + ENTRY; + + cache_inode = I2CI(filp->f_dentry->d_inode); if (!cache_inode) RETURN(-ENOENT); - smfs_prepare_cachefile(inode, filp, cache_inode, - &open_file, &open_dentry); - - if (cache_inode->i_fop->ioctl) - rc = cache_inode->i_fop->ioctl(cache_inode, &open_file, cmd, arg); - - post_smfs_inode(inode, cache_inode); - smfs_update_file(filp, &open_file); + sfi = F2SMFI(filp); + if (sfi->magic != SMFS_FILE_MAGIC) + BUG(); + + pre_smfs_inode(inode, cache_inode); + + if (cache_inode->i_fop->ioctl) + rc = cache_inode->i_fop->ioctl(cache_inode, sfi->c_file, cmd, + arg); + + post_smfs_inode(inode, cache_inode); + duplicate_file(filp, sfi->c_file); + RETURN(rc); } -static ssize_t smfs_read (struct file *filp, char *buf, - size_t count, loff_t *ppos) +static ssize_t smfs_read(struct file *filp, char *buf, size_t count, + loff_t *ppos) { - struct inode *cache_inode; - struct dentry *dentry = filp->f_dentry; - struct inode *inode = dentry->d_inode; - struct file open_file; - struct dentry open_dentry; - loff_t tmp_ppos; - loff_t *cache_ppos; - ssize_t rc = 0; - - ENTRY; - - cache_inode = I2CI(dentry->d_inode); + struct inode *cache_inode; + struct smfs_file_info *sfi; + loff_t tmp_ppos; + loff_t *cache_ppos; + ssize_t rc = 0; + ENTRY; + + cache_inode = I2CI(filp->f_dentry->d_inode); if (!cache_inode) RETURN(-ENOENT); - if (ppos != &(filp->f_pos)) { - cache_ppos = &tmp_ppos; - } else { - cache_ppos = &open_file.f_pos; - } - *cache_ppos = *ppos; - - - smfs_prepare_cachefile(inode, filp, cache_inode, - &open_file, &open_dentry); - - - pre_smfs_inode(inode, cache_inode); - if (cache_inode->i_fop->read) - rc = cache_inode->i_fop->read(&open_file, buf, count, cache_ppos); - - *ppos = *cache_ppos; - post_smfs_inode(inode, cache_inode); - smfs_update_file(filp, &open_file); - RETURN(rc); + sfi = F2SMFI(filp); + if (sfi->magic != SMFS_FILE_MAGIC) + BUG(); + + if (ppos != &(filp->f_pos)) + cache_ppos = &tmp_ppos; + else + cache_ppos = &sfi->c_file->f_pos; + *cache_ppos = *ppos; + + pre_smfs_inode(filp->f_dentry->d_inode, cache_inode); + + if (cache_inode->i_fop->read) + rc = cache_inode->i_fop->read(sfi->c_file, buf, count, + cache_ppos); + + *ppos = *cache_ppos; + post_smfs_inode(filp->f_dentry->d_inode, cache_inode); + duplicate_file(filp, sfi->c_file); + + RETURN(rc); } -static loff_t smfs_llseek(struct file *file, - loff_t offset, - int origin) +static loff_t smfs_llseek(struct file *file, loff_t offset, int origin) { - struct inode *cache_inode; - struct dentry *dentry = file->f_dentry; - struct file open_file; - struct dentry open_dentry; - ssize_t rc = 0; - - ENTRY; - - cache_inode = I2CI(dentry->d_inode); + struct inode *cache_inode; + struct smfs_file_info *sfi; + ssize_t rc = 0; + ENTRY; + + cache_inode = I2CI(file->f_dentry->d_inode); if (!cache_inode) RETURN(-ENOENT); - smfs_prepare_cachefile(dentry->d_inode, file, cache_inode, - &open_file, &open_dentry); - - pre_smfs_inode(dentry->d_inode, cache_inode); - if (cache_inode->i_fop->llseek) - rc = cache_inode->i_fop->llseek(&open_file, offset, origin); + sfi = F2SMFI(file); + if (sfi->magic != SMFS_FILE_MAGIC) + BUG(); + + pre_smfs_inode(file->f_dentry->d_inode, cache_inode); + + if (cache_inode->i_fop->llseek) + rc = cache_inode->i_fop->llseek(sfi->c_file, offset, origin); + + post_smfs_inode(file->f_dentry->d_inode, cache_inode); + duplicate_file(file, sfi->c_file); - post_smfs_inode(dentry->d_inode, cache_inode); - smfs_update_file(file, &open_file); - RETURN(rc); } -static int smfs_mmap(struct file * file, struct vm_area_struct * vma) +static int smfs_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file->f_dentry->d_inode; + struct smfs_file_info *sfi; struct inode *cache_inode = NULL; - struct file open_file; - struct dentry open_dentry; - int rc = 0; + int rc = 0; + ENTRY; - cache_inode = I2CI(inode); + cache_inode = I2CI(inode); if (!cache_inode) RETURN(-ENOENT); - smfs_prepare_cachefile(inode, file, cache_inode, - &open_file, &open_dentry); - - if (cache_inode->i_mapping == &cache_inode->i_data) + sfi = F2SMFI(file); + if (sfi->magic != SMFS_FILE_MAGIC) + BUG(); + + if (cache_inode->i_mapping == &cache_inode->i_data) inode->i_mapping = cache_inode->i_mapping; - pre_smfs_inode(inode, cache_inode); - if (cache_inode->i_fop->mmap) - rc = cache_inode->i_fop->mmap(&open_file, vma); - - post_smfs_inode(inode, cache_inode); - smfs_update_file(file, &open_file); - - RETURN(rc); + pre_smfs_inode(inode, cache_inode); + if (cache_inode->i_fop->mmap) + rc = cache_inode->i_fop->mmap(sfi->c_file, vma); + + post_smfs_inode(inode, cache_inode); + duplicate_file(file, sfi->c_file); + + RETURN(rc); +} + +static int smfs_init_cache_file(struct inode *inode, struct file *filp) +{ + struct smfs_file_info *sfi = NULL; + struct file *cache_filp = NULL; + struct dentry *cache_dentry = NULL; + int rc = 0; + ENTRY; + + OBD_ALLOC(sfi, sizeof(struct smfs_file_info)); + if (!sfi) + RETURN(-ENOMEM); + + cache_filp = get_empty_filp(); + if (!cache_filp) + GOTO(err_exit, rc = -ENOMEM); + + sfi->magic = SMFS_FILE_MAGIC; + + cache_dentry = pre_smfs_dentry(NULL, I2CI(inode), filp->f_dentry); + if (!cache_dentry) + GOTO(err_exit, rc = -ENOMEM); + + cache_filp->f_vfsmnt = filp->f_vfsmnt; + + cache_filp->f_dentry = cache_dentry; + duplicate_file(cache_filp, filp); + + sfi->c_file = cache_filp; + + if (filp->private_data != NULL) + BUG(); + + filp->private_data = sfi; + + RETURN(rc); +err_exit: + if (sfi) + OBD_FREE(sfi, sizeof(struct smfs_file_info)); + if (cache_filp) + put_filp(cache_filp); + RETURN(rc); } -static int smfs_open(struct inode * inode, struct file * filp) +static int smfs_cleanup_cache_file(struct file *filp) { - struct inode *cache_inode = NULL; - struct file open_file; - struct dentry open_dentry; - int rc = 0; + struct smfs_file_info *sfi = NULL; + int rc = 0; + ENTRY; + + sfi = F2SMFI(filp); + + post_smfs_dentry(sfi->c_file->f_dentry); - cache_inode = I2CI(inode); + put_filp(sfi->c_file); + + OBD_FREE(sfi, sizeof(struct smfs_file_info)); + + filp->private_data = NULL; + + RETURN(rc); +} + +int smfs_open(struct inode *inode, struct file *filp) +{ + struct inode *cache_inode = NULL; + int rc = 0, flag = 1; + ENTRY; + + cache_inode = I2CI(inode); if (!cache_inode) RETURN(-ENOENT); - smfs_prepare_cachefile(inode, filp, cache_inode, - &open_file, &open_dentry); - - pre_smfs_inode(inode, cache_inode); - if (cache_inode->i_fop->open) - rc = cache_inode->i_fop->open(cache_inode, &open_file); - - post_smfs_inode(inode, cache_inode); - smfs_update_file(filp, &open_file); - - RETURN(rc); + if ((rc = smfs_init_cache_file(inode, filp))) + RETURN(rc); + + pre_smfs_inode(inode, cache_inode); + if (cache_inode->i_fop->open) + rc = cache_inode->i_fop->open(cache_inode, F2CF(filp)); + + post_smfs_inode(inode, cache_inode); + duplicate_file(filp, F2CF(filp)); + SMFS_KML_POST(filp->f_dentry->d_inode, filp->f_dentry, &flag, NULL, + REINT_OPEN, "open", rc, exit); +exit: + RETURN(rc); } -static int smfs_release(struct inode * inode, struct file * filp) + +int smfs_release(struct inode *inode, struct file *filp) { - struct inode *cache_inode = NULL; - struct file open_file; - struct dentry open_dentry; - int rc = 0; + struct inode *cache_inode = NULL; + struct smfs_file_info *sfi = NULL; + int rc = 0, flag = 0; + ENTRY; - cache_inode = I2CI(inode); + cache_inode = I2CI(inode); if (!cache_inode) RETURN(-ENOENT); - - smfs_prepare_cachefile(inode, filp, cache_inode, - &open_file, &open_dentry); - - pre_smfs_inode(inode, cache_inode); - if (cache_inode->i_fop->release) - rc = cache_inode->i_fop->release(cache_inode, &open_file); - - post_smfs_inode(inode, cache_inode); - smfs_update_file(filp, &open_file); - - RETURN(rc); + + sfi = F2SMFI(filp); + if (sfi->magic != SMFS_FILE_MAGIC) + BUG(); + + pre_smfs_inode(inode, cache_inode); + if (cache_inode->i_fop->release) + rc = cache_inode->i_fop->release(cache_inode, sfi->c_file); + + post_smfs_inode(inode, cache_inode); + duplicate_file(filp, sfi->c_file); + + smfs_cleanup_cache_file(filp); + + SMFS_KML_POST(filp->f_dentry->d_inode, filp->f_dentry, &flag, NULL, + REINT_CLOSE, "close", rc, exit); +exit: + RETURN(rc); } -int smfs_fsync(struct file * file, - struct dentry *dentry, - int datasync) + +int smfs_fsync(struct file *file, struct dentry *dentry, int datasync) { - struct inode *inode = dentry->d_inode; - struct inode *cache_inode; - struct file open_file; - struct dentry open_dentry; - int rc = 0; + struct smfs_file_info *sfi = NULL; + struct inode *cache_inode; + int rc = 0; - cache_inode = I2CI(inode); + cache_inode = I2CI(file->f_dentry->d_inode); if (!cache_inode) RETURN(-ENOENT); - - smfs_prepare_cachefile(inode, file, cache_inode, - &open_file, &open_dentry); - - pre_smfs_inode(inode, cache_inode); - if (cache_inode->i_fop->fsync) - rc = cache_inode->i_fop->fsync(&open_file, &open_dentry, datasync); - - post_smfs_inode(inode, cache_inode); - smfs_update_file(file, &open_file); - - RETURN(rc); + + sfi = F2SMFI(file); + if (sfi->magic != SMFS_FILE_MAGIC) + BUG(); + + pre_smfs_inode(file->f_dentry->d_inode, cache_inode); + + if (cache_inode->i_fop->fsync) + rc = cache_inode->i_fop->fsync(sfi->c_file, + sfi->c_file->f_dentry, datasync); + + post_smfs_inode(file->f_dentry->d_inode, cache_inode); + duplicate_file(file, sfi->c_file); + + RETURN(rc); } struct file_operations smfs_file_fops = { - llseek: smfs_llseek, - read: smfs_read, - write: smfs_write, - ioctl: smfs_ioctl, - mmap: smfs_mmap, - open: smfs_open, - release: smfs_release, - fsync: smfs_fsync, + llseek: smfs_llseek, + read: smfs_read, + write: smfs_write, + ioctl: smfs_ioctl, + mmap: smfs_mmap, + open: smfs_open, + release: smfs_release, + fsync: smfs_fsync, }; -static void smfs_prepare_cache_dentry(struct dentry *dentry, struct inode *inode) +static void smfs_truncate(struct inode *inode) { - atomic_set(&dentry->d_count, 1); - dentry->d_vfs_flags = 0; - dentry->d_flags = 0; - dentry->d_inode = inode; - dentry->d_op = NULL; - dentry->d_fsdata = NULL; - dentry->d_mounted = 0; - INIT_LIST_HEAD(&dentry->d_hash); - INIT_LIST_HEAD(&dentry->d_lru); - INIT_LIST_HEAD(&dentry->d_subdirs); - INIT_LIST_HEAD(&dentry->d_alias); + struct inode *cache_inode; + + cache_inode = I2CI(inode); + + if (!cache_inode) + return; + + pre_smfs_inode(inode, cache_inode); + if (cache_inode->i_op->truncate) + cache_inode->i_op->truncate(cache_inode); + + post_smfs_inode(inode, cache_inode); } -static void smfs_truncate(struct inode * inode) -{ - struct inode *cache_inode; - - cache_inode = I2CI(inode); - - if (!cache_inode) - return; - - if (cache_inode->i_op->truncate) - cache_inode->i_op->truncate(cache_inode); - - post_smfs_inode(inode, cache_inode); - - return; -} - -int smfs_setattr(struct dentry *dentry, struct iattr *attr) +int smfs_setattr(struct dentry *dentry, struct iattr *attr) { - struct inode *cache_inode; - struct dentry open_dentry; - - int rc = 0; - - cache_inode = I2CI(dentry->d_inode); - - if (!cache_inode) - RETURN(-ENOENT); - smfs_prepare_cache_dentry(&open_dentry, cache_inode); - - pre_smfs_inode(dentry->d_inode, cache_inode); - if (cache_inode->i_op->setattr) - rc = cache_inode->i_op->setattr(&open_dentry, attr); - - post_smfs_inode(dentry->d_inode, cache_inode); - - RETURN(rc); -} - -int smfs_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) + struct inode *cache_inode; + struct dentry *cache_dentry; + void *handle = NULL; + int rc = 0; + + cache_inode = I2CI(dentry->d_inode); + if (!cache_inode) + RETURN(-ENOENT); + + cache_dentry = pre_smfs_dentry(NULL, cache_inode, dentry); + if (!cache_dentry) + RETURN(-ENOMEM); + + handle = smfs_trans_start(dentry->d_inode, FSFILT_OP_SETATTR, NULL); + if (IS_ERR(handle) ) { + CERROR("smfs_do_mkdir: no space for transaction\n"); + RETURN(-ENOSPC); + } + + pre_smfs_inode(dentry->d_inode, cache_inode); + + if (cache_inode->i_op->setattr) + rc = cache_inode->i_op->setattr(cache_dentry, attr); + + SMFS_KML_POST(dentry->d_inode, dentry, attr, NULL, + REINT_SETATTR, "setattr", rc, exit); +exit: + post_smfs_inode(dentry->d_inode, cache_inode); + post_smfs_dentry(cache_dentry); + smfs_trans_commit(dentry->d_inode, handle, 0); + RETURN(rc); +} + +int smfs_setxattr(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags) { - struct inode *cache_inode; - struct dentry open_dentry; - int rc = 0; - - cache_inode = I2CI(dentry->d_inode); - - if (!cache_inode) - RETURN(-ENOENT); - - pre_smfs_inode(dentry->d_inode, cache_inode); - smfs_prepare_cache_dentry(&open_dentry, cache_inode); - - if (cache_inode->i_op->setattr) - rc = cache_inode->i_op->setxattr(&open_dentry, name, value, size, flags); - - post_smfs_inode(dentry->d_inode, cache_inode); - RETURN(rc); -} - -int smfs_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct inode *cache_inode; + struct dentry *cache_dentry; + int rc = 0; + + cache_inode = I2CI(dentry->d_inode); + if (!cache_inode) + RETURN(-ENOENT); + + cache_dentry = pre_smfs_dentry(NULL, cache_inode, dentry); + if (!cache_dentry) + RETURN(-ENOMEM); + + pre_smfs_inode(dentry->d_inode, cache_inode); + + if (cache_inode->i_op->setxattr) + rc = cache_inode->i_op->setxattr(cache_dentry, name, value, + size, flags); + + post_smfs_inode(dentry->d_inode, cache_inode); + post_smfs_dentry(cache_dentry); + + RETURN(rc); +} + +int smfs_getxattr(struct dentry *dentry, const char *name, void *buffer, + size_t size) { - struct inode *cache_inode; - struct dentry open_dentry; - int rc = 0; + struct inode *cache_inode; + struct dentry *cache_dentry; + int rc = 0; + + cache_inode = I2CI(dentry->d_inode); + if (!cache_inode) + RETURN(-ENOENT); + + cache_dentry = pre_smfs_dentry(NULL, cache_inode, dentry); + if (!cache_dentry) + RETURN(-ENOMEM); - cache_inode = I2CI(dentry->d_inode); + pre_smfs_inode(dentry->d_inode, cache_inode); - if (!cache_inode) - RETURN(-ENOENT); + if (cache_inode->i_op->getattr) + rc = cache_inode->i_op->getxattr(cache_dentry, name, buffer, + size); - smfs_prepare_cache_dentry(&open_dentry, cache_inode); - pre_smfs_inode(dentry->d_inode, cache_inode); - - if (cache_inode->i_op->setattr) - rc = cache_inode->i_op->getxattr(&open_dentry, name, buffer, size); + post_smfs_inode(dentry->d_inode, cache_inode); + post_smfs_dentry(cache_dentry); - post_smfs_inode(dentry->d_inode, cache_inode); - RETURN(rc); + RETURN(rc); } ssize_t smfs_listxattr(struct dentry *dentry, char *buffer, size_t size) { - struct inode *cache_inode; - struct dentry open_dentry; - int rc = 0; + struct inode *cache_inode; + struct dentry *cache_dentry; + int rc = 0; + + cache_inode = I2CI(dentry->d_inode); + if (!cache_inode) + RETURN(-ENOENT); - cache_inode = I2CI(dentry->d_inode); + cache_dentry = pre_smfs_dentry(NULL, cache_inode, dentry); + if (!cache_dentry) + RETURN(-ENOMEM); - if (!cache_inode) - RETURN(-ENOENT); + pre_smfs_inode(dentry->d_inode, cache_inode); - smfs_prepare_cache_dentry(&open_dentry, cache_inode); - pre_smfs_inode(dentry->d_inode, cache_inode); - - if (cache_inode->i_op->listxattr) - rc = cache_inode->i_op->listxattr(&open_dentry, buffer, size); + if (cache_inode->i_op->listxattr) + rc = cache_inode->i_op->listxattr(cache_dentry, buffer, size); - post_smfs_inode(dentry->d_inode, cache_inode); - RETURN(rc); -} + post_smfs_inode(dentry->d_inode, cache_inode); + post_smfs_dentry(cache_dentry); + + RETURN(rc); +} int smfs_removexattr(struct dentry *dentry, const char *name) { - struct inode *cache_inode; - struct dentry open_dentry; - int rc = 0; + struct inode *cache_inode; + struct dentry *cache_dentry; + int rc = 0; + + cache_inode = I2CI(dentry->d_inode); + if (!cache_inode) + RETURN(-ENOENT); - cache_inode = I2CI(dentry->d_inode); + cache_dentry = pre_smfs_dentry(NULL, cache_inode, dentry); + if (!cache_dentry) + RETURN(-ENOMEM); - if (!cache_inode) - RETURN(-ENOENT); + pre_smfs_inode(dentry->d_inode, cache_inode); - smfs_prepare_cache_dentry(&open_dentry, cache_inode); - pre_smfs_inode(dentry->d_inode, cache_inode); - - if (cache_inode->i_op->removexattr) - rc = cache_inode->i_op->removexattr(&open_dentry, name); + if (cache_inode->i_op->removexattr) + rc = cache_inode->i_op->removexattr(cache_dentry, name); - post_smfs_inode(dentry->d_inode, cache_inode); - RETURN(rc); + post_smfs_inode(dentry->d_inode, cache_inode); + post_smfs_dentry(cache_dentry); + + RETURN(rc); } struct inode_operations smfs_file_iops = { - truncate: smfs_truncate, /* BKL held */ + truncate: smfs_truncate, /* BKL held */ setattr: smfs_setattr, /* BKL held */ setxattr: smfs_setxattr, /* BKL held */ getxattr: smfs_getxattr, /* BKL held */ listxattr: smfs_listxattr, /* BKL held */ removexattr: smfs_removexattr, /* BKL held */ }; - diff --git a/lustre/smfs/inode.c b/lustre/smfs/inode.c index 117d503..7335d5d 100644 --- a/lustre/smfs/inode.c +++ b/lustre/smfs/inode.c @@ -1,5 +1,22 @@ -/* - * smfs/inode.c +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */ @@ -10,297 +27,274 @@ #include #include #include +#include +#include +#include #include -#include "smfs_internal.h" +#include +#include "smfs_internal.h" -static void duplicate_inode(struct inode *dst_inode, - struct inode *src_inode) +static void smfs_read_inode(struct inode *inode) { - dst_inode->i_mode = src_inode->i_mode; - dst_inode->i_uid = src_inode->i_uid; - dst_inode->i_gid = src_inode->i_gid; - dst_inode->i_nlink = src_inode->i_nlink; - dst_inode->i_size = src_inode->i_size; - dst_inode->i_atime = src_inode->i_atime; - dst_inode->i_ctime = src_inode->i_ctime; - dst_inode->i_mtime = src_inode->i_mtime; - dst_inode->i_blksize = src_inode->i_blksize; - dst_inode->i_blocks = src_inode->i_blocks; - dst_inode->i_version = src_inode->i_version; - dst_inode->i_state = src_inode->i_state; -} + struct super_block *cache_sb; + struct inode *cache_inode; + ENTRY; -void post_smfs_inode(struct inode *inode, - struct inode *cache_inode) -{ - if (inode && cache_inode) { - duplicate_inode(inode, cache_inode); - /*Here we must release the cache_inode, - *Otherwise we will have no chance to - *do it - */ - cache_inode->i_state &=~I_LOCK; - } -} -void pre_smfs_inode(struct inode *inode, - struct inode *cache_inode) -{ - if (inode && cache_inode) { - duplicate_inode(cache_inode, inode); - } -} + if (!inode) + return; -static void smfs_read_inode(struct inode *inode) -{ - struct super_block *cache_sb; - struct inode *cache_inode; - ENTRY; - - if (!inode) - return; - - CDEBUG(D_INODE, "read_inode ino %lu\n", inode->i_ino); - cache_sb = S2CSB(inode->i_sb); - - cache_inode = iget(cache_sb, inode->i_ino); - I2CI(inode) = cache_inode; - - if(cache_sb && cache_sb->s_op->read_inode) - cache_sb->s_op->read_inode(cache_inode); - - post_smfs_inode(inode, cache_inode); - sm_set_inode_ops(cache_inode, inode); - - CDEBUG(D_INODE, "read_inode ino %lu icount %d \n", - inode->i_ino, atomic_read(&inode->i_count)); - - iput(cache_inode); - return; + CDEBUG(D_INODE, "read_inode ino %lu\n", inode->i_ino); + cache_sb = S2CSB(inode->i_sb); + + cache_inode = iget(cache_sb, inode->i_ino); + + SMFS_SET_INODE_REC(inode); + SMFS_SET_INODE_CACHE_HOOK(inode); + I2CI(inode) = cache_inode; + + pre_smfs_inode(inode, cache_inode); + if (cache_sb && cache_sb->s_op->read_inode) + cache_sb->s_op->read_inode(cache_inode); + + post_smfs_inode(inode, cache_inode); + sm_set_inode_ops(cache_inode, inode); + + CDEBUG(D_INODE, "read_inode ino %lu icount %d \n", + inode->i_ino, atomic_read(&inode->i_count)); + EXIT; } /* Although some filesystem(such as ext3) do not have - * clear_inode method, but we need it to free the - * cache inode + * clear_inode method, but we need it to free the + * cache inode */ static void smfs_clear_inode(struct inode *inode) { - struct super_block *cache_sb; - struct inode *cache_inode; - - ENTRY; - - if (!inode) return; - - cache_sb = S2CSB(inode->i_sb); - cache_inode = I2CI(inode); - - /*FIXME: because i_count of cache_inode may not - * be 0 or 1 in before smfs_delete inode, So we + struct super_block *cache_sb; + struct inode *cache_inode; + ENTRY; + + if (!inode) + return; + + cache_sb = S2CSB(inode->i_sb); + cache_inode = I2CI(inode); + + /*FIXME: because i_count of cache_inode may not + * be 0 or 1 in before smfs_delete inode, So we * need to dec it to 1 before we call delete_inode * of the bellow cache filesystem Check again latter*/ - if (atomic_read(&cache_inode->i_count) < 1) - BUG(); - - while (atomic_read(&cache_inode->i_count) != 1) { - atomic_dec(&cache_inode->i_count); - } - iput(cache_inode); - - I2CI(inode) = NULL; - return; + if (atomic_read(&cache_inode->i_count) < 1) + BUG(); + + while (atomic_read(&cache_inode->i_count) != 1) + atomic_dec(&cache_inode->i_count); + + iput(cache_inode); + + SMFS_CLEAN_INODE_REC(inode); + I2CI(inode) = NULL; + EXIT; } + static void smfs_delete_inode(struct inode *inode) { - struct inode *cache_inode; - struct super_block *cache_sb; + struct inode *cache_inode; + struct super_block *cache_sb; + ENTRY; - ENTRY; - cache_inode = I2CI(inode); - cache_sb = S2CSB(inode->i_sb); + cache_inode = I2CI(inode); + cache_sb = S2CSB(inode->i_sb); - if (!cache_inode || !cache_sb) - return; + if (!cache_inode || !cache_sb) + return; - /*FIXME: because i_count of cache_inode may not - * be 0 or 1 in before smfs_delete inode, So we - * need to dec it to 1 before we call delete_inode - * of the bellow cache filesystem Check again latter*/ + /* FIXME-WANGDI: because i_count of cache_inode may not be 0 or 1 in + * before smfs_delete inode, So we need to dec it to 1 before we call + * delete_inode of the bellow cache filesystem Check again latter. */ + + if (atomic_read(&cache_inode->i_count) < 1) + BUG(); + + while (atomic_read(&cache_inode->i_count) != 1) + atomic_dec(&cache_inode->i_count); + + pre_smfs_inode(inode, cache_inode); - if (atomic_read(&cache_inode->i_count) < 1) - BUG(); - - while (atomic_read(&cache_inode->i_count) != 1) { - atomic_dec(&cache_inode->i_count); - } - - pre_smfs_inode(inode, cache_inode); - - list_del(&cache_inode->i_hash); +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + list_del(&cache_inode->i_hash); INIT_LIST_HEAD(&cache_inode->i_hash); +#else + hlist_del_init(&cache_inode->i_hash); +#endif + list_del(&cache_inode->i_list); INIT_LIST_HEAD(&cache_inode->i_list); - - if (cache_inode->i_data.nrpages) - truncate_inode_pages(&cache_inode->i_data, 0); - - if (cache_sb->s_op->delete_inode) - cache_sb->s_op->delete_inode(cache_inode); - - post_smfs_inode(inode, cache_inode); - - I2CI(inode) = NULL; - return; + + if (cache_inode->i_data.nrpages) + truncate_inode_pages(&cache_inode->i_data, 0); + + if (cache_sb->s_op->delete_inode) + cache_sb->s_op->delete_inode(cache_inode); + + post_smfs_inode(inode, cache_inode); + + I2CI(inode) = NULL; + EXIT; } + static void smfs_write_inode(struct inode *inode, int wait) { - struct inode *cache_inode; - struct super_block *cache_sb; - - ENTRY; - cache_inode = I2CI(inode); - cache_sb = S2CSB(inode->i_sb); - - if (!cache_inode || !cache_sb) - return; - - pre_smfs_inode(inode, cache_inode); - - if (cache_sb->s_op->write_inode) - cache_sb->s_op->write_inode(cache_inode, wait); - - post_smfs_inode(inode, cache_inode); - - return; + struct inode *cache_inode; + struct super_block *cache_sb; + ENTRY; + + cache_inode = I2CI(inode); + cache_sb = S2CSB(inode->i_sb); + + if (!cache_inode || !cache_sb) + return; + + pre_smfs_inode(inode, cache_inode); + + if (cache_sb->s_op->write_inode) + cache_sb->s_op->write_inode(cache_inode, wait); + + post_smfs_inode(inode, cache_inode); + EXIT; } + static void smfs_dirty_inode(struct inode *inode) { - struct inode *cache_inode; - struct super_block *cache_sb; - - ENTRY; - cache_inode = I2CI(inode); - cache_sb = S2CSB(inode->i_sb); - - if (!cache_inode || !cache_sb) - return; - - pre_smfs_inode(inode, cache_inode); - if (cache_sb->s_op->dirty_inode) - cache_sb->s_op->dirty_inode(cache_inode); - - post_smfs_inode(inode, cache_inode); - return; -} + struct inode *cache_inode; + struct super_block *cache_sb; + ENTRY; -static void smfs_put_inode(struct inode *inode) -{ - struct inode *cache_inode; - struct super_block *cache_sb; + cache_inode = I2CI(inode); + cache_sb = S2CSB(inode->i_sb); - ENTRY; - cache_inode = I2CI(inode); - cache_sb = S2CSB(inode->i_sb); + if (!cache_inode || !cache_sb) + return; - if (!cache_inode || !cache_sb) - return; - if (cache_sb->s_op->put_inode) - cache_sb->s_op->put_inode(cache_inode); + pre_smfs_inode(inode, cache_inode); + if (cache_sb->s_op->dirty_inode) + cache_sb->s_op->dirty_inode(cache_inode); - return; + post_smfs_inode(inode, cache_inode); + EXIT; +} + +static void smfs_put_inode(struct inode *inode) +{ + struct inode *cache_inode; + struct super_block *cache_sb; + ENTRY; + + cache_inode = I2CI(inode); + cache_sb = S2CSB(inode->i_sb); + + if (!cache_inode || !cache_sb) + return; + if (cache_sb->s_op->put_inode) + cache_sb->s_op->put_inode(cache_inode); + EXIT; } static void smfs_write_super(struct super_block *sb) { - struct super_block *cache_sb; + struct super_block *cache_sb; + ENTRY; - ENTRY; - cache_sb = S2CSB(sb); + cache_sb = S2CSB(sb); + if (!cache_sb) + return; - if (!cache_sb) - return; - - if (cache_sb->s_op->write_super) - cache_sb->s_op->write_super(cache_sb); + if (cache_sb->s_op->write_super) + cache_sb->s_op->write_super(cache_sb); - duplicate_sb(cache_sb, sb); - return; + duplicate_sb(sb, cache_sb); + EXIT; } static void smfs_write_super_lockfs(struct super_block *sb) { - struct super_block *cache_sb; + struct super_block *cache_sb; + ENTRY; - ENTRY; - cache_sb = S2CSB(sb); + cache_sb = S2CSB(sb); + if (!cache_sb) + return; - if (!cache_sb) - return; - - if (cache_sb->s_op->write_super_lockfs) - cache_sb->s_op->write_super_lockfs(cache_sb); + if (cache_sb->s_op->write_super_lockfs) + cache_sb->s_op->write_super_lockfs(cache_sb); - duplicate_sb(cache_sb, sb); - return; + duplicate_sb(sb, cache_sb); + EXIT; } static void smfs_unlockfs(struct super_block *sb) { - struct super_block *cache_sb; + struct super_block *cache_sb; + ENTRY; - ENTRY; - cache_sb = S2CSB(sb); + cache_sb = S2CSB(sb); + if (!cache_sb) + return; - if (!cache_sb) - return; - - if (cache_sb->s_op->unlockfs) - cache_sb->s_op->unlockfs(cache_sb); + if (cache_sb->s_op->unlockfs) + cache_sb->s_op->unlockfs(cache_sb); - duplicate_sb(cache_sb, sb); - return; + duplicate_sb(sb, cache_sb); + EXIT; } -static int smfs_statfs(struct super_block * sb, struct statfs * buf) + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +static int smfs_statfs(struct super_block *sb, struct statfs *buf) +#else +static int smfs_statfs(struct super_block *sb, struct kstatfs *buf) +#endif { - struct super_block *cache_sb; - int rc = 0; + struct super_block *cache_sb; + int rc = 0; + ENTRY; - ENTRY; - cache_sb = S2CSB(sb); + cache_sb = S2CSB(sb); + if (!cache_sb) + RETURN(-EINVAL); - if (!cache_sb) - RETURN(-EINVAL); - - if (cache_sb->s_op->statfs) - rc = cache_sb->s_op->statfs(cache_sb, buf); + if (cache_sb->s_op->statfs) + rc = cache_sb->s_op->statfs(cache_sb, buf); - duplicate_sb(cache_sb, sb); - - return rc; + duplicate_sb(sb, cache_sb); + + RETURN(rc); } -static int smfs_remount(struct super_block * sb, int * flags, char * data) + +static int smfs_remount(struct super_block *sb, int *flags, char *data) { - struct super_block *cache_sb; - int rc = 0; + struct super_block *cache_sb; + int rc = 0; + ENTRY; + + cache_sb = S2CSB(sb); - ENTRY; - cache_sb = S2CSB(sb); + if (!cache_sb) + RETURN(-EINVAL); - if (!cache_sb) - RETURN(-EINVAL); - - if (cache_sb->s_op->remount_fs) - rc = cache_sb->s_op->remount_fs(cache_sb, flags, data); + if (cache_sb->s_op->remount_fs) + rc = cache_sb->s_op->remount_fs(cache_sb, flags, data); - duplicate_sb(cache_sb, sb); - RETURN(rc); + duplicate_sb(sb, cache_sb); + RETURN(rc); } + struct super_operations smfs_super_ops = { - read_inode: smfs_read_inode, - clear_inode: smfs_clear_inode, - put_super: smfs_put_super, - delete_inode: smfs_delete_inode, - write_inode: smfs_write_inode, + read_inode: smfs_read_inode, + clear_inode: smfs_clear_inode, + put_super: smfs_put_super, + delete_inode: smfs_delete_inode, + write_inode: smfs_write_inode, dirty_inode: smfs_dirty_inode, /* BKL not held. We take it */ put_inode: smfs_put_inode, /* BKL not held. Don't need */ @@ -309,10 +303,4 @@ struct super_operations smfs_super_ops = { unlockfs: smfs_unlockfs, /* BKL not held. We take it */ statfs: smfs_statfs, /* BKL held */ remount_fs: smfs_remount, /* BKL held */ - }; - - - - - diff --git a/lustre/smfs/ioctl.c b/lustre/smfs/ioctl.c new file mode 100644 index 0000000..c205c43 --- /dev/null +++ b/lustre/smfs/ioctl.c @@ -0,0 +1,164 @@ +#define DEBUG_SUBSYSTEM S_SM + +#ifndef EXPORT_SYMTAB +#define EXPORT_SYMTAB +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "smfs_internal.h" + + +struct smfs_control_device smfs_dev; + +static int smfs_handle_ioctl(unsigned int cmd, unsigned long arg) +{ + struct obd_ioctl_data *data = NULL; + struct super_block *sb = NULL; + char *buf = NULL, *dir = NULL; + int err = 0, len = 0, count = 0, do_kml = 0; + + if (obd_ioctl_getdata(&buf, &len, (void *)arg)) { + CERROR("OBD ioctl: data error\n"); + GOTO(out, err = -EINVAL); + } + data = (struct obd_ioctl_data *)buf; + + switch (cmd) { + case IOC_SMFS_START: + case IOC_SMFS_STOP: + case IOC_SMFS_REINT: + case IOC_SMFS_UNDO:{ + char *name; + if (!data->ioc_inllen1 || !data->ioc_inlbuf1) { + CERROR("No mountpoint passed!\n"); + GOTO(out, err = -EINVAL); + } + name = (char*) data->ioc_inlbuf1; + sb = smfs_get_sb_by_path(name, data->ioc_inllen1); + if (!sb) { + CERROR("can not find superblock at %s\n", buf); + GOTO(out, err = -EINVAL); + } + /*get cmd count*/ + if (data->ioc_inllen2 && data->ioc_inlbuf2) { + dir = (char *)data->ioc_inlbuf2; + } + if (data->ioc_plen1) + count = *((int*)data->ioc_pbuf1); + if (data->ioc_plen2) + do_kml = *((int*)data->ioc_pbuf2); + break; + } + default: { + CERROR("The command passed in is Invalid\n"); + GOTO(out, err = -EINVAL); + } + } + + switch (cmd) { + case IOC_SMFS_START: + err = smfs_start_rec(sb); + break; + case IOC_SMFS_STOP: + err = smfs_stop_rec(sb); + break; + case IOC_SMFS_REINT: + case IOC_SMFS_UNDO: { + int flags = 0; + if (cmd == IOC_SMFS_REINT) + SET_REC_OP_FLAGS(flags, SMFS_REINT_REC); + else + SET_REC_OP_FLAGS(flags, SMFS_UNDO_REC); + if (count == 0) + SET_REC_COUNT_FLAGS(flags, SMFS_REC_ALL); + if (do_kml) + SET_REC_WRITE_KML_FLAGS(flags, SMFS_WRITE_KML); + err = smfs_process_rec(sb, count, dir, flags); + break; + } + } +out: + if (buf) + obd_ioctl_freedata(buf, len); + RETURN(err); +} +static int smfs_psdev_ioctl (struct inode * inode, struct file * filp, + unsigned int cmd, unsigned long arg) +{ + int rc = 0; + rc = smfs_handle_ioctl(cmd, arg); + RETURN(rc); +} + +/* called when opening /dev/device */ +static int smfs_psdev_open(struct inode * inode, struct file * file) +{ + int dev; + ENTRY; + + if (!inode) + RETURN(-EINVAL); + dev = MINOR(inode->i_rdev); + if (dev != SMFS_PSDEV_MINOR) + RETURN(-ENODEV); + + RETURN(0); +} + +/* called when closing /dev/device */ +static int smfs_psdev_release(struct inode * inode, struct file * file) +{ + int dev; + ENTRY; + + if (!inode) + RETURN(-EINVAL); + dev = MINOR(inode->i_rdev); + if (dev != SMFS_PSDEV_MINOR) + RETURN(-ENODEV); + + RETURN(0); +} + +/* declare character device */ +static struct file_operations smfscontrol_fops = { + ioctl: smfs_psdev_ioctl, /* ioctl */ + open: smfs_psdev_open, /* open */ + release: smfs_psdev_release, /* release */ +}; + +#define SMFS_MINOR 250 +static struct miscdevice smfscontrol_dev = { + minor: SMFS_MINOR, + name: "smfscontrol", + fops: &smfscontrol_fops +}; + +int init_smfs_psdev(void) +{ + printk(KERN_INFO "SMFS psdev driver v0.01, braam@clusterfs.com\n"); + + misc_register(&smfscontrol_dev); + + return 0; +} + +void smfs_cleanup_psdev(void) +{ + ENTRY; + misc_deregister(&smfscontrol_dev); + EXIT; +} diff --git a/lustre/smfs/journal.c b/lustre/smfs/journal.c index 715a61c..040393f 100644 --- a/lustre/smfs/journal.c +++ b/lustre/smfs/journal.c @@ -1,5 +1,22 @@ -/* - * smfs/inode.c +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */ @@ -12,60 +29,61 @@ #include #include #include +#include +#include +#include #include -#include "kml_idl.h" -#include "smfs_internal.h" -extern struct sm_ops smfs_operations; +#include +#include +#include +#include "smfs_internal.h" #define size_round(x) (((x)+3) & ~0x3) -void *smfs_trans_start(struct inode *inode, int op) +void *smfs_trans_start(struct inode *inode, int op, void *desc_private) { + struct fsfilt_operations *fsfilt = S2SMI(inode->i_sb)->sm_fsfilt; + + CDEBUG(D_INFO, "trans start %p\n", fsfilt->fs_start); - CDEBUG(D_INODE, "trans start %p\n", - smfs_operations.sm_journal_ops.tr_start); - if (smfs_operations.sm_journal_ops.tr_start) { - return smfs_operations.sm_journal_ops.tr_start(inode, op); - } - return NULL; + SMFS_TRANS_OP(inode, op); + + /* There are some problem here. fs_start in fsfilt is used by lustre + * the journal blocks of write rec are not counted in FIXME later */ + if (fsfilt->fs_start) + return fsfilt->fs_start(inode, op, desc_private, 0); + return NULL; } -void smfs_trans_commit(void *handle) +void smfs_trans_commit(struct inode *inode, void *handle, int force_sync) { - if (smfs_operations.sm_journal_ops.tr_commit) { - smfs_operations.sm_journal_ops.tr_commit(handle); - } - CDEBUG(D_SM, "trans commit %p\n", - smfs_operations.sm_journal_ops.tr_commit); + struct fsfilt_operations *fsfilt = S2SMI(inode->i_sb)->sm_fsfilt; + + CDEBUG(D_INFO, "trans commit %p\n", fsfilt->fs_commit); + + if (fsfilt->fs_commit) + fsfilt->fs_commit(inode, handle, force_sync); } -/*The following function are gotten from intermezzo - * smfs_path - * logit - * journal_log_prefix_with_groups_and_ids - * journal_log_prefix -*/ -static char* smfs_path(struct dentry *dentry, struct dentry *root, - char *buffer, int buflen) + +/*smfs_path is gotten from intermezzo*/ +static char* smfs_path(struct dentry *dentry, struct dentry *root, char *buffer, + int buflen) { char * end = buffer+buflen; + char * name = buffer; + char * buf_end = buffer + buflen; char * retval; - + *--end = '\0'; buflen--; - if (dentry->d_parent != dentry && list_empty(&dentry->d_hash)) { - buflen -= 10; - end -= 10; - memcpy(end, " (deleted)", 10); - } - /* Get '/' right */ retval = end-1; *retval = '/'; - + for (;;) { struct dentry * parent; int namelen; - + if (dentry == root) break; parent = dentry->d_parent; @@ -81,169 +99,432 @@ static char* smfs_path(struct dentry *dentry, struct dentry *root, retval = end; dentry = parent; } + + while (end != buf_end) + *name++ = *end++; + *name = '\0'; return retval; } - -static inline char *logit(char *buf, const void *value, int size) +static int smfs_log_path(struct dentry *root, struct dentry *dentry, + char *buffer, int buffer_len) { - char *ptr = (char *)value; - - memcpy(buf, ptr, size); - buf += size; - return buf; + char *p_name = buffer + sizeof(int); + char *name = NULL; + int namelen = 0; + + name = smfs_path(dentry, root, p_name, buffer_len); + namelen = cpu_to_le32(strlen(p_name)); + memcpy(buffer, &namelen, sizeof(int)); + + namelen += sizeof(int); + RETURN(namelen); } -static inline char * -journal_log_prefix_with_groups_and_ids(char *buf, int opcode, - __u32 ngroups, gid_t *groups, - __u32 fsuid, __u32 fsgid) + +static inline int log_it(char *buffer, void *data, int length) { - struct kml_prefix_hdr p; - u32 loggroups[NGROUPS_MAX]; - - int i; - - p.version = KML_MAJOR_VERSION | KML_MINOR_VERSION; - p.pid = cpu_to_le32(current->pid); - p.auid = cpu_to_le32(current->uid); - p.fsuid = cpu_to_le32(fsuid); - p.fsgid = cpu_to_le32(fsgid); - p.ngroups = cpu_to_le32(ngroups); - p.opcode = cpu_to_le32(opcode); - for (i=0 ; i < ngroups ; i++) - loggroups[i] = cpu_to_le32((__u32) groups[i]); - - buf = logit(buf, &p, sizeof(struct kml_prefix_hdr)); - buf = logit(buf, &loggroups, sizeof(__u32) * ngroups); - return buf; + memcpy(buffer, &length, sizeof(int)); + memcpy(buffer + sizeof(int), data, length); + return (sizeof(int) + length); } - -static inline char * -journal_log_prefix(char *buf, int opcode) + +static int smfs_post_rec_create(struct inode *dir, struct dentry *dentry, + void *data1, void *data2) { - __u32 groups[NGROUPS_MAX]; - int i; - - /* convert 16 bit gid's to 32 bit gid's */ - for (i=0; ingroups; i++) - groups[i] = (__u32) current->groups[i]; - - return journal_log_prefix_with_groups_and_ids(buf, opcode, - (__u32)current->ngroups, - groups, - (__u32)current->fsuid, - (__u32)current->fsgid); + struct smfs_super_info *sinfo; + struct dentry *root; + struct update_record *rec = NULL; + char *buffer = NULL, *p_name; + int rc = 0, buffer_length = 0; + ENTRY; + + sinfo = S2SMI(dentry->d_inode->i_sb); + if (!sinfo) + RETURN(-EINVAL); + + OBD_ALLOC(buffer, PAGE_SIZE + sizeof(struct update_record)); + if (!buffer) + GOTO(exit, rc = -ENOMEM); + rec = (struct update_record*)buffer; + + smfs_rec_pack(rec, dentry->d_inode, dir, REINT_CREATE); + + p_name = buffer + sizeof(struct update_record); + + root = dir->i_sb->s_root; + + rc = smfs_log_path(root, dentry, p_name, PAGE_SIZE); + if (rc < 0) { + GOTO(exit, rc); + } else { + buffer_length += rc; + rc = 0; + } + if (data1) { + /*for symlink data is the path of the symname*/ + int data_len = strlen(data1); + + buffer_length += log_it(p_name + buffer_length, + data1, data_len); + } + rec->ur_len = sizeof(struct update_record) + buffer_length; + rc = smfs_llog_add_rec(sinfo, (void*)buffer, rec->ur_len); +exit: + if (buffer) + OBD_FREE(buffer, PAGE_SIZE + sizeof(struct update_record)); + + RETURN(rc); } - -static inline char * -journal_log_prefix_with_groups(char *buf, int opcode, - __u32 ngroups, gid_t *groups) + +static int smfs_post_rec_link(struct inode *dir, struct dentry *dentry, + void *data1, void *data2) { - return journal_log_prefix_with_groups_and_ids(buf, opcode, - ngroups, groups, - (__u32)current->fsuid, - (__u32)current->fsgid); + struct smfs_super_info *sinfo; + struct dentry *root; + struct dentry *new_dentry = (struct dentry *)data1; + struct update_record *rec = NULL; + char *buffer = NULL, *p_name = NULL; + int rc = 0, buffer_length = 0; + ENTRY; + + sinfo = S2SMI(dir->i_sb); + if (!sinfo) + RETURN(-EINVAL); + OBD_ALLOC(buffer, PAGE_SIZE + sizeof(struct update_record)); + if (!buffer) + GOTO(exit, rc = -ENOMEM); + + rec = (struct update_record*)buffer; + + smfs_rec_pack(rec, dentry->d_inode, NULL, REINT_LINK); + + root = dir->i_sb->s_root; + /*record old_dentry path*/ + p_name = buffer + sizeof(struct update_record); + rc = smfs_log_path(root, dentry, p_name, PAGE_SIZE); + if (rc < 0) + GOTO(exit, rc); + + buffer_length += rc; + p_name += buffer_length; + + /*record new_dentry path*/ + rc = smfs_log_path(root, new_dentry, p_name, + PAGE_SIZE - rc - sizeof(int)); + if (rc < 0) { + GOTO(exit, rc); + } else { + buffer_length += rc; + rc = 0; + } + rec->ur_len = sizeof(struct update_record) + buffer_length; + rc = smfs_llog_add_rec(sinfo, (void*)buffer, rec->ur_len); + +exit: + if (buffer) + OBD_FREE(buffer, PAGE_SIZE + sizeof(struct update_record)); + RETURN(rc); } -static inline char *log_dentry_version(char *buf, struct dentry *dentry) +static int smfs_post_rec_unlink(struct inode *dir, struct dentry *dentry, + void *data1, void *data2) { - struct smfs_version version; - - smfs_getversion(&version, dentry->d_inode); - - version.sm_mtime = HTON__u64(version.sm_mtime); - version.sm_ctime = HTON__u64(version.sm_ctime); - version.sm_size = HTON__u64(version.sm_size); - - return logit(buf, &version, sizeof(version)); + struct smfs_super_info *sinfo; + struct dentry *root; + int flag = *((int*)data1); + struct update_record *rec = NULL; + char *buffer = NULL, *p_name; + int rc = 0, buffer_length = 0; + char fidname[LL_FID_NAMELEN]; + struct dentry *new_child = NULL; + int namelen; + ENTRY; + + sinfo = S2SMI(dentry->d_inode->i_sb); + if (!sinfo) + RETURN(-EINVAL); + + OBD_ALLOC(buffer, PAGE_SIZE + sizeof(struct update_record)); + if (!buffer) + GOTO(exit, rc = -ENOMEM); + rec = (struct update_record*)buffer; + + smfs_rec_pack(rec, dentry->d_inode, dir, REINT_UNLINK); + + p_name = buffer + sizeof(struct update_record); + + root = dir->i_sb->s_root; + rc = smfs_log_path(root, dentry, p_name, PAGE_SIZE); + if (rc < 0) + GOTO(exit, rc); + + buffer_length += rc; + p_name += rc; + + if (!flag) { + /*unlink the inode*/ + namelen = ll_fid2str(fidname, dentry->d_inode->i_ino, + dentry->d_inode->i_generation); + + down(&sinfo->smsi_delete_dir->d_inode->i_sem); + new_child = lookup_one_len(fidname, sinfo->smsi_delete_dir, namelen); + if (new_child->d_inode != NULL) { + CERROR("has been deleted obj dentry %lu:%u!\n", + dentry->d_inode->i_ino, + dentry->d_inode->i_generation); + LBUG(); + } + + /* FIXME-WANGDI: this is ugly, but I do not know how to resolve + * it. */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + up(&dir->i_zombie); +#endif + lock_kernel(); + SMFS_CLEAN_INODE_REC(dir); + rc = vfs_rename(dir, dentry, sinfo->smsi_delete_dir->d_inode, + new_child); + SMFS_SET_INODE_REC(dir); + unlock_kernel(); + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + down(&dir->i_zombie); +#endif + up(&sinfo->smsi_delete_dir->d_inode->i_sem); + if (rc) + GOTO(exit, rc); + /* in vfs_unlink the inode on the dentry will be deleted, so we + * should delete it from dentry hash. */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + list_del_init(&dentry->d_hash); +#else + hlist_del_init(&dentry->d_hash); +#endif + + /* put the new_file_name to the log. */ + rc = smfs_log_path(root, dentry, p_name, + PAGE_SIZE - buffer_length); + if (rc < 0) + GOTO(exit, rc); + buffer_length += rc; + rc = 0; + } else { + /*only decrease the link count*/ + namelen = sizeof(ino_t); + + buffer_length += log_it(p_name + buffer_length, + &(dentry->d_inode->i_ino), namelen); + SET_REC_DEC_LINK_FLAGS(rec->ur_flags, SMFS_DEC_LINK); + } + rec->ur_len = sizeof(struct update_record) + buffer_length; + rc = smfs_llog_add_rec(sinfo, (void*)buffer, rec->ur_len); +exit: + if (new_child); + dput(new_child); + if (buffer) + OBD_FREE(buffer, PAGE_SIZE + sizeof(struct update_record)); + + RETURN(rc); } - -static inline char *log_version(char *buf, struct smfs_version *pv) + +static int smfs_post_rec_rename(struct inode *dir, struct dentry *dentry, + void *data1, void *data2) { - struct smfs_version version; - - memcpy(&version, pv, sizeof(version)); - - version.sm_mtime = HTON__u64(version.sm_mtime); - version.sm_ctime = HTON__u64(version.sm_ctime); - version.sm_size = HTON__u64(version.sm_size); - - return logit(buf, &version, sizeof(version)); + struct smfs_super_info *sinfo; + struct dentry *root; + struct inode *new_dir = (struct inode *)data1; + struct dentry *new_dentry = (struct dentry *)data2; + struct update_record *rec = NULL; + char *buffer = NULL, *p_name = NULL; + int rc = 0, buffer_length = 0; + ENTRY; + + sinfo = S2SMI(dir->i_sb); + if (!sinfo) + RETURN(-EINVAL); + + OBD_ALLOC(buffer, PAGE_SIZE + sizeof(struct update_record)); + if (!buffer) + GOTO(exit, rc = -ENOMEM); + + rec = (struct update_record*)buffer; + + smfs_rec_pack(rec, dentry->d_inode, dir, REINT_RENAME); + + root = dir->i_sb->s_root; + /*record old_dentry path*/ + p_name = buffer + sizeof(struct update_record); + rc = smfs_log_path(root, dentry, p_name, PAGE_SIZE); + if (rc < 0) + GOTO(exit, rc); + + buffer_length += rc; + p_name += rc; + + root = new_dir->i_sb->s_root; + /*record new_dentry path*/ + rc = smfs_log_path(root, new_dentry, p_name, + PAGE_SIZE - rc - sizeof(int)); + if (rc < 0) { + GOTO(exit, rc); + } else { + buffer_length += rc; + rc = 0; + } + rec->ur_len = sizeof(struct update_record) + buffer_length; + rc = smfs_llog_add_rec(sinfo, (void*)buffer, rec->ur_len); +exit: + if (buffer) + OBD_FREE(buffer, PAGE_SIZE + sizeof(struct update_record)); + RETURN(rc); } -static inline char *journal_log_suffix(char *buf, char *log, - struct dentry *dentry) + +static int smfs_post_rec_setattr(struct inode *dir, struct dentry *dentry, + void *data1, void *data2) { - struct kml_suffix s; - struct kml_prefix_hdr *p = (struct kml_prefix_hdr *)log; - - s.prevrec = 0; - - /* record number needs to be filled in after reservation - s.recno = cpu_to_le32(rec->recno); */ - s.time = cpu_to_le32(CURRENT_TIME); - s.len = p->len; - return logit(buf, &s, sizeof(s)); -} + struct smfs_super_info *sinfo; + struct dentry *root; + struct iattr *attr = (struct iattr *)data1; + struct update_record *rec = NULL; + char *buffer = NULL, *p_name; + int rc = 0, buffer_length = 0; + ENTRY; + + sinfo = S2SMI(dentry->d_inode->i_sb); + if (!sinfo) + RETURN(-EINVAL); -int smfs_kml_log(struct smfs_super_info *smfs_info, - const char *buf, size_t size, - const char *string1, int len1, - const char *string2, int len2, - const char *string3, int len3) + OBD_ALLOC(buffer, PAGE_SIZE + sizeof(struct update_record)); + if (!buffer) + GOTO(exit, rc = -ENOMEM); + rec = (struct update_record*)buffer; + + smfs_rec_pack(rec, dentry->d_inode, attr, REINT_SETATTR); + + root = dentry->d_inode->i_sb->s_root; + /*record old_dentry path*/ + p_name = buffer + sizeof(struct update_record); + rc = smfs_log_path(root, dentry, p_name, PAGE_SIZE); + + if (rc < 0) { + GOTO(exit, rc); + } else { + buffer_length += rc; + rc = 0; + } + rec->ur_len = sizeof(struct update_record) + buffer_length; + rc = smfs_llog_add_rec(sinfo, (void*)buffer, rec->ur_len); +exit: + if (buffer) + OBD_FREE(buffer, PAGE_SIZE + sizeof(struct update_record)); + RETURN(rc); +} +static int smfs_post_rec_open_close(struct inode *dir, struct dentry *dentry, + void *data1, void *data2) { - int rc = 0; - /*should pack the record and dispatch it - *create llog handle write to the log*/ - return rc; + struct smfs_super_info *sinfo; + struct dentry *root; + int open = *(int*)data1; + struct update_record *rec = NULL; + char *buffer = NULL, *p_name; + int rc = 0, buffer_length = 0; + ENTRY; + + sinfo = S2SMI(dentry->d_inode->i_sb); + if (!sinfo) + RETURN(-EINVAL); + + OBD_ALLOC(buffer, PAGE_SIZE + sizeof(struct update_record)); + if (!buffer) + GOTO(exit, rc = -ENOMEM); + rec = (struct update_record*)buffer; + if (open) + smfs_rec_pack(rec, dentry->d_inode, NULL, REINT_OPEN); + else + smfs_rec_pack(rec, dentry->d_inode, NULL, REINT_CLOSE); + root = dentry->d_inode->i_sb->s_root; + /*record old_dentry path*/ + p_name = buffer + sizeof(struct update_record); + rc = smfs_log_path(root, dentry, p_name, PAGE_SIZE); + + if (rc < 0) { + GOTO(exit, rc); + } else { + buffer_length += rc; + rc = 0; + } + rec->ur_len = sizeof(struct update_record) + buffer_length; + rc = smfs_llog_add_rec(sinfo, (void*)buffer, rec->ur_len); +exit: + if (buffer) + OBD_FREE(buffer, PAGE_SIZE + sizeof(struct update_record)); + RETURN(rc); } -int smfs_journal_mkdir(struct dentry *dentry, - struct smfs_version *tgt_dir_ver, - struct smfs_version *new_dir_ver, - int mode) +static int smfs_post_rec_write(struct inode *dir, struct dentry *dentry, + void *data1, void *data2) { - int opcode = KML_OPCODE_MKDIR; - char *buffer, *path, *logrecord, record[292]; + struct smfs_record_extents extents; + struct smfs_super_info *sinfo; struct dentry *root; - __u32 uid, gid, lmode, pathlen; - struct smfs_super_info *smfs_info; - struct super_block* sb; - int error, size; - - ENTRY; - - sb = dentry->d_inode->i_sb; - root = sb->s_root; - smfs_info = S2SMI(sb); - - uid = cpu_to_le32(dentry->d_inode->i_uid); - gid = cpu_to_le32(dentry->d_inode->i_gid); - lmode = cpu_to_le32(mode); - - SM_ALLOC(buffer, PAGE_SIZE); - path = smfs_path(dentry, root, buffer, PAGE_SIZE); - pathlen = cpu_to_le32(MYPATHLEN(buffer, path)); - size = sizeof(__u32) * current->ngroups + - sizeof(struct kml_prefix_hdr) + 3 * sizeof(*tgt_dir_ver) + - sizeof(lmode) + sizeof(uid) + sizeof(gid) + sizeof(pathlen) + - sizeof(struct kml_suffix); - - if ( size > sizeof(record) ) - CERROR("InterMezzo: BUFFER OVERFLOW in %s!\n", __FUNCTION__); - - logrecord = journal_log_prefix(record, opcode); - - logrecord = log_version(logrecord, tgt_dir_ver); - logrecord = log_dentry_version(logrecord, dentry->d_parent); - logrecord = log_version(logrecord, new_dir_ver); - logrecord = logit(logrecord, &lmode, sizeof(lmode)); - logrecord = logit(logrecord, &uid, sizeof(uid)); - logrecord = logit(logrecord, &gid, sizeof(gid)); - logrecord = logit(logrecord, &pathlen, sizeof(pathlen)); - logrecord = journal_log_suffix(logrecord, record, dentry); - - error = smfs_kml_log(smfs_info, record, size, - path, size_round(le32_to_cpu(pathlen)), - NULL, 0, NULL, 0); - SM_FREE(buffer, PAGE_SIZE); - RETURN(error); + struct update_record *rec = NULL; + char *buffer = NULL, *p_name; + int extents_length = 0; + int rc = 0, buffer_length = 0; + ENTRY; + + sinfo = S2SMI(dentry->d_inode->i_sb); + if (!sinfo) + RETURN(-EINVAL); + + OBD_ALLOC(buffer, PAGE_SIZE + sizeof(struct update_record)); + if (!buffer) + GOTO(exit, rc = -ENOMEM); + rec = (struct update_record*)buffer; + + smfs_rec_pack(rec, dentry->d_inode, NULL, REINT_OPEN); + + root = dentry->d_inode->i_sb->s_root; + /*record old_dentry path*/ + p_name = buffer + sizeof(struct update_record); + rc = smfs_log_path(root, dentry, p_name, PAGE_SIZE); + + if (rc < 0) { + GOTO(exit, rc); + } else { + buffer_length += rc; + rc = 0; + } + /*record the extents of this write*/ + extents.sre_count = *((size_t*)data1); + extents.sre_off = *((loff_t*)data2); + extents_length = sizeof(struct smfs_record_extents); + + buffer_length += log_it(p_name + buffer_length, + &extents, extents_length); + rec->ur_len = sizeof(struct update_record) + buffer_length; + rc = smfs_llog_add_rec(sinfo, (void*)buffer, rec->ur_len); +exit: + if (buffer) + OBD_FREE(buffer, PAGE_SIZE + sizeof(struct update_record)); + RETURN(rc); +} + +typedef int (*post_kml_rec)(struct inode *dir, struct dentry *dentry, + void *data1, void *data2); + +static post_kml_rec smfs_kml_post[REINT_MAX + 1] = { + [REINT_SETATTR] smfs_post_rec_setattr, + [REINT_CREATE] smfs_post_rec_create, + [REINT_LINK] smfs_post_rec_link, + [REINT_UNLINK] smfs_post_rec_unlink, + [REINT_RENAME] smfs_post_rec_rename, + [REINT_OPEN] smfs_post_rec_open_close, + [REINT_CLOSE] smfs_post_rec_open_close, + [REINT_WRITE] smfs_post_rec_write, +}; + +int smfs_post_kml_rec(struct inode *dir, struct dentry *dst_dentry, + void *data1, void *data2, int op) +{ + return smfs_kml_post[op](dir, dst_dentry, data1, data2); } diff --git a/lustre/smfs/journal_ext3.c b/lustre/smfs/journal_ext3.c deleted file mode 100644 index c54effe..0000000 --- a/lustre/smfs/journal_ext3.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * smfs/journal_ext3.c - * - */ - -#define DEBUG_SUBSYSTEM S_SM - -#include -#include -#include -#include -#include -#include -#include -#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE) -#include -#include -#include -#endif - -#include "smfs_internal.h" -#include "kml_idl.h" - -#if defined(CONFIG_EXT3_FS) || defined (CONFIG_EXT3_FS_MODULE) - -#define MAX_PATH_BLOCKS(inode) (PATH_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb)) -#define MAX_NAME_BLOCKS(inode) (NAME_MAX >> EXT3_BLOCK_SIZE_BITS((inode)->i_sb)) - -static void *smfs_e3_trans_start(struct inode *inode, - int op) -{ - - int trunc_blks, one_path_blks, extra_path_blks; - int extra_name_blks, lml_blks, jblocks; - __u32 avail_kmlblocks; - handle_t *handle; - - avail_kmlblocks = inode->i_sb->u.ext3_sb.s_es->s_free_blocks_count; - - if ( avail_kmlblocks < 3 ) { - return ERR_PTR(-ENOSPC); - } - - if ((op != KML_OPCODE_UNLINK && op != KML_OPCODE_RMDIR) - && avail_kmlblocks < 6 ) { - return ERR_PTR(-ENOSPC); - } - /* Need journal space for: - at least three writes to KML (two one block writes, one a path) - possibly a second name (unlink, rmdir) - possibly a second path (symlink, rename) - a one block write to the last rcvd file - */ - - trunc_blks = EXT3_DATA_TRANS_BLOCKS + 1; - one_path_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 3; - lml_blks = 4*EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode) + 2; - extra_path_blks = EXT3_DATA_TRANS_BLOCKS + MAX_PATH_BLOCKS(inode); - extra_name_blks = EXT3_DATA_TRANS_BLOCKS + MAX_NAME_BLOCKS(inode); - - /* additional blocks appear for "two pathname" operations - and operations involving the LML records - */ - - switch (op) { - case KML_OPCODE_MKDIR: - jblocks = one_path_blks + trunc_blks - + EXT3_DATA_TRANS_BLOCKS + 4 + 2; - break; - default: - CDEBUG(D_INODE, "invalid operation %d for journal\n", op); - return NULL; - } - - CDEBUG(D_INODE, "creating journal handle (%d blocks)\n", jblocks); - - lock_kernel(); - handle = journal_start(EXT3_JOURNAL(inode), jblocks); - unlock_kernel(); - - return handle; -} - -static void smfs_e3_trans_commit(void *handle) -{ - lock_kernel(); - journal_stop(handle); - unlock_kernel(); -} - -struct journal_operations smfs_ext3_journal_ops = { - .tr_start = smfs_e3_trans_start, - .tr_commit = smfs_e3_trans_commit, -}; -#endif - diff --git a/lustre/smfs/kml.c b/lustre/smfs/kml.c index 4151fda..0658283 100644 --- a/lustre/smfs/kml.c +++ b/lustre/smfs/kml.c @@ -1,5 +1,22 @@ -/* - * smfs/kml.c +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */ @@ -9,51 +26,297 @@ #include #include #include +#include +#include +#include #include -#include "smfs_internal.h" -#include "kml_idl.h" +#include +#include +#include "smfs_internal.h" /*FIXME there should be more conditions in this check*/ -int smfs_do_kml(struct inode *dir) +int smfs_do_rec(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + struct smfs_super_info *smfs_info = S2SMI(sb); + + if (SMFS_DO_REC(smfs_info) && SMFS_INIT_REC(smfs_info) && + SMFS_DO_INODE_REC(inode)) + return 1; + return 0; +} + +int smfs_rec_init(struct super_block *sb) +{ + struct smfs_super_info *smfs_info = S2SMI(sb); + int rc = 0; + + SMFS_SET_REC(smfs_info); + + RETURN(rc); +} + +int smfs_rec_cleanup(struct super_block *sb) { - struct smfs_super_info *smfs_info = S2SMI(dir->i_sb); - - if (smfs_info->flags & SM_DO_KML) { - return 1; - } - return 0; + int rc = 0; + + SMFS_CLEAN_REC(S2SMI(sb)); + RETURN(rc); } -void smfs_getversion(struct smfs_version * smfs_version, - struct inode * inode) + +void reint_rec_free(struct reint_record *reint_rec) { - smfs_version->sm_mtime = (__u64)inode->i_mtime; - smfs_version->sm_ctime = (__u64)inode->i_ctime; - smfs_version->sm_size = (__u64)inode->i_size; + if (reint_rec) { + if (reint_rec->rec_data1) + OBD_FREE(reint_rec->rec_data1, + reint_rec->rec1_size + 1); + if (reint_rec->rec_data2) + OBD_FREE(reint_rec->rec_data2, + reint_rec->rec2_size + 1); + + OBD_FREE(reint_rec, sizeof(struct reint_record)); + } } -int smfs_kml_init(struct super_block *sb) +static inline void copy_inode_attr(struct iattr *iattr, struct inode *inode) { - struct smfs_super_info *smfs_info = S2SMI(sb); - int rc = 0; - smfs_info->flags |= SM_DO_KML; + iattr->ia_mode = inode->i_mode; + iattr->ia_uid = inode->i_uid; + iattr->ia_gid = inode->i_gid; + iattr->ia_atime = inode->i_atime; + iattr->ia_ctime = inode->i_ctime; + iattr->ia_mtime = inode->i_mtime; + iattr->ia_size = inode->i_size; +} - rc = smfs_llog_setup(&smfs_info->kml_llog); +void smfs_rec_pack(struct update_record *rec, struct inode *dst, + void *data, int op) +{ + rec->ur_fsuid = current->fsuid; + rec->ur_fsgid = current->fsgid; + rec->ur_rdev = dst->i_rdev; + rec->ur_opcode = op; + copy_inode_attr(&rec->ur_iattr, dst); + if (data) { + switch (op) { + case REINT_CREATE: + case REINT_LINK: + case REINT_UNLINK: + case REINT_RENAME: { + struct inode *dir = (struct inode *)data; + copy_inode_attr(&rec->ur_pattr, dir); + break; + } + case REINT_SETATTR: { + struct iattr *attr = (struct iattr *)data; + memcpy(&rec->ur_pattr, attr, sizeof(struct iattr)); + break; + } + } + } +} - RETURN(rc); +static inline void unpack_attr(struct reint_record *r_rec, + struct update_record *u_rec) +{ + memcpy(&r_rec->u_rec, u_rec, sizeof(struct update_record)); } -int post_kml_mkdir(struct inode *dir, struct dentry *dentry) +static inline int unpack_rec_data(char **p_buffer, int *size, + char *in_data, char *args_data) { - struct smfs_version tgt_dir_ver, new_dir_ver; - int error; + int args_len = 0; + int rc = 0; + ENTRY; + + if (args_data) + args_len = strlen(args_data); + + *size = *((int*)(in_data)); + rc = *size + sizeof(int); - smfs_getversion(&tgt_dir_ver, dir); + OBD_ALLOC(*p_buffer, *size + args_len + 1); + if (!*p_buffer) + RETURN(-ENOMEM); + /*First copy reint dir */ + if (args_data) + memcpy(*p_buffer, args_data, args_len); - smfs_getversion(&new_dir_ver, dentry->d_inode); - - error = smfs_journal_mkdir(dentry, &tgt_dir_ver, - &new_dir_ver, - dentry->d_inode->i_mode); - return error; + /*then copy the node name */ + memcpy(*p_buffer + args_len, + (in_data + sizeof(int)), *size); + + *size += args_len; + + RETURN(rc); } +int smfs_rec_unpack(struct smfs_proc_args *args, struct reint_record *r_rec, + char *rec_buf) +{ + struct update_record *u_rec = (struct update_record *)rec_buf; + int rc = 0, length = 0; + ENTRY; + /*FIXME wangdi, there unpack are so smiliar that + *we will put it together later*/ + + if (SMFS_DO_WRITE_KML(args->sr_flags)) + SET_REC_WRITE_KML_FLAGS(r_rec->u_rec.ur_flags, SMFS_WRITE_KML); + unpack_attr(r_rec, u_rec); + length += sizeof(struct update_record); + rc = unpack_rec_data(&r_rec->rec_data1, &r_rec->rec1_size, + (rec_buf + length), args->sr_data); + switch (u_rec->ur_opcode) { + case REINT_OPEN: + case REINT_CLOSE: + /*record src path which will be passed to reint and undo*/ + rc = unpack_rec_data(&r_rec->rec_data2, &r_rec->rec2_size, + (rec_buf + length), NULL); + break; + case REINT_LINK: + case REINT_RENAME: + case REINT_SETATTR: + case REINT_UNLINK: + case REINT_CREATE: + case REINT_WRITE: { + length += rc; + if (length < u_rec->ur_len) { + char *pre_name; + if (u_rec->ur_opcode == REINT_CREATE || + u_rec->ur_opcode == REINT_WRITE || + (u_rec->ur_opcode == REINT_UNLINK && + SMFS_DO_DEC_LINK(r_rec->u_rec.ur_flags))) + pre_name = NULL; + else + pre_name = args->sr_data; + + rc = unpack_rec_data(&r_rec->rec_data2, + &r_rec->rec2_size, + (rec_buf + length), pre_name); + } + break; + } + } + if (rc > 0) + rc = 0; + + RETURN(rc); +} + +int smfs_start_rec(struct super_block *sb) +{ + struct dentry *dentry; + struct lvfs_run_ctxt saved; + int rc = 0; + ENTRY; + + if (SMFS_INIT_REC(S2SMI(sb)) || + (!SMFS_DO_REC(S2SMI(sb)) && !SMFS_CACHE_HOOK(S2SMI(sb)))) + RETURN(rc); + + rc = smfs_llog_setup(sb); + if (rc) + RETURN(rc); + + push_ctxt(&saved, S2SMI(sb)->smsi_ctxt, NULL); + dentry = simple_mkdir(current->fs->pwd, "DELETE", 0777, 1); + if (IS_ERR(dentry)) { + rc = PTR_ERR(dentry); + CERROR("cannot create LOGS directory: rc = %d\n", rc); + GOTO(err_exit, rc = -EINVAL); + } +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + if (list_empty(&dentry->d_hash)) + d_rehash(dentry); +#else + /* FIXME-WANGDI: here should be be something. */ +#endif + + if (!rc) + SMFS_SET_INIT_REC(S2SMI(sb)); + S2SMI(sb)->smsi_delete_dir = dentry; +exit: + pop_ctxt(&saved, S2SMI(sb)->smsi_ctxt, NULL); + RETURN(rc); +err_exit: + if (S2SMI(sb)->smsi_ctxt) + OBD_FREE(S2SMI(sb)->smsi_ctxt, sizeof(struct lvfs_run_ctxt)); + goto exit; +} + +int smfs_stop_rec(struct super_block *sb) +{ + int rc = 0; + ENTRY; + + if (!SMFS_INIT_REC(S2SMI(sb)) || + (!SMFS_DO_REC(S2SMI(sb)) && !SMFS_CACHE_HOOK(S2SMI(sb)))) + RETURN(rc); + + rc = smfs_llog_cleanup(sb); + + SMFS_CLEAN_INIT_REC(S2SMI(sb)); + + if (S2SMI(sb)->smsi_delete_dir) { + l_dput(S2SMI(sb)->smsi_delete_dir); + S2SMI(sb)->smsi_delete_dir = NULL; + } + RETURN(rc); +} + +int smfs_process_rec(struct super_block *sb, int count, char *dir, int flags) +{ + struct llog_ctxt *ctxt; + struct llog_handle *loghandle; + struct smfs_proc_args args; + int rc = 0; + ENTRY; + + if (!SMFS_INIT_REC(S2SMI(sb))) { + CWARN("Did not start up rec server \n"); + RETURN(rc); + } + + memset(&args, 0, sizeof(struct smfs_proc_args)); + args.sr_sb = sb; + args.sr_count = count; + args.sr_data = dir; + args.sr_flags = flags ; + ctxt = S2SMI(sb)->smsi_rec_log; + loghandle = ctxt->loc_handle; + + if (count == 0) { + if (SMFS_DO_REINT_REC(flags)) { + struct llog_gen_rec *lgr; + + /*For reint rec, we need insert + *a gen rec to identify the end + *of the rec.*/ + OBD_ALLOC(lgr, sizeof(*lgr)); + if (!lgr) + RETURN(-ENOMEM); + lgr->lgr_hdr.lrh_len = lgr->lgr_tail.lrt_len = + sizeof(*lgr); + lgr->lgr_hdr.lrh_type = LLOG_GEN_REC; + lgr->lgr_gen = ctxt->loc_gen; + rc = llog_add(ctxt, &lgr->lgr_hdr, NULL, NULL, 1, NULL); + OBD_FREE(lgr, sizeof(*lgr)); + if (rc != 1) + RETURN(rc); + } + } else { + SET_REC_COUNT_FLAGS(args.sr_flags, SMFS_REC_ALL); + } + if (loghandle) { + if (SMFS_DO_REINT_REC(flags)) + rc = llog_cat_process(loghandle, ctxt->loc_proc_cb, + (void *)&args); + else + rc = llog_cat_reverse_process(loghandle, + ctxt->loc_proc_cb, + (void *)&args); + if (rc == LLOG_PROC_BREAK) + rc = 0; + } + RETURN(rc); +} diff --git a/lustre/smfs/kml_idl.h b/lustre/smfs/kml_idl.h deleted file mode 100644 index a470bb7..0000000 --- a/lustre/smfs/kml_idl.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * smfs/kml_idl.h - */ - -# define MYPATHLEN(buffer, path) ((buffer) + PAGE_SIZE - (path)) -/*Got these defines from intermezzo*/ -struct kml_log_fd { - rwlock_t fd_lock; - loff_t fd_offset; /* offset where next record should go */ - struct file *fd_file; - int fd_truncating; - unsigned int fd_recno; /* last recno written */ - struct list_head fd_reservations; -}; - -#define KML_MAJOR_VERSION 0x00010000 -#define KML_MINOR_VERSION 0x00000002 -#define KML_OPCODE_NOOP 0 -#define KML_OPCODE_CREATE 1 -#define KML_OPCODE_MKDIR 2 -#define KML_OPCODE_UNLINK 3 -#define KML_OPCODE_RMDIR 4 -#define KML_OPCODE_CLOSE 5 -#define KML_OPCODE_SYMLINK 6 -#define KML_OPCODE_RENAME 7 -#define KML_OPCODE_SETATTR 8 -#define KML_OPCODE_LINK 9 -#define KML_OPCODE_OPEN 10 -#define KML_OPCODE_MKNOD 11 -#define KML_OPCODE_WRITE 12 -#define KML_OPCODE_RELEASE 13 -#define KML_OPCODE_TRUNC 14 -#define KML_OPCODE_SETEXTATTR 15 -#define KML_OPCODE_DELEXTATTR 16 -#define KML_OPCODE_KML_TRUNC 17 -#define KML_OPCODE_GET_FILEID 18 -#define KML_OPCODE_NUM 19 - -#ifdef __KERNEL__ -# define NTOH__u32(var) le32_to_cpu(var) -# define NTOH__u64(var) le64_to_cpu(var) -# define HTON__u32(var) cpu_to_le32(var) -# define HTON__u64(var) cpu_to_le64(var) -#else -# include -# define NTOH__u32(var) GUINT32_FROM_LE(var) -# define NTOH__u64(var) GUINT64_FROM_LE(var) -# define HTON__u32(var) GUINT32_TO_LE(var) -# define HTON__u64(var) GUINT64_TO_LE(var) -#endif - - diff --git a/lustre/smfs/options.c b/lustre/smfs/options.c index 8ff445e..b2a4d3f 100644 --- a/lustre/smfs/options.c +++ b/lustre/smfs/options.c @@ -1,6 +1,25 @@ -/* - * snapfs/options.c +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * */ + #define DEBUG_SUBSYSTEM S_SM #include @@ -10,88 +29,96 @@ #include #include #include +#include +#include +#include #include -#include "smfs_internal.h" +#include +#include "smfs_internal.h" static struct list_head option_list; -char *options = NULL; -char *opt_left = NULL; +static char *options = NULL; +static char *opt_left = NULL; int init_option(char *data) { - INIT_LIST_HEAD(&option_list); - SM_ALLOC(options, strlen(data) + 1); - if (!options) { - CERROR("Can not allocate memory \n"); - return -ENOMEM; - } - memcpy(options, data, strlen(data)); - opt_left = options; - return 0; + INIT_LIST_HEAD(&option_list); + OBD_ALLOC(options, strlen(data) + 1); + if (!options) { + CERROR("Can not allocate memory \n"); + return -ENOMEM; + } + memcpy(options, data, strlen(data)); + opt_left = options; + return 0; } + /*cleanup options*/ void cleanup_option(void) { - struct option *option; - while (!list_empty(&option_list)) { - option = list_entry(option_list.next, struct option, list); - list_del(&option->list); - SM_FREE(option->opt, strlen(option->opt) + 1); - if (option->value) - SM_FREE(option->value, strlen(option->value) + 1); - SM_FREE(option, sizeof(struct option)); - } - SM_FREE(options, strlen(options) + 1); + struct option *option; + while (!list_empty(&option_list)) { + option = list_entry(option_list.next, struct option, list); + list_del(&option->list); + OBD_FREE(option->opt, strlen(option->opt) + 1); + if (option->value) + OBD_FREE(option->value, strlen(option->value) + 1); + OBD_FREE(option, sizeof(struct option)); + } + OBD_FREE(options, strlen(options) + 1); } + int get_opt(struct option **option, char **pos) { - char *name, *value, *left, *tmp; - struct option *tmp_opt; - int length = 0; + char *name, *value, *left, *tmp; + struct option *tmp_opt; + int length = 0; - *pos = opt_left; + *pos = opt_left; - if (! *opt_left) - return -ENODATA; - left = strchr(opt_left, ','); - if (left == opt_left) - return -EINVAL; - if (!left){ - left = opt_left + strlen(opt_left); - } + if (!*opt_left) + return -ENODATA; + left = strchr(opt_left, ','); + if (left == opt_left) + return -EINVAL; + if (!left) + left = opt_left + strlen(opt_left); - SM_ALLOC(tmp_opt, sizeof(struct option)); - tmp_opt->opt = NULL; - tmp_opt->value = NULL; + OBD_ALLOC(tmp_opt, sizeof(struct option)); + tmp_opt->opt = NULL; + tmp_opt->value = NULL; - tmp = opt_left; - while(tmp != left && *tmp != '=') { - length++; - tmp++; - } - SM_ALLOC(name, length + 1); - tmp_opt->opt = name; - memset(name, 0, length + 1); - while (opt_left != tmp) *name++ = *opt_left++; + tmp = opt_left; + while(tmp != left && *tmp != '=') { + length++; + tmp++; + } + OBD_ALLOC(name, length + 1); + tmp_opt->opt = name; + while (opt_left != tmp) *name++ = *opt_left++; - if (*tmp == '=') { - /*this option has value*/ - opt_left ++; /*after '='*/ - if (left == opt_left) { - SM_FREE(tmp_opt->opt, length); - SM_FREE(tmp_opt, sizeof(struct option)); - opt_left = *pos; - return -EINVAL; - } - length = left - opt_left + 1; - SM_ALLOC(value, length); - tmp_opt->value = value; - memset(value, 0, length); - while (opt_left != left) *value++ = *opt_left++; - } - list_add(&tmp_opt->list, &option_list); - if (*opt_left == ',') opt_left ++; /*after ','*/ - *option = tmp_opt; - return 0; + if (*tmp == '=') { + /*this option has value*/ + opt_left ++; /*after '='*/ + if (left == opt_left) { + OBD_FREE(tmp_opt->opt, strlen(tmp_opt->opt) + 1); + OBD_FREE(tmp_opt, sizeof(struct option)); + opt_left = *pos; + return -EINVAL; + } + length = left - opt_left; + OBD_ALLOC(value, length + 1); + if (!value) { + OBD_FREE(tmp_opt->opt, strlen(tmp_opt->opt) + 1); + OBD_FREE(tmp_opt, sizeof(struct option)); + return -ENOMEM; + } + tmp_opt->value = value; + while (opt_left != left) *value++ = *opt_left++; + } + list_add(&tmp_opt->list, &option_list); + if (*opt_left == ',') opt_left ++; /*after ','*/ + *option = tmp_opt; + return 0; } diff --git a/lustre/smfs/reint.c b/lustre/smfs/reint.c deleted file mode 100644 index 50420d7..0000000 --- a/lustre/smfs/reint.c +++ /dev/null @@ -1,15 +0,0 @@ -/* - * smfs/kml.c - * - */ - -#define DEBUG_SUBSYSTEM S_SM - -#include -#include -#include -#include -#include -#include -#include "smfs_internal.h" - diff --git a/lustre/smfs/sm_fs.c b/lustre/smfs/sm_fs.c index d07facb..19283ae 100644 --- a/lustre/smfs/sm_fs.c +++ b/lustre/smfs/sm_fs.c @@ -1,12 +1,31 @@ -/* - * fs/smfs/sm_fs.c +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: * - * A storage management file system. + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */ -#define EXPORT_SYMTAB + #define DEBUG_SUBSYSTEM S_SM - + +#ifndef EXPORT_SYMTAB +#define EXPORT_SYMTAB +#endif + #include #include #include @@ -14,41 +33,50 @@ #include #include #include -#include -#include "smfs_internal.h" +#include +#include +#include +#include +#include + +#include "smfs_internal.h" int sm_stack = 0; long sm_kmemory = 0; - MODULE_AUTHOR("Peter J. Braam "); MODULE_DESCRIPTION("Smfs file system filters v0.01"); - +MODULE_LICENSE("GPL"); + extern int init_smfs(void); extern int cleanup_smfs(void); -extern int init_snap_sysctl(void); - + static int __init smfs_init(void) { int err; - + + if ( (err = init_smfs_psdev()) ) { + printk("Error initializing smfs_psdev, %d\n", err); + return -EINVAL; + } + if ( (err = init_smfs()) ) { - printk("Error initializing snapfs, %d\n", err); + printk("Error initializing smfs, %d\n", err); return -EINVAL; } - + if ( (err = init_smfs_proc_sys()) ) { - printk("Error initializing snapfs proc sys, %d\n", err); + printk("Error initializing smfs proc sys, %d\n", err); return -EINVAL; } - + return 0; } - + static void __exit smfs_cleanup(void) { - cleanup_smfs(); + cleanup_smfs(); + smfs_cleanup_psdev(); } module_init(smfs_init); module_exit(smfs_cleanup); - diff --git a/lustre/smfs/smfs_internal.h b/lustre/smfs/smfs_internal.h index 3142bab..9614847 100644 --- a/lustre/smfs/smfs_internal.h +++ b/lustre/smfs/smfs_internal.h @@ -1,61 +1,59 @@ #ifndef __LINUX_SMFS_H #define __LINUX_SMFS_H -struct smfs_inode_info { - struct inode *smi_inode; -}; +#include +#define SMFSDEV_NAME "/dev/smfsconf" +#define SMFS_PSDEV_MINOR 250 +#define SMFS_PSDEV_MAJOR 10 -struct smfs_super_info { - struct super_block *smsi_sb; - struct vfsmount *smsi_mnt; /* mount the cache kern with kern_do_mount (like MDS) */ - __u32 flags; /* flags*/ - struct llog_ctxt *kml_llog; /*smfs kml llog*/ - int ops_check; +struct option { + char *opt; + char *value; + struct list_head list; }; -#define I2SMI(inode) ((struct smfs_inode_info *) (&(inode->u.generic_ip))) -#define S2SMI(sb) ((struct smfs_super_info *) (&(sb->u.generic_sbp))) -#define S2CSB(sb) (((struct smfs_super_info *) (&(sb->u.generic_sbp)))->smsi_sb) -#define I2CI(inode) (((struct smfs_inode_info*) (&(inode->u.generic_ip)))->smi_inode) - -#define SB_OPS_CHECK 0x1 -#define INODE_OPS_CHECK 0x2 -#define FILE_OPS_CHECK 0x4 -#define DENTRY_OPS_CHECK 0x8 -#define DEV_OPS_CHECK 0x10 -#define SYMLINK_OPS_CHECK 0x20 -#define DIR_OPS_CHECK 0x40 - -#define SM_DO_KML 0x1 - -#include "smfs_support.h" - -struct journal_operations { - void *(*tr_start)(struct inode *, int op); - void (*tr_commit)(void *handle); +struct smfs_control_device { + struct list_head smfs_dev_list; }; -struct sm_ops { - /* operations on the file store */ - struct super_operations sm_sb_ops; - - struct inode_operations sm_dir_iops; - struct inode_operations sm_file_iops; - struct inode_operations sm_sym_iops; - - struct file_operations sm_dir_fops; - struct file_operations sm_file_fops; - struct file_operations sm_sym_fops; - - struct dentry_operations sm_dentry_ops; - struct journal_operations sm_journal_ops; +#define SMFS_TYPE "smfs" +#define IOC_SMFS_START _IOWR('s', 41, long) +#define IOC_SMFS_STOP _IOWR('s', 42, long) +#define IOC_SMFS_REINT _IOWR('s', 43, long) +#define IOC_SMFS_UNDO _IOWR('s', 44, long) +#ifdef __KERNEL__ + +struct smfs_proc_args { + struct super_block *sr_sb; + int sr_count; + int sr_flags; + void *sr_data; }; -struct option { - char *opt; - char *value; - struct list_head list; -}; + + +#define SB_OPS_CHECK 0x1 +#define INODE_OPS_CHECK 0x2 +#define FILE_OPS_CHECK 0x4 +#define DENTRY_OPS_CHECK 0x8 +#define DEV_OPS_CHECK 0x10 +#define SYMLINK_OPS_CHECK 0x20 +#define DIR_OPS_CHECK 0x40 + +#define KML_LOG_NAME "kml_rec" + +#define MYPATHLEN(buffer, path) ((buffer) + PAGE_SIZE - (path)) + +#define SMFS_KML_POST(dir, dentry, data1, data2, op, name, rc, label) \ +do { \ + if(smfs_do_rec(dir) && !rc) { \ + CDEBUG(D_INODE, "Do %s kml post for dir %lu \n", \ + name, dir->i_ino); \ + rc = smfs_post_kml_rec(dir, dentry, data1, data2, op); \ + if (rc) \ + GOTO(label, rc); \ + } \ +} while(0) extern int init_smfs_proc_sys(void); /*options.c*/ @@ -63,59 +61,230 @@ extern int get_opt(struct option **option, char **pos); extern void cleanup_option(void); extern int init_option(char *data); /*cache.c*/ -void sm_set_inode_ops(struct inode *cache_inode, struct inode *inode); -void sm_set_sb_ops(struct super_block *cache_sb, struct super_block *sb); -void init_smfs_cache(void); -void cleanup_smfs_cache(void); -void setup_sm_journal_ops(char * cache_type); +extern void sm_set_inode_ops(struct inode *cache_inode, struct inode *inode); +extern void sm_set_sb_ops(struct super_block *cache_sb, struct super_block *sb); +extern void init_smfs_cache(void); +extern void cleanup_smfs_cache(void); +extern void sm_set_journal_ops(struct super_block *sb, char *cache_type); +extern int smfs_init_sm_ops(struct smfs_super_info *smb); +extern void smfs_cleanup_sm_ops(struct smfs_super_info *smb); +static inline struct super_operations *cache_sops(struct smfs_super_info *smb) +{ + return &smb->sm_ops->sm_sb_ops; +} +static inline struct inode_operations *cache_diops(struct smfs_super_info *smb) +{ + return &smb->sm_ops->sm_dir_iops; +} +static inline struct inode_operations *cache_fiops(struct smfs_super_info *smb) +{ + return &smb->sm_ops->sm_file_iops; +} +static inline struct inode_operations *cache_siops(struct smfs_super_info *smb) +{ + return &smb->sm_ops->sm_sym_iops; +} +static inline struct file_operations *cache_dfops(struct smfs_super_info *smb) +{ + return &smb->sm_ops->sm_dir_fops; +} +static inline struct file_operations *cache_ffops(struct smfs_super_info *smb) +{ + return &smb->sm_ops->sm_file_fops; +} +static inline struct file_operations *cache_sfops(struct smfs_super_info *smb) +{ + return &smb->sm_ops->sm_sym_fops; +} +static inline struct dentry_operations *cache_dops(struct smfs_super_info *smb) +{ + return &smb->sm_ops->sm_dentry_ops; +} +static inline struct journal_operations *journal_ops(struct smfs_super_info *smb) +{ + return &smb->sm_ops->sm_journal_ops; +} /*super.c*/ extern int init_smfs(void); extern int cleanup_smfs(void); extern void smfs_put_super(struct super_block *sb); -extern void duplicate_sb(struct super_block *csb, struct super_block *sb); +extern struct super_block *smfs_get_sb_by_path(char *path, int len); +extern struct vfsmount* get_vfsmount(struct super_block *sb); /*sysctl.c*/ extern int sm_debug_level; extern int sm_inodes; extern long sm_kmemory; extern int sm_stack; /*dir.c*/ -extern struct inode_operations smfs_dir_iops; -extern struct file_operations smfs_dir_fops; - -extern void d_unalloc(struct dentry *dentry); -/*inode.c*/ -extern void duplicate_inode(struct inode *cache_inode, struct inode *inode); +extern struct inode_operations smfs_dir_iops; +extern struct file_operations smfs_dir_fops; /*file.c*/ -extern void smfs_prepare_cachefile(struct inode *inode, - struct file *file, - struct inode *cache_inode, - struct file *cache_file, - struct dentry *cache_dentry); -extern int smfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg); +extern struct inode_operations smfs_file_iops; +extern struct file_operations smfs_file_fops; +extern int smfs_ioctl(struct inode * inode, struct file * filp, + unsigned int cmd, unsigned long arg); extern int smfs_fsync(struct file * file, struct dentry *dentry, int datasync); -extern int smfs_setattr(struct dentry *dentry, struct iattr *attr); +extern int smfs_setattr(struct dentry *dentry, struct iattr *attr); extern int smfs_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); -extern int smfs_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size); + const void *value, size_t size, int flags); +extern int smfs_getxattr(struct dentry *dentry, const char *name, void *buffer, + size_t size); extern ssize_t smfs_listxattr(struct dentry *dentry, char *buffer, size_t size); extern int smfs_removexattr(struct dentry *dentry, const char *name); -extern void smfs_update_file(struct file *file, struct file *cache_file); +extern int smfs_open(struct inode * inode, struct file * filp); +extern int smfs_release(struct inode * inode, struct file * filp); +/*inode.c*/ +extern struct super_operations smfs_super_ops; +/*symlink.c*/ +extern struct inode_operations smfs_sym_iops; +extern struct file_operations smfs_sym_fops; /*journal.c */ -extern void *smfs_trans_start(struct inode *inode, int op); -extern void smfs_trans_commit(void *handle); -extern int smfs_journal_mkdir(struct dentry *dentry, - struct smfs_version *tgt_dir_ver, - struct smfs_version *new_dir_ver, - int mode); -/*journal_ext3.c*/ -extern struct journal_operations smfs_ext3_journal_ops; +extern void *smfs_trans_start(struct inode *inode, int op, void *desc_private); +extern void smfs_trans_commit(struct inode *inode, void *handle, + int force_sync); +extern int smfs_post_kml_rec(struct inode *dir, struct dentry *dst_dentry, + void *data1, void *data2, int op); /*kml.c*/ extern int smfs_kml_init(struct super_block *sb); -extern int smfs_do_kml(struct inode *dir); -extern void smfs_getversion(struct smfs_version * smfs_version, struct inode * inode); -extern int post_kml_mkdir(struct inode *dir, struct dentry *dentry); +extern int smfs_do_rec(struct inode *inode); +extern int smfs_rec_cleanup(struct super_block *sb); +extern int smfs_rec_init(struct super_block *sb); +extern int smfs_rec_unpack(struct smfs_proc_args *args, + struct reint_record *u_rec, char *rec_buf); +extern int smfs_start_rec(struct super_block *sb); +extern int smfs_stop_rec(struct super_block *sb); +extern int smfs_process_rec(struct super_block *sb, int count, char *dir, + int flags); +void reint_rec_free(struct reint_record *reint_rec); + +extern void smfs_rec_pack(struct update_record *rec, struct inode *dst, + void *data, int op); /*smfs_llog.c*/ -extern int smfs_llog_setup(struct llog_ctxt **ctxt); +extern int smfs_llog_setup(struct super_block *sb); +extern int smfs_llog_cleanup(struct super_block *sb); +extern int smfs_llog_add_rec(struct smfs_super_info * sinfo, void *data, + int data_size); +/*ioctl.c*/ +extern int init_smfs_psdev(void); +extern void smfs_cleanup_psdev(void); + +/* cache_space.c */ +extern int do_cache_manage; +struct cache_purge_queue { + wait_queue_head_t cpq_waitq; + struct super_block *cpq_sb; + struct llog_handle *cpq_loghandle; + __u32 cpq_flags; + struct completion cpq_comp; +}; + +/* opcodes */ +#define CACHE_SPACE_INSERT 0x1 +#define CACHE_SPACE_DELETE 0x2 +#define CACHE_SPACE_COMMIT 0x4 + +#define CACHE_LRU_LOG "CACHE_LRU_LIST" + +extern int smfs_cache_hook(struct inode *inode); +extern void cache_space_pre(struct inode *inode, int op); +extern int cache_space_post(int op, void *handle, struct inode *old_dir, + struct dentry *old_dentry, struct inode *new_dir, + struct dentry *new_dentry); + +extern int cache_space_hook_setup(struct super_block *); +extern int cache_space_hook_cleanup(void); +extern int cache_space_hook_init(struct super_block *); +extern int cache_space_hook_exit(struct super_block *); + +#define XATTR_SMFS_HOARD_MARK "hoard" +#define XATTR_SMFS_CACHE_LOGCOOKIE "cache" +#define XATTR_SMFS_ACTIVE_ENTRY "entry" + +#define SMFS_TRANS_OP(inode, op) \ +{ \ + if (smfs_do_rec(inode)) \ + op = op | 0x10; \ + if (smfs_cache_hook(inode)) \ + op = op | 0x20; \ +} + +static inline int set_hoard_priority(struct inode *inode, void *handle, + __u32 *hoard) +{ + struct fsfilt_operations *fsops = I2CSB(inode)->sm_fsfilt; + int rc; + + rc = fsops->fs_set_xattr(inode, handle, XATTR_SMFS_HOARD_MARK, + hoard, sizeof(__u32)); + RETURN(rc); +} + +static inline int get_hoard_priority(struct inode *inode, __u32 *hoard) +{ + struct fsfilt_operations *fsops = I2CSB(inode)->sm_fsfilt; + int rc; + + rc = fsops->fs_get_xattr(inode, XATTR_SMFS_HOARD_MARK, + hoard, sizeof(__u32)); + RETURN(rc); +} + +static inline int set_active_entry(struct inode *dir, __u64 *active_entry, + void *handle) +{ + struct fsfilt_operations *fsops = I2CSB(dir)->sm_fsfilt; + int rc; + *active_entry = cpu_to_le64(*active_entry); + rc = fsops->fs_set_xattr(dir, handle, XATTR_SMFS_ACTIVE_ENTRY, + active_entry, sizeof(__u64)); + RETURN(rc); +} +static inline int get_active_entry(struct inode *dir, __u64 *active_entry) +{ + struct fsfilt_operations *fsops = I2CSB(dir)->sm_fsfilt; + int rc = fsops->fs_get_xattr(dir, XATTR_SMFS_ACTIVE_ENTRY, + active_entry, sizeof(__u64)); + *active_entry = le64_to_cpu(*active_entry); + if (rc >= 0) + rc = 0; + RETURN(rc); +} + +#define CACHE_HOOK_CREATE 1 +#define CACHE_HOOK_LOOKUP 2 +#define CACHE_HOOK_LINK 3 +#define CACHE_HOOK_UNLINK 4 +#define CACHE_HOOK_SYMLINK 5 +#define CACHE_HOOK_MKDIR 6 +#define CACHE_HOOK_RMDIR 7 +#define CACHE_HOOK_MKNOD 8 +#define CACHE_HOOK_RENAME 9 + +#define CACHE_HOOK_MAX 9 + +#define SMFS_CACHE_HOOK_PRE(op, handle, dir) \ +{ \ + if (smfs_cache_hook(dir)) { \ + LASSERT(handle != NULL); \ + CDEBUG(D_INODE, "cache hook pre: op %d, dir %lu\n", \ + op, dir->i_ino); \ + cache_space_pre(dir, op); \ + } \ +} + +#define SMFS_CACHE_HOOK_POST(op, handle, old_dir, old_dentry, \ + new_dir, new_dentry, rc, label) \ +{ \ + if (!rc && smfs_cache_hook(old_dir)) { \ + LASSERT(handle != NULL); \ + CDEBUG(D_INODE, "cache hook post: op %d, dir %lu\n", \ + op, old_dir->i_ino); \ + rc = cache_space_post(op, handle, old_dir, old_dentry, \ + new_dir, new_dentry); \ + if (rc) \ + GOTO(label, rc); \ + } \ +} + +#endif /*__KERNEL*/ #endif /* __LINUX_SMFS_H */ diff --git a/lustre/smfs/smfs_llog.c b/lustre/smfs/smfs_llog.c index a661e56..b2efd68 100644 --- a/lustre/smfs/smfs_llog.c +++ b/lustre/smfs/smfs_llog.c @@ -1,27 +1,220 @@ -/* - * llog.c +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * */ + #define DEBUG_SUBSYSTEM S_SM -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include "smfs_internal.h" -int smfs_llog_setup(struct llog_ctxt **p_ctxt) +static int smfs_llog_process_rec_cb(struct llog_handle *handle, + struct llog_rec_hdr *rec, void *data) { - int rc = 0; - + struct reint_record *reint_rec; + char *rec_buf ; + struct smfs_proc_args *args = (struct smfs_proc_args *)data; + struct lvfs_run_ctxt saved; + int rc = 0; + ENTRY; + + if (!(le32_to_cpu(handle->lgh_hdr->llh_flags) & LLOG_F_IS_PLAIN)) { + CERROR("log is not plain\n"); + RETURN(-EINVAL); + } + + if (le32_to_cpu(rec->lrh_type) == LLOG_GEN_REC) { + struct llog_cookie cookie; + + cookie.lgc_lgl = handle->lgh_id; + cookie.lgc_index = le32_to_cpu(rec->lrh_index); + + llog_cancel(handle->lgh_ctxt, 1, &cookie, 0, NULL); + RETURN(LLOG_PROC_BREAK); + } + + if (le32_to_cpu(rec->lrh_type) != SMFS_UPDATE_REC) + RETURN(-EINVAL); + + rec_buf = (char*) (rec + 1); + + OBD_ALLOC(reint_rec, sizeof(struct reint_record)); + if (!reint_rec) + RETURN(-ENOMEM); + + rc = smfs_rec_unpack(args, reint_rec, rec_buf); + if (rc) + GOTO(exit, rc = -ENOMEM); + + if (!S2SMI(args->sr_sb)->smsi_ctxt) + GOTO(exit, rc = -ENODEV); + + push_ctxt(&saved, S2SMI(args->sr_sb)->smsi_ctxt, NULL); + if (SMFS_DO_REINT_REC(args->sr_flags)) + rc = lvfs_reint(args->sr_sb, reint_rec); + else + rc = lvfs_undo(args->sr_sb, reint_rec); + + if (!rc && !SMFS_DO_REC_ALL(args->sr_flags)) { + args->sr_count --; + if (args->sr_count == 0) + rc = LLOG_PROC_BREAK; + } + pop_ctxt(&saved, S2SMI(args->sr_sb)->smsi_ctxt, NULL); +exit: + reint_rec_free(reint_rec); RETURN(rc); -} +} + +int smfs_llog_setup(struct super_block *sb) +{ + struct llog_ctxt **ctxt = &(S2SMI(sb)->smsi_rec_log); + struct lvfs_run_ctxt saved; + struct lvfs_run_ctxt *current_ctxt = NULL; + struct vfsmount *mnt; + struct dentry *dentry; + int rc = 0, rc2; + ENTRY; + + /* create OBJECTS and LOGS for writing logs */ + S2SMI(sb)->sm_cache_fsfilt->fs_setup(sb); + OBD_ALLOC(current_ctxt, sizeof(*current_ctxt)); + if (!current_ctxt) + RETURN(-ENOMEM); + mnt = get_vfsmount(sb); + if (!mnt) + GOTO(err_exit, rc = -EINVAL); + OBD_SET_CTXT_MAGIC(current_ctxt); + current_ctxt->pwdmnt = mnt; + current_ctxt->pwd = mnt->mnt_root; + current_ctxt->fs = get_ds(); + S2SMI(sb)->smsi_ctxt = current_ctxt; + push_ctxt(&saved, current_ctxt, NULL); + dentry = simple_mkdir(current->fs->pwd, "LOGS", 0777, 1); + if (IS_ERR(dentry)) { + rc = PTR_ERR(dentry); + CERROR("cannot create LOGS directory: rc = %d\n", rc); + GOTO(err_exit, rc = -EINVAL); + } + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + if (list_empty(&dentry->d_hash)) + d_rehash(dentry); +#else + /* FIXME-WANGDI: here should be something. */ +#endif + + S2SMI(sb)->smsi_logs_dir = dentry; + dentry = simple_mkdir(current->fs->pwd, "OBJECTS", 0777, 1); + if (IS_ERR(dentry)) { + rc = PTR_ERR(dentry); + CERROR("cannot create OBJECTS directory: rc = %d\n", rc); + GOTO(err_exit, rc = -EINVAL); + } +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + if (list_empty(&dentry->d_hash)) + d_rehash(dentry); +#else + /* FIXME-WANGDI: here should be something. */ +#endif + S2SMI(sb)->smsi_objects_dir = dentry; + pop_ctxt(&saved, current_ctxt, NULL); + + /* write log will not write to KML, cleanup kml flags */ + SMFS_CLEAN_INODE_REC(S2SMI(sb)->smsi_objects_dir->d_inode); + SMFS_CLEAN_INODE_REC(S2SMI(sb)->smsi_logs_dir->d_inode); + + /* log create does not call cache hooks, cleanup hook flags */ + SMFS_CLEAN_INODE_CACHE_HOOK(S2SMI(sb)->smsi_objects_dir->d_inode); + SMFS_CLEAN_INODE_CACHE_HOOK(S2SMI(sb)->smsi_logs_dir->d_inode); + + if (SMFS_DO_REC(S2SMI(sb))) { + rc = llog_catalog_setup(ctxt, KML_LOG_NAME, current_ctxt, + S2SMI(sb)->sm_fsfilt, + S2SMI(sb)->smsi_logs_dir, + S2SMI(sb)->smsi_objects_dir); + (*ctxt)->loc_proc_cb = smfs_llog_process_rec_cb; + } + + if (SMFS_CACHE_HOOK(S2SMI(sb))) { + rc2 = cache_space_hook_setup(sb); + if (!rc && rc2) + rc = rc2; + } +exit: + RETURN(rc); +err_exit: + if (current_ctxt) + OBD_FREE(current_ctxt, sizeof(*current_ctxt)); + goto exit; +} + +int smfs_llog_cleanup(struct super_block *sb) +{ + struct llog_ctxt *ctxt = S2SMI(sb)->smsi_rec_log; + int rc = 0, rc2; + ENTRY; + + if (SMFS_CACHE_HOOK(S2SMI(sb))) + rc = cache_space_hook_cleanup(); + + if (SMFS_DO_REC(S2SMI(sb))) { + rc2 = llog_catalog_cleanup(ctxt); + if (!rc) + rc = rc2; + } + + if (S2SMI(sb)->smsi_logs_dir) { + l_dput(S2SMI(sb)->smsi_logs_dir); + S2SMI(sb)->smsi_logs_dir = NULL; + } + if (S2SMI(sb)->smsi_objects_dir) { + l_dput(S2SMI(sb)->smsi_objects_dir); + S2SMI(sb)->smsi_objects_dir = NULL; + } + + OBD_FREE(S2SMI(sb)->smsi_ctxt, sizeof(struct lvfs_run_ctxt)); + RETURN(rc); +} + +int smfs_llog_add_rec(struct smfs_super_info * sinfo, void *data, int data_size) +{ + struct llog_rec_hdr rec; + int rc = 0; + rec.lrh_len = size_round(data_size); + rec.lrh_type = SMFS_UPDATE_REC; + rc = llog_add(sinfo->smsi_rec_log, + &rec, data, NULL, 0, NULL); + if (rc != 1) { + CERROR("error adding kml rec: %d\n", rc); + RETURN(-EINVAL); + } + RETURN(0); +} diff --git a/lustre/smfs/smfs_support.h b/lustre/smfs/smfs_support.h deleted file mode 100644 index 433d350..0000000 --- a/lustre/smfs/smfs_support.h +++ /dev/null @@ -1,161 +0,0 @@ -#ifndef __LINUX_SMFS_SUPPORT_H -#define __LINUX_SMFS_SUPPORT_H -/*Got these defination from lustre. Put here temporaryly*/ - -#define D_TRACE (1 << 0) /* ENTRY/EXIT markers */ -#define D_INODE (1 << 1) -#define D_SUPER (1 << 2) -#define D_EXT2 (1 << 3) /* anything from ext2_debug */ -#define D_MALLOC (1 << 4) /* print malloc, free information */ -#define D_CACHE (1 << 5) /* cache-related items */ -#define D_INFO (1 << 6) /* general information */ -#define D_IOCTL (1 << 7) /* ioctl related information */ -#define D_BLOCKS (1 << 8) /* ext2 block allocation */ -#define D_NET (1 << 9) /* network communications */ -#define D_WARNING (1 << 10) /* CWARN(...) == CDEBUG (D_WARNING, ...) */ -#define D_BUFFS (1 << 11) -#define D_OTHER (1 << 12) -#define D_DENTRY (1 << 13) -#define D_PAGE (1 << 15) /* bulk page handling */ -#define D_DLMTRACE (1 << 16) -#define D_ERROR (1 << 17) /* CERROR(...) == CDEBUG (D_ERROR, ...) */ -#define D_EMERG (1 << 18) /* CEMERG(...) == CDEBUG (D_EMERG, ...) */ -#define D_HA (1 << 19) /* recovery and failover */ -#define D_RPCTRACE (1 << 20) /* for distributed debugging */ -#define D_VFSTRACE (1 << 21) -#define D_SM (1 << 22) - - -#ifdef __KERNEL__ -# include /* THREAD_SIZE */ -#else -# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */ -# define THREAD_SIZE 8192 -# endif -#endif -# include -# define sm_debug_msg(mask, file, fn, line, stack, format, a...) \ - printk("%02x (@%lu %s:%s,l. %d %d %lu): " format, \ - (mask), (long)time(0), file, fn, line, \ - getpid() , stack, ## a); - -#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) - -#ifdef __KERNEL__ -# ifdef __ia64__ -# define CDEBUG_STACK (THREAD_SIZE - \ - ((unsigned long)__builtin_dwarf_cfa() & \ - (THREAD_SIZE - 1))) -# else -# define CDEBUG_STACK (THREAD_SIZE - \ - ((unsigned long)__builtin_frame_address(0) & \ - (THREAD_SIZE - 1))) -# endif - -#define CHECK_STACK(stack) \ - do { \ - if ((stack) > 3*THREAD_SIZE/4 && (stack) > sm_stack) { \ - printk( "maximum lustre stack %u\n", \ - sm_stack = (stack)); \ - } \ - } while (0) -#else /* __KERNEL__ */ -#define CHECK_STACK(stack) do { } while(0) -#define CDEBUG_STACK (0L) -#endif /* __KERNEL__ */ - -#if 1 -#define CDEBUG(mask, format, a...) \ -do { \ - CHECK_STACK(CDEBUG_STACK); \ - if (!(mask) || ((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \ - (sm_debug_level & (mask))) { \ - printk("SM_DEBUG: (%s %d):", __FILE__, __LINE__); \ - printk(format, ## a); \ - } \ -} while (0) - -#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a) -#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a) -#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a) - -#define GOTO(label, rc) \ -do { \ - long GOTO__ret = (long)(rc); \ - CDEBUG(D_TRACE,"SM_DEBUG: (%s %d):Process leaving via %s (rc=%lu : %ld : %lx)\n", \ - __FILE__, __LINE__, #label, (unsigned long)GOTO__ret, \ - (signed long)GOTO__ret, \ - (signed long)GOTO__ret); \ - goto label; \ -} while (0) - -#define RETURN(rc) \ -do { \ - typeof(rc) RETURN__ret = (rc); \ - CDEBUG(D_TRACE, "Process %d leaving %s (rc=%lu : %ld : %lx)\n", \ - current->pid, __FUNCTION__, (long)RETURN__ret, \ - (long)RETURN__ret, (long)RETURN__ret); \ - return RETURN__ret; \ -} while (0) - -#define ENTRY \ -do { \ - CDEBUG(D_TRACE, "SM_DEBUG: (%s %d): Process %d enter %s\n", \ - __FILE__, __LINE__, current->pid, __FUNCTION__); \ -} while (0) - -#define EXIT \ -do { \ - CDEBUG(D_TRACE, "SM_DEBUG: (%s %d): Process %d leaving %s \n", \ - __FILE__, __LINE__, current->pid, __FUNCTION__); \ -} while(0) -#else -#define CDEBUG(mask, format, a...) do { } while (0) -#define CWARN(format, a...) do { } while (0) -#define CERROR(format, a...) printk("<3>" format, ## a) -#define CEMERG(format, a...) printk("<0>" format, ## a) -#define GOTO(label, rc) do { (void)(rc); goto label; } while (0) -#define RETURN(rc) return (rc) -#define ENTRY do { } while (0) -#define EXIT do { } while (0) -#endif - -#define SM_ALLOC(ptr, size) \ -do { \ - if (size <= 4096) { \ - ptr = kmalloc((unsigned long) size, GFP_KERNEL); \ - CDEBUG(D_MALLOC, "Proc %d %s:%d kmalloced: %d at %x.\n",\ - current->pid, __FUNCTION__, __LINE__, \ - (int) size, (int) ptr); \ - } else { \ - ptr = vmalloc((unsigned long) size); \ - CDEBUG(D_MALLOC, "Proc %d %s:%d vmalloced: %d at %x.\n",\ - current->pid, __FUNCTION__, __LINE__, \ - (int) size, (int) ptr); \ - } \ - if (ptr == 0) { \ - printk("kernel malloc returns 0 at %s:%d\n", \ - __FILE__, __LINE__); \ - } else { \ - memset(ptr, 0, size); \ - sm_kmemory += size; \ - } \ -} while (0) - -#define SM_FREE(ptr,size) \ -do { \ - sm_kmemory -= size; \ - if (size <= 4096) { \ - CDEBUG(D_MALLOC, "Proc %d %s:%d kfreed: %d at %x.\n", \ - current->pid, __FUNCTION__, __LINE__, \ - (int) size, (int) ptr); \ - kfree((ptr)); \ - } else { \ - CDEBUG(D_MALLOC, "Proc %d %s:%d vfreed: %d at %x.\n", \ - current->pid, __FUNCTION__, __LINE__, \ - (int) size, (int) ptr); \ - vfree((ptr)); \ - } \ -} while (0) - -#endif /*__LINUX_SMFS_SUPPORT_H */ diff --git a/lustre/smfs/super.c b/lustre/smfs/super.c index bfa4247..eb3ab6b 100644 --- a/lustre/smfs/super.c +++ b/lustre/smfs/super.c @@ -1,13 +1,25 @@ -/* - * snap_current +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: * - * Copyright (C) 1998 Peter J. Braam - * Copyright (C) 2000 Stelias Computing, Inc. - * Copyright (C) 2000 Red Hat, Inc. - * Copyright (C) 2000 Mountain View Data, Inc. + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * Author: Peter J. Braam */ + #define DEBUG_SUBSYSTEM S_SM #include @@ -20,316 +32,340 @@ #include #include #include +#include #include #include +#include +#include +#include #include -#include "smfs_internal.h" +#include +#include +#include "smfs_internal.h" -/* Find the options for the clone. These consist of a cache device - and an index in the snaptable associated with that device. -*/ -static char *smfs_options(char *options, char **devstr, char **namestr, int *kml) +static char *smfs_options(char *data, char **devstr, char **namestr, + int *kml, int *cache, char **opts, + int *iopen_nopriv) { - struct option *opt_value = NULL; - char *pos; - - while (!(get_opt(&opt_value, &pos))) { - if (!strcmp(opt_value->opt, "dev")) { - if (devstr != NULL) - *devstr = opt_value->value; - } else if (!strcmp(opt_value->opt, "type")) { - if (namestr != NULL) - *namestr = opt_value->value; - } else if (!strcmp(opt_value->opt, "kml")) { - *kml = 1; - } else { - break; - } - } - return pos; + char *pos; + struct option *opt_value = NULL; + + while (!(get_opt(&opt_value, &pos))) { + if (!strcmp(opt_value->opt, "dev")) { + if (devstr != NULL) + *devstr = opt_value->value; + } else if (!strcmp(opt_value->opt, "type")) { + if (namestr != NULL) + *namestr = opt_value->value; + } else if (!strcmp(opt_value->opt, "kml")) { + if (kml) + *kml = 1; + } else if (!strcmp(opt_value->opt, "cache")) { + if (cache) + *cache = 1; + } else if (!strcmp(opt_value->opt, "options")) { + if (opts != NULL) + *opts = opt_value->value; + } else if (!strcmp(opt_value->opt, "iopen_nopriv")) { + if (iopen_nopriv != NULL) + *iopen_nopriv = 1; + } else { + break; + } + } + return pos; } -static int set_loop_fd(char *dev_path, char *loop_dev) +struct vfsmount *get_vfsmount(struct super_block *sb) { - struct loop_info loopinfo; - struct nameidata nd; - struct dentry *dentry; - struct block_device_operations *bd_ops; - struct file *filp; - int fd = 0, error = 0; - - fd = get_unused_fd(); - - if (!fd) RETURN(-EINVAL); - - filp = filp_open(dev_path, FMODE_WRITE, 0); - if (!filp || !S_ISREG(filp->f_dentry->d_inode->i_mode)) - RETURN(-EINVAL); - - fd_install(fd, filp); - - if (path_init(loop_dev, LOOKUP_FOLLOW, &nd)) { - error = path_walk(loop_dev, &nd); - if (error) { - path_release(&nd); - filp_close(filp, current->files); - RETURN(-EINVAL); - } - } else { - path_release(&nd); - filp_close(filp, current->files); - RETURN(-EINVAL); - } - dentry = nd.dentry; - bd_ops = get_blkfops(LOOP_MAJOR); - - error = bd_ops->ioctl(dentry->d_inode, filp, LOOP_SET_FD, - (unsigned long)fd); - if (error) { - path_release(&nd); - filp_close(filp, current->files); - RETURN(-EINVAL); - } - memset(&loopinfo, 0, sizeof(struct loop_info)); - - error = bd_ops->ioctl(dentry->d_inode, filp, LOOP_SET_STATUS, - (unsigned long)(&loopinfo)); - path_release(&nd); - RETURN(error); + struct vfsmount *rootmnt, *mnt, *ret = NULL; + struct list_head *end, *list; + + rootmnt = mntget(current->fs->rootmnt); + end = list = &rootmnt->mnt_list; + do { + mnt = list_entry(list, struct vfsmount, mnt_list); + if (mnt->mnt_sb == sb) { + ret = mnt; + break; + } + list = list->next; + } while (end != list); + + mntput(current->fs->rootmnt); + return ret; } -#define SIZE(a) (sizeof(a)/sizeof(a[0])) -static char *find_unused_and_set_loop_device(char *dev_path) +struct super_block *smfs_get_sb_by_path(char *path, int len) { - char *loop_formats[] = { "/dev/loop/%d", "/dev/loop%d"}; - struct loop_info loopinfo; - struct nameidata nd; - struct dentry *dentry; - char *dev = NULL; - int i, j, error; - - for (j = 0; j < SIZE(loop_formats); j++) { - SM_ALLOC(dev, strlen(loop_formats[i]) + 1); - for(i = 0; i < 256; i++) { - struct block_device_operations *bd_ops; - - sprintf(dev, loop_formats[j], i); - - if (path_init(dev, LOOKUP_FOLLOW, &nd)) { - error = path_walk(dev, &nd); - if (error && error != -ENOENT) { - path_release(&nd); - SM_FREE(dev, strlen(loop_formats[i]) + 1); - RETURN(NULL); - } - } else { - SM_FREE(dev, strlen(loop_formats[i]) + 1); - RETURN(NULL); - } - dentry = nd.dentry; - bd_ops = get_blkfops(LOOP_MAJOR); - error = bd_ops->ioctl(dentry->d_inode, NULL, LOOP_GET_STATUS, - (unsigned long)&loopinfo); - path_release(&nd); - - if (error == -ENXIO) { - /*find unused loop and set dev_path to loopdev*/ - error = set_loop_fd(dev_path, dev); - if (error) { - SM_FREE(dev, strlen(loop_formats[i]) + 1); - dev = NULL; - } - return dev;/* probably free */ - } - } - SM_FREE(dev, strlen(loop_formats[i]) + 1); - } - RETURN(NULL); + struct super_block *sb; + struct nameidata nd; + int error = 0; + + ENTRY; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + if (path_init(path, LOOKUP_FOLLOW, &nd)) { +#else + if (path_lookup(path, LOOKUP_FOLLOW, &nd)) { +#endif + error = path_walk(path, &nd); + if (error) { + path_release(&nd); + RETURN(NULL); + } + } else { + RETURN(NULL); + } + + /* FIXME-WANGDI: add some check code here. */ + sb = nd.dentry->d_sb; + path_release(&nd); + RETURN(sb); } -#define MAX_LOOP_DEVICES 256 -static char *parse_path2dev(struct super_block *sb, char *dev_path) +static int smfs_init_fsfilt_ops(struct super_block *sb) { - struct dentry *dentry; - struct nameidata nd; - char *name = NULL; - int error = 0; - - if (path_init(dev_path, LOOKUP_FOLLOW, &nd)) { - error = path_walk(dev_path, &nd); - if (error) { - path_release(&nd); - RETURN(NULL); - } - } else { - RETURN(NULL); - } - dentry = nd.dentry; - - if (!dentry->d_inode || is_bad_inode(dentry->d_inode) || - (!S_ISBLK(dentry->d_inode->i_mode) && - !S_ISREG(dentry->d_inode->i_mode))){ - path_release(&nd); - RETURN(NULL); - } - - if (S_ISREG(dentry->d_inode->i_mode)) { - name = find_unused_and_set_loop_device(dev_path); - path_release(&nd); - RETURN(name); - } - SM_ALLOC(name, strlen(dev_path) + 1); - memcpy(name, dev_path, strlen(dev_path) + 1); - RETURN(name); + ENTRY; + if (!S2SMI(sb)->sm_cache_fsfilt) { + S2SMI(sb)->sm_cache_fsfilt = + fsfilt_get_ops(S2SMI(sb)->cache_fs_type); + if (!S2SMI(sb)->sm_cache_fsfilt) { + CERROR("Can not get %s fsfilt ops needed by kml\n", + S2SMI(sb)->cache_fs_type); + RETURN(-EINVAL); + } + } + if (!S2SMI(sb)->sm_fsfilt) { + S2SMI(sb)->sm_fsfilt = + fsfilt_get_ops(S2SMI(sb)->fs_type); + if (!S2SMI(sb)->sm_fsfilt) { + CERROR("Can not get %s fsfilt ops needed by kml\n", + S2SMI(sb)->fs_type); + RETURN(-EINVAL); + } + } + RETURN(0); } -void duplicate_sb(struct super_block *csb, - struct super_block *sb) + +void smfs_cleanup_fsfilt_ops(struct super_block *sb) { - sb->s_blocksize = csb->s_blocksize; - sb->s_magic = csb->s_magic; - sb->s_blocksize_bits = csb->s_blocksize_bits; - sb->s_maxbytes = csb->s_maxbytes; + if (S2SMI(sb)->sm_cache_fsfilt) + fsfilt_put_ops(S2SMI(sb)->sm_cache_fsfilt); + if (S2SMI(sb)->sm_fsfilt) + fsfilt_put_ops(S2SMI(sb)->sm_fsfilt); } -extern struct super_operations smfs_super_ops; -static int sm_mount_cache(struct super_block *sb, - char *devstr, - char *typestr) +static int sm_mount_cache(struct super_block *sb, char *devstr, + char *typestr, char *opts, int iopen_nopriv) { - struct vfsmount *mnt; - struct smfs_super_info *smb; - char *dev_name = NULL; - unsigned long page; - int err = 0; - - dev_name = parse_path2dev(sb, devstr); - if (!dev_name) { - GOTO(err_out, err = -ENOMEM); - } - if (!(page = __get_free_page(GFP_KERNEL))) { - GOTO(err_out, err = -ENOMEM); - } + struct smfs_super_info *smb; + int err = 0, typelen; + struct vfsmount *mnt; + unsigned long page; + + ENTRY; + + typelen = strlen(typestr); + + page = __get_free_page(GFP_KERNEL); + if (!page) + GOTO(err_out, err = -ENOMEM); + memset((void *)page, 0, PAGE_SIZE); - sprintf((char *)page, "iopen_nopriv"); - - mnt = do_kern_mount(typestr, 0, dev_name, (void *)page); + + if (iopen_nopriv) + sprintf((char *)page, "iopen_nopriv"); + + if (opts && strlen(opts)) { + int n = strlen((char *)page); + sprintf((char *)page + n, ",%s", opts); + } + + printk("smfs: mounting %s at %s\n", typestr, devstr); + + mnt = do_kern_mount(typestr, 0, devstr, (void *)page); free_page(page); - - if (IS_ERR(mnt)) { + + if (IS_ERR(mnt)) { CERROR("do_kern_mount failed: rc = %ld\n", PTR_ERR(mnt)); GOTO(err_out, err = PTR_ERR(mnt)); } - smb = S2SMI(sb); - smb->smsi_sb = mnt->mnt_sb; - smb->smsi_mnt = mnt; - - duplicate_sb(mnt->mnt_sb, sb); - sm_set_sb_ops(mnt->mnt_sb, sb); + smb = S2SMI(sb); + smb->smsi_sb = mnt->mnt_sb; + smb->smsi_mnt = mnt; + + smfs_init_sm_ops(smb); + + OBD_ALLOC(smb->cache_fs_type, strlen(typestr) + 1); + memcpy(smb->cache_fs_type, typestr, strlen(typestr)); + + OBD_ALLOC(smb->fs_type, strlen(SMFS_TYPE) + 1); + memcpy(smb->fs_type, SMFS_TYPE, strlen(SMFS_TYPE)); + + duplicate_sb(sb, mnt->mnt_sb); + sm_set_sb_ops(mnt->mnt_sb, sb); + err = smfs_init_fsfilt_ops(sb); err_out: - if (dev_name) - SM_FREE(dev_name, strlen(dev_name) + 2); - - return err; + return err; } + static int sm_umount_cache(struct super_block *sb) { - struct smfs_super_info *smb = S2SMI(sb); - - mntput(smb->smsi_mnt); - - return 0; + struct smfs_super_info *smb = S2SMI(sb); + + mntput(smb->smsi_mnt); + smfs_cleanup_sm_ops(smb); + smfs_cleanup_fsfilt_ops(sb); + + if (smb->cache_fs_type) + OBD_FREE(smb->cache_fs_type, strlen(smb->cache_fs_type) + 1); + + if (smb->fs_type) + OBD_FREE(smb->fs_type, strlen(smb->fs_type) + 1); + + return 0; } + void smfs_put_super(struct super_block *sb) { - if (sb) - sm_umount_cache(sb); - return; + if (SMFS_CACHE_HOOK(S2SMI(sb))) + cache_space_hook_exit(sb); + + if (SMFS_DO_REC(S2SMI(sb))) + smfs_rec_cleanup(sb); + + if (sb) + sm_umount_cache(sb); + return; } -struct super_block * -smfs_read_super( - struct super_block *sb, - void *data, - int silent) +static int smfs_fill_super(struct super_block *sb, + void *data, int silent) { - struct inode *root_inode = NULL; - char *devstr = NULL, *typestr = NULL; - char *cache_data; - ino_t root_ino; - int err = 0, kml = 0; - - ENTRY; - - CDEBUG(D_SUPER, "mount opts: %s\n", data ? (char *)data : "(none)"); - - init_option(data); - /* read and validate options */ - cache_data = smfs_options(data, &devstr, &typestr, &kml); - if (*cache_data) { - CERROR("invalid mount option %s\n", (char*)data); - GOTO(out_err, err=-EINVAL); - } - if (!typestr || !devstr) { - CERROR("mount options name and dev mandatory\n"); - GOTO(out_err, err=-EINVAL); - } - - err = sm_mount_cache(sb, devstr, typestr); - if (err) { - CERROR("Can not mount %s as %s\n", devstr, typestr); - GOTO(out_err, 0); - } - - if (kml) smfs_kml_init(sb); - - setup_sm_journal_ops(typestr); - - dget(S2CSB(sb)->s_root); - root_ino = S2CSB(sb)->s_root->d_inode->i_ino; - root_inode = iget(sb, root_ino); - - CDEBUG(D_SUPER, "readinode %p, root ino %ld, root inode at %p\n", - sb->s_op->read_inode, root_ino, root_inode); - - sb->s_root = d_alloc_root(root_inode); - - if (!sb->s_root) { - GOTO(out_err, err=-EINVAL); - } - - CDEBUG(D_SUPER, "sb %lx, &sb->u.generic_sbp: %lx\n", - (ulong) sb, (ulong) &sb->u.generic_sbp); - + ino_t root_ino; + char *cache_data; + + int iopen_nopriv = 0; + struct inode *root_inode = NULL; + int err = 0, do_rec = 0, cache_hook = 0; + char *devstr = NULL, *typestr = NULL, *opts = NULL; + + ENTRY; + + CDEBUG(D_SUPER, "mount opts: %s\n", data ? + (char *)data : "(none)"); + + init_option(data); + + /* read and validate passed options. */ + cache_data = smfs_options(data, &devstr, &typestr, + &do_rec, &cache_hook, &opts, + &iopen_nopriv); + + if (*cache_data) + CWARN("smfs_fill_super(): options parsing stoped at " + "option %s\n", cache_data); + + if (!typestr || !devstr) { + CERROR("mount options name and dev mandatory\n"); + GOTO(out_err, err = -EINVAL); + } + + err = sm_mount_cache(sb, devstr, typestr, opts, + iopen_nopriv); + + if (err) { + CERROR("Can not mount %s as %s, rc = %d\n", devstr, + typestr, err); + GOTO(out_err, err); + } + + if (do_rec) + smfs_rec_init(sb); + + if (cache_hook) + cache_space_hook_init(sb); + + dget(S2CSB(sb)->s_root); + root_ino = S2CSB(sb)->s_root->d_inode->i_ino; + root_inode = iget(sb, root_ino); + + CDEBUG(D_SUPER, "readinode %p, root ino %ld, root inode at %p\n", + sb->s_op->read_inode, root_ino, root_inode); + + sb->s_root = d_alloc_root(root_inode); + + if (!sb->s_root) { + sm_umount_cache(sb); + GOTO(out_err, err = -EINVAL); + } + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + CDEBUG(D_SUPER, "sb %lx, &sb->u.generic_sbp: %lx\n", + (ulong)sb, (ulong)&sb->u.generic_sbp); +#else + CDEBUG(D_SUPER, "sb %lx, &sb->s_fs_info: %lx\n", + (ulong)sb, (ulong)&sb->s_fs_info); +#endif + out_err: - cleanup_option(); - if (err) - return NULL; - return sb; + cleanup_option(); + return err; +} + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +static struct super_block *smfs_read_super(struct super_block *sb, + void *data, int silent) +{ + int err; + + err = smfs_fill_super(sb, data, silent); + if (err) + return NULL; + + return sb; +} +#else +struct super_block *smfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data) +{ + return get_sb_nodev(fs_type, flags, data, smfs_fill_super); } +#endif -static DECLARE_FSTYPE(smfs_type, "smfs", smfs_read_super, 0); +static struct file_system_type smfs_type = { + .owner = THIS_MODULE, + .name = "smfs", +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + .read_super = smfs_read_super, +#else + .get_sb = smfs_get_sb, + .kill_sb = kill_anon_super, +#endif +}; int init_smfs(void) { - int err = 0; - - err = register_filesystem(&smfs_type); - if (err) { - CERROR("smfs: failed in register Storage Management filesystem!\n"); - } - init_smfs_cache(); - return err; + int err; + + err = register_filesystem(&smfs_type); + if (err) { + CERROR("register_filesystem() failed, " + "rc = %d\n", err); + } + return err; } int cleanup_smfs(void) { - int err = 0; - - ENTRY; - err = unregister_filesystem(&smfs_type); - if (err) { - CERROR("smfs: failed to unregister Storage Management filesystem!\n"); - } - cleanup_smfs_cache(); - return 0; + int err = 0; + + err = unregister_filesystem(&smfs_type); + if (err) { + CERROR("unregister_filesystem() failed, " + "rc = %d\n", err); + } + return 0; } diff --git a/lustre/smfs/symlink.c b/lustre/smfs/symlink.c index e1ca762..42b6ab6 100644 --- a/lustre/smfs/symlink.c +++ b/lustre/smfs/symlink.c @@ -1,74 +1,86 @@ -/* - * smfs/symlink.c +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: * + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */ -#define DEBUG_SUBSYSTEM S_SNAP + +#define DEBUG_SUBSYSTEM S_SM #include #include #include #include #include +#include +#include +#include #include +#include -#include "smfs_internal.h" +#include "smfs_internal.h" -static int smfs_readlink(struct dentry * dentry, char * buffer, int buflen) +static int smfs_readlink(struct dentry *dentry, char *buffer, int buflen) { - struct inode *cache_inode = I2CI(dentry->d_inode); - struct inode *cache_dir = NULL; - struct dentry *cache_dentry; - struct dentry parent; - int rc = 0; + struct inode *cache_inode = I2CI(dentry->d_inode); + struct dentry *cache_dentry; + int rc = 0; + ENTRY; + + if (!cache_inode) + RETURN(-ENOENT); - if (!cache_inode) - RETURN(-ENOENT); - if (dentry->d_parent && dentry->d_parent->d_inode){ - cache_dir = I2CI(dentry->d_parent->d_inode); - prepare_parent_dentry(&parent, cache_dir); - } - cache_dentry = d_alloc(&parent, &dentry->d_name); - d_add(cache_dentry, cache_inode); - igrab(cache_inode); - - if (cache_inode->i_op && cache_inode->i_op->readlink) - rc = cache_inode->i_op->readlink(cache_dentry, buffer, buflen); - - d_unalloc(cache_dentry); - return rc; + cache_dentry = pre_smfs_dentry(NULL, cache_inode, dentry); + if (!cache_dentry) + GOTO(exit, rc = -ENOMEM); + if (cache_inode->i_op && cache_inode->i_op->readlink) + rc = cache_inode->i_op->readlink(cache_dentry, buffer, buflen); + GOTO(exit, rc); +exit: + post_smfs_dentry(cache_dentry); + return rc; } static int smfs_follow_link(struct dentry *dentry, struct nameidata *nd) { - struct inode *cache_inode = I2CI(dentry->d_inode); - struct inode *cache_dir = NULL; - struct dentry *cache_dentry; - struct dentry parent; - int rc = 0; - if (!cache_inode) - RETURN(-ENOENT); + struct inode *cache_inode = I2CI(dentry->d_inode); + struct dentry *cache_dentry; + int rc = 0; + ENTRY; - if (dentry->d_parent && dentry->d_parent->d_inode){ - cache_dir = I2CI(dentry->d_parent->d_inode); - prepare_parent_dentry(&parent, cache_dir); - } + if (!cache_inode) + RETURN(-ENOENT); - cache_dentry = d_alloc(&parent, &dentry->d_name); + cache_dentry = pre_smfs_dentry(NULL, cache_inode, dentry); + if (!cache_dentry) + GOTO(exit, rc = -ENOMEM); - d_add(cache_dentry, cache_inode); - igrab(cache_inode); - - if (cache_inode->i_op && cache_inode->i_op->follow_link) - rc = cache_inode->i_op->follow_link(cache_dentry, nd); - - d_unalloc(cache_dentry); - return rc; + if (cache_inode->i_op && cache_inode->i_op->follow_link) + rc = cache_inode->i_op->follow_link(cache_dentry, nd); +exit: + post_smfs_dentry(cache_dentry); + return rc; } + struct inode_operations smfs_sym_iops = { - readlink: smfs_readlink, - follow_link: smfs_follow_link, - setxattr: smfs_setxattr, /* BKL held */ + readlink: smfs_readlink, + follow_link: smfs_follow_link, + setxattr: smfs_setxattr, /* BKL held */ getxattr: smfs_getxattr, /* BKL held */ listxattr: smfs_listxattr, /* BKL held */ removexattr: smfs_removexattr, /* BKL held */ diff --git a/lustre/smfs/sysctl.c b/lustre/smfs/sysctl.c index 785b3dd..8c6feab 100644 --- a/lustre/smfs/sysctl.c +++ b/lustre/smfs/sysctl.c @@ -1,10 +1,26 @@ -/* - * Sysctrl entries for Snapfs +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * */ -/* /proc entries */ - -#define DEBUG_SUBSYSTEM S_SNAP +#define DEBUG_SUBSYSTEM S_SM #include #include @@ -14,15 +30,19 @@ #include #include #include +#include +#include +#include #include -#include "smfs_internal.h" +#include + +#include "smfs_internal.h" #ifdef CONFIG_PROC_FS static struct proc_dir_entry *proc_smfs_root; #endif - /* SYSCTL below */ static struct ctl_table_header *smfs_table_header = NULL; @@ -31,8 +51,8 @@ static struct ctl_table_header *smfs_table_header = NULL; */ #define PSDEV_SMFS (0x130) -#define PSDEV_DEBUG 1 /* control debugging */ -#define PSDEV_TRACE 2 /* control enter/leave pattern */ +#define PSDEV_DEBUG 1 /* control debugging */ +#define PSDEV_TRACE 2 /* control enter/leave pattern */ /* These are global control options */ #define ENTRY_CNT 3 @@ -43,45 +63,46 @@ int sm_debug_level = 0; /* XXX - doesn't seem to be working in 2.2.15 */ static struct ctl_table smfs_ctltable[] = { - {PSDEV_DEBUG, "debug", &sm_debug_level, sizeof(int), 0644, NULL, &proc_dointvec}, - {PSDEV_TRACE, "trace", &sm_print_entry, sizeof(int), 0644, NULL, &proc_dointvec}, - {0} + {PSDEV_DEBUG, "debug", &sm_debug_level, sizeof(int), 0644, NULL, + &proc_dointvec}, + {PSDEV_TRACE, "trace", &sm_print_entry, sizeof(int), 0644, NULL, + &proc_dointvec}, + {0} }; static ctl_table smfs_table[2] = { - {PSDEV_SMFS, "smfs", NULL, 0, 0555, smfs_ctltable}, - {0} + {PSDEV_SMFS, "smfs", NULL, 0, 0555, smfs_ctltable}, + {0} }; int __init init_smfs_proc_sys(void) { #ifdef CONFIG_PROC_FS - proc_smfs_root = proc_mkdir("smfs", proc_root_fs); - if (!proc_smfs_root) { - printk(KERN_ERR "SMFS: error registering /proc/fs/smfs\n"); - RETURN(-ENOMEM); - } - proc_smfs_root->owner = THIS_MODULE; + proc_smfs_root = proc_mkdir("smfs", proc_root_fs); + if (!proc_smfs_root) { + printk(KERN_ERR "SMFS: error registering /proc/fs/smfs\n"); + RETURN(-ENOMEM); + } + proc_smfs_root->owner = THIS_MODULE; #endif #ifdef CONFIG_SYSCTL - if ( !smfs_table_header ) - smfs_table_header = - register_sysctl_table(smfs_table, 0); + if ( !smfs_table_header ) + smfs_table_header = + register_sysctl_table(smfs_table, 0); #endif - return 0; + return 0; } -void cleanup_smfs_proc_sys(void) +void cleanup_smfs_proc_sys(void) { #ifdef CONFIG_SYSCTL - if ( smfs_table_header ) - unregister_sysctl_table(smfs_table_header); - smfs_table_header = NULL; + if ( smfs_table_header ) + unregister_sysctl_table(smfs_table_header); + smfs_table_header = NULL; #endif #if CONFIG_PROC_FS - remove_proc_entry("smfs", proc_root_fs); + remove_proc_entry("smfs", proc_root_fs); #endif } - diff --git a/lustre/snapfs/options.c b/lustre/snapfs/options.c index fb951fa..b4c3a4d 100644 --- a/lustre/snapfs/options.c +++ b/lustre/snapfs/options.c @@ -17,8 +17,8 @@ static struct list_head option_list; -char *options = NULL; -char *opt_left = NULL; +static char *options = NULL; +static char *opt_left = NULL; int init_option(char *data) { @@ -32,6 +32,7 @@ int init_option(char *data) opt_left = options; return 0; } + /*cleanup options*/ void cleanup_option() { @@ -45,6 +46,7 @@ void cleanup_option() } SNAP_FREE(options, strlen(options) + 1); } + int get_opt(struct option **option, char **pos) { char *name, *value, *left; diff --git a/lustre/tests/cmknod.c b/lustre/tests/cmknod.c index c8659e4..920ee5b 100644 --- a/lustre/tests/cmknod.c +++ b/lustre/tests/cmknod.c @@ -7,6 +7,7 @@ #include #include #include + #define TEST_MINOR 120 #define TEST_MAJOR 25 @@ -16,18 +17,48 @@ void usage(char *prog) exit(1); } +/* UMKA: This stuff inlined here instead of using appropriate header + to avoid linking to symbols which is not present in newer libc. + + Currently this is the case, as UML image contains RedHat 9 and + developers use something newer (Fedora, etc.). */ +inline unsigned int +__gnu_dev_major (unsigned long long int __dev) +{ + return ((__dev >> 8) & 0xfff) | ((unsigned int) (__dev >> 32) & ~0xfff); +} + +inline unsigned int +__gnu_dev_minor (unsigned long long int __dev) +{ + return (__dev & 0xff) | ((unsigned int) (__dev >> 12) & ~0xff); +} + +inline unsigned long long int +__gnu_dev_makedev (unsigned int __major, unsigned int __minor) +{ + return ((__minor & 0xff) | ((__major & 0xfff) << 8) + | (((unsigned long long int) (__minor & ~0xff)) << 12) + | (((unsigned long long int) (__major & ~0xfff)) << 32)); +} + +#define __minor(dev) __gnu_dev_minor(dev) +#define __major(dev) __gnu_dev_major(dev) +#define __makedev(maj, min) __gnu_dev_makedev(maj, min) + int main( int argc, char **argv) { char *prog = argv[0]; char *filename = argv[1]; int rc; struct stat st; - dev_t device = makedev(TEST_MAJOR, TEST_MINOR); + dev_t device = __makedev(TEST_MAJOR, TEST_MINOR); if (argc != 2) usage(prog); unlink(filename); + /* First try block devices */ rc = mknod(filename, 0700 | S_IFBLK, device); if ( rc < 0 ) { @@ -42,8 +73,10 @@ int main( int argc, char **argv) prog, filename, errno, strerror(errno)); return 3; } + if ( st.st_rdev != device) { - fprintf(stderr, "%s: created device other than requested: (%d,%d) instead of (%d,%d)\n", prog, major(st.st_rdev),minor(st.st_rdev),major(device),minor(device)); + fprintf(stderr, "%s: created device other than requested: (%u,%u) instead of (%u,%u)\n", + prog, __major(st.st_rdev),__minor(st.st_rdev),__major(device),__minor(device)); return 4; } if (!S_ISBLK(st.st_mode)) { @@ -73,7 +106,8 @@ int main( int argc, char **argv) return 8; } if ( st.st_rdev != device) { - fprintf(stderr, "%s: created device other than requested: (%d,%d) instead of (%d,%d)\n", prog, major(st.st_rdev),minor(st.st_rdev),major(device),minor(device)); + fprintf(stderr, "%s: created device other than requested: (%u,%u) instead of (%u,%u)\n", + prog, __major(st.st_rdev),__minor(st.st_rdev),__major(device),__minor(device)); return 9; } if (!S_ISCHR(st.st_mode)) { diff --git a/lustre/tests/llmount.sh b/lustre/tests/llmount.sh index 9dd1ece..d8d243b 100755 --- a/lustre/tests/llmount.sh +++ b/lustre/tests/llmount.sh @@ -10,11 +10,11 @@ config=$NAME.xml mkconfig=$NAME.sh if [ "$PORTALS" ]; then - portals_opt="--portals=$PORTALS" + portals_opt="--portals=$PORTALS" fi if [ "$LUSTRE" ]; then - lustre_opt="--lustre=$LUSTRE" + lustre_opt="--lustre=$LUSTRE" fi if [ "$LDAPURL" ]; then diff --git a/lustre/tests/local.sh b/lustre/tests/local.sh index 0f8fe13..44fa4b9 100755 --- a/lustre/tests/local.sh +++ b/lustre/tests/local.sh @@ -7,9 +7,11 @@ config=${1:-local.xml} LMC="${LMC:-lmc} -m $config" TMP=${TMP:-/tmp} +FSTYPE=${FSTYPE:-ext3} + MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} MDSSIZE=${MDSSIZE:-100000} -FSTYPE=${FSTYPE:-ext3} + MOUNT=${MOUNT:-/mnt/lustre} MOUNT2=${MOUNT2:-${MOUNT}2} NETWORKTYPE=${NETWORKTYPE:-tcp} @@ -17,6 +19,26 @@ NETWORKTYPE=${NETWORKTYPE:-tcp} OSTDEV=${OSTDEV:-$TMP/ost1-`hostname`} OSTSIZE=${OSTSIZE:-200000} +MDS_BACKFSTYPE=${MDS_BACKFSTYPE:-ext3} +OST_BACKFSTYPE=${OST_BACKFSTYPE:-ext3} + +MDS_BACKDEV=${MDS_BACKDEV:-$TMP/mds1-`hostname`} +OST_BACKDEV=${OST_BACKDEV:-$TMP/ost1-`hostname`} + +MDS_MOUNT_OPTS=${MDS_MOUNT_OPTS:-"errors=remount-ro"} +OST_MOUNT_OPTS=${OST_MOUNT_OPTS:-"errors=remount-ro"} + +# adding some options needed for ext3 on 2.4.x kernels +if test "x$(uname -r | sed -e 's/-.*//' -e 's/\.[[:digit:]]*$//')" = "x2.4"; then + if test "x$FSTYPE" = "xext3" || test "x$FSTYPE" = "xldiskfs"; then + if test "x$OST_MOUNT_OPTS" = "x"; then + OST_MOUNT_OPTS="asyncdel" + else + OST_MOUNT_OPTS="$OST_MOUNT_OPTS,asyncdel" + fi + fi +fi + # specific journal size for the ost, in MB JSIZE=${JSIZE:-0} [ "$JSIZE" -gt 0 ] && JARG="--journal_size $JSIZE" @@ -33,12 +55,24 @@ ${LMC} --add node --node localhost || exit 10 ${LMC} --add net --node localhost --nid `hostname` --nettype $NETWORKTYPE || exit 11 ${LMC} --add net --node client --nid '*' --nettype $NETWORKTYPE || exit 12 +echo "MDS mount options are: $MDS_MOUNT_OPTS" + # configure mds server -${LMC} --add mds --nspath /mnt/mds_ns --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE $JARG $IARG || exit 20 +${LMC} --add mds --nspath /mnt/mds_ns --node localhost --mds mds1 \ +--fstype $FSTYPE --backfstype $MDS_BACKFSTYPE --dev $MDSDEV \ +--backdev $MDS_BACKDEV --mountfsoptions $MDS_MOUNT_OPTS \ +--size $MDSSIZE $JARG $IARG || exit 20 + +echo "OST mount options are: $OST_MOUNT_OPTS" # configure ost -${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 20 -${LMC} --add ost --nspath /mnt/ost_ns --node localhost --lov lov1 --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE $JARG || exit 30 +${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES \ +--stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 20 + +${LMC} --add ost --nspath /mnt/ost_ns --node localhost --lov lov1 \ +--fstype $FSTYPE --backfstype $OST_BACKFSTYPE --dev $OSTDEV \ +--backdev $OST_BACKDEV --mountfsoptions $OST_MOUNT_OPTS \ +--size $OSTSIZE $JARG || exit 30 # create client config ${LMC} --add mtpt --node localhost --path $MOUNT --mds mds1 --lov lov1 || exit 40 diff --git a/lustre/tests/lsmfs.sh b/lustre/tests/lsmfs.sh new file mode 100755 index 0000000..2d435c4 --- /dev/null +++ b/lustre/tests/lsmfs.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +MDSDEV=smfs OSTDEV=smfs FSTYPE=smfs MDS_MOUNT_OPTS="kml" OST_MOUNT_OPTS="kml" \ +OSTSIZE=100000 MDSSIZE=50000 MDS_BACKFSTYPE=ext3 OST_BACKFSTYPE=ext3 \ +MDS_BACKDEV=/tmp/mds1-$(hostname) OST_BACKDEV=/tmp/ost1-$(hostname) sh llmount.sh diff --git a/lustre/tests/oos.sh b/lustre/tests/oos.sh index 5894c4f..910346e 100755 --- a/lustre/tests/oos.sh +++ b/lustre/tests/oos.sh @@ -34,6 +34,12 @@ fi export LANG=C LC_LANG=C # for "No space left on device" message +# make sure, that log file will be removed. Somehow it was possible +# to me, that log file had +a and could not be rewritten, what led +# to test fail. +chattr -ai $LOG >/dev/null 2>&1 +rm -f $LOG >/dev/null 2>&1 + # make sure we stripe over all OSTs to avoid OOS on only a subset of OSTs $LFS setstripe $OOS 65536 0 $STRIPECOUNT if dd if=/dev/zero of=$OOS count=$(($ORIGFREE + 100)) bs=1k 2> $LOG; then @@ -54,6 +60,7 @@ for OSC in /proc/fs/lustre/osc/OSC*MNT*; do GRANT=`cat $OSC/cur_grant_bytes` [ $(($AVAIL - $GRANT / 1024)) -lt 400 ] && OSCFULL=full done + if [ -z "$OSCFULL" ]; then echo "no OSTs are close to full" grep [0-9] /proc/fs/lustre/osc/OSC*MNT*/{kbytesavail,cur*} diff --git a/lustre/utils/lconf b/lustre/utils/lconf index 2f4130b..d74fa4f 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -537,7 +537,7 @@ class LCTLInterface: quit""" % (type, name, uuid) self.run(cmds) - def setup(self, name, setup = ""): + def setup(self, name, setup = ""): cmds = """ cfg_device %s setup %s @@ -783,8 +783,8 @@ def loop_base(): panic ("can't access loop devices") return loop -# find loop device assigned to thefile -def find_loop(file): +# find loop device assigned to the file +def find_assigned_loop(file): loop = loop_base() for n in xrange(0, MAX_LOOP_DEVICES): dev = loop + str(n) @@ -799,19 +799,29 @@ def find_loop(file): return '' # create file if necessary and assign the first free loop device -def init_loop(file, size, fstype, journal_size, inode_size, mkfsoptions, reformat): - dev = find_loop(file) +def init_loop(file, size, fstype, journal_size, inode_size, + mkfsoptions, reformat, backfstype, backfile): + if fstype == 'smfs': + realfile = backfile + realfstype = backfstype + else: + realfile = file + realfstype = fstype + + dev = find_assigned_loop(realfile) if dev: - print 'WARNING file:', file, 'already mapped to', dev + print 'WARNING file:', realfile, 'already mapped to', dev return dev - if reformat or not os.access(file, os.R_OK | os.W_OK): + + if reformat or not os.access(realfile, os.R_OK | os.W_OK): if size < 8000: - panic("size of loopback file '%s' must be larger than 8MB, but is set to %s" % (file,size)) - (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, - file)) + panic("size of loopback file '%s' must be larger than 8MB, but is set to %s" % (realfile, size)) + (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, realfile)) if ret: - panic("Unable to create backing store:", file) - mkfs(file, size, fstype, journal_size, inode_size, mkfsoptions, isblock=0) + panic("Unable to create backing store:", realfile) + + mkfs(realfile, size, realfstype, journal_size, inode_size, + mkfsoptions, isblock=0) loop = loop_base() # find next free loop @@ -820,7 +830,7 @@ def init_loop(file, size, fstype, journal_size, inode_size, mkfsoptions, reforma if os.access(dev, os.R_OK): (stat, out) = run('losetup', dev) if stat: - run('losetup', dev, file) + run('losetup', dev, realfile) return dev else: print "out of loop devices" @@ -830,7 +840,7 @@ def init_loop(file, size, fstype, journal_size, inode_size, mkfsoptions, reforma # undo loop assignment def clean_loop(file): - dev = find_loop(file) + dev = find_assigned_loop(file) if dev: ret, out = run('losetup -d', dev) if ret: @@ -844,11 +854,13 @@ def need_format(fstype, dev): # initialize a block device if needed def block_dev(dev, size, fstype, reformat, autoformat, journal_size, - inode_size, mkfsoptions): - if config.noexec: return dev - if not is_block(dev): + inode_size, mkfsoptions, backfstype, backdev): + if config.noexec: + return dev + + if fstype == 'smfs' or not is_block(dev): dev = init_loop(dev, size, fstype, journal_size, inode_size, - mkfsoptions, reformat) + mkfsoptions, reformat, backfstype, backdev) elif reformat or (need_format(fstype, dev) and autoformat == 'yes'): mkfs(dev, size, fstype, journal_size, inode_size, mkfsoptions, isblock=0) @@ -1350,11 +1362,14 @@ class MDSDEV(Module): def __init__(self,db): Module.__init__(self, 'MDSDEV', db) self.devpath = self.db.get_val('devpath','') + self.backdevpath = self.db.get_val('backdevpath','') self.size = self.db.get_val_int('devsize', 0) self.journal_size = self.db.get_val_int('journalsize', 0) self.fstype = self.db.get_val('fstype', '') + self.backfstype = self.db.get_val('backfstype', '') self.nspath = self.db.get_val('nspath', '') self.mkfsoptions = self.db.get_val('mkfsoptions', '') + self.mountfsoptions = self.db.get_val('mountfsoptions', '') # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid target_uuid = self.db.get_first_ref('target') mds = self.db.lookup(target_uuid) @@ -1403,15 +1418,26 @@ class MDSDEV(Module): self.target_dev_uuid = self.uuid self.uuid = target_uuid - # modules + + # loading modules self.add_lustre_module('mdc', 'mdc') self.add_lustre_module('osc', 'osc') self.add_lustre_module('lov', 'lov') self.add_lustre_module('mds', 'mds') + + if self.fstype == 'smfs': + self.add_lustre_module('smfs', 'smfs') + if self.fstype == 'ldiskfs': self.add_lustre_module('ldiskfs', 'ldiskfs') + if self.fstype: self.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype)) + + # if fstype is smfs, then we should also take care about backing + # store fs. + if self.fstype == 'smfs': + self.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype)) def load_module(self): if self.active: @@ -1431,12 +1457,34 @@ class MDSDEV(Module): # never reformat here blkdev = block_dev(self.devpath, self.size, self.fstype, 0, self.format, self.journal_size, self.inode_size, - self.mkfsoptions) + self.mkfsoptions, self.backfstype, self.backdevpath) if not is_prepared('MDT'): lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="") try: - lctl.newdev("mds", self.name, self.uuid, - setup ="%s %s %s" %(blkdev, self.fstype, self.name)) + if config.mountfsoptions != None: + mountfsoptions = config.mountfsoptions + if self.mountfsoptions != None: + mountfsoptions = mountfsoptions + ' ' + self.mountfsoptions + else: + mountfsoptions = self.mountfsoptions + + # we count, that mountfsoptions is always not None for smfs + if self.fstype == 'smfs': + realdev = self.fstype + mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions, + self.backfstype, + blkdev) + else: + realdev = blkdev + + if mountfsoptions != None: + lctl.newdev("mds", self.name, self.uuid, + setup ="%s %s %s %s" %(realdev, self.fstype, + self.name, mountfsoptions)) + else: + lctl.newdev("mds", self.name, self.uuid, + setup ="%s %s %s" %(realdev, self.fstype, + self.name)) except CommandError, e: if e.rc == 2: panic("MDS is missing the config log. Need to run " + @@ -1448,12 +1496,44 @@ class MDSDEV(Module): if is_prepared(self.name): return self.info(self.devpath, self.fstype, self.format) + blkdev = block_dev(self.devpath, self.size, self.fstype, config.reformat, self.format, self.journal_size, - self.inode_size, self.mkfsoptions) - lctl.newdev("mds", self.name, self.uuid, - setup ="%s %s" %(blkdev, self.fstype)) - + self.inode_size, self.mkfsoptions, self.backfstype, + self.backdevpath) + + if config.mountfsoptions != None: + mountfsoptions = config.mountfsoptions + if self.mountfsoptions != None: + mountfsoptions = mountfsoptions + ' ' + self.mountfsoptions + else: + mountfsoptions = self.mountfsoptions + + # Even for writing logs we mount mds with supplied mount options + # because it will not mount smfs (if used) otherwise. + + # we count, that mountfsoptions is always not None for smfs + if self.fstype == 'smfs': + realdev = self.fstype + mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions, + self.backfstype, + blkdev) + else: + realdev = blkdev + + # As mount options are passed by 4th param to config tool, we need + # to pass something in 3rd param. But we do not want this 3rd param + # be counted as a profile name for reading log on MDS setup, thus, + # we pass there some predefined sign @dumb, which will be checked + # in MDS code and skipped. + if mountfsoptions != None: + lctl.newdev("mds", self.name, self.uuid, + setup ="%s %s %s %s" %(realdev, self.fstype, 'dumb', + mountfsoptions)) + else: + lctl.newdev("mds", self.name, self.uuid, + setup ="%s %s %s" %(realdev, self.fstype, 'dumb')) + # record logs for the MDS lov for uuid in self.filesystem_uuids: log("recording clients for filesystem:", uuid) @@ -1522,7 +1602,11 @@ class MDSDEV(Module): e.dump() cleanup_error(e.rc) Module.cleanup(self) - clean_loop(self.devpath) + + if self.fstype == 'smfs': + clean_loop(self.backdevpath) + else: + clean_loop(self.devpath) def msd_remaining(self): out = lctl.device_list() @@ -1558,18 +1642,25 @@ class MDSDEV(Module): print "cleanup failed: ", self.name e.dump() cleanup_error(e.rc) - clean_loop(self.devpath) + + if self.fstype == 'smfs': + clean_loop(self.backdevpath) + else: + clean_loop(self.devpath) class OSD(Module): def __init__(self, db): Module.__init__(self, 'OSD', db) self.osdtype = self.db.get_val('osdtype') self.devpath = self.db.get_val('devpath', '') + self.backdevpath = self.db.get_val('backdevpath', '') self.size = self.db.get_val_int('devsize', 0) self.journal_size = self.db.get_val_int('journalsize', 0) self.inode_size = self.db.get_val_int('inodesize', 0) self.mkfsoptions = self.db.get_val('mkfsoptions', '') + self.mountfsoptions = self.db.get_val('mountfsoptions', '') self.fstype = self.db.get_val('fstype', '') + self.backfstype = self.db.get_val('backfstype', '') self.nspath = self.db.get_val('nspath', '') target_uuid = self.db.get_first_ref('target') ost = self.db.lookup(target_uuid) @@ -1594,11 +1685,16 @@ class OSD(Module): self.uuid = target_uuid # modules self.add_lustre_module('ost', 'ost') + if self.fstype == 'smfs': + self.add_lustre_module('smfs', 'smfs') # FIXME: should we default to ext3 here? if self.fstype == 'ldiskfs': self.add_lustre_module('ldiskfs', 'ldiskfs') if self.fstype: self.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype)) + if self.fstype == 'smfs': + self.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype)) + self.add_lustre_module(self.osdtype, self.osdtype) def load_module(self): @@ -1622,10 +1718,33 @@ class OSD(Module): else: blkdev = block_dev(self.devpath, self.size, self.fstype, config.reformat, self.format, self.journal_size, - self.inode_size, self.mkfsoptions) - lctl.newdev(self.osdtype, self.name, self.uuid, - setup ="%s %s %s" %(blkdev, self.fstype, - self.failover_ost)) + self.inode_size, self.mkfsoptions, self.backfstype, + self.backdevpath) + if config.mountfsoptions != None: + mountfsoptions = config.mountfsoptions + if self.mountfsoptions != None: + mountfsoptions = mountfsoptions + ' ' + self.mountfsoptions + else: + mountfsoptions = self.mountfsoptions + + # we count, that mountfsoptions is always not None for smfs + if self.fstype == 'smfs': + realdev = self.fstype + mountfsoptions = "%s,type=%s,dev=%s" % (mountfsoptions, + self.backfstype, + blkdev) + else: + realdev = blkdev + + if mountfsoptions != None: + lctl.newdev(self.osdtype, self.name, self.uuid, + setup ="%s %s %s %s" %(realdev, self.fstype, + self.failover_ost, + mountfsoptions)) + else: + lctl.newdev(self.osdtype, self.name, self.uuid, + setup ="%s %s %s" %(realdev, self.fstype, + self.failover_ost)) if not is_prepared('OSS'): lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="") @@ -1663,7 +1782,10 @@ class OSD(Module): e.dump() cleanup_error(e.rc) if not self.osdtype == 'obdecho': - clean_loop(self.devpath) + if self.fstype == 'smfs': + clean_loop(self.backdevpath) + else: + clean_loop(self.devpath) def mgmt_uuid_for_fs(mtpt_name): if not mtpt_name: @@ -2532,6 +2654,7 @@ lconf_options = [ ('nosetup', "Skip device setup/cleanup step."), ('reformat', "Reformat all devices (without question)"), ('mkfsoptions', "Additional options for the mk*fs command line", PARAM), + ('mountfsoptions', "Additional options for mount fs command line", PARAM), ('dump', "Dump the kernel debug log to file before portals is unloaded", PARAM), ('write_conf', "Save all the client config information on mds."), diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index 86f20ef..06c9b82 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -253,10 +253,10 @@ command_t cmdlist[] = { " check all records from index 1 by default."}, {"llog_cancel", jt_llog_cancel, 0, "cancel one record in log.\n" - "usage: llog_cancel "}, + "usage: llog_cancel "}, {"llog_remove", jt_llog_remove, 0, - "remove one log from catlog, erase it from disk.\n" - "usage: llog_remove "}, + "remove one log from catalog, erase it from disk.\n" + "usage: llog_remove "}, /* Debug commands */ {"======== debug =========", jt_noop, 0, "debug"}, diff --git a/lustre/utils/lmc b/lustre/utils/lmc index 567689e..37cfab7 100755 --- a/lustre/utils/lmc +++ b/lustre/utils/lmc @@ -85,7 +85,9 @@ Object creation command summary: --node node_name --mds mds_name --dev path + --backdev path --fstype extN|ext3 + --backfstype ext3|tmpfs --size size --nspath --journal_size size @@ -103,8 +105,10 @@ Object creation command summary: --ost ost_name --lov lov_name --dev path + --backdev path --size size --fstype extN|ext3 + --backfstype ext3|tmpfs --journal_size size --inode_size size --obdtype obdecho|obdfilter @@ -178,11 +182,14 @@ lmc_options = [ ('failover', "Enable failover support on OSTs or MDS?"), ('group', "", PARAM), ('dev', "Path of the device on local system.", PARAM,""), + ('backdev', "Path of the device for backing storage on local system.", PARAM,""), ('size', "Specify the size of the device if needed.", PARAM,"0"), ('journal_size', "Specify new journal size for underlying ext3 file system.", PARAM,"0"), ('inode_size', "Specify new inode size for underlying ext3 file system.", PARAM,"0"), ('fstype', "Optional argument to specify the filesystem type.", PARAM, "ext3"), + ('backfstype', "Optional argument to specify the backing filesystem type.", PARAM, "ext3"), ('mkfsoptions', "Optional argument to mkfs.", PARAM, ""), + ('mountfsoptions', "Optional argument to mount fs.", PARAM, ""), ('ostuuid', "", PARAM,""), ('nspath', "Local mount point of server namespace.", PARAM,""), ('format', ""), @@ -367,13 +374,18 @@ class GenConfig: return ldlm def osd(self, name, uuid, fs, osdtype, devname, format, ost_uuid, - node_uuid, dev_size=0, journal_size=0, inode_size=0, nspath="", mkfsoptions=""): + node_uuid, dev_size=0, journal_size=0, inode_size=0, nspath="", + mkfsoptions="", mountfsoptions="", backfs="", backdevname=""): osd = self.newService("osd", name, uuid) osd.setAttribute('osdtype', osdtype) osd.appendChild(self.ref("target", ost_uuid)) osd.appendChild(self.ref("node", node_uuid)) if fs: self.addElement(osd, "fstype", fs) + if backfs: + self.addElement(osd, "backfstype", backfs) + if backdevname: + self.addElement(osd, "backdevpath", backdevname) if devname: dev = self.addElement(osd, "devpath", devname) self.addElement(osd, "autoformat", format) @@ -385,6 +397,8 @@ class GenConfig: self.addElement(osd, "inodesize", "%s" % (inode_size)) if mkfsoptions: self.addElement(osd, "mkfsoptions", mkfsoptions) + if mountfsoptions: + self.addElement(osd, "mountfsoptions", mountfsoptions) if nspath: self.addElement(osd, "nspath", nspath) return osd @@ -428,10 +442,15 @@ class GenConfig: def mdsdev(self, name, uuid, fs, devname, format, node_uuid, mds_uuid, dev_size=0, journal_size=0, inode_size=256, - nspath="", mkfsoptions=""): + nspath="", mkfsoptions="", mountfsoptions="", backfs="", + backdevname=""): mdd = self.newService("mdsdev", name, uuid) self.addElement(mdd, "fstype", fs) + if backfs: + self.addElement(mdd, "backfstype", backfs) dev = self.addElement(mdd, "devpath", devname) + if backdevname: + self.addElement(mdd, "backdevpath", backdevname) self.addElement(mdd, "autoformat", format) if dev_size: self.addElement(mdd, "devsize", "%s" % (dev_size)) @@ -443,6 +462,9 @@ class GenConfig: self.addElement(mdd, "nspath", nspath) if mkfsoptions: self.addElement(mdd, "mkfsoptions", mkfsoptions) + if mountfsoptions: + self.addElement(mdd, "mountfsoptions", mountfsoptions) + mdd.appendChild(self.ref("node", node_uuid)) mdd.appendChild(self.ref("target", mds_uuid)) return mdd @@ -706,12 +728,15 @@ def add_mds(gen, lustre, options): mds.setAttribute('failover', "1") devname = get_option(options, 'dev') + backdevname = get_option(options, 'backdev') size = get_option(options, 'size') fstype = get_option(options, 'fstype') + backfstype = get_option(options, 'backfstype') journal_size = get_option(options, 'journal_size') inode_size = get_option(options, 'inode_size') nspath = get_option(options, 'nspath') mkfsoptions = get_option(options, 'mkfsoptions') + mountfsoptions = get_option(options, 'mountfsoptions') node_uuid = name2uuid(lustre, node_name, 'node') @@ -723,7 +748,8 @@ def add_mds(gen, lustre, options): mdd = gen.mdsdev(mdd_name, mdd_uuid, fstype, devname, get_format_flag(options), node_uuid, mds_uuid, - size, journal_size, inode_size, nspath, mkfsoptions) + size, journal_size, inode_size, nspath, mkfsoptions, + mountfsoptions, backfstype, backdevname) lustre.appendChild(mdd) @@ -753,19 +779,25 @@ def add_ost(gen, lustre, options): if osdtype == 'obdecho': fstype = '' + backfstype = '' devname = '' + backdevname = '' size = 0 fstype = '' journal_size = '' inode_size = '' mkfsoptions = '' + mountfsoptions = '' else: devname = get_option(options, 'dev') # can be unset for bluearcs + backdevname = get_option(options, 'backdev') size = get_option(options, 'size') fstype = get_option(options, 'fstype') + backfstype = get_option(options, 'backfstype') journal_size = get_option(options, 'journal_size') inode_size = get_option(options, 'inode_size') mkfsoptions = get_option(options, 'mkfsoptions') + mountfsoptions = get_option(options, 'mountfsoptions') nspath = get_option(options, 'nspath') @@ -801,7 +833,8 @@ def add_ost(gen, lustre, options): osd = gen.osd(osdname, osd_uuid, fstype, osdtype, devname, get_format_flag(options), ost_uuid, node_uuid, size, - journal_size, inode_size, nspath, mkfsoptions) + journal_size, inode_size, nspath, mkfsoptions, + mountfsoptions, backfstype, backdevname) node = findByName(lustre, node_name, "node") diff --git a/lustre/utils/lustre_cfg.c b/lustre/utils/lustre_cfg.c index adbc384..3629d0a 100644 --- a/lustre/utils/lustre_cfg.c +++ b/lustre/utils/lustre_cfg.c @@ -170,6 +170,7 @@ int jt_lcfg_setup(int argc, char **argv) lcfg.lcfg_inllen3 = strlen(argv[3]) + 1; lcfg.lcfg_inlbuf3 = argv[3]; } + if (argc > 4) { lcfg.lcfg_inllen4 = strlen(argv[4]) + 1; lcfg.lcfg_inlbuf4 = argv[4]; diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index 3a4089a..d047810 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -1989,7 +1989,7 @@ int jt_llog_remove(int argc, char **argv) if (argc == 3) fprintf(stdout, "log %s are removed.\n", argv[2]); else - fprintf(stdout, "the log in catlog %s are removed. \n", argv[1]); + fprintf(stdout, "the log in catalog %s are removed. \n", argv[1]); } else fprintf(stderr, "OBD_IOC_LLOG_REMOVE failed: %s\n", strerror(errno)); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index dfec4fa..6968cec 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -132,7 +132,7 @@ void lustre_assert_wire_constants(void) (long long)REINT_RENAME); LASSERTF(REINT_OPEN == 6, " found %lld\n", (long long)REINT_OPEN); - LASSERTF(REINT_MAX == 6, " found %lld\n", + LASSERTF(REINT_MAX == 8, " found %lld\n", (long long)REINT_MAX); LASSERTF(DISP_IT_EXECD == 1, " found %lld\n", (long long)DISP_IT_EXECD);