From 1e7fc4ddb3f805d95345448e7205b539f396ed4a Mon Sep 17 00:00:00 2001 From: scjody Date: Tue, 16 May 2006 10:32:50 +0000 Subject: [PATCH] Merge b1_5 from b1_4 (20060515_1255) --- .../patches/ext3-lookup-dotdot-2.6.9.patch | 63 +++ .../kernel_patches/series/ldiskfs-2.6-rhel4.series | 1 + .../kernel_patches/series/ldiskfs-2.6-suse.series | 1 + .../series/ldiskfs-2.6.12-vanilla.series | 1 + lustre/ChangeLog | 16 +- lustre/autoMakefile.am | 9 +- lustre/include/linux/lustre_compat25.h | 2 - lustre/include/lustre/liblustreapi.h | 7 +- lustre/include/lustre/lustre_idl.h | 4 +- lustre/include/obd.h | 1 + .../patches/ext3-lookup-dotdot-2.4.20.patch | 63 +++ .../patches/ext3-lookup-dotdot-2.6.9.patch | 63 +++ .../kernel_patches/series/ldiskfs-2.6-rhel4.series | 1 + .../kernel_patches/series/ldiskfs-2.6-suse.series | 1 + .../series/ldiskfs-2.6.12-vanilla.series | 1 + lustre/kernel_patches/series/rhel-2.4.21 | 1 + lustre/kernel_patches/targets/2.6-suse.target.in | 2 +- lustre/ldiskfs/lustre_quota_fmt.c | 5 +- lustre/ldlm/ldlm_lib.c | 6 +- lustre/ldlm/ldlm_lockd.c | 10 +- lustre/ldlm/ldlm_request.c | 10 +- lustre/liblustre/namei.c | 3 +- lustre/liblustre/rw.c | 8 +- lustre/llite/dcache.c | 149 +++-- lustre/llite/file.c | 343 +++++++---- lustre/llite/llite_internal.h | 21 +- lustre/llite/llite_lib.c | 21 +- lustre/llite/namei.c | 72 ++- lustre/llite/rw.c | 2 - lustre/lov/lov_internal.h | 6 +- lustre/lov/lov_obd.c | 6 +- lustre/lov/lov_request.c | 8 +- lustre/mdc/mdc_lib.c | 3 +- lustre/mdc/mdc_request.c | 20 +- lustre/mds/handler.c | 31 +- lustre/mds/mds_fs.c | 5 +- lustre/mds/mds_internal.h | 15 +- lustre/mds/mds_join.c | 2 +- lustre/mds/mds_open.c | 56 +- lustre/mds/mds_reint.c | 47 +- lustre/obdclass/llog_ioctl.c | 9 +- lustre/obdfilter/filter.c | 10 +- lustre/obdfilter/filter_io.c | 7 +- lustre/tests/.cvsignore | 2 + lustre/tests/createdestroy.c | 6 +- lustre/tests/ll_dirstripe_verify.c | 42 +- lustre/tests/mmap_sanity.c | 4 +- lustre/tests/recovery-small.sh | 237 +++++++- lustre/tests/sanity.sh | 32 +- lustre/tests/small_write.c | 56 +- lustre/tests/utime.c | 2 +- lustre/tests/writemany.c | 12 +- lustre/utils/Makefile.am | 6 +- lustre/utils/lconf | 24 +- lustre/utils/liblustreapi.c | 36 +- lustre/utils/llobdstat.pl | 6 +- lustre/utils/llog_reader.c | 124 ++-- lustre/utils/llstat.pl | 36 +- lustre/utils/llverdev.c | 502 ++++++++++++++++ lustre/utils/llverfs.c | 630 +++++++++++++++++++++ lustre/utils/obd.c | 6 +- lustre/utils/obdio.c | 4 +- lustre/utils/obdiolib.c | 162 +++--- lustre/utils/obdiolib.h | 9 +- lustre/utils/wirecheck.c | 1 + 65 files changed, 2587 insertions(+), 464 deletions(-) create mode 100644 ldiskfs/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch create mode 100644 lustre/kernel_patches/patches/ext3-lookup-dotdot-2.4.20.patch create mode 100644 lustre/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch create mode 100644 lustre/utils/llverdev.c create mode 100644 lustre/utils/llverfs.c diff --git a/ldiskfs/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch b/ldiskfs/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch new file mode 100644 index 0000000..f9d4857 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch @@ -0,0 +1,63 @@ +Index: linux-2.6.9-full/fs/ext3/iopen.c +=================================================================== +--- linux-2.6.9-full.orig/fs/ext3/iopen.c 2006-04-25 08:51:11.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/iopen.c 2006-05-06 01:21:11.000000000 +0400 +@@ -94,9 +94,12 @@ static struct dentry *iopen_lookup(struc + assert(!(alternate->d_flags & DCACHE_DISCONNECTED)); + } + +- if (!list_empty(&inode->i_dentry)) { +- alternate = list_entry(inode->i_dentry.next, +- struct dentry, d_alias); ++ list_for_each(lp, &inode->i_dentry) { ++ alternate = list_entry(lp, struct dentry, d_alias); ++ /* ignore dentries created for ".." to preserve ++ * proper dcache hierarchy -- bug 10458 */ ++ if (alternate->d_flags & DCACHE_NFSFS_RENAMED) ++ continue; + dget_locked(alternate); + spin_lock(&alternate->d_lock); + alternate->d_flags |= DCACHE_REFERENCED; +Index: linux-2.6.9-full/fs/ext3/namei.c +=================================================================== +--- linux-2.6.9-full.orig/fs/ext3/namei.c 2006-05-06 01:21:10.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/namei.c 2006-05-06 01:29:30.000000000 +0400 +@@ -1003,6 +1003,38 @@ static struct dentry *ext3_lookup(struct + return ERR_PTR(-EACCES); + } + ++ /* ".." shouldn't go into dcache to preserve dcache hierarchy ++ * otherwise we'll get parent being a child of actual child. ++ * see bug 10458 for details -bzzz */ ++ if (dentry->d_name.name[0] == '.' && (dentry->d_name.len == 1 || ++ (dentry->d_name.len == 2 && dentry->d_name.name[1] == '.'))) { ++ struct dentry *tmp, *goal = NULL; ++ struct list_head *lp; ++ ++ /* first, look for an existing dentry - any one is good */ ++ spin_lock(&dcache_lock); ++ list_for_each(lp, &inode->i_dentry) { ++ tmp = list_entry(lp, struct dentry, d_alias); ++ goal = tmp; ++ dget_locked(goal); ++ break; ++ } ++ if (goal == NULL) { ++ /* there is no alias, we need to make current dentry: ++ * a) inaccessible for __d_lookup() ++ * b) inaccessible for iopen */ ++ J_ASSERT(list_empty(&dentry->d_alias)); ++ dentry->d_flags |= DCACHE_NFSFS_RENAMED; ++ /* this is d_instantiate() ... */ ++ list_add(&dentry->d_alias, &inode->i_dentry); ++ dentry->d_inode = inode; ++ } ++ spin_unlock(&dcache_lock); ++ if (goal) ++ iput(inode); ++ return goal; ++ } ++ + return iopen_connect_dentry(dentry, inode, 1); + } + diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel4.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel4.series index bab81b9..3661023 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel4.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel4.series @@ -10,3 +10,4 @@ ext3-extents-2.6.9-rhel4.patch ext3-mballoc2-2.6.9-rhel4.patch ext3-nlinks-2.6.9.patch ext3-ialloc-2.6.patch +ext3-lookup-dotdot-2.6.9.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-suse.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-suse.series index 2584c1d..efa7700 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-suse.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-suse.series @@ -12,3 +12,4 @@ ext3-nlinks-2.6.7.patch ext3-rename-reserve-2.6-suse.patch ext3-htree-dot-2.6.5-suse.patch ext3-ialloc-2.6.patch +ext3-lookup-dotdot-2.6.9.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6.12-vanilla.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6.12-vanilla.series index 7d0a383..b44e35e 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6.12-vanilla.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6.12-vanilla.series @@ -11,3 +11,4 @@ ext3-ialloc-2.6.patch ext3-remove-cond_resched-calls-2.6.12.patch ext3-htree-dot-2.6.patch ext3-external-journal-2.6.12.patch +ext3-lookup-dotdot-2.6.9.patch diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 7fdebda..0bae13c 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -208,11 +208,25 @@ Details : Check that we actually have objects in a file before doing any Severity : minor Frequency : Rare -Bugzilla : 10484 +Bugzilla : 10484 Description: Request leak when working with deleted CWD Details : Introduce advanced request refcount tracking for requests referenced from lustre intent. +Severity : Enhancement +Bugzilla : 10482 +Description: Cache open file handles on client. +Details : MDS now will return special lock along with openhandle, if + requested and client is allowed to hold openhandle, even if unused, + until such a lock is revoked. Helps NFS a lot, since NFS is opening + closing files for every read/write openration. + +Severity : Enhancement +Bugzilla : 9291 +Description: Cache open negative dentries on client when possible. +Details : Guard negative dentries with UPDATE lock on parent dir, drop + negative dentries on lock revocation. + ------------------------------------------------------------------------------ diff --git a/lustre/autoMakefile.am b/lustre/autoMakefile.am index 368c081..be4dae8 100644 --- a/lustre/autoMakefile.am +++ b/lustre/autoMakefile.am @@ -58,13 +58,14 @@ sources: $(LDISKFS) lvfs-sources obdclass-sources lustre_build_version all-recursive: lustre_build_version +BUILD_VER_H=$(top_builddir)/lustre/include/linux/lustre_build_version.h + lustre_build_version: perl $(top_builddir)/lustre/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver echo "#define LUSTRE_RELEASE @RELEASE@" >> tmpver - cmp -s $(top_builddir)/lustre/include/linux/lustre_build_version.h tmpver \ - 2> /dev/null && \ - $(RM) tmpver || \ - mv tmpver $(top_builddir)/lustre/include/linux/lustre_build_version.h + cmp -s $(BUILD_VER_H) tmpver > tmpdiff 2> /dev/null && \ + $(RM) tmpver tmpdiff || \ + mv tmpver $(BUILD_VER_H) CSTK=/tmp/checkstack CSTKO=/tmp/checkstack.orig diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 066cc20..ec22d4a 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -326,12 +326,10 @@ static inline int page_mapped(struct page *page) } #endif /* !HAVE_PAGE_MAPPED */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)) static inline void touch_atime(struct vfsmount *mnt, struct dentry *dentry) { update_atime(dentry->d_inode); } -#endif static inline void file_accessed(struct file *file) { diff --git a/lustre/include/lustre/liblustreapi.h b/lustre/include/lustre/liblustreapi.h index 08f8786..2473a9a 100644 --- a/lustre/include/lustre/liblustreapi.h +++ b/lustre/include/lustre/liblustreapi.h @@ -11,9 +11,12 @@ typedef void (*llapi_cb_t)(char *obd_type_name, char *obd_name, char *obd_uuid, void *args); /* liblustreapi.c */ -extern int llapi_file_create(char *name, long stripe_size, int stripe_offset, - int stripe_count, int stripe_pattern); +extern int llapi_file_create(const char *name, long stripe_size, + int stripe_offset, int stripe_count, + int stripe_pattern); extern int llapi_file_get_stripe(char *path, struct lov_user_md *lum); +#define HAVE_LLAPI_FILE_LOOKUP +extern int llapi_file_lookup(int dirfd, const char *name); extern int llapi_find(char *path, struct obd_uuid *obduuid, int recursive, int verbose, int quiet); extern int llapi_obd_statfs(char *path, __u32 type, __u32 index, diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index bd60687..e339db0 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -388,7 +388,8 @@ struct obdo { __u32 o_mds; __u32 o_stripe_idx; /* holds stripe idx */ __u32 o_padding_1; - char o_inline[OBD_INLINESZ]; /* fid in ost writes */ + char o_inline[OBD_INLINESZ]; + /* lustre_handle + llog_cookie */ }; #define o_dirty o_blocks @@ -760,6 +761,7 @@ extern void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa); #define MDS_OPEN_DELAY_CREATE 0100000000 /* delay initial object create */ #define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */ #define MDS_OPEN_JOIN_FILE 0400000000 /* open for join file*/ +#define MDS_OPEN_LOCK 04000000000 /* This open requires open lock */ #define MDS_OPEN_HAS_EA 010000000000 /* specify object create pattern */ #define MDS_OPEN_HAS_OBJS 020000000000 /* Just set the EA the obj exist */ diff --git a/lustre/include/obd.h b/lustre/include/obd.h index feb2b49..772cbec 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -364,6 +364,7 @@ struct client_obd { struct mdc_rpc_lock *cl_rpc_lock; struct mdc_rpc_lock *cl_setattr_lock; + struct mdc_rpc_lock *cl_close_lock; struct osc_creator cl_oscc; /* mgc datastruct */ diff --git a/lustre/kernel_patches/patches/ext3-lookup-dotdot-2.4.20.patch b/lustre/kernel_patches/patches/ext3-lookup-dotdot-2.4.20.patch new file mode 100644 index 0000000..0329dfe --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-lookup-dotdot-2.4.20.patch @@ -0,0 +1,63 @@ +Index: linux-2.4.21/fs/ext3/namei.c +=================================================================== +--- linux-2.4.21.orig/fs/ext3/namei.c 2006-04-29 20:48:26.000000000 +0400 ++++ linux-2.4.21/fs/ext3/namei.c 2006-05-06 01:31:51.000000000 +0400 +@@ -955,6 +955,38 @@ static struct dentry *ext3_lookup(struct + } + } + ++ /* ".." shouldn't go into dcache to preserve dcache hierarchy ++ * otherwise we'll get parent being a child of actual child. ++ * see bug 10458 for details -bzzz */ ++ if (dentry->d_name.name[0] == '.' && (dentry->d_name.len == 1 || ++ (dentry->d_name.len == 2 && dentry->d_name.name[1] == '.'))) { ++ struct dentry *tmp, *goal = NULL; ++ struct list_head *lp; ++ ++ /* first, look for an existing dentry - any one is good */ ++ spin_lock(&dcache_lock); ++ list_for_each(lp, &inode->i_dentry) { ++ tmp = list_entry(lp, struct dentry, d_alias); ++ goal = tmp; ++ dget_locked(goal); ++ break; ++ } ++ if (goal == NULL) { ++ /* there is no alias, we need to make current dentry: ++ * a) inaccessible for __d_lookup() ++ * b) inaccessible for iopen */ ++ J_ASSERT(list_empty(&dentry->d_alias)); ++ dentry->d_flags |= DCACHE_NFSFS_RENAMED; ++ /* this is d_instantiate() ... */ ++ list_add(&dentry->d_alias, &inode->i_dentry); ++ dentry->d_inode = inode; ++ } ++ spin_unlock(&dcache_lock); ++ if (goal) ++ iput(inode); ++ return goal; ++ } ++ + return iopen_connect_dentry(dentry, inode, 1); + } + +Index: linux-2.4.21/fs/ext3/iopen.c +=================================================================== +--- linux-2.4.21.orig/fs/ext3/iopen.c 2006-04-29 20:48:23.000000000 +0400 ++++ linux-2.4.21/fs/ext3/iopen.c 2006-04-29 20:59:50.000000000 +0400 +@@ -92,9 +92,12 @@ static struct dentry *iopen_lookup(struc + assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED)); + } + +- if (!list_empty(&inode->i_dentry)) { +- alternate = list_entry(inode->i_dentry.next, +- struct dentry, d_alias); ++ list_for_each(lp, &inode->i_dentry) { ++ alternate = list_entry(lp, struct dentry, d_alias); ++ /* ignore dentries created for ".." to preserve ++ * proper dcache hierarchy -- bug 10458 */ ++ if (alternate->d_flags & DCACHE_NFSFS_RENAMED) ++ continue; + dget_locked(alternate); + alternate->d_vfs_flags |= DCACHE_REFERENCED; + iput(inode); diff --git a/lustre/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch b/lustre/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch new file mode 100644 index 0000000..f9d4857 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch @@ -0,0 +1,63 @@ +Index: linux-2.6.9-full/fs/ext3/iopen.c +=================================================================== +--- linux-2.6.9-full.orig/fs/ext3/iopen.c 2006-04-25 08:51:11.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/iopen.c 2006-05-06 01:21:11.000000000 +0400 +@@ -94,9 +94,12 @@ static struct dentry *iopen_lookup(struc + assert(!(alternate->d_flags & DCACHE_DISCONNECTED)); + } + +- if (!list_empty(&inode->i_dentry)) { +- alternate = list_entry(inode->i_dentry.next, +- struct dentry, d_alias); ++ list_for_each(lp, &inode->i_dentry) { ++ alternate = list_entry(lp, struct dentry, d_alias); ++ /* ignore dentries created for ".." to preserve ++ * proper dcache hierarchy -- bug 10458 */ ++ if (alternate->d_flags & DCACHE_NFSFS_RENAMED) ++ continue; + dget_locked(alternate); + spin_lock(&alternate->d_lock); + alternate->d_flags |= DCACHE_REFERENCED; +Index: linux-2.6.9-full/fs/ext3/namei.c +=================================================================== +--- linux-2.6.9-full.orig/fs/ext3/namei.c 2006-05-06 01:21:10.000000000 +0400 ++++ linux-2.6.9-full/fs/ext3/namei.c 2006-05-06 01:29:30.000000000 +0400 +@@ -1003,6 +1003,38 @@ static struct dentry *ext3_lookup(struct + return ERR_PTR(-EACCES); + } + ++ /* ".." shouldn't go into dcache to preserve dcache hierarchy ++ * otherwise we'll get parent being a child of actual child. ++ * see bug 10458 for details -bzzz */ ++ if (dentry->d_name.name[0] == '.' && (dentry->d_name.len == 1 || ++ (dentry->d_name.len == 2 && dentry->d_name.name[1] == '.'))) { ++ struct dentry *tmp, *goal = NULL; ++ struct list_head *lp; ++ ++ /* first, look for an existing dentry - any one is good */ ++ spin_lock(&dcache_lock); ++ list_for_each(lp, &inode->i_dentry) { ++ tmp = list_entry(lp, struct dentry, d_alias); ++ goal = tmp; ++ dget_locked(goal); ++ break; ++ } ++ if (goal == NULL) { ++ /* there is no alias, we need to make current dentry: ++ * a) inaccessible for __d_lookup() ++ * b) inaccessible for iopen */ ++ J_ASSERT(list_empty(&dentry->d_alias)); ++ dentry->d_flags |= DCACHE_NFSFS_RENAMED; ++ /* this is d_instantiate() ... */ ++ list_add(&dentry->d_alias, &inode->i_dentry); ++ dentry->d_inode = inode; ++ } ++ spin_unlock(&dcache_lock); ++ if (goal) ++ iput(inode); ++ return goal; ++ } ++ + return iopen_connect_dentry(dentry, inode, 1); + } + diff --git a/lustre/kernel_patches/series/ldiskfs-2.6-rhel4.series b/lustre/kernel_patches/series/ldiskfs-2.6-rhel4.series index bab81b9..3661023 100644 --- a/lustre/kernel_patches/series/ldiskfs-2.6-rhel4.series +++ b/lustre/kernel_patches/series/ldiskfs-2.6-rhel4.series @@ -10,3 +10,4 @@ ext3-extents-2.6.9-rhel4.patch ext3-mballoc2-2.6.9-rhel4.patch ext3-nlinks-2.6.9.patch ext3-ialloc-2.6.patch +ext3-lookup-dotdot-2.6.9.patch diff --git a/lustre/kernel_patches/series/ldiskfs-2.6-suse.series b/lustre/kernel_patches/series/ldiskfs-2.6-suse.series index 2584c1d..efa7700 100644 --- a/lustre/kernel_patches/series/ldiskfs-2.6-suse.series +++ b/lustre/kernel_patches/series/ldiskfs-2.6-suse.series @@ -12,3 +12,4 @@ ext3-nlinks-2.6.7.patch ext3-rename-reserve-2.6-suse.patch ext3-htree-dot-2.6.5-suse.patch ext3-ialloc-2.6.patch +ext3-lookup-dotdot-2.6.9.patch diff --git a/lustre/kernel_patches/series/ldiskfs-2.6.12-vanilla.series b/lustre/kernel_patches/series/ldiskfs-2.6.12-vanilla.series index 7d0a383..b44e35e 100644 --- a/lustre/kernel_patches/series/ldiskfs-2.6.12-vanilla.series +++ b/lustre/kernel_patches/series/ldiskfs-2.6.12-vanilla.series @@ -11,3 +11,4 @@ ext3-ialloc-2.6.patch ext3-remove-cond_resched-calls-2.6.12.patch ext3-htree-dot-2.6.patch ext3-external-journal-2.6.12.patch +ext3-lookup-dotdot-2.6.9.patch diff --git a/lustre/kernel_patches/series/rhel-2.4.21 b/lustre/kernel_patches/series/rhel-2.4.21 index bc6e9f6..dcaff40 100644 --- a/lustre/kernel_patches/series/rhel-2.4.21 +++ b/lustre/kernel_patches/series/rhel-2.4.21 @@ -51,3 +51,4 @@ statfs64-cast-unsigned-2.4-rhel.patch fsprivate-2.4.patch nfsd_iallocsem.patch linux-2.4.24-jbd-handle-EIO-rhel3.patch +ext3-lookup-dotdot-2.4.20.patch diff --git a/lustre/kernel_patches/targets/2.6-suse.target.in b/lustre/kernel_patches/targets/2.6-suse.target.in index a0a2633..5e34152 100644 --- a/lustre/kernel_patches/targets/2.6-suse.target.in +++ b/lustre/kernel_patches/targets/2.6-suse.target.in @@ -1,5 +1,5 @@ lnxmaj="2.6.5" -lnxrel="7.244" +lnxrel="7.252" KERNEL=linux-$lnxmaj-$lnxrel.tar.bz2 # they include our patches diff --git a/lustre/ldiskfs/lustre_quota_fmt.c b/lustre/ldiskfs/lustre_quota_fmt.c index 9db3f3f..d639b74 100644 --- a/lustre/ldiskfs/lustre_quota_fmt.c +++ b/lustre/ldiskfs/lustre_quota_fmt.c @@ -701,9 +701,8 @@ int lustre_read_dquot(struct lustre_dquot *dquot) struct lustre_disk_dqblk ddquot, empty; int ret = 0; - filp = dquot->dq_info->qi_files[type]; - - if (!filp || !dquot->dq_info) { /* Invalidated quota? */ + /* Invalidated quota? */ + if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) { printk(KERN_ERR "VFS: Quota invalidated while reading!\n"); return -EIO; } diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index a5e1e86..614a91c 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1436,6 +1436,10 @@ int target_handle_qc_callback(struct ptlrpc_request *req) oqctl = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*oqctl), lustre_swab_obd_quotactl); + if (oqctl == NULL) { + CERROR("Can't unpack obd_quotactl\n"); + RETURN(-EPROTO); + } cli->cl_qchk_stat = oqctl->qc_stat; @@ -1465,7 +1469,7 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req) qdata = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata), lustre_swab_qdata); if (qdata == NULL) { - CERROR("unpacking request buffer failed!"); + CERROR("Can't unpack qunit_data\n"); RETURN(-EPROTO); } diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 321e578..f2392e5 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -617,11 +617,11 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data) body->lock_flags |= LDLM_FL_AST_SENT; /* We might get here prior to ldlm_handle_enqueue setting - LDLM_FL_CANCEL_ON_BLOCK flag. Then we will put this lock into - waiting list, but this is safe and similar code in - ldlm_handle_enqueue will call ldlm_lock_cancel() still, that - would not only cancel the loc, but will also remove it from - waiting list */ + * LDLM_FL_CANCEL_ON_BLOCK flag. Then we will put this lock + * into waiting list, but this is safe and similar code in + * ldlm_handle_enqueue will call ldlm_lock_cancel() still, + * that would not only cancel the lock, but will also remove + * it from waiting list */ if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) { ldlm_lock_cancel(lock); instant_cancel = 1; diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 21eee08..d8945a9 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -54,9 +54,10 @@ int ldlm_expired_completion_wait(void *data) if (lock->l_conn_export == NULL) { static cfs_time_t next_dump = 0, last_dump = 0; - LDLM_ERROR(lock, "lock timed out (enq %lus ago); not entering " - "recovery in server code, just going back to sleep", - lock->l_enqueued_time.tv_sec); + LDLM_ERROR(lock, "lock timed out (enqueued at %lu, %lus ago); " + "not entering recovery in server code, just going " + "back to sleep", lock->l_enqueued_time.tv_sec, + CURRENT_SECONDS - lock->l_enqueued_time.tv_sec); if (cfs_time_after(cfs_time_current(), next_dump)) { last_dump = next_dump; next_dump = cfs_time_shift(300); @@ -71,8 +72,9 @@ int ldlm_expired_completion_wait(void *data) obd = lock->l_conn_export->exp_obd; imp = obd->u.cli.cl_import; ptlrpc_fail_import(imp, lwd->lwd_conn_cnt); - LDLM_ERROR(lock, "lock timed out (enqueued %lus ago), entering " + LDLM_ERROR(lock, "lock timed out (enqueued at %lu, %lus ago), entering " "recovery for %s@%s", lock->l_enqueued_time.tv_sec, + CURRENT_SECONDS - lock->l_enqueued_time.tv_sec, obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid); RETURN(0); diff --git a/lustre/liblustre/namei.c b/lustre/liblustre/namei.c index 503480c..f4b19ee 100644 --- a/lustre/liblustre/namei.c +++ b/lustre/liblustre/namei.c @@ -340,8 +340,7 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, /* NB 1 request reference will be taken away by ll_intent_lock() * when I return */ - if (!it_disposition(it, DISP_LOOKUP_NEG) || - (it->it_op & IT_CREAT)) { + if (!it_disposition(it, DISP_LOOKUP_NEG) || (it->it_op & IT_CREAT)) { struct lustre_md md; struct llu_inode_info *lli; struct intnl_stat *st; diff --git a/lustre/liblustre/rw.c b/lustre/liblustre/rw.c index 8917eff..2c5f924 100644 --- a/lustre/liblustre/rw.c +++ b/lustre/liblustre/rw.c @@ -349,14 +349,16 @@ static void llu_ap_fill_obdo(void *data, int cmd, struct obdo *oa) oa->o_valid = OBD_MD_FLID; valid_flags = OBD_MD_FLTYPE | OBD_MD_FLATIME; if (cmd & OBD_BRW_WRITE) - valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME; + valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME | + OBD_MD_FLUID | OBD_MD_FLGID | + OBD_MD_FLFID | OBD_MD_FLGENER; obdo_from_inode(oa, inode, valid_flags); EXIT; } /* called for each page in a completed rpc.*/ -static void llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc) +static int llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc) { struct ll_async_page *llap; struct page *page; @@ -371,7 +373,7 @@ static void llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc) CERROR("writeback error on page %p index %ld: %d\n", page, page->index, rc); } - EXIT; + RETURN(0); } static struct obd_async_page_ops llu_async_page_ops = { diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index 4f0ec58..529f5a7 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -148,6 +148,51 @@ void ll_intent_release(struct lookup_intent *it) EXIT; } +/* Drop dentry if it is not used already, unhash otherwise. + Should be called with dcache lock held! + Returns: 1 if dentry was dropped, 0 if unhashed. */ +int ll_drop_dentry(struct dentry *dentry) +{ + lock_dentry(dentry); + if (atomic_read(&dentry->d_count) == 0) { + CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p " + "inode %p\n", dentry->d_name.len, + dentry->d_name.name, dentry, dentry->d_parent, + dentry->d_inode); + dget_locked(dentry); + __d_drop(dentry); + unlock_dentry(dentry); + spin_unlock(&dcache_lock); + dput(dentry); + spin_lock(&dcache_lock); + return 1; + } + + if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) { +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + struct inode *inode = dentry->d_inode; +#endif + CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p " + "inode %p refc %d\n", dentry->d_name.len, + dentry->d_name.name, dentry, dentry->d_parent, + dentry->d_inode, atomic_read(&dentry->d_count)); + /* actually we don't unhash the dentry, rather just + * mark it inaccessible for to __d_lookup(). otherwise + * sys_getcwd() could return -ENOENT -bzzz */ + dentry->d_flags |= DCACHE_LUSTRE_INVALID; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + __d_drop(dentry); + if (inode) { + /* Put positive dentries to orphan list */ + hlist_add_head(&dentry->d_hash, + &ll_i2sbi(inode)->ll_orphan_dentry_list); + } +#endif + } + unlock_dentry(dentry); + return 0; +} + void ll_unhash_aliases(struct inode *inode) { struct list_head *tmp, *head; @@ -162,8 +207,8 @@ void ll_unhash_aliases(struct inode *inode) inode->i_ino, inode->i_generation, inode); head = &inode->i_dentry; -restart: spin_lock(&dcache_lock); +restart: tmp = head; while ((tmp = tmp->next) != head) { struct dentry *dentry = list_entry(tmp, struct dentry, d_alias); @@ -185,35 +230,9 @@ restart: continue; } - - lock_dentry(dentry); - if (atomic_read(&dentry->d_count) == 0) { - CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p " - "inode %p\n", dentry->d_name.len, - dentry->d_name.name, dentry, dentry->d_parent, - dentry->d_inode); - dget_locked(dentry); - __d_drop(dentry); - unlock_dentry(dentry); - spin_unlock(&dcache_lock); - dput(dentry); - goto restart; - } else if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) { - CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p " - "inode %p refc %d\n", dentry->d_name.len, - dentry->d_name.name, dentry, dentry->d_parent, - dentry->d_inode, atomic_read(&dentry->d_count)); - /* actually we don't unhash the dentry, rather just - * mark it inaccessible for to __d_lookup(). otherwise - * sys_getcwd() could return -ENOENT -bzzz */ - dentry->d_flags |= DCACHE_LUSTRE_INVALID; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - __d_drop(dentry); - hlist_add_head(&dentry->d_hash, - &ll_i2sbi(inode)->ll_orphan_dentry_list); -#endif - } - unlock_dentry(dentry); + + if (ll_drop_dentry(dentry)) + goto restart; } spin_unlock(&dcache_lock); EXIT; @@ -282,7 +301,6 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, struct lookup_intent *it) { int rc; - struct it_cb_data icbd; struct mdc_op_data op_data; struct ptlrpc_request *req = NULL; struct lookup_intent lookup_it = { .it_op = IT_LOOKUP }; @@ -292,13 +310,25 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name, LL_IT2STR(it)); - /* Cached negative dentries are unsafe for now - look them up again */ - if (de->d_inode == NULL) - RETURN(0); + if (de->d_inode == NULL) { + /* We can only use negative dentries if this is stat or lookup, + for opens and stuff we do need to query server. */ + /* If there is IT_CREAT in intent op set, then we must throw + away this negative dentry and actually do the request to + kernel to create whatever needs to be created (if possible)*/ + if (it && (it->it_op & IT_CREAT)) + RETURN(0); + + if (de->d_flags & DCACHE_LUSTRE_INVALID) + RETURN(0); + + rc = ll_have_md_lock(de->d_parent->d_inode, + MDS_INODELOCK_UPDATE); + + RETURN(rc); + } exp = ll_i2mdcexp(de->d_inode); - icbd.icbd_parent = de->d_parent->d_inode; - icbd.icbd_childp = &de; /* Never execute intents for mount points. * Attributes will be fixed up in ll_inode_revalidate_it */ @@ -312,6 +342,53 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, ll_prepare_mdc_op_data(&op_data, de->d_parent->d_inode, de->d_inode, de->d_name.name, de->d_name.len, 0); + if ((it->it_op == IT_OPEN) && de->d_inode) { + struct inode *inode = de->d_inode; + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_client_handle **och_p; + __u64 *och_usecount; + /* We used to check for MDS_INODELOCK_OPEN here, but in fact + * just having LOOKUP lock is enough to justify inode is the + * same. And if inode is the same and we have suitable + * openhandle, then there is no point in doing another OPEN RPC + * just to throw away newly received openhandle. + * There are no security implications too, if file owner or + * access mode is change, LOOKUP lock is revoked */ + + it->it_create_mode &= ~current->fs->umask; + + if (it->it_flags & FMODE_WRITE) { + och_p = &lli->lli_mds_write_och; + och_usecount = &lli->lli_open_fd_write_count; + } else if (it->it_flags & FMODE_EXEC) { + och_p = &lli->lli_mds_exec_och; + och_usecount = &lli->lli_open_fd_exec_count; + } else { + och_p = &lli->lli_mds_read_och; + och_usecount = &lli->lli_open_fd_read_count; + } + /* Check for the proper lock. */ + if (!ll_have_md_lock(inode, MDS_INODELOCK_LOOKUP)) + goto do_lock; + down(&lli->lli_och_sem); + if (*och_p) { /* Everything is open already, do nothing */ + /*(*och_usecount)++; Do not let them steal our open + handle from under us */ + /* XXX The code above was my original idea, but in case + we have the handle, but we cannot use it due to later + checks (e.g. O_CREAT|O_EXCL flags set), nobody + would decrement counter increased here. So we just + hope the lock won't be invalidated in between. But + if it would be, we'll reopen the open request to + MDS later during file open path */ + up(&lli->lli_och_sem); + RETURN(1); + } else { + up(&lli->lli_och_sem); + } + } + +do_lock: rc = mdc_intent_lock(exp, &op_data, NULL, 0, it, lookup_flags, &req, ll_mdc_blocking_ast, 0); /* If req is NULL, then mdc_intent_lock only tried to do a lock match; diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 31b2fde..9a5a294 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -51,8 +51,25 @@ static int ll_close_inode_openhandle(struct inode *inode, struct obd_client_handle *och) { struct ptlrpc_request *req = NULL; + struct obd_device *obd; struct obdo *oa; int rc; + ENTRY; + + obd = class_exp2obd(ll_i2mdcexp(inode)); + if (obd == NULL) { + CERROR("Invalid MDC connection handle "LPX64"\n", + ll_i2mdcexp(inode)->exp_handle.h_cookie); + GOTO(out, rc = 0); + } + + /* + * here we check if this is forced umount. If so this is called on + * canceling "open lock" and we do not call mdc_close() in this case, as + * it will not be successful, as import is already deactivated. + */ + if (obd->obd_no_recov) + GOTO(out, rc = 0); oa = obdo_alloc(); if (!oa) @@ -89,8 +106,52 @@ static int ll_close_inode_openhandle(struct inode *inode, inode->i_ino, rc); } - mdc_clear_open_replay_data(och); ptlrpc_req_finished(req); /* This is close request */ + EXIT; +out: + mdc_clear_open_replay_data(och); + + return rc; +} + +int ll_mdc_real_close(struct inode *inode, int flags) +{ + struct ll_inode_info *lli = ll_i2info(inode); + int rc = 0; + struct obd_client_handle **och_p; + struct obd_client_handle *och; + __u64 *och_usecount; + + ENTRY; + + if (flags & FMODE_WRITE) { + och_p = &lli->lli_mds_write_och; + och_usecount = &lli->lli_open_fd_write_count; + } else if (flags & FMODE_EXEC) { + och_p = &lli->lli_mds_exec_och; + och_usecount = &lli->lli_open_fd_exec_count; + } else { + LASSERT(flags & FMODE_READ); + och_p = &lli->lli_mds_read_och; + och_usecount = &lli->lli_open_fd_read_count; + } + + down(&lli->lli_och_sem); + if (*och_usecount) { /* There are still users of this handle, so + skip freeing it. */ + up(&lli->lli_och_sem); + RETURN(0); + } + och=*och_p; + *och_p = NULL; + up(&lli->lli_och_sem); + + if (och) { /* There might be a race and somebody have freed this och + already */ + rc = ll_close_inode_openhandle(inode, och); + och->och_fh.cookie = DEAD_HANDLE_MAGIC; + OBD_FREE(och, sizeof *och); + } RETURN(rc); } @@ -99,8 +160,8 @@ int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode, struct file *file) { struct ll_file_data *fd = LUSTRE_FPRIVATE(file); - struct obd_client_handle *och = &fd->fd_mds_och; - int rc; + struct ll_inode_info *lli = ll_i2info(inode); + int rc = 0; ENTRY; /* clear group lock, if present */ @@ -110,9 +171,45 @@ int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode, rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh); } + + /* Let's see if we have good enough OPEN lock on the file and if + we can skip talking to MDS */ + if (file->f_dentry->d_inode) { /* Can this ever be false? */ + int lockmode; + int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK; + struct lustre_handle lockh; + struct inode *inode = file->f_dentry->d_inode; + struct ldlm_res_id file_res_id = {.name={inode->i_ino, + inode->i_generation}}; + ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}}; + + down(&lli->lli_och_sem); + if (fd->fd_omode & FMODE_WRITE) { + lockmode = LCK_CW; + LASSERT(lli->lli_open_fd_write_count); + lli->lli_open_fd_write_count--; + } else if (fd->fd_omode & FMODE_EXEC) { + lockmode = LCK_PR; + LASSERT(lli->lli_open_fd_exec_count); + lli->lli_open_fd_exec_count--; + } else { + lockmode = LCK_CR; + LASSERT(lli->lli_open_fd_read_count); + lli->lli_open_fd_read_count--; + } + up(&lli->lli_och_sem); + + if (!ldlm_lock_match(mdc_exp->exp_obd->obd_namespace, flags, + &file_res_id, LDLM_IBITS, &policy,lockmode, + &lockh)) { + rc = ll_mdc_real_close(file->f_dentry->d_inode, + fd->fd_omode); + } + } else { + CERROR("Releasing a file %p with negative dentry %p. Name %s", + file, file->f_dentry, file->f_dentry->d_name.name); + } - rc = ll_close_inode_openhandle(inode, och); - och->och_fh.cookie = DEAD_HANDLE_MAGIC; LUSTRE_FPRIVATE(file) = NULL; ll_file_data_put(fd); @@ -170,6 +267,18 @@ static int ll_intent_file_open(struct file *file, void *lmm, ll_prepare_mdc_op_data(&data, parent->d_inode, NULL, name, len, O_RDWR); + /* Usually we come here only for NFSD, and we want open lock. + But we can also get here with pre 2.6.15 patchless kernels, and in + that case that lock is also ok */ + /* We can also get here if there was cached open handle in revalidate_it + * but it disappeared while we were getting from there to ll_file_open. + * But this means this file was closed and immediatelly opened which + * makes a good candidate for using OPEN lock */ + /* If lmmsize & lmm are not 0, we are just setting stripe info + * parameters. No need for the open lock */ + if (!lmm && !lmmsize) + itp->it_flags |= MDS_OPEN_LOCK; + rc = mdc_enqueue(sbi->ll_mdc_exp, LDLM_IBITS, itp, LCK_PW, &data, &lockh, lmm, lmmsize, ldlm_completion_ast, ll_mdc_blocking_ast, NULL, 0); @@ -178,6 +287,11 @@ static int ll_intent_file_open(struct file *file, void *lmm, GOTO(out, rc); } + if (itp->d.lustre.it_lock_mode) { /* If we got lock - release it right + * away */ + ldlm_lock_decref(&lockh, itp->d.lustre.it_lock_mode); + itp->d.lustre.it_lock_mode = 0; + } rc = ll_prep_inode(sbi->ll_osc_exp, &file->f_dentry->d_inode, (struct ptlrpc_request *)itp->d.lustre.it_data, DLM_REPLY_REC_OFF, NULL); @@ -205,7 +319,7 @@ static void ll_och_fill(struct ll_inode_info *lli, struct lookup_intent *it, } int ll_local_open(struct file *file, struct lookup_intent *it, - struct ll_file_data *fd) + struct ll_file_data *fd, struct obd_client_handle *och) { ENTRY; @@ -213,9 +327,11 @@ int ll_local_open(struct file *file, struct lookup_intent *it, LASSERT(fd != NULL); - ll_och_fill(ll_i2info(file->f_dentry->d_inode), it, &fd->fd_mds_och); + if (och) + ll_och_fill(ll_i2info(file->f_dentry->d_inode), it, och); LUSTRE_FPRIVATE(file) = fd; ll_readahead_init(file->f_dentry->d_inode, &fd->fd_ras); + fd->fd_omode = it->it_flags; RETURN(0); } @@ -241,7 +357,9 @@ int ll_file_open(struct inode *inode, struct file *file) struct lookup_intent *it, oit = { .it_op = IT_OPEN, .it_flags = file->f_flags }; struct lov_stripe_md *lsm; - struct ptlrpc_request *req; + struct ptlrpc_request *req = NULL; + struct obd_client_handle **och_p; + __u64 *och_usecount; struct ll_file_data *fd; int rc = 0; ENTRY; @@ -276,25 +394,77 @@ int ll_file_open(struct inode *inode, struct file *file) oit.it_flags &= ~O_EXCL; it = &oit; - rc = ll_intent_file_open(file, NULL, 0, it); + } + + /* Let's see if we have file open on MDS already. */ + if (it->it_flags & FMODE_WRITE) { + och_p = &lli->lli_mds_write_och; + och_usecount = &lli->lli_open_fd_write_count; + } else if (it->it_flags & FMODE_EXEC) { + och_p = &lli->lli_mds_exec_och; + och_usecount = &lli->lli_open_fd_exec_count; + } else { + och_p = &lli->lli_mds_read_och; + och_usecount = &lli->lli_open_fd_read_count; + } + down(&lli->lli_och_sem); + if (*och_p) { /* Open handle is present */ + if (it_disposition(it, DISP_LOOKUP_POS) && /* Positive lookup */ + it_disposition(it, DISP_OPEN_OPEN)) { /* & OPEN happened */ + /* Well, there's extra open request that we do not need, + let's close it somehow. This will decref request. */ + ll_release_openhandle(file->f_dentry, it); + } + (*och_usecount)++; + + rc = ll_local_open(file, it, fd, NULL); + + LASSERTF(rc == 0, "rc = %d\n", rc); + } else { + LASSERT(*och_usecount == 0); + OBD_ALLOC(*och_p, sizeof (struct obd_client_handle)); + if (!*och_p) { + ll_file_data_put(fd); + GOTO(out_och_free, rc = -ENOMEM); + } + (*och_usecount)++; + if (!it->d.lustre.it_disposition) { + rc = ll_intent_file_open(file, NULL, 0, it); + if (rc) { + ll_file_data_put(fd); + GOTO(out_och_free, rc); + } + + /* Got some error? Release the request */ + if (it->d.lustre.it_status < 0) { + req = it->d.lustre.it_data; + ptlrpc_req_finished(req); + } + mdc_set_lock_data(&it->d.lustre.it_lock_handle, + file->f_dentry->d_inode); + } + req = it->d.lustre.it_data; + + /* mdc_intent_lock() didn't get a request ref if there was an + * open error, so don't do cleanup on the request here + * (bug 3430) */ + /* XXX (green): Should not we bail out on any error here, not + * just open error? */ + rc = it_open_error(DISP_OPEN_OPEN, it); if (rc) { ll_file_data_put(fd); - GOTO(out, rc); + GOTO(out_och_free, rc); } - } - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN); - rc = it_open_error(DISP_OPEN_OPEN, it); - /* mdc_intent_lock() didn't get a request ref if there was an open - * error, so don't do cleanup on the request here (bug 3430) */ - if (rc) { - ll_file_data_put(fd); - RETURN(rc); + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN); + rc = ll_local_open(file, it, fd, *och_p); + LASSERTF(rc == 0, "rc = %d\n", rc); } + up(&lli->lli_och_sem); - rc = ll_local_open(file, it, fd); - LASSERTF(rc == 0, "rc = %d\n", rc); - + /* Must do this outside lli_och_sem lock to prevent deadlock where + different kind of OPEN lock for this same inode gets cancelled + by ldlm_cancel_lru */ if (!S_ISREG(inode->i_mode)) GOTO(out, rc); @@ -309,12 +479,21 @@ int ll_file_open(struct inode *inode, struct file *file) file->f_flags &= ~O_LOV_DELAY_CREATE; GOTO(out, rc); out: - req = it->d.lustre.it_data; ptlrpc_req_finished(req); if (req) it_clear_disposition(it, DISP_ENQ_OPEN_REF); - if (rc == 0) + if (rc == 0) { ll_open_complete(inode); + } else { +out_och_free: + if (*och_p) { + OBD_FREE(*och_p, sizeof (struct obd_client_handle)); + *och_p = NULL; /* OBD_FREE writes some magic there */ + (*och_usecount)--; + } + up(&lli->lli_och_sem); + } + return rc; } @@ -1023,10 +1202,8 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count, *ppos = inode->i_size; if (*ppos >= maxbytes) { - if (count || *ppos > maxbytes) { - send_sig(SIGXFSZ, current, 0); - GOTO(out, retval = -EFBIG); - } + send_sig(SIGXFSZ, current, 0); + GOTO(out, retval = -EFBIG); } if (*ppos + count > maxbytes) count = maxbytes - *ppos; @@ -1200,14 +1377,9 @@ static int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file, int lum_size) { struct ll_inode_info *lli = ll_i2info(inode); - struct file *f = NULL; - struct obd_export *exp = ll_i2obdexp(inode); struct lov_stripe_md *lsm; struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags}; - struct ptlrpc_request *req = NULL; - struct ll_file_data *fd; int rc = 0; - struct lustre_md md; ENTRY; down(&lli->lli_open_sem); @@ -1219,49 +1391,24 @@ static int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file, RETURN(-EEXIST); } - fd = ll_file_data_get(); - if (fd == NULL) - GOTO(out, -ENOMEM); - - f = get_empty_filp(); - if (!f) - GOTO(out, -ENOMEM); - - f->f_dentry = dget(file->f_dentry); - f->f_vfsmnt = mntget(file->f_vfsmnt); - - rc = ll_intent_file_open(f, lum, lum_size, &oit); + rc = ll_intent_file_open(file, lum, lum_size, &oit); if (rc) GOTO(out, rc); if (it_disposition(&oit, DISP_LOOKUP_NEG)) - GOTO(out, -ENOENT); - req = oit.d.lustre.it_data; + GOTO(out_req_free, rc = -ENOENT); rc = oit.d.lustre.it_status; - if (rc < 0) - GOTO(out, rc); + GOTO(out_req_free, rc); - rc = mdc_req2lustre_md(req, DLM_REPLY_REC_OFF, exp, &md); - if (rc) - GOTO(out, rc); - ll_update_inode(f->f_dentry->d_inode, &md); - - rc = ll_local_open(f, &oit, fd); - if (rc) - GOTO(out, rc); - fd = NULL; - ll_intent_release(&oit); - - rc = ll_file_release(f->f_dentry->d_inode, f); + ll_release_openhandle(file->f_dentry, &oit); out: - if (f) - fput(f); - ll_file_data_put(fd); up(&lli->lli_open_sem); - if (req != NULL) - ptlrpc_req_finished(req); + ll_intent_release(&oit); RETURN(rc); +out_req_free: + ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data); + goto out; } static int ll_lov_setea(struct inode *inode, struct file *file, @@ -1419,13 +1566,10 @@ static int join_file(struct inode *head_inode, struct file *head_filp, struct dentry *tail_dentry = tail_filp->f_dentry; struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = head_filp->f_flags|O_JOIN_FILE}; - struct ptlrpc_request *req = NULL; - struct ll_file_data *fd; struct lustre_handle lockh; struct mdc_op_data *op_data; __u32 hsize = head_inode->i_size >> 32; __u32 tsize = head_inode->i_size; - struct file *f; int rc; ENTRY; @@ -1433,23 +1577,11 @@ static int join_file(struct inode *head_inode, struct file *head_filp, tail_inode = tail_dentry->d_inode; tail_parent = tail_dentry->d_parent->d_inode; - fd = ll_file_data_get(); - if (fd == NULL) - RETURN(-ENOMEM); - OBD_ALLOC_PTR(op_data); if (op_data == NULL) { - ll_file_data_put(fd); RETURN(-ENOMEM); } - f = get_empty_filp(); - if (f == NULL) - GOTO(out, rc = -ENOMEM); - - f->f_dentry = dget(head_filp->f_dentry); - f->f_vfsmnt = mntget(head_filp->f_vfsmnt); - ll_prepare_mdc_op_data(op_data, head_inode, tail_parent, tail_dentry->d_name.name, tail_dentry->d_name.len, 0); @@ -1460,26 +1592,24 @@ static int join_file(struct inode *head_inode, struct file *head_filp, if (rc < 0) GOTO(out, rc); - req = oit.d.lustre.it_data; rc = oit.d.lustre.it_status; - if (rc < 0) + if (rc < 0) { + ptlrpc_req_finished((struct ptlrpc_request *) + oit.d.lustre.it_data); GOTO(out, rc); + } - rc = ll_local_open(f, &oit, fd); - LASSERTF(rc == 0, "rc = %d\n", rc); - - fd = NULL; - ll_intent_release(&oit); - - rc = ll_file_release(f->f_dentry->d_inode, f); + if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right + * away */ + ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode); + oit.d.lustre.it_lock_mode = 0; + } + ll_release_openhandle(head_filp->f_dentry, &oit); out: if (op_data) OBD_FREE_PTR(op_data); - if (f) - fput(f); - ll_file_data_put(fd); - ptlrpc_req_finished(req); + ll_intent_release(&oit); RETURN(rc); } @@ -1883,23 +2013,21 @@ int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock) RETURN(rc); } -static int ll_have_md_lock(struct dentry *de) +int ll_have_md_lock(struct inode *inode, __u64 bits) { - struct ll_sb_info *sbi = ll_s2sbi(de->d_sb); struct lustre_handle lockh; struct ldlm_res_id res_id = { .name = {0} }; struct obd_device *obddev; - ldlm_policy_data_t policy = { .l_inodebits = { - MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP}}; + ldlm_policy_data_t policy = { .l_inodebits = {bits}}; int flags; ENTRY; - if (!de->d_inode) + if (!inode) RETURN(0); - obddev = sbi->ll_mdc_exp->exp_obd; - res_id.name[0] = de->d_inode->i_ino; - res_id.name[1] = de->d_inode->i_generation; + obddev = ll_i2mdcexp(inode)->exp_obd; + res_id.name[0] = inode->i_ino; + res_id.name[1] = inode->i_generation; CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]); @@ -1976,8 +2104,19 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it) GOTO(out, rc); } + /* Unlinked? Unhash dentry, so it is not picked up later by + do_lookup() -> ll_revalidate_it(). We cannot use d_drop + here to preserve get_cwd functionality on 2.6. + Bug 10503 */ + if (!dentry->d_inode->i_nlink) { + spin_lock(&dcache_lock); + ll_drop_dentry(dentry); + spin_unlock(&dcache_lock); + } + ll_lookup_finish_locks(&oit, dentry); - } else if (!ll_have_md_lock(dentry)) { + } else if (!ll_have_md_lock(dentry->d_inode, + MDS_INODELOCK_UPDATE|MDS_INODELOCK_LOOKUP)) { struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode); struct ll_fid fid; obd_valid valid = OBD_MD_FLGETATTR; diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index ea519ba..f34079f 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -83,6 +83,17 @@ struct ll_inode_info { struct list_head lli_dead_list; + struct semaphore lli_och_sem; /* Protects access to och pointers + and their usage counters */ + /* We need all three because every inode may be opened in different + modes */ + struct obd_client_handle *lli_mds_read_och; + __u64 lli_open_fd_read_count; + struct obd_client_handle *lli_mds_write_och; + __u64 lli_open_fd_write_count; + struct obd_client_handle *lli_mds_exec_och; + __u64 lli_open_fd_exec_count; + #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) struct inode lli_vfs_inode; #endif @@ -256,11 +267,11 @@ struct ll_readahead_state { extern kmem_cache_t *ll_file_data_slab; struct lustre_handle; struct ll_file_data { - struct obd_client_handle fd_mds_och; struct ll_readahead_state fd_ras; - __u32 fd_flags; + int fd_omode; struct lustre_handle fd_cwlockh; unsigned long fd_gid; + __u32 fd_flags; }; struct lov_stripe_md; @@ -383,6 +394,7 @@ extern struct file_operations ll_file_operations; extern struct file_operations ll_file_operations_flock; extern struct inode_operations ll_file_inode_operations; extern int ll_inode_revalidate_it(struct dentry *, struct lookup_intent *); +extern int ll_have_md_lock(struct inode *inode, __u64 bits); int ll_extent_lock(struct ll_file_data *, struct inode *, struct lov_stripe_md *, int mode, ldlm_policy_data_t *, struct lustre_handle *, int ast_flags); @@ -393,10 +405,12 @@ int ll_file_release(struct inode *inode, struct file *file); int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *); int ll_glimpse_size(struct inode *inode, int ast_flags); int ll_local_open(struct file *file, - struct lookup_intent *it, struct ll_file_data *fd); + struct lookup_intent *it, struct ll_file_data *fd, + struct obd_client_handle *och); int ll_release_openhandle(struct dentry *, struct lookup_intent *); int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode, struct file *file); +int ll_mdc_real_close(struct inode *inode, int flags); #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) int ll_getattr_it(struct vfsmount *mnt, struct dentry *de, struct lookup_intent *it, struct kstat *stat); @@ -413,6 +427,7 @@ int ll_inode_permission(struct inode *inode, int mask); void ll_intent_drop_lock(struct lookup_intent *); void ll_intent_release(struct lookup_intent *); extern void ll_set_dd(struct dentry *de); +int ll_drop_dentry(struct dentry *dentry); void ll_unhash_aliases(struct inode *); void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft); void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 488066f..0e25235 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -484,7 +484,7 @@ static void prune_deathrow(struct ll_sb_info *sbi, int try) int empty; do { - if (need_resched()) + if (need_resched() && try) break; if (try) { @@ -641,6 +641,11 @@ void ll_lli_init(struct ll_inode_info *lli) spin_lock_init(&lli->lli_lock); INIT_LIST_HEAD(&lli->lli_pending_write_llaps); lli->lli_inode_magic = LLI_INODE_MAGIC; + sema_init(&lli->lli_och_sem, 1); + lli->lli_mds_read_och = lli->lli_mds_write_och = NULL; + lli->lli_mds_exec_och = NULL; + lli->lli_open_fd_read_count = lli->lli_open_fd_write_count = 0; + lli->lli_open_fd_exec_count = 0; INIT_LIST_HEAD(&lli->lli_dead_list); } @@ -1042,9 +1047,21 @@ void ll_clear_inode(struct inode *inode) inode->i_generation, inode); ll_inode2fid(&fid, inode); - clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(ll_i2info(inode)->lli_flags)); + clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags); mdc_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode); + LASSERT(!lli->lli_open_fd_write_count); + LASSERT(!lli->lli_open_fd_read_count); + LASSERT(!lli->lli_open_fd_exec_count); + + if (lli->lli_mds_write_och) + ll_mdc_real_close(inode, FMODE_WRITE); + if (lli->lli_mds_exec_och) + ll_mdc_real_close(inode, FMODE_EXEC); + if (lli->lli_mds_read_och) + ll_mdc_real_close(inode, FMODE_READ); + + if (lli->lli_smd) { obd_change_cbdata(sbi->ll_osc_exp, lli->lli_smd, null_if_equal, inode); diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 0cf1eef..33cfa18 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -180,16 +180,71 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, inode->i_ino, inode->i_generation, inode); } + if (bits & MDS_INODELOCK_OPEN) { + int flags = 0; + switch (lock->l_req_mode) { + case LCK_CW: + flags = FMODE_WRITE; + break; + case LCK_PR: + flags = FMODE_EXEC; + break; + case LCK_CR: + flags = FMODE_READ; + break; + default: + CERROR("Unexpected lock mode for OPEN lock " + "%d, inode %ld\n", lock->l_req_mode, + inode->i_ino); + } + ll_mdc_real_close(inode, flags); + } + if (bits & MDS_INODELOCK_UPDATE) clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(ll_i2info(inode)->lli_flags)); - if (S_ISDIR(inode->i_mode) && - (bits & MDS_INODELOCK_UPDATE)) { + (bits & MDS_INODELOCK_UPDATE)) { + struct dentry *dentry, *tmp, *dir; + struct list_head *list; + CDEBUG(D_INODE, "invalidating inode %lu\n", inode->i_ino); truncate_inode_pages(inode->i_mapping, 0); + + + /* Drop possible cached negative dentries */ + list = &inode->i_dentry; + dir = NULL; + spin_lock(&dcache_lock); + + /* It is possible to have several dentries (with + racer?) */ + while ((list = list->next) != &inode->i_dentry) { + dir = list_entry(list, struct dentry, d_alias); + if (!(dir->d_flags & DCACHE_LUSTRE_INVALID)) + break; + + dir = NULL; + } + + if (dir) { +restart: + list_for_each_entry_safe(dentry, tmp, + &dir->d_subdirs, + d_child) + { + /* XXX Print some debug here? */ + if (!dentry->d_inode) + /* Negative dentry. If we were + dropping dcache lock, go + throught the list again */ + if (ll_drop_dentry(dentry)) + goto restart; + } + } + spin_unlock(&dcache_lock); } if (inode->i_sb->s_root && @@ -407,9 +462,16 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, *de = ll_find_alias(inode, *de); } else { ENTRY; - spin_lock(&dcache_lock); - ll_d_add(*de, inode); - spin_unlock(&dcache_lock); + /* Check that parent has UPDATE lock. If there is none, we + cannot afford to hash this dentry (done by ll_d_add) as it + might get picked up later when UPDATE lock will appear */ + if (ll_have_md_lock(parent, MDS_INODELOCK_UPDATE)) { + spin_lock(&dcache_lock); + ll_d_add(*de, inode); + spin_unlock(&dcache_lock); + } else { + (*de)->d_inode = NULL; + } } ll_set_dd(*de); diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index e340c1d..49e6407 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -359,8 +359,6 @@ void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa) if (cmd & OBD_BRW_WRITE) { oa->o_valid |= OBD_MD_FLEPOCH; oa->o_easize = ll_i2info(inode)->lli_io_epoch; - oa->o_uid = inode->i_uid; - oa->o_gid = inode->i_gid; valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLUID | OBD_MD_FLGID | diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index fecd1b5..f020980 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -50,7 +50,7 @@ struct lov_request_set { struct list_head set_list; }; -#define LAP_MAGIC 8200 +#define LOV_AP_MAGIC 8200 struct lov_async_page { int lap_magic; @@ -62,8 +62,8 @@ struct lov_async_page { void *lap_caller_data; }; -#define LAP_FROM_COOKIE(c) \ - (LASSERT(((struct lov_async_page *)(c))->lap_magic == LAP_MAGIC), \ +#define LAP_FROM_COOKIE(c) \ + (LASSERT(((struct lov_async_page *)(c))->lap_magic == LOV_AP_MAGIC), \ (struct lov_async_page *)(c)) static inline void lov_llh_addref(void *llhp) diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 2585e9f..33dc956 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -1043,7 +1043,7 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa, rc = obd_destroy(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa, NULL, oti, NULL); err = lov_update_common_set(set, req, rc); - if (rc) { + if (err) { CERROR("error: destroying objid "LPX64" subobj " LPX64" on OST idx %d: rc = %d\n", set->set_oa->o_id, req->rq_oa->o_id, @@ -1052,7 +1052,7 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa, rc = err; } } - lov_fini_destroy_set(set); + rc = lov_fini_destroy_set(set); if (rc == 0) { LASSERT(lsm_op_find(lsm->lsm_magic) != NULL); rc = lsm_op_find(lsm->lsm_magic)->lsm_destroy(lsm, oa, md_exp); @@ -1566,7 +1566,7 @@ int lov_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm, LASSERT(loi == NULL); lap = *res; - lap->lap_magic = LAP_MAGIC; + lap->lap_magic = LOV_AP_MAGIC; lap->lap_caller_ops = ops; lap->lap_caller_data = data; diff --git a/lustre/lov/lov_request.c b/lustre/lov/lov_request.c index 65800ab..d76c4cb 100644 --- a/lustre/lov/lov_request.c +++ b/lustre/lov/lov_request.c @@ -44,6 +44,7 @@ static void lov_init_set(struct lov_request_set *set) set->set_count = 0; set->set_completes = 0; set->set_success = 0; + set->set_cookies = 0; CFS_INIT_LIST_HEAD(&set->set_list); atomic_set(&set->set_refcount, 1); } @@ -129,11 +130,12 @@ int lov_update_enqueue_set(struct lov_request_set *set, * can be addressed then. */ if (rc == ELDLM_OK) { struct ldlm_lock *lock = ldlm_handle2lock(lov_lockhp); - __u64 tmp = req->rq_md->lsm_oinfo->loi_lvb.lvb_size; + __u64 tmp; LASSERT(lock != NULL); lov_stripe_lock(set->set_md); loi->loi_lvb = req->rq_md->lsm_oinfo->loi_lvb; + tmp = loi->loi_lvb.lvb_size; /* Extend KMS up to the end of this lock and no further * A lock on [x,y] means a KMS of up to y + 1 bytes! */ if (tmp > lock->l_policy_data.l_extent.end) @@ -641,7 +643,7 @@ int lov_update_create_set(struct lov_request_set *set, lsm->lsm_object_id, loi->loi_id, loi->loi_id, req->rq_idx); loi_init(loi); - if (set->set_cookies) + if (oti && set->set_cookies) ++oti->oti_logcookies; if (req->rq_oa->o_valid & OBD_MD_FLCOOKIE) set->set_cookie_sent++; @@ -1010,7 +1012,7 @@ int lov_prep_destroy_set(struct obd_export *exp, struct obdo *src_oa, req->rq_oa->o_id = loi->loi_id; /* Setup the first request's cookie position */ - if (!cookie_set && set->set_cookies) { + if (oti && !cookie_set && set->set_cookies) { oti->oti_logcookies = set->set_cookies + i; cookie_set = 1; } diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c index 563085b..9ebb767 100644 --- a/lustre/mdc/mdc_lib.c +++ b/lustre/mdc/mdc_lib.c @@ -109,7 +109,8 @@ static __u32 mds_pack_open_flags(__u32 flags) return (flags & (FMODE_READ | FMODE_WRITE | MDS_OPEN_DELAY_CREATE | MDS_OPEN_HAS_EA | - MDS_OPEN_HAS_OBJS | MDS_OPEN_OWNEROVERRIDE)) | + MDS_OPEN_HAS_OBJS | MDS_OPEN_OWNEROVERRIDE | + MDS_OPEN_LOCK)) | ((flags & O_CREAT) ? MDS_OPEN_CREAT : 0) | ((flags & O_EXCL) ? MDS_OPEN_EXCL : 0) | ((flags & O_TRUNC) ? MDS_OPEN_TRUNC : 0) | diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index a6b190b..131263a 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -630,6 +630,12 @@ int mdc_close(struct obd_export *exp, struct obdo *oa, if (req == NULL) GOTO(out, rc = -ENOMEM); + /* To avoid a livelock (bug 7034), we need to send CLOSE RPCs to a + * portal whose threads are not taking any DLM locks and are therefore + * always progressing */ + /* XXX FIXME bug 249 */ + req->rq_request_portal = MDS_READPAGE_PORTAL; + /* Ensure that this close's handle is fixed up during replay. */ LASSERT(och != NULL); LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC); @@ -656,9 +662,9 @@ int mdc_close(struct obd_export *exp, struct obdo *oa, LASSERT(req->rq_cb_data == NULL); req->rq_cb_data = mod; - mdc_get_rpc_lock(obd->u.cli.cl_rpc_lock, NULL); + mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL); rc = ptlrpc_queue_wait(req); - mdc_put_rpc_lock(obd->u.cli.cl_rpc_lock, NULL); + mdc_put_rpc_lock(obd->u.cli.cl_close_lock, NULL); if (req->rq_repmsg == NULL) { CDEBUG(D_HA, "request failed to send: %p, %d\n", req, @@ -1122,9 +1128,14 @@ static int mdc_setup(struct obd_device *obd, obd_count len, void *buf) GOTO(err_rpc_lock, rc = -ENOMEM); mdc_init_rpc_lock(cli->cl_setattr_lock); + OBD_ALLOC(cli->cl_close_lock, sizeof (*cli->cl_close_lock)); + if (!cli->cl_close_lock) + GOTO(err_setattr_lock, rc = -ENOMEM); + mdc_init_rpc_lock(cli->cl_close_lock); + rc = client_obd_setup(obd, len, buf); if (rc) - GOTO(err_setattr_lock, rc); + GOTO(err_close_lock, rc); lprocfs_init_vars(mdc, &lvars); lprocfs_obd_setup(obd, lvars.obd_vars); @@ -1136,6 +1147,8 @@ static int mdc_setup(struct obd_device *obd, obd_count len, void *buf) RETURN(rc); +err_close_lock: + OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock)); err_setattr_lock: OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock)); err_rpc_lock: @@ -1212,6 +1225,7 @@ static int mdc_cleanup(struct obd_device *obd) OBD_FREE(cli->cl_rpc_lock, sizeof (*cli->cl_rpc_lock)); OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock)); + OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock)); lprocfs_obd_cleanup(obd); ptlrpcd_decref(); diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 86c8023..28bde34 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -1431,12 +1431,16 @@ int mds_handle(struct ptlrpc_request *req) /* sanity check: if the xid matches, the request must * be marked as a resent or replayed */ - if (req->rq_xid == med->med_mcd->mcd_last_xid) - LASSERTF(lustre_msg_get_flags(req->rq_reqmsg) & - (MSG_RESENT | MSG_REPLAY), - "rq_xid "LPU64" matches last_xid, " - "expected RESENT flag\n", - req->rq_xid); + if (req->rq_xid == le64_to_cpu(med->med_mcd->mcd_last_xid) || + req->rq_xid == le64_to_cpu(med->med_mcd->mcd_last_close_xid)) + if (!(lustre_msg_get_flags(req->rq_reqmsg) & + (MSG_RESENT | MSG_REPLAY))) { + CERROR("rq_xid "LPU64" matches last_xid, " + "expected RESENT flag\n", + req->rq_xid); + req->rq_status = -ENOTCONN; + GOTO(out, rc = -EFAULT); + } /* else: note the opposite is not always true; a * RESENT req after a failover will usually not match * the last_xid, since it was likely never @@ -1696,6 +1700,9 @@ int mds_handle(struct ptlrpc_request *req) /* If we're DISCONNECTing, the mds_export_data is already freed */ if (!rc && lustre_msg_get_opc(req->rq_reqmsg) != MDS_DISCONNECT) { struct mds_export_data *med = &req->rq_export->exp_mds_data; + + /* I don't think last_xid is used for anyway, so I'm not sure + if we need to care about last_close_xid here.*/ lustre_msg_set_last_xid(req->rq_repmsg, le64_to_cpu(med->med_mcd->mcd_last_xid)); @@ -1985,7 +1992,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) obd->obd_replayable ? "enabled" : "disabled"); } - ldlm_timeout = 2; + ldlm_timeout = 6; RETURN(0); @@ -2272,6 +2279,10 @@ static void fixup_handle_for_resent_req(struct ptlrpc_request *req, int offset, le64_to_cpu(exp->exp_mds_data.med_mcd->mcd_last_xid)) return; + if (req->rq_xid == + le64_to_cpu(exp->exp_mds_data.med_mcd->mcd_last_close_xid)) + return; + /* This remote handle isn't enqueued, so we never received or * processed this request. Clear MSG_RESENT, because it can * be handled like any normal request now. */ @@ -2366,7 +2377,13 @@ static int mds_intent_policy(struct ldlm_namespace *ns, if (intent_disposition(rep, DISP_LOOKUP_NEG) && !intent_disposition(rep, DISP_OPEN_OPEN)) #endif + if (rep->lock_policy_res2) { + /* mds_open returns ENOLCK where it should return zero, + but it has no lock to return */ + if (rep->lock_policy_res2 == ENOLCK) + rep->lock_policy_res2 = 0; RETURN(ELDLM_LOCK_ABORTED); + } break; case IT_LOOKUP: getattr_part = MDS_INODELOCK_LOOKUP; diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index 2bae298..ca387f8 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -345,7 +345,10 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) continue; } - last_transno = le64_to_cpu(mcd->mcd_last_transno); + last_transno = le64_to_cpu(mcd->mcd_last_transno) > + le64_to_cpu(mcd->mcd_last_close_transno) ? + le64_to_cpu(mcd->mcd_last_transno) : + le64_to_cpu(mcd->mcd_last_close_transno); /* These exports are cleaned up by mds_disconnect(), so they * need to be set up like real exports as mds_connect() does. diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index cc7b49c..9fd562a 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -18,7 +18,12 @@ struct mds_client_data { __u64 mcd_last_xid; /* xid for the last transaction */ __u32 mcd_last_result; /* result from last RPC */ __u32 mcd_last_data; /* per-op data (disposition for open &c.) */ - __u8 mcd_padding[LR_CLIENT_SIZE - 64]; + /* for MDS_CLOSE requests */ + __u64 mcd_last_close_transno; /* last completed transaction ID */ + __u64 mcd_last_close_xid; /* xid for the last transaction */ + __u32 mcd_last_close_result; /* result from last RPC */ + __u32 mcd_last_close_data; /* per-op data (disposition for open &c.) */ + __u8 mcd_padding[LR_CLIENT_SIZE - 88]; }; #define MDS_SERVICE_WATCHDOG_TIMEOUT (obd_timeout * 1000) @@ -103,9 +108,13 @@ static inline void mds_inode_unset_orphan(struct inode *inode) if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { \ struct mds_client_data *mcd = \ req->rq_export->exp_mds_data.med_mcd; \ - if (mcd->mcd_last_xid == req->rq_xid) { \ + if (le64_to_cpu(mcd->mcd_last_xid) == req->rq_xid) { \ reconstruct; \ - RETURN(lustre_msg_get_status(req->rq_repmsg)); \ + RETURN(le32_to_cpu(mcd->mcd_last_result)); \ + } \ + if (le64_to_cpu(mcd->mcd_last_close_xid) == req->rq_xid) { \ + reconstruct; \ + RETURN(le32_to_cpu(mcd->mcd_last_close_result)); \ } \ DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")",\ mcd->mcd_last_xid); \ diff --git a/lustre/mds/mds_join.c b/lustre/mds/mds_join.c index 9790280..a566a5d 100644 --- a/lustre/mds/mds_join.c +++ b/lustre/mds/mds_join.c @@ -218,7 +218,7 @@ static void mds_finish_join(struct mds_obd *mds, struct ptlrpc_request *req, sizeof(struct llog_cookie); int max_easize = sizeof(*lmmj); - CDEBUG(D_INFO, "change the max md size from %d to %d \n", + CDEBUG(D_INFO, "change the max md size from %d to "LPSZ"\n", mds->mds_max_mdsize, sizeof(*lmmj)); if (mds->mds_max_mdsize < max_easize || diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index f35feff..a291ace 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -517,7 +517,7 @@ static void reconstruct_open(struct mds_update_record *rec, int offset, /* copy rc, transno and disp; steal locks */ mds_req_from_mcd(req, mcd); - intent_set_disposition(rep, mcd->mcd_last_data); + intent_set_disposition(rep, le32_to_cpu(mcd->mcd_last_data)); /* Only replay if create or open actually happened. */ if (!intent_disposition(rep, DISP_OPEN_CREATE | DISP_OPEN_OPEN) ) { @@ -720,12 +720,12 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild, } UNLOCK_INODE_MUTEX(dchild->d_inode); - if (!(rec->ur_flags & MDS_OPEN_JOIN_FILE)) + if (rec && !(rec->ur_flags & MDS_OPEN_JOIN_FILE)) lustre_shrink_reply(req, DLM_REPLY_REC_OFF + 1, body->eadatasize, 0); if (req->rq_export->exp_connect_flags & OBD_CONNECT_ACL && - !(rec->ur_flags & MDS_OPEN_JOIN_FILE)) { + rec && !(rec->ur_flags & MDS_OPEN_JOIN_FILE)) { int acl_off = DLM_REPLY_REC_OFF + (body->eadatasize ? 2 : 1); rc = mds_pack_acl(&req->rq_export->exp_mds_data, @@ -869,8 +869,17 @@ int mds_open(struct mds_update_record *rec, int offset, struct dentry_params dp; unsigned int qcids[MAXQUOTAS] = { current->fsuid, current->fsgid }; unsigned int qpids[MAXQUOTAS] = { 0, 0 }; + int child_mode = LCK_CR; + /* Always returning LOOKUP lock if open succesful to guard + dentry on client. */ + ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_LOOKUP}}; + struct ldlm_res_id child_res_id = { .name = {0}}; + int lock_flags = 0; ENTRY; + OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PAUSE_OPEN | OBD_FAIL_ONCE, + (obd_timeout + 1) / 4); + CLASSERT(MAXQUOTAS < 4); if (offset == DLM_INTENT_REC_OFF) { /* intent */ rep = lustre_msg_buf(req->rq_repmsg, DLM_LOCKREPLY_OFF, @@ -1107,6 +1116,36 @@ found_child: GOTO(cleanup, rc = -EAGAIN); } + /* Obtain OPEN lock as well */ + policy.l_inodebits.bits |= MDS_INODELOCK_OPEN; + + /* We cannot use acc_mode here, because it is zeroed in case of + creating a file, so we get wrong lockmode */ + if (accmode(dchild->d_inode, rec->ur_flags) & MAY_WRITE) + child_mode = LCK_CW; + else if (accmode(dchild->d_inode, rec->ur_flags) & MAY_EXEC) + child_mode = LCK_PR; + else + child_mode = LCK_CR; + + if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) && + (rec->ur_flags & MDS_OPEN_LOCK)) { + /* In case of replay we do not get a lock assuming that the + caller has it already */ + child_res_id.name[0] = dchild->d_inode->i_ino; + child_res_id.name[1] = dchild->d_inode->i_generation; + + rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, + child_res_id, LDLM_IBITS, &policy, + child_mode, &lock_flags, + ldlm_blocking_ast, ldlm_completion_ast, + NULL, NULL, NULL, 0, NULL, child_lockh); + if (rc != ELDLM_OK) + GOTO(cleanup, rc); + + cleanup_phase = 3; + } + if (!S_ISREG(dchild->d_inode->i_mode) && !S_ISDIR(dchild->d_inode->i_mode) && (req->rq_export->exp_connect_flags & OBD_CONNECT_NODEVOH)) { @@ -1126,6 +1165,9 @@ found_child: cleanup_no_trans: switch (cleanup_phase) { + case 3: + if (rc) + ldlm_lock_decref(child_lockh, child_mode); case 2: if (rc && created) { int err = vfs_unlink(dparent->d_inode, dchild); @@ -1151,6 +1193,14 @@ found_child: else ptlrpc_save_lock(req, &parent_lockh, parent_mode); } + /* If we have not taken the "open" lock, we may not return 0 here, + because caller expects 0 to mean "lock is taken", and it needs + nonzero return here for caller to return EDLM_LOCK_ABORTED to + client. Later caller should rewrite the return value back to zero + if it to be used any further + */ + if ((cleanup_phase != 3) && !rc) + rc = ENOLCK; /* trigger dqacq on the owner of child and parent */ lquota_adjust(quota_interface, obd, qcids, qpids, rc, FSFILT_OP_CREATE); diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 79dc8a5..891cd2a 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -160,12 +160,20 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, mds->mds_last_transno = transno; spin_unlock(&mds->mds_transno_lock); } + req->rq_transno = transno; lustre_msg_set_transno(req->rq_repmsg, transno); - mcd->mcd_last_transno = cpu_to_le64(transno); - mcd->mcd_last_xid = cpu_to_le64(req->rq_xid); - mcd->mcd_last_result = cpu_to_le32(rc); - mcd->mcd_last_data = cpu_to_le32(op_data); + if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CLOSE) { + mcd->mcd_last_close_transno = cpu_to_le64(transno); + mcd->mcd_last_close_xid = cpu_to_le64(req->rq_xid); + mcd->mcd_last_close_result = cpu_to_le32(rc); + mcd->mcd_last_close_data = cpu_to_le32(op_data); + } else { + mcd->mcd_last_transno = cpu_to_le64(transno); + mcd->mcd_last_xid = cpu_to_le64(req->rq_xid); + mcd->mcd_last_result = cpu_to_le32(rc); + mcd->mcd_last_data = cpu_to_le32(op_data); + } if (off <= 0) { CERROR("client idx %d has offset %lld\n", med->med_lr_idx, off); @@ -355,12 +363,19 @@ void mds_steal_ack_locks(struct ptlrpc_request *req) void mds_req_from_mcd(struct ptlrpc_request *req, struct mds_client_data *mcd) { + if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CLOSE) { + req->rq_transno = le64_to_cpu(mcd->mcd_last_close_transno); + lustre_msg_set_transno(req->rq_repmsg, req->rq_transno); + req->rq_status = le32_to_cpu(mcd->mcd_last_close_result); + lustre_msg_set_status(req->rq_repmsg, req->rq_status); + } else { + req->rq_transno = le64_to_cpu(mcd->mcd_last_transno); + lustre_msg_set_transno(req->rq_repmsg, req->rq_transno); + req->rq_status = le32_to_cpu(mcd->mcd_last_result); + lustre_msg_set_status(req->rq_repmsg, req->rq_status); + } DEBUG_REQ(D_HA, req, "restoring transno "LPD64"/status %d", - mcd->mcd_last_transno, mcd->mcd_last_result); - req->rq_transno = mcd->mcd_last_transno; - lustre_msg_set_transno(req->rq_repmsg, req->rq_transno); - req->rq_status = mcd->mcd_last_result; - lustre_msg_set_status(req->rq_repmsg, req->rq_status); + req->rq_transno, req->rq_status); mds_steal_ack_locks(req); } @@ -1263,6 +1278,10 @@ cleanup: return rc; } +#define INODE_CTIME_AGE (10) +#define INODE_CTIME_OLD(inode) (LTIME_S(inode->i_ctime) + \ + INODE_CTIME_AGE < CURRENT_SECONDS) + int mds_get_parent_child_locked(struct obd_device *obd, struct mds_obd *mds, struct ll_fid *fid, struct lustre_handle *parent_lockh, @@ -1320,6 +1339,16 @@ int mds_get_parent_child_locked(struct obd_device *obd, struct mds_obd *mds, child_res_id.name[0] = inode->i_ino; child_res_id.name[1] = inode->i_generation; + + /* If we want a LCK_CR for a directory, and this directory has not been + changed for some time, we return not only a LOOKUP lock, but also an + UPDATE lock to have negative dentry starts working for this dir. + Also we apply same logic to non-directories. If the file is rarely + changed - we return both locks and this might save us RPC on + later STAT. */ + if ((child_mode & (LCK_CR|LCK_PR|LCK_CW)) && INODE_CTIME_OLD(inode)) + child_policy.l_inodebits.bits |= MDS_INODELOCK_UPDATE; + iput(inode); retry_locks: diff --git a/lustre/obdclass/llog_ioctl.c b/lustre/obdclass/llog_ioctl.c index 9bdea74..2e9c52f 100644 --- a/lustre/obdclass/llog_ioctl.c +++ b/lustre/obdclass/llog_ioctl.c @@ -119,7 +119,14 @@ static int llog_check_cb(struct llog_handle *handle, struct llog_rec_hdr *rec, } if (handle->lgh_ctxt == NULL) RETURN(-EOPNOTSUPP); - llog_cat_id2handle(handle, &log_handle, &lir->lid_id); + rc = llog_cat_id2handle(handle, &log_handle, &lir->lid_id); + if (rc) { + CDEBUG(D_IOCTL, + "cannot find log #"LPX64"#"LPX64"#%08x\n", + lir->lid_id.lgl_oid, lir->lid_id.lgl_ogr, + lir->lid_id.lgl_ogen); + RETURN(rc); + } rc = llog_process(log_handle, llog_check_cb, NULL, NULL); llog_close(log_handle); } else { diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index e3aea4b..f6cc6ff 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -1050,12 +1050,13 @@ int filter_vfs_unlink(struct inode *dir, struct dentry *dentry) /* don't need dir->i_zombie for 2.4, it is for rename/unlink of dir * itself we already hold dir->i_mutex for child create/unlink ops */ + LASSERT(dentry->d_inode != NULL); LASSERT(TRYLOCK_INODE_MUTEX(dir) == 0); LASSERT(TRYLOCK_INODE_MUTEX(dentry->d_inode) == 0); /* may_delete() */ - if (!dentry->d_inode || dentry->d_parent->d_inode != dir) + if (/*!dentry->d_inode ||*/dentry->d_parent->d_inode != dir) GOTO(out, rc = -ENOENT); rc = ll_permission(dir, MAY_WRITE | MAY_EXEC, NULL); @@ -2216,12 +2217,14 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry, EXT3_IOC_SETFLAGS, (long)&oa->o_flags); } else { rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1); - if (fcc != NULL) + if (fcc != NULL) { /* set cancel cookie callback function */ fsfilt_add_journal_cb(exp->exp_obd, 0, oti ? oti->oti_handle : handle, filter_cancel_cookies_cb, fcc); + fcc = NULL; + } } if (locked) { @@ -2242,6 +2245,9 @@ out_unlock: if (locked) UNLOCK_INODE_MUTEX(inode); + if (fcc) + OBD_FREE(fcc, sizeof(*fcc)); + /* trigger quota release */ if (ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID)) { unsigned int cur_ids[MAXQUOTAS] = {oa->o_uid, oa->o_gid}; diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c index 0987611..d1c7a2d 100644 --- a/lustre/obdfilter/filter_io.c +++ b/lustre/obdfilter/filter_io.c @@ -284,7 +284,8 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, ENTRY; /* We are currently not supporting multi-obj BRW_READ RPCS at all. - * When we do this function's dentry cleanup will need to be fixed */ + * When we do this function's dentry cleanup will need to be fixed. + * These values are verified in ost_brw_write() from the wire. */ LASSERTF(objcount == 1, "%d\n", objcount); LASSERTF(obj->ioo_bufcnt > 0, "%d\n", obj->ioo_bufcnt); @@ -310,9 +311,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, inode = dentry->d_inode; - if (oa) - obdo_to_inode(inode, oa, OBD_MD_FLATIME); - + obdo_to_inode(inode, oa, OBD_MD_FLATIME); fsfilt_check_slow(now, obd_timeout, "preprw_read setup"); for (i = 0, lnb = res, rnb = nb; i < obj->ioo_bufcnt; diff --git a/lustre/tests/.cvsignore b/lustre/tests/.cvsignore index 97728eb..bf8de9d 100644 --- a/lustre/tests/.cvsignore +++ b/lustre/tests/.cvsignore @@ -72,3 +72,5 @@ flock_test writemany random-reads chownmany +llverdev +llverfs diff --git a/lustre/tests/createdestroy.c b/lustre/tests/createdestroy.c index cec369b..b5b7c2b 100644 --- a/lustre/tests/createdestroy.c +++ b/lustre/tests/createdestroy.c @@ -45,8 +45,7 @@ static int be_verbose(int verbose, struct timeval *next_time, gettimeofday(&now, NULL); /* A positive verbosity means to print every X iterations */ - if (verbose > 0 && - (next_num == NULL || num >= *next_num || num >= num_total)) { + if (verbose > 0 && (num >= *next_num || num >= num_total)) { *next_num += verbose; if (next_time) { next_time->tv_sec = now.tv_sec - verbose; @@ -59,8 +58,7 @@ static int be_verbose(int verbose, struct timeval *next_time, if (verbose < 0 && next_time != NULL && difftime(&now, next_time) >= 0){ next_time->tv_sec = now.tv_sec - verbose; next_time->tv_usec = now.tv_usec; - if (next_num) - *next_num = num; + *next_num = num; return 1; } diff --git a/lustre/tests/ll_dirstripe_verify.c b/lustre/tests/ll_dirstripe_verify.c index aa1ed8c..52ed448 100644 --- a/lustre/tests/ll_dirstripe_verify.c +++ b/lustre/tests/ll_dirstripe_verify.c @@ -28,25 +28,37 @@ #define MAX_LOV_UUID_COUNT 1000 +/* Returns bytes read on success and a negative value on failure. + * If zero bytes are read it will be treated as failure as such + * zero cannot be returned from this function. + */ int read_proc_entry(char *proc_path, char *buf, int len) { - int rcnt = -2, fd; + int rc, fd; + + memset(buf, 0, len); - if ((fd = open(proc_path, O_RDONLY)) == -1) { + fd = open(proc_path, O_RDONLY); + if (fd == -1) { fprintf(stderr, "open('%s') failed: %s\n", proc_path, strerror(errno)); - rcnt = -3; - } else if ((rcnt = read(fd, buf, len)) <= 0) { + return -2; + } + + rc = read(fd, buf, len - 1); + if (rc < 0) { fprintf(stderr, "read('%s') failed: %s\n", proc_path, strerror(errno)); - } else { - buf[rcnt - 1] = '\0'; + rc = -3; + } else if (rc == 0) { + fprintf(stderr, "read('%s') zero bytes\n", proc_path); + rc = -4; + } else if (/* rc > 0 && */ buf[rc - 1] == '\n') { + buf[rc - 1] = '\0'; /* Remove trailing newline */ } + close(fd); - if (fd >= 0) - close(fd); - - return (rcnt); + return (rc); } int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1, @@ -62,7 +74,7 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1, int i, rc; rc = read_proc_entry("/proc/fs/lustre/llite/fs0/lov/common_name", - buf, sizeof(buf)) <= 0; + buf, sizeof(buf)); if (rc < 0) return -rc; @@ -71,7 +83,7 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1, if (lum_dir == NULL) { snprintf(tmp_path, sizeof(tmp_path) - 1, "%s/stripecount", lov_path); - if (read_proc_entry(tmp_path, buf, sizeof(buf)) <= 0) + if (read_proc_entry(tmp_path, buf, sizeof(buf)) < 0) return 5; stripe_count = atoi(buf); @@ -82,7 +94,7 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1, stripe_count = 1; snprintf(tmp_path, sizeof(tmp_path) - 1, "%s/numobd", lov_path); - if (read_proc_entry(tmp_path, buf, sizeof(buf)) <= 0) + if (read_proc_entry(tmp_path, buf, sizeof(buf)) < 0) return 6; ost_count = atoi(buf); @@ -99,7 +111,7 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1, if (stripe_size == 0) { snprintf(tmp_path, sizeof(tmp_path) - 1, "%s/stripesize", lov_path); - if (read_proc_entry(tmp_path, buf, sizeof(buf)) <= 0) + if (read_proc_entry(tmp_path, buf, sizeof(buf)) < 0) return 5; stripe_size = atoi(buf); @@ -149,7 +161,7 @@ int main(int argc, char **argv) if (argc < 3) { fprintf(stderr, "Usage: %s [filename2]\n", argv[0]); - exit(1); + return 1; } dir = opendir(argv[1]); diff --git a/lustre/tests/mmap_sanity.c b/lustre/tests/mmap_sanity.c index 91b6a2f..cfa4cafb 100644 --- a/lustre/tests/mmap_sanity.c +++ b/lustre/tests/mmap_sanity.c @@ -472,7 +472,7 @@ static int mmap_tst5(char *mnt) memset(ptr, 'a', region); /* cancel unused locks */ - cancel_lru_locks("osc"); + rc = cancel_lru_locks("osc"); if (rc) goto out_unmap; @@ -538,7 +538,7 @@ static int mmap_tst6(char *mnt) goto out; } - cancel_lru_locks("osc"); + rc = cancel_lru_locks("osc"); if (rc) goto out; diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 77d6d04..c336b9c 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -330,16 +330,233 @@ test_20b() { # bug 2986 - ldlm_handle_enqueue error during open } run_test 20b "ldlm_handle_enqueue error (should return error)" -#b_cray run_test 21a "drop close request while close and open are both in flight" -#b_cray run_test 21b "drop open request while close and open are both in flight" -#b_cray run_test 21c "drop both request while close and open are both in flight" -#b_cray run_test 21d "drop close reply while close and open are both in flight" -#b_cray run_test 21e "drop open reply while close and open are both in flight" -#b_cray run_test 21f "drop both reply while close and open are both in flight" -#b_cray run_test 21g "drop open reply and close request while close and open are both in flight" -#b_cray run_test 21h "drop open request and close reply while close and open are both in flight" -#b_cray run_test 22 "drop close request and do mknod" -#b_cray run_test 23 "client hang when close a file after mds crash" +test_21a() { + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + close_pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000129" + multiop $DIR/$tdir-2/f Oc & + open_pid=$! + sleep 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + kill -USR1 $close_pid + cancel_lru_locks MDC # force the close + wait $close_pid || return 1 + wait $open_pid || return 2 + do_facet mds "sysctl -w lustre.fail_loc=0" + + $CHECKSTAT -t file $DIR/$tdir-1/f || return 3 + $CHECKSTAT -t file $DIR/$tdir-2/f || return 4 + + rm -rf $DIR/$tdir-* +} +run_test 21a "drop close request while close and open are both in flight" + +test_21b() { + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + close_pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000107" + mcreate $DIR/$tdir-2/f & + open_pid=$! + sleep 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + kill -USR1 $close_pid + cancel_lru_locks MDC # force the close + wait $close_pid || return 1 + wait $open_pid || return 3 + + $CHECKSTAT -t file $DIR/$tdir-1/f || return 4 + $CHECKSTAT -t file $DIR/$tdir-2/f || return 5 + rm -rf $DIR/$tdir-* +} +run_test 21b "drop open request while close and open are both in flight" + +test_21c() { + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + close_pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000107" + mcreate $DIR/$tdir-2/f & + open_pid=$! + sleep 3 + do_facet mds "sysctl -w lustre.fail_loc=0" + + do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + kill -USR1 $close_pid + cancel_lru_locks MDC # force the close + wait $close_pid || return 1 + wait $open_pid || return 2 + + do_facet mds "sysctl -w lustre.fail_loc=0" + + $CHECKSTAT -t file $DIR/$tdir-1/f || return 2 + $CHECKSTAT -t file $DIR/$tdir-2/f || return 3 + rm -rf $DIR/$tdir-* +} +run_test 21c "drop both request while close and open are both in flight" + +test_21d() { + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000129" + multiop $DIR/$tdir-2/f Oc & + sleep 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + do_facet mds "sysctl -w lustre.fail_loc=0x80000122" + kill -USR1 $pid + cancel_lru_locks MDC # force the close + wait $pid || return 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + $CHECKSTAT -t file $DIR/$tdir-1/f || return 2 + $CHECKSTAT -t file $DIR/$tdir-2/f || return 3 + + rm -rf $DIR/$tdir-* +} +run_test 21d "drop close reply while close and open are both in flight" + +test_21e() { + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + touch $DIR/$tdir-2/f & + sleep 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + kill -USR1 $pid + cancel_lru_locks MDC # force the close + wait $pid || return 1 + + sleep $TIMEOUT + $CHECKSTAT -t file $DIR/$tdir-1/f || return 2 + $CHECKSTAT -t file $DIR/$tdir-2/f || return 3 + rm -rf $DIR/$tdir-* +} +run_test 21e "drop open reply while close and open are both in flight" + +test_21f() { + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + touch $DIR/$tdir-2/f & + sleep 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + do_facet mds "sysctl -w lustre.fail_loc=0x80000122" + kill -USR1 $pid + cancel_lru_locks MDC # force the close + wait $pid || return 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + $CHECKSTAT -t file $DIR/$tdir-1/f || return 2 + $CHECKSTAT -t file $DIR/$tdir-2/f || return 3 + rm -rf $DIR/$tdir-* +} +run_test 21f "drop both reply while close and open are both in flight" + +test_21g() { + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + touch $DIR/$tdir-2/f & + sleep 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + kill -USR1 $pid + cancel_lru_locks MDC # force the close + wait $pid || return 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + $CHECKSTAT -t file $DIR/$tdir-1/f || return 2 + $CHECKSTAT -t file $DIR/$tdir-2/f || return 3 + rm -rf $DIR/$tdir-* +} +run_test 21g "drop open reply and close request while close and open are both in flight" + +test_21h() { + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000107" + touch $DIR/$tdir-2/f & + touch_pid=$! + sleep 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + do_facet mds "sysctl -w lustre.fail_loc=0x80000122" + cancel_lru_locks MDC # force the close + kill -USR1 $pid + wait $pid || return 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + wait $touch_pid || return 2 + + $CHECKSTAT -t file $DIR/$tdir-1/f || return 3 + $CHECKSTAT -t file $DIR/$tdir-2/f || return 4 + rm -rf $DIR/$tdir-* +} +run_test 21h "drop open request and close reply while close and open are both in flight" + +# bug 3462 - multiple MDC requests +test_22() { + f1=$DIR/${tfile}-1 + f2=$DIR/${tfile}-2 + + do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + multiop $f2 Oc & + close_pid=$! + + sleep 1 + multiop $f1 msu || return 1 + + cancel_lru_locks MDC # force the close + do_facet mds "sysctl -w lustre.fail_loc=0" + + wait $close_pid || return 2 + rm -rf $f2 || return 4 +} +run_test 22 "drop close request and do mknod" + +test_23() { #b=4561 + multiop $DIR/$tfile O_c & + pid=$! + # give a chance for open + sleep 5 + + # try the close + drop_request "kill -USR1 $pid" + + fail mds + wait $pid || return 1 + return 0 +} +run_test 23 "client hang when close a file after mds crash" test_24() { # bug 2248 - eviction fails writeback but app doesn't see it mkdir -p $DIR/$tdir diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 1ca3bb1..1f4d128 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -21,7 +21,7 @@ EXCEPT="$EXCEPT 48a" case `uname -r` in 2.4*) FSTYPE=${FSTYPE:-ext3}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 76" ;; -2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 48b" ;; +2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT " ;; *) error "unsupported kernel" ;; esac @@ -2628,7 +2628,35 @@ test_72() { # bug 5695 - Test that on 2.6 remove_suid works properly } run_test 72 "Test that remove suid works properly (bug5695) ====" -#b_cray run_test 73 "multiple MDC requests (should not deadlock)" +# bug 3462 - multiple simultaneous MDC requests +test_73() { + mkdir $DIR/d73-1 + mkdir $DIR/d73-2 + multiop $DIR/d73-1/f73-1 O_c & + pid1=$! + #give multiop a chance to open + usleep 500 + + echo 0x80000129 > /proc/sys/lustre/fail_loc + multiop $DIR/d73-1/f73-2 Oc & + sleep 1 + echo 0 > /proc/sys/lustre/fail_loc + + multiop $DIR/d73-2/f73-3 Oc & + pid3=$! + + kill -USR1 $pid1 + wait $pid1 || return 1 + + sleep 25 + + $CHECKSTAT -t file $DIR/d73-1/f73-1 || return 4 + $CHECKSTAT -t file $DIR/d73-1/f73-2 || return 5 + $CHECKSTAT -t file $DIR/d73-2/f73-3 || return 6 + + rm -rf $DIR/d73-* +} +run_test 73 "multiple MDC requests (should not deadlock)" test_74() { # bug 6149, 6184 #define OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT 0x30e diff --git a/lustre/tests/small_write.c b/lustre/tests/small_write.c index ebbb2b3..442d0fd 100644 --- a/lustre/tests/small_write.c +++ b/lustre/tests/small_write.c @@ -7,42 +7,44 @@ #include #include +#define GOTO(label, rc) do { rc; goto label; } while (0) + int main (int argc, char **argv) { - int fd, i, rc; + int fd, i, rc = 0; unsigned long bytes, lbytes; struct stat st; char *str, *str2, *readbuf; if (argc != 3) { fprintf(stderr, "usage: %s \n", argv[0]); - return 1; + GOTO(out, rc = 1); } bytes = strtoul(argv[2], NULL, 10); if (!bytes) { printf("No bytes!\n"); - return 1; + GOTO(out, rc = 2); } if (bytes % 2) { printf("Need an even number of bytes!\n"); - return 1; + GOTO(out, rc = 3); } lbytes = 3*bytes/2; str = malloc(bytes+1); if (!str) { printf("No enough memory for %lu bytes.\n", bytes); - return 1; + GOTO(out, rc = 4); } str2 = malloc(lbytes+1); - if (!str) { + if (!str2) { printf("No enough memory for %lu bytes.\n", lbytes); - return 1; + GOTO(out_str, rc = 5); } readbuf = malloc(bytes*2); - if (!str) { + if (!readbuf) { printf("No enough memory for %lu bytes.\n", bytes*2); - return 1; + GOTO(out_str2, rc = 6); } for(i=0; i < bytes; i++) @@ -59,13 +61,13 @@ int main (int argc, char **argv) { fd = open(argv[1], O_CREAT|O_RDWR|O_TRUNC, 0700); if (fd == -1) { printf("Could not open file %s.\n", argv[1]); - return 1; + GOTO(out_readbuf, rc = 7); } rc = write(fd, str, bytes); if (rc != bytes) { printf("Write failed!\n"); - return 1; + GOTO(out_fd, rc = 8); } sleep(1); @@ -74,19 +76,19 @@ int main (int argc, char **argv) { printf("bad file %lu size first write %lu != %lu: rc %d\n", (unsigned long)st.st_ino, (unsigned long)st.st_size, bytes, rc); - return 1; + GOTO(out_fd, rc = 9); } rc = lseek(fd, bytes / 2, SEEK_SET); if (rc != bytes / 2) { printf("Seek failed!\n"); - return 1; + GOTO(out_fd, rc = 10); } rc = write(fd, str, bytes); if (rc != bytes) { printf("Write failed!\n"); - return 1; + GOTO(out_fd, rc = 11); } rc = fstat(fd, &st); @@ -94,13 +96,13 @@ int main (int argc, char **argv) { printf("bad file %lu size second write %lu != %lu: rc %d\n", (unsigned long)st.st_ino, (unsigned long)st.st_size, bytes, rc); - return 1; + GOTO(out_fd, rc = 12); } rc = lseek(fd, 0, SEEK_SET); if (rc != 0) { printf("Seek failed!\n"); - return 1; + GOTO(out_fd, rc = 13); } rc = read(fd, readbuf, bytes * 2); @@ -115,23 +117,29 @@ int main (int argc, char **argv) { printf("bad file size after read %lu != %lu: rc %d\n", (unsigned long)st.st_size, bytes + bytes / 2, rc); - return 1; + GOTO(out_fd, rc = 14); } - return 1; + GOTO(out_fd, rc = 15); } - - fd = close(fd); - if (fd == -1) - return 1; + rc = 0; if (bytes < 320) printf("%s\n%s\n", readbuf, str2); if (strcmp(readbuf, str2)) { printf("No match!\n"); - return 1; + GOTO(out_fd, rc = 16); } printf("Pass!\n"); - return 0; +out_fd: + close(fd); +out_readbuf: + free(readbuf); +out_str2: + free(str2); +out_str: + free(str); +out: + return rc; } diff --git a/lustre/tests/utime.c b/lustre/tests/utime.c index e05090e..5b9c2dd 100644 --- a/lustre/tests/utime.c +++ b/lustre/tests/utime.c @@ -90,7 +90,7 @@ int main(int argc, char *argv[]) } if (st.st_atime != utb.actime ) { - fprintf(stderr, "%s: bad utime mtime %lu should be %lu\n", + fprintf(stderr, "%s: bad utime atime %lu should be %lu\n", prog, st.st_atime, utb.actime); return 7; } diff --git a/lustre/tests/writemany.c b/lustre/tests/writemany.c index 23be7e4..03b48fb 100644 --- a/lustre/tests/writemany.c +++ b/lustre/tests/writemany.c @@ -42,13 +42,17 @@ struct kid_list_t { struct kid_list_t *head = NULL; -void push_kid(pid_t kid) +int push_kid(pid_t kid) { struct kid_list_t *new; new = (struct kid_list_t *)malloc(sizeof(struct kid_list_t)); + if (new == NULL) + return 1; + new->kid = kid; new->next = head; head = new; + return 0; } void kill_kids(void) @@ -258,7 +262,11 @@ int main(int argc, char *argv[]) return (run_one_child(directory, i, duration)); } else { /* parent */ - push_kid(rc); + rc = push_kid(rc); + if (rc != 0) { + kill_kids(); + exit(3); + } } } /* parent process */ diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am index eb43617..6fd7f84 100644 --- a/lustre/utils/Makefile.am +++ b/lustre/utils/Makefile.am @@ -16,7 +16,7 @@ if UTILS rootsbin_PROGRAMS = mount.lustre sbin_PROGRAMS = lctl obdio obdbarrier lload wirecheck wiretest \ mount_lustre mkfs_lustre mkfs.lustre \ - tunefs_lustre tunefs.lustre l_getgroups + tunefs_lustre tunefs.lustre l_getgroups # llverfs llverdev bin_PROGRAMS = lfs llog_reader lib_LIBRARIES = liblustreapi.a sbin_SCRIPTS = $(sbin_scripts) @@ -34,6 +34,10 @@ lfs_DEPENDENCIES := $(LIBPTLCTL) liblustreapi.a lload_SOURCES = lload.c lload_LDADD := $(LIBREADLINE) $(LIBPTLCTL) lload_DEPENDENCIES := $(LIBPTLCTL) +lload_SOURCES = lload.c + +llverfs_LDADD := -lext2fs -le2p +llverdev_LDADD := -lext2fs -lblkid liblustreapi_a_SOURCES = liblustreapi.c diff --git a/lustre/utils/lconf b/lustre/utils/lconf index fa92ab1..296f600 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -72,7 +72,7 @@ PORTALS_DIR = '../lnet' # Needed to call lconf --record CONFIG_FILE = "" -# Please keep these in sync with the values in portals/kp30.h +# Please keep these in sync with the values in lnet/include/libcfs/libcfs.h ptldebug_names = { "trace" : (1 << 0), "inode" : (1 << 1), @@ -88,7 +88,8 @@ ptldebug_names = { "buffs" : (1 << 11), "other" : (1 << 12), "dentry" : (1 << 13), - "portals" : (1 << 14), + "portals" : (1 << 14), # deprecated + "lnet" : (1 << 14), "page" : (1 << 15), "dlmtrace" : (1 << 16), "error" : (1 << 17), @@ -114,22 +115,29 @@ subsystem_names = { "log" : (1 << 6), "llite" : (1 << 7), "rpc" : (1 << 8), - "portals" : (1 << 10), - "nal" : (1 << 11), + "lnet" : (1 << 10), + "portals" : (1 << 10), # deprecated + "lnd" : (1 << 11), + "nal" : (1 << 11), # deprecated "pinger" : (1 << 12), "filter" : (1 << 13), - "ptlbd" : (1 << 14), + "ptlbd" : (1 << 14), # deprecated "echo" : (1 << 15), "ldlm" : (1 << 16), "lov" : (1 << 17), - "ptlrouter" : (1 << 18), + "ptlrouter" : (1 << 18), # deprecated "cobd" : (1 << 19), "sm" : (1 << 20), "asobd" : (1 << 21), - "confobd" : (1 << 22), + "confobd" : (1 << 22), # deprecated "lmv" : (1 << 23), "cmobd" : (1 << 24), "sec" : (1 << 25), + "sec" : (1 << 26), + "gss" : (1 << 27), + "gks" : (1 << 28), + "mgc" : (1 << 29), + "mgs" : (1 << 30), } @@ -1419,7 +1427,7 @@ class MDSDEV(Module): if not fs_uuid in self.filesystem_uuids: continue; - debug("recording", client_name) + log("Recording log", client_name, "on", self.name) old_noexec = config.noexec config.noexec = 0 noexec_opt = ('', '-n') diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index c320aed..24fd739 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -69,7 +69,7 @@ static void err_msg(char *fmt, ...) fprintf(stderr, ": %s (%d)\n", strerror(tmp_errno), tmp_errno); } -int llapi_file_create(char *name, long stripe_size, int stripe_offset, +int llapi_file_create(const char *name, long stripe_size, int stripe_offset, int stripe_count, int stripe_pattern) { struct lov_user_md lum = { 0 }; @@ -145,14 +145,6 @@ out: return rc; } -/* short term backwards compat only */ -int op_create_file(char *name, long stripe_size, int stripe_offset, - int stripe_count) -{ - return llapi_file_create(name, stripe_size, stripe_offset, - stripe_count, 0); -} - struct find_param { int recursive; int verbose; @@ -493,10 +485,30 @@ int llapi_file_get_stripe(char *path, struct lov_user_md *lum) return rc; } -/* short term backwards compat only */ -int op_get_file_stripe(char *path, struct lov_user_md *lum) +int llapi_file_lookup(int dirfd, const char *name) { - return llapi_file_get_stripe(path, lum); + struct obd_ioctl_data data = { 0 }; + char rawbuf[8192]; + char *buf = rawbuf; + int rc; + + if (dirfd < 0 || name == NULL) + return -EINVAL; + + data.ioc_version = OBD_IOCTL_VERSION; + data.ioc_len = sizeof(data); + data.ioc_inlbuf1 = (char *)name; + data.ioc_inllen1 = strlen(name) + 1; + + rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf)); + if (rc) { + fprintf(stderr, + "error: IOC_MDC_LOOKUP pack failed for '%s': rc %d\n", + name, rc); + return rc; + } + + return ioctl(dirfd, IOC_MDC_LOOKUP, buf); } static int find_process_file(DIR *dir, char *dname, char *fname, diff --git a/lustre/utils/llobdstat.pl b/lustre/utils/llobdstat.pl index b42c8a3..be8fba7 100755 --- a/lustre/utils/llobdstat.pl +++ b/lustre/utils/llobdstat.pl @@ -73,7 +73,7 @@ sub get_cpumhz() get_cpumhz(); print "Processor counters run at $mhz MHz\n"; -sub readall() +sub readstat() { my $prevcount; my @iodata; @@ -101,6 +101,7 @@ sub readall() } } } + sub process_stats() { my $delta; @@ -149,7 +150,7 @@ sub process_stats() open(STATS, $statspath) || die "Cannot open $statspath: $!\n"; do { - readall(); + readstat(); process_stats(); if ($interval) { sleep($interval); @@ -157,4 +158,3 @@ do { } } while ($interval); close STATS; - diff --git a/lustre/utils/llog_reader.c b/lustre/utils/llog_reader.c index 03f04c7..6b5601d 100644 --- a/lustre/utils/llog_reader.c +++ b/lustre/utils/llog_reader.c @@ -30,11 +30,13 @@ #include #include -int llog_pack_buffer(int fd, struct llog_log_hdr** llog_buf, struct llog_rec_hdr*** recs, int* recs_number); +int llog_pack_buffer(int fd, struct llog_log_hdr **llog_buf, + struct llog_rec_hdr ***recs, int *recs_number); -void print_llog_header(struct llog_log_hdr* llog_buf); -void print_records(struct llog_rec_hdr** recs_buf,int rec_number); -void llog_unpack_buffer(int fd,struct llog_log_hdr* llog_buf,struct llog_rec_hdr** recs_buf); +void print_llog_header(struct llog_log_hdr *llog_buf); +void print_records(struct llog_rec_hdr **recs_buf,int rec_number); +void llog_unpack_buffer(int fd, struct llog_log_hdr *llog_buf, + struct llog_rec_hdr **recs_buf); #define PTL_CMD_BASE 100 char* portals_command[17]= @@ -57,37 +59,36 @@ char* portals_command[17]= "GET_INTERFACE", "" }; - + int main(int argc, char **argv) { - int rc=0; - int fd,rec_number; - - struct llog_log_hdr* llog_buf=NULL; - struct llog_rec_hdr** recs_buf=NULL; - + int rc = 0; + int fd, rec_number; + struct llog_log_hdr *llog_buf = NULL; + struct llog_rec_hdr **recs_buf = NULL; setlinebuf(stdout); - + if(argc != 2 ){ printf("Usage: llog_reader filename \n"); return -1; } - + fd = open(argv[1],O_RDONLY); if (fd < 0){ printf("Could not open the file %s \n",argv[1]); goto out; } rc = llog_pack_buffer(fd, &llog_buf, &recs_buf, &rec_number); - - if(llog_buf == NULL ) - printf("error"); + if (rc < 0) { + printf("Could not pack buffer; rc=%d\n", rc); + goto out_fd; + } + print_llog_header(llog_buf); - print_records(recs_buf,rec_number); - llog_unpack_buffer(fd,llog_buf,recs_buf); +out_fd: close(fd); out: return rc; @@ -95,31 +96,31 @@ out: -int llog_pack_buffer(int fd, struct llog_log_hdr** llog, - struct llog_rec_hdr*** recs, - int* recs_number) +int llog_pack_buffer(int fd, struct llog_log_hdr **llog, + struct llog_rec_hdr ***recs, + int *recs_number) { - int rc=0,recs_num,rd; + int rc = 0, recs_num,rd; off_t file_size; struct stat st; - char *file_buf=NULL, *recs_buf=NULL; - struct llog_rec_hdr** recs_pr=NULL; - char* ptr=NULL; + char *file_buf=NULL, *recs_buf=NULL; + struct llog_rec_hdr **recs_pr=NULL; + char *ptr=NULL; int cur_idx,i; - + rc = fstat(fd,&st); if (rc < 0){ printf("Get file stat error.\n"); goto out; - } + } file_size = st.st_size; - + file_buf = malloc(file_size); if (file_buf == NULL){ printf("Memory Alloc for file_buf error.\n"); rc = -ENOMEM; goto out; - } + } *llog = (struct llog_log_hdr*)file_buf; rd = read(fd,file_buf,file_size); @@ -127,40 +128,37 @@ int llog_pack_buffer(int fd, struct llog_log_hdr** llog, printf("Read file error.\n"); rc = -EIO; /*FIXME*/ goto clear_file_buf; - } + } /* the llog header not countable here.*/ recs_num = le32_to_cpu((*llog)->llh_count)-1; - - recs_buf = malloc(recs_num*sizeof(struct llog_rec_hdr*)); + + recs_buf = malloc(recs_num * sizeof(struct llog_rec_hdr *)); if (recs_buf == NULL){ printf("Memory Alloc for recs_buf error.\n"); rc = -ENOMEM; goto clear_file_buf; - } + } recs_pr = (struct llog_rec_hdr **)recs_buf; - + ptr = file_buf + le32_to_cpu((*llog)->llh_hdr.lrh_len); cur_idx = 1; i = 0; - while (i < recs_num){ - struct llog_rec_hdr* cur_rec=(struct llog_rec_hdr*)ptr; - while(!ext2_test_bit(cur_idx,(*llog)->llh_bitmap)){ - cur_idx++; + while (i < recs_num){ + struct llog_rec_hdr *cur_rec = (struct llog_rec_hdr*)ptr; + + if (ext2_test_bit(cur_idx++, (*llog)->llh_bitmap)) { + recs_pr[i++] = cur_rec; ptr += cur_rec->lrh_len; - if ((ptr-file_buf) > file_size){ - printf("The log is corrupted. \n"); + if ((ptr - file_buf) > file_size) { + printf("The log is corrupted.\n"); rc = -EINVAL; goto clear_recs_buf; - } + } } - recs_pr[i] = cur_rec; - ptr+=cur_rec->lrh_len; - i++; - cur_idx++; } - + *recs = recs_pr; *recs_number=recs_num; @@ -175,24 +173,21 @@ clear_file_buf: *llog=NULL; goto out; - } - -void llog_unpack_buffer(int fd,struct llog_log_hdr* llog_buf,struct llog_rec_hdr **recs_buf) +void llog_unpack_buffer(int fd, struct llog_log_hdr *llog_buf, + struct llog_rec_hdr **recs_buf) { free(llog_buf); free(recs_buf); return; } - -void print_llog_header(struct llog_log_hdr* llog_buf) +void print_llog_header(struct llog_log_hdr *llog_buf) { time_t t; printf("Header size : %d \n", - // le32_to_cpu(llog_buf->llh_hdr.lrh_len)); llog_buf->llh_hdr.lrh_len); t = le64_to_cpu(llog_buf->llh_timestamp); @@ -204,7 +199,7 @@ void print_llog_header(struct llog_log_hdr* llog_buf) printf("Target uuid : %s \n", (char *)(&llog_buf->llh_tgtuuid)); - /* Add the other infor you want to view here*/ + /* Add the other info you want to view here */ printf("-----------------------\n"); return; @@ -213,13 +208,14 @@ void print_llog_header(struct llog_log_hdr* llog_buf) static void print_1_cfg(struct lustre_cfg *lcfg) { int i; + if (lcfg->lcfg_nid) printf("nid=%s("LPX64") ", libcfs_nid2str(lcfg->lcfg_nid), lcfg->lcfg_nid); if (lcfg->lcfg_nal) printf("nal=%d ", lcfg->lcfg_nal); for (i = 0; i < lcfg->lcfg_bufcount; i++) - printf("%d:%.*s ", i, lcfg->lcfg_buflens[i], + printf("%d:%.*s ", i, lcfg->lcfg_buflens[i], (char*)lustre_cfg_buf(lcfg, i)); return; } @@ -229,7 +225,7 @@ static void print_setup_cfg(struct lustre_cfg *lcfg) { struct lov_desc *desc; - if ((lcfg->lcfg_bufcount == 2) && + if ((lcfg->lcfg_bufcount == 2) && (lcfg->lcfg_buflens[1] == sizeof(*desc))) { printf("lov_setup "); printf("0:%s ", lustre_cfg_string(lcfg, 0)); @@ -250,7 +246,7 @@ static void print_setup_cfg(struct lustre_cfg *lcfg) void print_lustre_cfg(struct lustre_cfg *lcfg, int *skip) { enum lcfg_command_type cmd = le32_to_cpu(lcfg->lcfg_command); - + if (*skip > 0) printf("SKIP "); @@ -352,27 +348,25 @@ void print_lustre_cfg(struct lustre_cfg *lcfg, int *skip) return; } -void print_records(struct llog_rec_hdr** recs,int rec_number) +void print_records(struct llog_rec_hdr **recs, int rec_number) { __u32 lopt; int i, skip = 0; - for(i = 0; i < rec_number; i++){ - + for(i = 0; i < rec_number; i++) { printf("#%.2d ", le32_to_cpu(recs[i]->lrh_index)); lopt = le32_to_cpu(recs[i]->lrh_type); if (lopt == OBD_CFG_REC){ struct lustre_cfg *lcfg; - printf("L "); - lcfg = (struct lustre_cfg *) - ((char*)(recs[i]) + sizeof(struct llog_rec_hdr)); + printf("L "); + lcfg = (struct lustre_cfg *)((char*)(recs[i]) + + sizeof(struct llog_rec_hdr)); print_lustre_cfg(lcfg, &skip); } - if (lopt == PTL_CFG_REC){ - printf("Portals - unknown type\n"); - } + if (lopt == PTL_CFG_REC) + printf("Portals - unknown type\n"); } } diff --git a/lustre/utils/llstat.pl b/lustre/utils/llstat.pl index 7c3855a..0305f3d 100755 --- a/lustre/utils/llstat.pl +++ b/lustre/utils/llstat.pl @@ -2,6 +2,9 @@ my $pname = $0; +my $defaultpath = "/proc/fs/lustre"; +my $obdstats = "stats"; + sub usage() { print STDERR "Usage: $pname []\n"; @@ -9,19 +12,41 @@ sub usage() } -my $statspath; +my $statspath = "None"; my $interval = 0; if (($#ARGV < 0) || ($#ARGV > 1)) { usage(); } else { - $statspath = $ARGV[0]; + if ( $ARGV[0] =~ /help$/ ) { + usage(); + } + if ( -f $ARGV[0] ) { + $statspath = $ARGV[0]; + } elsif ( -f "$ARGV[0]/$obdstats" ) { + $statspath = "$ARGV[0]/$obdstats"; + } else { + my $st = `ls $defaultpath/*/$ARGV[0]/$obdstats 2> /dev/null`; + chop $st; + if ( -f "$st" ) { + $statspath = $st; + } else { + $st = `ls $defaultpath/*/*/$ARGV[0]/$obdstats 2> /dev/null`; + chop $st; + if ( -f "$st" ) { + $statspath = $st; + } + } + } + if ( $statspath =~ /^None$/ ) { + die "Cannot locate stat file for: $ARGV[0]\n"; + } if ($#ARGV == 1) { $interval = $ARGV[1]; } } - +print "$pname on $statspath\n"; my %cumulhash; my %sumhash; @@ -43,6 +68,7 @@ sub get_cpumhz() if (defined($itc_freq)) { $mhz = $itc_freq; } elsif (defined($cpu_freq)) { $mhz = $cpu_freq; } else { $mhz = 1; } + close CPUINFO; } get_cpumhz(); @@ -50,7 +76,7 @@ print "Processor counters run at $mhz MHz\n"; sub readstat() { - open(STATS, $statspath) || die "Cannot open $statspath: $!\n"; + seek STATS, 0, 0; while () { chop; ($name, $cumulcount, $samples, $unit, $min, $max, $sum, $sumsquare) @@ -125,9 +151,11 @@ sub readstat() } } +open(STATS, $statspath) || die "Cannot open $statspath: $!\n"; do { readstat(); if ($interval) { sleep($interval); } } while ($interval); +close STATS; diff --git a/lustre/utils/llverdev.c b/lustre/utils/llverdev.c new file mode 100644 index 0000000..a00db8e --- /dev/null +++ b/lustre/utils/llverdev.c @@ -0,0 +1,502 @@ +/* + * Large Block Device Verification Tool. + * This program is used to test whether the block device is correctly + * handling IO beyond 2TB boundary. + * This tool have two working modes + * 1. full mode + * 2. fast mode + * The full mode is basic mode in which program writes the test pattern + * on entire disk. The test pattern (device offset and timestamp) is written + * at the beginning of each 4kB block. When the whole device is full then + * read operation is performed to verify that the test pattern is correct. + * In the fast mode the program writes data at the critical locations + * of the device such as start of the device, before and after multiple of 1GB + * offset and at the end. + * A chunk buffer with default size of 1MB is used to write and read test + * pattern in bulk. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ONE_MB (1024 * 1024) +#define ONE_GB (1024 * 1024 * 1024) +#define HALF_MB (ONE_MB / 2) +#define ONE_KB 1024 +#define HALF_KB (ONE_KB / 2) +#define BLOCKSIZE 4096 + +/* Structure for writting test pattern */ +struct block_data { + loff_t bd_offset; + time_t bd_time; +}; +static char *progname; /* name by which this program was run. */ +static unsigned verbose = 1; /* prints offset in kB, operation rate */ +static int readoption; /* run test in read-only (verify) mode */ +static int writeoption; /* run test in write_only mode */ +const char *devname; /* name of device to be tested. */ +static unsigned full = 1; /* flag to full check */ +static int fd; +static int isatty_flag; + +static struct option const longopts[] = +{ + { "chunksize", required_argument, 0, 'c' }, + { "force", no_argument, 0, 'f' }, + { "help", no_argument, 0, 'h' }, + { "offset", required_argument, 0, 'o' }, + { "partial", required_argument, 0, 'p' }, + { "quiet", required_argument, 0, 'q' }, + { "read", no_argument, 0, 'r' }, + { "timestamp", required_argument, 0, 't' }, + { "verbose", no_argument, 0, 'v' }, + { "write", no_argument, 0, 'w' }, + { "long", no_argument, 0, 'l' }, + { 0, 0, 0, 0} +}; + +/* + * Usage: displays help information, whenever user supply --help option in + * command or enters incorrect command line. + */ +void usage(int status) +{ + if (status != 0) { + printf("\nUsage: %s [OPTION]... ...\n", + progname); + printf("Block device verification tool.\n" + "\t-t {seconds}, --timestamp, " + "set test time (default=current time())\n" + "\t-o {offset}, --offset, " + "offset in kB of start of test, default=0\n" + "\t-r, --read run test in verify mode\n" + "\t-w, --write run test in test-pattern mode, default=rw\n" + "\t-v, --verbose\n" + "\t-q, --quiet\n" + "\t-l, --long, full check of device\n" + "\t-p, --partial, for partial check (1GB steps)\n" + "\t-c, --chunksize, IO chunk size, default=1048576\n" + "\t-f, --force, force test to run without confirmation\n" + "\t-h, --help display this help and exit\n"); + } + exit(status); +} + +/* + * Open_dev: Opens device in specified mode and returns fd. + */ +static int open_dev(const char *devname, int mode) +{ + int mount_flags; + char mountpt[80] = ""; + + if (ext2fs_check_mount_point(devname, &mount_flags, mountpt, + sizeof(mountpt))) { + fprintf(stderr, "%s: ext2fs_check_mount_point failed:%s", + progname, strerror(errno)); + exit(1); + } + if (mount_flags & EXT2_MF_MOUNTED){ + fprintf(stderr, "%s: %s is already mounted\n", progname, + devname); + exit(1); + } + fd = open(devname, mode | O_EXCL | O_LARGEFILE); + if (fd < 0) { + fprintf(stderr, "%s: Open failed: %s",progname,strerror(errno)); + exit(3); + } + return (fd); +} + +/* + * sizeof_dev: Returns size of device in bytes + */ +static unsigned long long sizeof_dev(int fd) +{ + blkid_loff_t numbytes = 0; + + numbytes = blkid_get_dev_size(fd); + if (numbytes <= 0) { + fprintf(stderr, "%s: blkid_get_dev_size(%s) failed", + progname, devname); + return 1; + } + + if (verbose) + printf("%s: %s is %llu bytes (%g GB) in size\n", + progname, devname, + (unsigned long long)numbytes, (double)numbytes / ONE_GB); + + return numbytes; +} + +/* + * Verify_chunk: Verifies test pattern in each 4kB (BLOCKSIZE) is correct. + * Returns 0 if test offset and timestamp is correct otherwise 1. + */ +int verify_chunk(char *chunk_buf, size_t chunksize, + loff_t chunk_off, time_t time_st) +{ + struct block_data *bd; + char *chunk_end; + + for (chunk_end = chunk_buf + chunksize - sizeof(*bd); + (char *)chunk_buf < chunk_end; + chunk_buf += BLOCKSIZE, chunk_off += BLOCKSIZE) { + bd = (struct block_data *)chunk_buf; + if ((bd->bd_offset == chunk_off) && (bd->bd_time == time_st)) + continue; + + fprintf(stderr, "\n%s: verify failed at offset/timestamp " + "%llu/%lu: found %llu/%lu instead\n", progname, + chunk_off, time_st, bd->bd_offset, bd->bd_time); + return 1; + } + return 0; +} + +/* + * fill_chunk: Fills the chunk with current or user specified timestamp + * and offset. The test patters is filled at the beginning of + * each 4kB(BLOCKSIZE) blocks in chunk_buf. + */ +void fill_chunk(char *chunk_buf, size_t chunksize, loff_t chunk_off, + time_t time_st) +{ + struct block_data *bd; + char *chunk_end; + + for (chunk_end = chunk_buf + chunksize - sizeof(*bd); + (char *)chunk_buf < chunk_end; + chunk_buf += BLOCKSIZE, chunk_off += BLOCKSIZE) { + bd = (struct block_data *)chunk_buf; + bd->bd_offset = chunk_off; + bd->bd_time = time_st; + } +} + +void show_rate(char *op, unsigned long long offset, unsigned long long *count) +{ + static time_t last; + time_t now; + double diff; + + now = time(NULL); + diff = now - last; + + if (diff > 4) { + if (last != 0) { + if (isatty_flag) + printf("\r"); + printf("%s offset: %14llukB %5g MB/s ", op, + offset / ONE_KB, (double)(*count) /ONE_MB /diff); + if (isatty_flag) + fflush(stdout); + else + printf("\n"); + + *count = 0; + } + last = now; + } +} + +/* + * write_chunk: write the chunk_buf on the device. The number of write + * operations are based on the parameters write_end, offset, and chunksize. + */ +int write_chunks(loff_t offset, loff_t write_end, char *chunk_buf, + size_t chunksize, time_t time_st) +{ + unsigned long long stride, count = 0; + + stride = full ? chunksize : (ONE_GB - chunksize); + + for (offset = offset & ~(chunksize - 1); offset < write_end; + offset += stride) { + if (lseek64(fd, offset, SEEK_SET) == -1) { + fprintf(stderr, "\n%s: lseek64(%llu) failed: %s\n", + progname, offset, strerror(errno)); + return 1; + } + if (offset + chunksize > write_end) + chunksize = write_end - offset; + + if (!full && offset > chunksize) { + fill_chunk(chunk_buf, chunksize, offset, time_st); + if (write(fd, chunk_buf, chunksize) < 0) { + fprintf(stderr, "\n%s: write %llu failed: %s\n", + progname, offset, strerror(errno)); + return 1; + } + offset += chunksize; + if (offset + chunksize > write_end) + chunksize = write_end - offset; + } + + fill_chunk(chunk_buf, chunksize, offset, time_st); + if (write(fd, chunk_buf, chunksize) < 0) { + fprintf(stderr, "\n%s: write %llu failed: %s\n", + progname, offset, strerror(errno)); + return 1; + } + + count += chunksize; + if (verbose > 1) + show_rate("write", offset, &count); + } + if (verbose > 1) { + show_rate("write", offset, &count); + printf("\nwrite complete\n"); + } + if (fsync(fd) == -1) { + fprintf(stderr, "%s: fsync faild: %s\n", progname, + strerror(errno)); + return 1; + } + return 0; +} + +/* + * read_chunk: reads the chunk_buf from the device. The number of read + * operations are based on the parameters read_end, offset, and chunksize. + */ +int read_chunks(loff_t offset, loff_t read_end, char *chunk_buf, + size_t chunksize, time_t time_st) +{ + unsigned long long stride, count = 0; + + stride = full ? chunksize : (ONE_GB - chunksize); + + if (ioctl(fd, BLKFLSBUF, 0) < 0 && verbose) + fprintf(stderr, "%s: ioctl BLKFLSBUF failed: %s (ignoring)\n", + progname, strerror(errno)); + + for (offset = offset & ~(chunksize - 1); offset < read_end; + offset += stride) { + if (lseek64(fd, offset, SEEK_SET) == -1) { + fprintf(stderr, "\n%s: lseek64(%llu) failed: %s\n", + progname, offset, strerror(errno)); + return 1; + } + if (offset + chunksize > read_end) + chunksize = read_end - offset; + + if (!full && offset > chunksize) { + if (read (fd, chunk_buf, chunksize) < 0) { + fprintf(stderr, "\n%s: read %llu failed: %s\n", + progname, offset, strerror(errno)); + return 1; + } + if (verify_chunk(chunk_buf, chunksize, offset, + time_st) != 0) + return 1; + offset += chunksize; + if (offset + chunksize >= read_end) + chunksize = read_end - offset; + } + + if (read(fd, chunk_buf, chunksize) < 0) { + fprintf(stderr, "\n%s: read failed: %s\n", progname, + strerror(errno)); + return 1; + } + + if (verify_chunk(chunk_buf, chunksize, offset, time_st) != 0) + return 1; + + count += chunksize; + if (verbose > 1) + show_rate("read", offset, &count); + } + if (verbose > 1) { + show_rate("read", offset, &count); + printf("\nread complete\n"); + } + return 0; +} + +int main(int argc, char **argv) +{ + time_t time_st = 0; /* Default timestamp */ + loff_t offset = 0, offset_orig; /* offset in kB */ + size_t chunksize = ONE_MB; /* IO chunk size */ + char *chunk_buf = NULL; + unsigned int force = 0; /* run test run without confirmation*/ + unsigned long long dev_size = 0; + char yesno[4]; + int mode = O_RDWR; /* mode which device should be opened */ + int error = 0, c; + + progname = strrchr(argv[0], '/') == NULL ? + argv[0] : strrchr(argv[0], '/') + 1; + while ((c = getopt_long(argc, argv, "c:fhlo:pqrt:vw", longopts, + NULL)) != -1) { + switch (c) { + case 'c': + chunksize = (strtoul(optarg, NULL, 0) * ONE_MB); + if (!chunksize) { + fprintf(stderr, "%s: chunk size value should be" + "nonzero and multiple of 1MB\n", + progname); + return -1; + } + break; + case 'f': + force = 1; + break; + case 'l': + full = 1; + break; + case 'o': + offset = strtoull(optarg, NULL, 0) * ONE_KB; + break; + case 'p': + full = 0; + break; + case 'q': + verbose = 0; + break; + case 'r': + readoption = 1; + mode = O_RDONLY; + break; + case 't': + time_st = (time_t)strtoul(optarg, NULL, 0); + break; + case 'v': + verbose++; + break; + case 'w': + writeoption = 1; + mode = O_WRONLY; + break; + case 'h': + default: + usage (1); + return 0; + } + } + offset_orig = offset; + devname = argv[optind]; + if (!devname) { + fprintf(stderr, "%s: device name not given\n", progname); + usage (1); + return -1; + } + + if (readoption && writeoption) + mode = O_RDWR; + if (!readoption && !writeoption) { + readoption = 1; + writeoption = 1; + } + + if (!force && writeoption) { + printf("%s: permanently overwrite all data on %s (yes/no)? ", + progname, devname); + scanf("%3s", yesno); + if (!(strcasecmp("yes", yesno) || strcasecmp("y", yesno))) { + printf("Not continuing due to '%s' response", yesno); + return 0; + } + } + + if (!writeoption && time_st == 0) { + fprintf(stderr, "%s: must give timestamp for read-only test\n", + progname); + usage(1); + } + + fd = open_dev(devname, mode); + dev_size = sizeof_dev(fd); + if (!dev_size) { + fprintf(stderr, "%s: cannot test on device size < 1MB\n", + progname); + error = 7; + goto close_dev; + } + + if (dev_size < (offset * 2)) { + fprintf(stderr, "%s: device size %llu < offset %llu\n", + progname, dev_size, offset); + error = 6; + goto close_dev; + } + if (!time_st) + (void)time(&time_st); + + isatty_flag = isatty(STDOUT_FILENO); + + if (verbose) + printf("Timestamp: %lu\n", time_st); + + chunk_buf = (char *)calloc(chunksize, 1); + if (chunk_buf == NULL) { + fprintf(stderr, "%s: memory allocation failed for chunk_buf\n", + progname); + error = 4; + goto close_dev; + } + if (writeoption) { + if (write_chunks(offset, dev_size, chunk_buf, chunksize, + time_st)) { + error = 3; + goto chunk_buf; + } + if (!full) { /* end of device aligned to a block */ + offset = ((dev_size - chunksize + BLOCKSIZE - 1) & + ~(BLOCKSIZE - 1)); + if (write_chunks(offset, dev_size, chunk_buf, chunksize, + time_st)) { + error = 3; + goto chunk_buf; + } + } + offset = offset_orig; + } + if (readoption) { + if (read_chunks(offset, dev_size, chunk_buf, chunksize, + time_st)) { + error = 2; + goto chunk_buf; + } + if (!full) { /* end of device aligned to a block */ + offset = ((dev_size - chunksize + BLOCKSIZE - 1) & + ~(BLOCKSIZE - 1)); + if (read_chunks(offset, dev_size, chunk_buf, chunksize, + time_st)) { + error = 2; + goto chunk_buf; + } + } + if (verbose) + printf("\n%s: data verified successfully\n", progname); + } + error = 0; +chunk_buf: + free(chunk_buf); +close_dev: + close(fd); + return error; +} diff --git a/lustre/utils/llverfs.c b/lustre/utils/llverfs.c new file mode 100644 index 0000000..77e54dd --- /dev/null +++ b/lustre/utils/llverfs.c @@ -0,0 +1,630 @@ +/* + * ext3 Filesystem Verification Tool. + * This program tests the correct operation of ext3 filesystem. + * This tool have two working modes + * 1. full mode + * 2. fast mode + * The full mode is basic mode in which program creates a subdirectory + * in the test fileysytem, writes n(files_in_dir, default=16) large(4GB) files + * to the directory with the test pattern at the start of each 4kb block. + * The test pattern contains timestamp, relative file offset and per file + * unique idenfifier(inode number). this continues until whole filesystem is + * full and then this tooll verifies that the data in all of the test files + * is correct. + * In the fast mode the tool creates a test directories with + * EXT3_TOPDIR_FL flag set. the number of directories equals to the number + * of block groups in the filesystem(e.g. 65536 directories for 8TB filesystem) + * and then writes a single 1MB file in each directory. The tool then verifies + * that the data in each file is correct. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ONE_MB (1024 * 1024) +#define ONE_GB ((unsigned long long)(1024 * 1024 * 1024)) +#define BLOCKSIZE 4096 + +/* Structure for writing test pattern */ +struct block_data { + loff_t bd_offset; + time_t bd_time; + ino_t bd_inode; +}; +static char *progname; /* name by which this program was run. */ +static unsigned verbose = 1; /* prints offset in kB, operation rate */ +static int readoption; /* run test in read-only (verify) mode */ +static int writeoption; /* run test in write_only mode */ +char *testdir; /* name of device to be tested. */ +static unsigned full = 1; /* flag to full check */ +static int errno_local; /* local copy of errno */ +static unsigned long num_files; /* Total number of files for read/write */ +static loff_t file_size; /* Size of each file */ +static unsigned files_in_dir = 32; /* number of files in each directioy */ +static unsigned num_dirs = 30000; /* total number of directories */ +const int dirmode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; +static int fd = -1; +static int isatty_flag; +static int perms = S_IRWXU | S_IRGRP | S_IROTH; + +static struct option const longopts[] = +{ + { "chunksize", required_argument, 0, 'c' }, + { "help", no_argument, 0, 'h' }, + { "offset", required_argument, 0, 'o' }, + { "long", no_argument, 0, 'l' }, + { "partial", required_argument, 0, 'p' }, + { "quiet", required_argument, 0, 'q' }, + { "read", no_argument, 0, 'r' }, + { "timestamp", required_argument, 0, 't' }, + { "verbose", no_argument, 0, 'v' }, + { "write", no_argument, 0, 'w' }, + { 0, 0, 0, 0} +}; + +/* + * Usages: displays help information, whenever user supply --help option in + * command or enters incorrect command line. + */ +void usage(int status) +{ + if (status != 0) + { + printf("\nUsage: %s [OPTION]... ...\n", + progname); + printf("ext3 filesystem verification tool.\n" + "\t-t {seconds} for --timestamp, set test time" + "(default=current time())\n" + "\t-o {offset} for --offset, directory starting offset" + " from which tests should start\n" + "\t-r run test in read (verify) mode\n" + "\t-w run test in write (test-pattern) mode (default=r&w)\n" + "\t-v for verbose\n" + "\t-p for --partial, for partial check (1MB files)\n" + "\t-l for --long, full check (4GB file with 4k blocks)\n" + "\t-c for --chunksize, IO chunk size (default=1048576)\n" + "\t-h display this help and exit\n" + "\t--help display this help and exit\n"); + } + exit(status); +} + +/* + * open_file: Opens file in specified mode and returns fd. + */ +static int open_file(const char *file, int flag) +{ + fd = open(file, flag, perms); + if (fd < 0) { + fprintf(stderr, "\n%s: Open '%s' failed:%s\n", + progname, file, strerror(errno)); + exit(3); + } + return (fd); +} + +/* + * Verify_chunk: Verifies test pattern in each 4kB (BLOCKSIZE) is correct. + * Returns 0 if test offset and timestamp is correct otherwise 1. + */ +int verify_chunk(char *chunk_buf, size_t chunksize, loff_t chunk_off, + time_t time_st, ino_t inode_st, char *file) +{ + struct block_data *bd; + char *chunk_end; + + for (chunk_end = chunk_buf + chunksize - sizeof(*bd); + (char *)chunk_buf < chunk_end; + chunk_buf += BLOCKSIZE, chunk_off += BLOCKSIZE) { + bd = (struct block_data *)chunk_buf; + if ((bd->bd_offset == chunk_off) && (bd->bd_time == time_st) && + (bd->bd_inode == inode_st)) + continue; + fprintf(stderr,"\n%s: verify %s failed offset/timestamp/inode " + "%llu/%lu/%lu: found %llu/%lu/%lu instead\n", progname, + file, chunk_off, time_st, inode_st, bd->bd_offset, + bd->bd_time, bd->bd_inode); + return 1; + } + return 0; +} + +/* + * fill_chunk: Fills the chunk with current or user specified timestamp + * and offset. The test patters is filled at the beginning of + * each 4kB(BLOCKSIZE) blocks in chunk_buf. + */ +void fill_chunk(char *chunk_buf, size_t chunksize, loff_t chunk_off, + time_t time_st, ino_t inode_st) +{ + struct block_data *bd; + char *chunk_end; + + for (chunk_end = chunk_buf + chunksize - sizeof(*bd); + (char *)chunk_buf < chunk_end; + chunk_buf += BLOCKSIZE, chunk_off += BLOCKSIZE) { + bd = (struct block_data *)chunk_buf; + bd->bd_offset = chunk_off; + bd->bd_time = time_st; + bd->bd_inode = inode_st; + } +} + +/* + * write_chunk: write the chunk_buf on the device. The number of write + * operations are based on the parameters write_end, offset, and chunksize. + */ +int write_chunks(int fd, loff_t offset, loff_t write_end, char *chunk_buf, + size_t chunksize, time_t time_st, + ino_t inode_st, const char *file) +{ + unsigned long long stride; + + stride = full ? chunksize : (ONE_GB - chunksize); + for (offset = offset & ~(chunksize - 1); offset < write_end; + offset += stride) { + if (lseek64(fd, offset, SEEK_SET) == -1) { + fprintf(stderr, "\n%s: lseek64(%s+%llu) failed: %s\n", + progname, file, offset, strerror(errno)); + return 1; + } + if (offset + chunksize > write_end) + chunksize = write_end - offset; + if (!full && offset > chunksize) { + fill_chunk(chunk_buf, chunksize, offset, time_st, + inode_st); + if (write(fd, chunk_buf, chunksize) < 0) { + if (errno == ENOSPC) { + errno_local = errno; + return 0; + } + fprintf(stderr, + "\n%s: write %s+%llu failed: %s\n", + progname, file, offset,strerror(errno)); + return errno; + } + offset += chunksize; + if (offset + chunksize > write_end) + chunksize = write_end - offset; + } + fill_chunk(chunk_buf, chunksize, offset, time_st, inode_st); + if (write(fd, (char *) chunk_buf, chunksize) < 0) { + if (errno == ENOSPC) { + errno_local = errno; + return 0; + } + fprintf(stderr, "\n%s: write %s+%llu failed: %s\n", + progname, file, offset, strerror(errno)); + return 1; + } + } + return 0; +} + +/* + * read_chunk: reads the chunk_buf from the device. The number of read + * operations are based on the parameters read_end, offset, and chunksize. + */ +int read_chunks(int fd, loff_t offset, loff_t read_end, char *chunk_buf, + size_t chunksize, time_t time_st, ino_t inode_st, char *file) +{ + unsigned long long stride; + + stride = full ? chunksize : (ONE_GB - chunksize); + for (offset = offset & ~(chunksize - 1); offset < read_end; + offset += stride) { + if (lseek64(fd, offset, SEEK_SET) == -1) { + fprintf(stderr, "\n%s: lseek64(%s+%llu) failed: %s\n", + progname, file, offset, strerror(errno)); + return 1; + } + if (offset + chunksize > read_end) + chunksize = read_end - offset; + if (!full && offset > chunksize) { + if (read(fd, chunk_buf, chunksize) < 0) { + fprintf(stderr, + "\n%s: read %s+%llu failed: %s\n", + progname, file, offset,strerror(errno)); + return 1; + } + if (verify_chunk(chunk_buf, chunksize, offset, + time_st, inode_st, file) != 0) + return 1; + offset += chunksize; + if (offset + chunksize >= read_end) + chunksize = read_end - offset; + } + if (read(fd, chunk_buf, chunksize) < 0) { + fprintf(stderr, "\n%s: read %s+%llu failed: %s\n", + progname, file, offset, strerror(errno)); + return 1; + } + if (verify_chunk(chunk_buf, chunksize, offset, time_st, + inode_st, file) != 0) + return 1; + } + return 0; +} + +/* + * new_file: prepares new filename using file counter and current dir. + */ +char *new_file(char *tempfile, char *cur_dir, int file_num) +{ + sprintf(tempfile, "%s/file%03d", cur_dir, file_num); + return tempfile; +} + +/* + * new_dir: prepares new dir name using dir counters. + */ +char *new_dir(char *tempdir, int dir_num) +{ + sprintf(tempdir, "%s/dir%05d", testdir, dir_num); + return tempdir; +} + +/* + * show_filename: Displays name of current file read/write + */ +void show_filename(char *op, char *filename) +{ + static time_t last; + time_t now; + double diff; + + now = time(NULL); + diff = now - last; + if (diff > 4 || verbose > 2) { + if (isatty_flag) + printf("\r"); + printf("%s File name: %s ", op, filename); + if (isatty_flag) + fflush(stdout); + else + printf("\n"); + last = now; + } +} + +/* + * dir_write: This function writes directories and files on device. + * it works for both full and fast modes. + */ +static int dir_write(char *chunk_buf, size_t chunksize, + time_t time_st, unsigned long dir_num) +{ + char tempfile[PATH_MAX]; + char tempdir[PATH_MAX]; + struct stat64 file; + int file_num = 999999999; + ino_t inode_st = 0; + + if (!full && fsetflags(testdir, EXT2_TOPDIR_FL)) + fprintf(stderr, + "\n%s: can't set TOPDIR_FL on %s: %s (ignoring)", + progname, testdir, strerror(errno)); + + for (; dir_num < num_dirs; num_files++, file_num++) { + if (file_num >= files_in_dir) { + if (dir_num == num_dirs - 1) + break; + + file_num = 0; + if (mkdir(new_dir(tempdir, dir_num), dirmode) < 0) { + if (errno == ENOSPC) + break; + if (errno != EEXIST) { + fprintf(stderr, "\n%s: mkdir %s : %s\n", + progname, tempdir, + strerror(errno)); + return 1; + } + } + dir_num++; + } + fd = open_file(new_file(tempfile, tempdir, file_num), + O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE); + + if (fd >= 0 && fstat64(fd, &file) == 0) { + inode_st = file.st_ino; + } else { + fprintf(stderr, "\n%s: write stat64 to file %s: %s", + progname, tempfile, strerror(errno)); + exit(1); + } + + if (verbose > 1) + show_filename("write", tempfile); + + if (write_chunks(fd, 0, file_size, chunk_buf, chunksize, + time_st, inode_st, tempfile)) { + close(fd); + return 1; + } + close(fd); + + if (errno_local == ENOSPC) + break; + } + + if (verbose) { + verbose++; + show_filename("write", tempfile); + printf("\nwrite complete\n"); + verbose--; + } + + return 0; +} + +/* + * dir_read: This function reads directories and files on device. + * it works for both full and fast modes. + */ +static int dir_read(char *chunk_buf, size_t chunksize, + time_t time_st, unsigned long dir_num) +{ + char tempfile[PATH_MAX]; + char tempdir[PATH_MAX]; + unsigned long count = 0; + struct stat64 file; + int file_num = 0; + ino_t inode_st = 0; + + for (count = 0; count < num_files && dir_num < num_dirs; count++) { + if (file_num == 0) { + if (dir_num == num_dirs - 1) + break; + + new_dir(tempdir, dir_num); + dir_num++; + } + + fd = open_file(new_file(tempfile, tempdir, file_num), + O_RDONLY | O_LARGEFILE); + if (fd >= 0 && fstat64(fd, &file) == 0) { + inode_st = file.st_ino; + } else { + fprintf(stderr, "\n%s: read stat64 file '%s': %s\n", + progname, tempfile, strerror(errno)); + return 1; + } + + if (verbose > 1) + show_filename("read", tempfile); + + if (count == num_files) + file_size = file.st_size; + if (read_chunks(fd, 0, file_size, chunk_buf, chunksize, + time_st, inode_st, tempfile)) { + close(fd); + return 1; + } + close(fd); + + if (++file_num >= files_in_dir) + file_num = 0; + } + if (verbose > 1){ + verbose++; + show_filename("read", tempfile); + printf("\nread complete\n"); + verbose--; + } + return 0; +} + +int main(int argc, char **argv) +{ + time_t time_st = 0; /* Default timestamp */ + size_t chunksize = ONE_MB; /* IO chunk size(defailt=1MB) */ + char *chunk_buf; /* chunk buffer */ + int error = 0; + FILE *countfile = NULL; + char filecount[PATH_MAX]; + unsigned long dir_num = 0, dir_num_orig = 0;/* starting directory */ + char c; + + progname = strrchr(argv[0], '/') ? strrchr(argv[0], '/') + 1 : argv[0]; + while ((c = (char)getopt_long(argc, argv, "t:rwvplo:h", + longopts, NULL)) != -1) { + switch (c) { + case 'c': + chunksize = (strtoul(optarg, NULL, 0) * ONE_MB); + if (!chunksize) { + fprintf(stderr, "%s: Chunk size value should be" + "a multiple of 1MB\n", progname); + return -1; + } + break; + case 'l': + full = 1; + break; + case 'o': /* offset */ + dir_num = strtoul(optarg, NULL, 0); + break; + case 'p': + full = 0; + break; + case 'q': + verbose = 0; + break; + case 'r': + readoption = 1; + break; + case 't': + time_st = (time_t)strtoul(optarg, NULL, 0); + break; + case 'w': + writeoption = 1; + break; + case 'v': + verbose++; + break; + + case 'h': + default: + usage(1); + return 0; + } + } + testdir = argv[optind]; + + if (!testdir) { + fprintf(stderr, "%s: pathname not given\n", progname); + usage(1); + return -1; + } + file_size = 4 * ONE_GB; + if (!readoption && !writeoption) { + readoption = 1; + writeoption = 1; + } + if (!time_st) + (void) time(&time_st); + printf("Timestamp: %lu\n", (unsigned long )time_st); + isatty_flag = isatty(STDOUT_FILENO); + + if (!full) { + struct mntent *tempmnt; + FILE *fp = NULL; + ext2_filsys fs; + + if ((fp = setmntent("/etc/mtab", "r")) == NULL){ + fprintf(stderr, "%s: fail to open /etc/mtab in read" + "mode :%s\n", progname, strerror(errno)); + goto guess; + } + + /* find device name using filesystem */ + while ((tempmnt = getmntent(fp)) != NULL) { + if (strcmp(tempmnt->mnt_dir, testdir) == 0) + break; + } + + if (tempmnt == NULL) { + fprintf(stderr, "%s: no device found for '%s'\n", + progname, testdir); + endmntent(fp); + goto guess; + } + + if (ext2fs_open(tempmnt->mnt_fsname, 0, 0, 0, + unix_io_manager, &fs)) { + fprintf(stderr, "%s: unable to open ext3 fs on '%s'\n", + progname, testdir); + endmntent(fp); + goto guess; + } + endmntent(fp); + + num_dirs = (fs->super->s_blocks_count + + fs->super->s_blocks_per_group - 1) / + fs->super->s_blocks_per_group; + if (verbose) + printf("ext3 block groups: %u, fs blocks: %u " + "blocks per group: %u\n", + num_dirs, fs->super->s_blocks_count, + fs->super->s_blocks_per_group); + ext2fs_close(fs); + if (0) { /* ugh */ + struct statfs64 statbuf; + guess: + if (statfs64(testdir, &statbuf) == 0) { + num_dirs = (long long)statbuf.f_blocks * + statbuf.f_bsize / (128ULL << 20); + if (verbose) + printf("dirs: %u, fs blocks: %llu\n", + num_dirs, + (long long)statbuf.f_blocks); + } else { + fprintf(stderr, "%s: unable to stat '%s': %s\n", + progname, testdir, strerror(errno)); + if (verbose) + printf("dirs: %u\n", num_dirs); + } + } + + file_size = ONE_MB; + chunksize = ONE_MB; + files_in_dir = 1; + } + chunk_buf = (char *)calloc(chunksize, 1); + if (chunk_buf == NULL) { + fprintf(stderr, "Memory allocation failed for chunk_buf\n"); + return 4; + } + sprintf(filecount, "%s/%s.filecount", testdir, progname); + if (writeoption) { + (void)mkdir(testdir, dirmode); + + unlink(filecount); + if (dir_num != 0) { + num_files = dir_num * files_in_dir; + if (verbose) + printf("\n%s: %lu files already written\n", + progname, num_files); + } + if (dir_write(chunk_buf, chunksize, time_st, dir_num)) { + error = 3; + goto out; + } + countfile = fopen(filecount, "w"); + if (countfile != NULL) { + if (fprintf(countfile, "%lu", num_files) < 1 || + fflush(countfile) != 0) { + fprintf(stderr, "\n%s: writing %s failed :%s\n", + progname, filecount, strerror(errno)); + } + fclose(countfile); + } + dir_num = dir_num_orig; + } + if (readoption) { + if (!writeoption) { + countfile = fopen(filecount, "r"); + if (countfile == NULL || + fscanf(countfile, "%lu", &num_files) != 1) { + fprintf(stderr, "\n%s: reading %s failed :%s\n", + progname, filecount, strerror(errno)); + num_files = num_dirs * files_in_dir; + } else { + num_files -= (dir_num * files_in_dir); + } + if (countfile) + fclose(countfile); + } + if (dir_read(chunk_buf, chunksize, time_st, dir_num)) { + fprintf(stderr, "\n%s: Data verification failed\n", + progname) ; + error = 2; + goto out; + } + } + error = 0; +out: + free(chunk_buf); + return error; +} diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index a856c63..96cb3a0 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -397,8 +397,7 @@ static int be_verbose(int verbose, struct timeval *next_time, gettimeofday(&now, NULL); /* A positive verbosity means to print every X iterations */ - if (verbose > 0 && - (next_num == NULL || num >= *next_num || num >= num_total)) { + if (verbose > 0 && (num >= *next_num || num >= num_total)) { *next_num += verbose; if (next_time) { next_time->tv_sec = now.tv_sec - verbose; @@ -412,8 +411,7 @@ static int be_verbose(int verbose, struct timeval *next_time, difftime(&now, next_time) >= 0.0){ next_time->tv_sec = now.tv_sec - verbose; next_time->tv_usec = now.tv_usec; - if (next_num) - *next_num = num; + *next_num = num; return 1; } diff --git a/lustre/utils/obdio.c b/lustre/utils/obdio.c index f10dff3..0152064a 100644 --- a/lustre/utils/obdio.c +++ b/lustre/utils/obdio.c @@ -43,8 +43,8 @@ obdio_test_fixed_extent (struct obdio_conn *conn, int j; int rc = 0; - buffer = obdio_alloc_aligned_buffer (&space, size); - if (buffer == NULL) { + space = obdio_alloc_aligned_buffer (&buffer, size); + if (space == NULL) { fprintf (stderr, "Can't allocate buffer size %d\n", size); return (-1); } diff --git a/lustre/utils/obdiolib.c b/lustre/utils/obdiolib.c index b948c45..c402104 100644 --- a/lustre/utils/obdiolib.c +++ b/lustre/utils/obdiolib.c @@ -51,9 +51,9 @@ obdio_ioctl (struct obdio_conn *conn, int cmd) rc = obd_ioctl_pack (&conn->oc_data, &buf, sizeof (conn->oc_buffer)); if (rc != 0) { - fprintf (stderr, "obdio_ioctl: obd_ioctl_pack: %d (%s)\n", - rc, strerror (errno)); - abort (); + fprintf(stderr, "%s: obd_ioctl_pack: %d (%s)\n", + __FUNCTION__, rc, strerror(errno)); + abort(); } rc = ioctl (conn->oc_fd, cmd, buf); @@ -62,8 +62,8 @@ obdio_ioctl (struct obdio_conn *conn, int cmd) rc2 = obd_ioctl_unpack (&conn->oc_data, buf, sizeof (conn->oc_buffer)); if (rc2 != 0) { - fprintf (stderr, "obdio_ioctl: obd_ioctl_unpack: %d (%s)\n", - rc2, strerror (errno)); + fprintf(stderr, "%s: obd_ioctl_unpack: %d (%s)\n", + __FUNCTION__, rc2, strerror(errno)); abort (); } @@ -77,15 +77,15 @@ obdio_connect (int device) conn = malloc (sizeof (*conn)); if (conn == NULL) { - fprintf (stderr, "obdio_connect: no memory\n"); + fprintf (stderr, "%s: no memory\n", __FUNCTION__); return (NULL); } memset (conn, 0, sizeof (*conn)); conn->oc_fd = open ("/dev/obd", O_RDWR); if (conn->oc_fd < 0) { - fprintf (stderr, "obdio_connect: Can't open /dev/obd: %s\n", - strerror (errno)); + fprintf(stderr, "%s: Can't open /dev/obd: %s\n", + __FUNCTION__, strerror(errno)); goto failed; } @@ -107,13 +107,14 @@ obdio_disconnect (struct obdio_conn *conn, int flags) int obdio_pread (struct obdio_conn *conn, uint64_t oid, - char *buffer, uint32_t count, uint64_t offset) + void *buffer, uint32_t count, uint64_t offset) { obdio_iocinit (conn); conn->oc_data.ioc_obdo1.o_id = oid; conn->oc_data.ioc_obdo1.o_mode = S_IFREG; - conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE; + conn->oc_data.ioc_obdo1.o_valid = + OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE; conn->oc_data.ioc_pbuf2 = buffer; conn->oc_data.ioc_plen2 = count; @@ -125,13 +126,14 @@ obdio_pread (struct obdio_conn *conn, uint64_t oid, int obdio_pwrite (struct obdio_conn *conn, uint64_t oid, - char *buffer, uint32_t count, uint64_t offset) + void *buffer, uint32_t count, uint64_t offset) { obdio_iocinit (conn); conn->oc_data.ioc_obdo1.o_id = oid; conn->oc_data.ioc_obdo1.o_mode = S_IFREG; - conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE; + conn->oc_data.ioc_obdo1.o_valid = + OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE; conn->oc_data.ioc_pbuf2 = buffer; conn->oc_data.ioc_plen2 = count; @@ -180,14 +182,14 @@ obdio_cancel (struct obdio_conn *conn, struct lustre_handle *lh) void * obdio_alloc_aligned_buffer (void **spacep, int size) { - int pagesize = getpagesize(); - void *space = malloc (size + pagesize - 1); + int pagemask = getpagesize() - 1; + void *space = malloc(size + pagemask); - *spacep = space; if (space == NULL) return (NULL); - return ((void *)(((unsigned long)space + pagesize - 1) & ~(pagesize - 1))); + *spacep = (void *)(((unsigned long)space + pagemask) & ~pagemask); + return space; } struct obdio_barrier * @@ -195,10 +197,11 @@ obdio_new_barrier (uint64_t oid, uint64_t id, int npeers) { struct obdio_barrier *b; - b = (struct obdio_barrier *)malloc (sizeof (*b)); + b = malloc(sizeof(*b)); if (b == NULL) { - fprintf (stderr, "obdio_new_barrier "LPX64": Can't allocate\n", oid); - return (NULL); + fprintf(stderr, "%s "LPX64": Can't allocate\n", + __FUNCTION__, oid); + return(NULL); } b->ob_id = id; @@ -215,41 +218,42 @@ obdio_setup_barrier (struct obdio_conn *conn, struct obdio_barrier *b) struct lustre_handle lh; int rc; int rc2; - void *space; + void *space, *fileptr; struct obdio_barrier *fileb; if (b->ob_ordinal != 0 || b->ob_count != 0) { - fprintf (stderr, "obdio_setup_barrier: invalid parameter\n"); + fprintf(stderr, "%s: invalid parameter\n", __FUNCTION__); abort (); } - fileb = (struct obdio_barrier *) obdio_alloc_aligned_buffer (&space, getpagesize ()); - if (fileb == NULL) { - fprintf (stderr, "obdio_setup_barrier "LPX64": Can't allocate page buffer\n", - b->ob_oid); + space = obdio_alloc_aligned_buffer(&fileptr, getpagesize()); + if (space == NULL) { + fprintf(stderr, "%s "LPX64": Can't allocate page buffer\n", + __FUNCTION__, b->ob_oid); return (-1); } - memset (fileb, 0, getpagesize ()); + fileb = fileptr; + memset(fileb, 0, getpagesize()); *fileb = *b; - rc = obdio_enqueue (conn, b->ob_oid, LCK_PW, 0, getpagesize (), &lh); + rc = obdio_enqueue(conn, b->ob_oid, LCK_PW, 0, getpagesize(), &lh); if (rc != 0) { - fprintf (stderr, "obdio_setup_barrier "LPX64": Error on enqueue: %s\n", - b->ob_oid, strerror (errno)); + fprintf(stderr, "%s "LPX64": Error on enqueue: %s\n", + __FUNCTION__, b->ob_oid, strerror(errno)); goto out; } - rc = obdio_pwrite (conn, b->ob_oid, (void *)fileb, getpagesize (), 0); + rc = obdio_pwrite(conn, b->ob_oid, fileb, getpagesize(), 0); if (rc != 0) - fprintf (stderr, "obdio_setup_barrier "LPX64": Error on write: %s\n", - b->ob_oid, strerror (errno)); + fprintf(stderr, "%s "LPX64": Error on write: %s\n", + __FUNCTION__, b->ob_oid, strerror(errno)); rc2 = obdio_cancel (conn, &lh); if (rc == 0 && rc2 != 0) { - fprintf (stderr, "obdio_setup_barrier "LPX64": Error on cancel: %s\n", - b->ob_oid, strerror (errno)); + fprintf(stderr, "%s "LPX64": Error on cancel: %s\n", + __FUNCTION__, b->ob_oid, strerror(errno)); rc = rc2; } out: @@ -263,29 +267,30 @@ obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b) struct lustre_handle lh; int rc; int rc2; - void *space; + void *space, *fileptr; struct obdio_barrier *fileb; char *mode; - fileb = (struct obdio_barrier *) obdio_alloc_aligned_buffer (&space, getpagesize ()); - if (fileb == NULL) { - fprintf (stderr, "obdio_barrier "LPX64": Can't allocate page buffer\n", - b->ob_oid); + space = obdio_alloc_aligned_buffer(&fileptr, getpagesize()); + if (space == NULL) { + fprintf(stderr, "%s "LPX64": Can't allocate page buffer\n", + __FUNCTION__, b->ob_oid); return (-1); } - rc = obdio_enqueue (conn, b->ob_oid, LCK_PW, 0, getpagesize (), &lh); + rc = obdio_enqueue(conn, b->ob_oid, LCK_PW, 0, getpagesize(), &lh); if (rc != 0) { - fprintf (stderr, "obdio_barrier "LPX64": Error on PW enqueue: %s\n", - b->ob_oid, strerror (errno)); + fprintf(stderr, "%s "LPX64": Error on PW enqueue: %s\n", + __FUNCTION__, b->ob_oid, strerror(errno)); goto out_1; } - memset (fileb, 0xeb, getpagesize ()); - rc = obdio_pread (conn, b->ob_oid, (void *)fileb, getpagesize (), 0); + fileb = fileptr; + memset(fileb, 0xeb, getpagesize()); + rc = obdio_pread(conn, b->ob_oid, fileb, getpagesize(), 0); if (rc != 0) { - fprintf (stderr, "obdio_barrier "LPX64": Error on initial read: %s\n", - b->ob_oid, strerror (errno)); + fprintf(stderr, "%s "LPX64": Error on initial read: %s\n", + __FUNCTION__, b->ob_oid, strerror(errno)); goto out_2; } @@ -294,13 +299,16 @@ obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b) fileb->ob_npeers != b->ob_npeers || fileb->ob_count >= b->ob_npeers || fileb->ob_ordinal != b->ob_ordinal) { - fprintf (stderr, "obdio_barrier "LPX64": corrupt on initial read\n", b->ob_id); - fprintf (stderr, " got ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n", - fileb->ob_id, fileb->ob_oid, fileb->ob_npeers, - fileb->ob_ordinal, fileb->ob_count); - fprintf (stderr, " expected ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n", - b->ob_id, b->ob_oid, b->ob_npeers, - b->ob_ordinal, b->ob_count); + fprintf(stderr, "%s "LPX64": corrupt on initial read\n", + __FUNCTION__, b->ob_id); + fprintf(stderr, + " got ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n", + fileb->ob_id, fileb->ob_oid, fileb->ob_npeers, + fileb->ob_ordinal, fileb->ob_count); + fprintf(stderr, + " expected ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n", + b->ob_id, b->ob_oid, b->ob_npeers, + b->ob_ordinal, b->ob_count); rc = -1; goto out_2; } @@ -311,37 +319,36 @@ obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b) fileb->ob_ordinal++; /* signal all joined */ } - rc = obdio_pwrite (conn, b->ob_oid, (void *)fileb, getpagesize (), 0); + rc = obdio_pwrite(conn, b->ob_oid, fileb, getpagesize(), 0); if (rc != 0) { - fprintf (stderr, "obdio_barrier "LPX64": Error on initial write: %s\n", - b->ob_oid, strerror (errno)); + fprintf (stderr, "%s "LPX64": Error on initial write: %s\n", + __FUNCTION__, b->ob_oid, strerror(errno)); goto out_2; } mode = "PW"; b->ob_ordinal++; /* now I wait... */ while (fileb->ob_ordinal != b->ob_ordinal) { - rc = obdio_cancel (conn, &lh); if (rc != 0) { - fprintf (stderr, "obdio_barrier "LPX64": Error on %s cancel: %s\n", - b->ob_oid, mode, strerror (errno)); + fprintf(stderr, "%s "LPX64": Error on %s cancel: %s\n", + __FUNCTION__, b->ob_oid, mode, strerror(errno)); goto out_1; } mode = "PR"; - rc = obdio_enqueue (conn, b->ob_oid, LCK_PR, 0, getpagesize (), &lh); + rc = obdio_enqueue(conn, b->ob_oid, LCK_PR,0,getpagesize(),&lh); if (rc != 0) { - fprintf (stderr, "obdio_barrier "LPX64": Error on PR enqueue: %s\n", - b->ob_oid, strerror (errno)); + fprintf(stderr, "%s "LPX64": Error on PR enqueue: %s\n", + __FUNCTION__, b->ob_oid, strerror(errno)); goto out_1; } - memset (fileb, 0xeb, getpagesize ()); - rc = obdio_pread (conn, b->ob_oid, (void *)fileb, getpagesize (), 0); + memset (fileb, 0xeb, getpagesize()); + rc = obdio_pread(conn, b->ob_oid, fileb, getpagesize(), 0); if (rc != 0) { - fprintf (stderr, "obdio_barrier "LPX64": Error on read: %s\n", - b->ob_oid, strerror (errno)); + fprintf(stderr, "%s "LPX64": Error on read: %s\n", + __FUNCTION__, b->ob_oid, strerror(errno)); goto out_2; } @@ -351,13 +358,16 @@ obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b) fileb->ob_count >= b->ob_npeers || (fileb->ob_ordinal != b->ob_ordinal - 1 && fileb->ob_ordinal != b->ob_ordinal)) { - fprintf (stderr, "obdio_barrier "LPX64": corrupt\n", b->ob_id); - fprintf (stderr, " got ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n", - fileb->ob_id, fileb->ob_oid, fileb->ob_npeers, - fileb->ob_ordinal, fileb->ob_count); - fprintf (stderr, " expected ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n", - b->ob_id, b->ob_oid, b->ob_npeers, - b->ob_ordinal, b->ob_count); + fprintf(stderr, "%s "LPX64": corrupt\n", + __FUNCTION__, b->ob_id); + fprintf(stderr, " got ["LPX64","LPX64","LPX64"," + LPX64","LPX64"]\n", + fileb->ob_id, fileb->ob_oid, fileb->ob_npeers, + fileb->ob_ordinal, fileb->ob_count); + fprintf(stderr, " expected ["LPX64","LPX64","LPX64 + ","LPX64","LPX64"]\n", + b->ob_id, b->ob_oid, b->ob_npeers, + b->ob_ordinal, b->ob_count); rc = -1; goto out_2; } @@ -366,13 +376,11 @@ obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b) out_2: rc2 = obdio_cancel (conn, &lh); if (rc == 0 && rc2 != 0) { - fprintf (stderr, "obdio_barrier "LPX64": Error on cancel: %s\n", - b->ob_oid, strerror (errno)); + fprintf(stderr, "%s "LPX64": Error on cancel: %s\n", + __FUNCTION__, b->ob_oid, strerror(errno)); rc = rc2; } out_1: free (space); return (rc); } - - diff --git a/lustre/utils/obdiolib.h b/lustre/utils/obdiolib.h index 8813de4..97be436 100644 --- a/lustre/utils/obdiolib.h +++ b/lustre/utils/obdiolib.h @@ -30,16 +30,16 @@ struct obdio_barrier { uint64_t ob_count; }; -extern struct obdio_conn * obdio_connect (int device); +extern struct obdio_conn *obdio_connect(int device); extern void obdio_disconnect(struct obdio_conn *conn, int flags); extern int obdio_open(struct obdio_conn *conn, uint64_t oid, struct lustre_handle *fh); extern int obdio_close(struct obdio_conn *conn, uint64_t oid, struct lustre_handle *fh); extern int obdio_pread(struct obdio_conn *conn, uint64_t oid, - char *buffer, uint32_t count, uint64_t offset); + void *buffer, uint32_t count, uint64_t offset); extern int obdio_pwrite(struct obdio_conn *conn, uint64_t oid, - char *buffer, uint32_t count, uint64_t offset); + void *buffer, uint32_t count, uint64_t offset); extern int obdio_enqueue(struct obdio_conn *conn, uint64_t oid, int mode, uint64_t offset, uint32_t count, struct lustre_handle *lh); @@ -47,8 +47,7 @@ extern int obdio_cancel(struct obdio_conn *conn, struct lustre_handle *lh); extern void *obdio_alloc_aligned_buffer(void **spacep, int size); extern struct obdio_barrier *obdio_new_barrier(uint64_t oid, uint64_t id, int npeers); -extern int obdio_setup_barrier(struct obdio_conn *conn, - struct obdio_barrier *b); +extern int obdio_setup_barrier(struct obdio_conn *conn,struct obdio_barrier *b); extern int obdio_barrier(struct obdio_conn *conn, struct obdio_barrier *b); #endif diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 9aa1d66..4b3adc1 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -1120,6 +1120,7 @@ main(int argc, char **argv) COMMENT("Sizes and Offsets"); BLANK_LINE(); + CHECK_STRUCT(obd_uuid); check_lustre_handle(); check_lustre_msg_v1(); check_lustre_msg_v2(); -- 1.8.3.1