Whamcloud - gitweb
Merge b1_5 from b1_4 (20060515_1255)
authorscjody <scjody>
Tue, 16 May 2006 10:32:50 +0000 (10:32 +0000)
committerscjody <scjody>
Tue, 16 May 2006 10:32:50 +0000 (10:32 +0000)
65 files changed:
ldiskfs/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch [new file with mode: 0644]
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel4.series
ldiskfs/kernel_patches/series/ldiskfs-2.6-suse.series
ldiskfs/kernel_patches/series/ldiskfs-2.6.12-vanilla.series
lustre/ChangeLog
lustre/autoMakefile.am
lustre/include/linux/lustre_compat25.h
lustre/include/lustre/liblustreapi.h
lustre/include/lustre/lustre_idl.h
lustre/include/obd.h
lustre/kernel_patches/patches/ext3-lookup-dotdot-2.4.20.patch [new file with mode: 0644]
lustre/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch [new file with mode: 0644]
lustre/kernel_patches/series/ldiskfs-2.6-rhel4.series
lustre/kernel_patches/series/ldiskfs-2.6-suse.series
lustre/kernel_patches/series/ldiskfs-2.6.12-vanilla.series
lustre/kernel_patches/series/rhel-2.4.21
lustre/kernel_patches/targets/2.6-suse.target.in
lustre/ldiskfs/lustre_quota_fmt.c
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c
lustre/liblustre/namei.c
lustre/liblustre/rw.c
lustre/llite/dcache.c
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/namei.c
lustre/llite/rw.c
lustre/lov/lov_internal.h
lustre/lov/lov_obd.c
lustre/lov/lov_request.c
lustre/mdc/mdc_lib.c
lustre/mdc/mdc_request.c
lustre/mds/handler.c
lustre/mds/mds_fs.c
lustre/mds/mds_internal.h
lustre/mds/mds_join.c
lustre/mds/mds_open.c
lustre/mds/mds_reint.c
lustre/obdclass/llog_ioctl.c
lustre/obdfilter/filter.c
lustre/obdfilter/filter_io.c
lustre/tests/.cvsignore
lustre/tests/createdestroy.c
lustre/tests/ll_dirstripe_verify.c
lustre/tests/mmap_sanity.c
lustre/tests/recovery-small.sh
lustre/tests/sanity.sh
lustre/tests/small_write.c
lustre/tests/utime.c
lustre/tests/writemany.c
lustre/utils/Makefile.am
lustre/utils/lconf
lustre/utils/liblustreapi.c
lustre/utils/llobdstat.pl
lustre/utils/llog_reader.c
lustre/utils/llstat.pl
lustre/utils/llverdev.c [new file with mode: 0644]
lustre/utils/llverfs.c [new file with mode: 0644]
lustre/utils/obd.c
lustre/utils/obdio.c
lustre/utils/obdiolib.c
lustre/utils/obdiolib.h
lustre/utils/wirecheck.c

diff --git a/ldiskfs/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch b/ldiskfs/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch
new file mode 100644 (file)
index 0000000..f9d4857
--- /dev/null
@@ -0,0 +1,63 @@
+Index: linux-2.6.9-full/fs/ext3/iopen.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/iopen.c      2006-04-25 08:51:11.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/iopen.c   2006-05-06 01:21:11.000000000 +0400
+@@ -94,9 +94,12 @@ static struct dentry *iopen_lookup(struc
+               assert(!(alternate->d_flags & DCACHE_DISCONNECTED));
+       }
+-      if (!list_empty(&inode->i_dentry)) {
+-              alternate = list_entry(inode->i_dentry.next,
+-                                     struct dentry, d_alias);
++      list_for_each(lp, &inode->i_dentry) {
++              alternate = list_entry(lp, struct dentry, d_alias);
++              /* ignore dentries created for ".." to preserve
++               * proper dcache hierarchy -- bug 10458 */
++              if (alternate->d_flags & DCACHE_NFSFS_RENAMED)
++                      continue;
+               dget_locked(alternate);
+               spin_lock(&alternate->d_lock);
+               alternate->d_flags |= DCACHE_REFERENCED;
+Index: linux-2.6.9-full/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/namei.c      2006-05-06 01:21:10.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/namei.c   2006-05-06 01:29:30.000000000 +0400
+@@ -1003,6 +1003,38 @@ static struct dentry *ext3_lookup(struct
+                       return ERR_PTR(-EACCES);
+       }
++      /* ".." shouldn't go into dcache to preserve dcache hierarchy
++       * otherwise we'll get parent being a child of actual child.
++       * see bug 10458 for details -bzzz */
++      if (dentry->d_name.name[0] == '.' && (dentry->d_name.len == 1 ||
++              (dentry->d_name.len == 2 && dentry->d_name.name[1] == '.'))) {
++              struct dentry *tmp, *goal = NULL;
++              struct list_head *lp;
++
++              /* first, look for an existing dentry - any one is good */
++              spin_lock(&dcache_lock);
++              list_for_each(lp, &inode->i_dentry) {
++                      tmp = list_entry(lp, struct dentry, d_alias);
++                      goal = tmp;
++                      dget_locked(goal);
++                      break;
++              }
++              if (goal == NULL) {
++                      /* there is no alias, we need to make current dentry:
++                       *  a) inaccessible for __d_lookup()
++                       *  b) inaccessible for iopen */
++                      J_ASSERT(list_empty(&dentry->d_alias));
++                      dentry->d_flags |= DCACHE_NFSFS_RENAMED;
++                      /* this is d_instantiate() ... */
++                      list_add(&dentry->d_alias, &inode->i_dentry);
++                      dentry->d_inode = inode;
++              }
++              spin_unlock(&dcache_lock);
++              if (goal)
++                      iput(inode);
++              return goal;
++      }
++
+       return iopen_connect_dentry(dentry, inode, 1);
+ }
index bab81b9..3661023 100644 (file)
@@ -10,3 +10,4 @@ ext3-extents-2.6.9-rhel4.patch
 ext3-mballoc2-2.6.9-rhel4.patch 
 ext3-nlinks-2.6.9.patch
 ext3-ialloc-2.6.patch
+ext3-lookup-dotdot-2.6.9.patch
index 2584c1d..efa7700 100644 (file)
@@ -12,3 +12,4 @@ ext3-nlinks-2.6.7.patch
 ext3-rename-reserve-2.6-suse.patch
 ext3-htree-dot-2.6.5-suse.patch 
 ext3-ialloc-2.6.patch
+ext3-lookup-dotdot-2.6.9.patch
index 7d0a383..b44e35e 100644 (file)
@@ -11,3 +11,4 @@ ext3-ialloc-2.6.patch
 ext3-remove-cond_resched-calls-2.6.12.patch
 ext3-htree-dot-2.6.patch
 ext3-external-journal-2.6.12.patch
+ext3-lookup-dotdot-2.6.9.patch
index 7fdebda..0bae13c 100644 (file)
@@ -208,11 +208,25 @@ Details    : Check that we actually have objects in a file before doing any
 
 Severity   : minor
 Frequency  : Rare
-Bugzilla   : 10484 
+Bugzilla   : 10484
 Description: Request leak when working with deleted CWD
 Details    : Introduce advanced request refcount tracking for requests
             referenced from lustre intent.
 
+Severity   : Enhancement
+Bugzilla   : 10482
+Description: Cache open file handles on client.
+Details    : MDS now will return special lock along with openhandle, if
+            requested and client is allowed to hold openhandle, even if unused,
+            until such a lock is revoked. Helps NFS a lot, since NFS is opening
+            closing files for every read/write openration.
+
+Severity   : Enhancement
+Bugzilla   : 9291
+Description: Cache open negative dentries on client when possible.
+Details    : Guard negative dentries with UPDATE lock on parent dir, drop
+            negative dentries on lock revocation.
+
 
 ------------------------------------------------------------------------------
 
index 368c081..be4dae8 100644 (file)
@@ -58,13 +58,14 @@ sources: $(LDISKFS) lvfs-sources obdclass-sources lustre_build_version
 
 all-recursive: lustre_build_version
 
+BUILD_VER_H=$(top_builddir)/lustre/include/linux/lustre_build_version.h
+
 lustre_build_version:
        perl $(top_builddir)/lustre/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver
        echo "#define LUSTRE_RELEASE @RELEASE@" >> tmpver
-       cmp -s $(top_builddir)/lustre/include/linux/lustre_build_version.h tmpver \
-                2> /dev/null &&                                            \
-                $(RM) tmpver ||                                            \
-                mv tmpver $(top_builddir)/lustre/include/linux/lustre_build_version.h
+       cmp -s $(BUILD_VER_H) tmpver > tmpdiff 2> /dev/null &&          \
+                $(RM) tmpver tmpdiff ||                                        \
+               mv tmpver $(BUILD_VER_H)
 
 CSTK=/tmp/checkstack
 CSTKO=/tmp/checkstack.orig
index 066cc20..ec22d4a 100644 (file)
@@ -326,12 +326,10 @@ static inline int page_mapped(struct page *page)
 }
 #endif /* !HAVE_PAGE_MAPPED */
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16))
 static inline void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
 {
         update_atime(dentry->d_inode);
 }
-#endif
 
 static inline void file_accessed(struct file *file)
 {
index 08f8786..2473a9a 100644 (file)
 typedef void (*llapi_cb_t)(char *obd_type_name, char *obd_name, char *obd_uuid, void *args);
 
 /* liblustreapi.c */
-extern int llapi_file_create(char *name, long stripe_size, int stripe_offset,
-                             int stripe_count, int stripe_pattern);
+extern int llapi_file_create(const char *name, long stripe_size,
+                             int stripe_offset, int stripe_count,
+                             int stripe_pattern);
 extern int llapi_file_get_stripe(char *path, struct lov_user_md *lum);
+#define HAVE_LLAPI_FILE_LOOKUP
+extern int llapi_file_lookup(int dirfd, const char *name);
 extern int llapi_find(char *path, struct obd_uuid *obduuid, int recursive,
                       int verbose, int quiet);
 extern int llapi_obd_statfs(char *path, __u32 type, __u32 index,
index bd60687..e339db0 100644 (file)
@@ -388,7 +388,8 @@ struct obdo {
         __u32                   o_mds;
         __u32                   o_stripe_idx;   /* holds stripe idx */
         __u32                   o_padding_1;
-        char                    o_inline[OBD_INLINESZ]; /* fid in ost writes */
+        char                    o_inline[OBD_INLINESZ];
+                                /* lustre_handle + llog_cookie */
 };
 
 #define o_dirty   o_blocks
@@ -760,6 +761,7 @@ extern void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa);
 #define MDS_OPEN_DELAY_CREATE  0100000000 /* delay initial object create */
 #define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */
 #define MDS_OPEN_JOIN_FILE     0400000000 /* open for join file*/
+#define MDS_OPEN_LOCK         04000000000 /* This open requires open lock */
 #define MDS_OPEN_HAS_EA      010000000000 /* specify object create pattern */
 #define MDS_OPEN_HAS_OBJS    020000000000 /* Just set the EA the obj exist */
 
index feb2b49..772cbec 100644 (file)
@@ -364,6 +364,7 @@ struct client_obd {
 
         struct mdc_rpc_lock     *cl_rpc_lock;
         struct mdc_rpc_lock     *cl_setattr_lock;
+        struct mdc_rpc_lock     *cl_close_lock;
         struct osc_creator       cl_oscc;
 
         /* mgc datastruct */
diff --git a/lustre/kernel_patches/patches/ext3-lookup-dotdot-2.4.20.patch b/lustre/kernel_patches/patches/ext3-lookup-dotdot-2.4.20.patch
new file mode 100644 (file)
index 0000000..0329dfe
--- /dev/null
@@ -0,0 +1,63 @@
+Index: linux-2.4.21/fs/ext3/namei.c
+===================================================================
+--- linux-2.4.21.orig/fs/ext3/namei.c  2006-04-29 20:48:26.000000000 +0400
++++ linux-2.4.21/fs/ext3/namei.c       2006-05-06 01:31:51.000000000 +0400
+@@ -955,6 +955,38 @@ static struct dentry *ext3_lookup(struct
+               }
+       }
++      /* ".." shouldn't go into dcache to preserve dcache hierarchy
++       * otherwise we'll get parent being a child of actual child.
++       * see bug 10458 for details -bzzz */
++      if (dentry->d_name.name[0] == '.' && (dentry->d_name.len == 1 ||
++              (dentry->d_name.len == 2 && dentry->d_name.name[1] == '.'))) {
++              struct dentry *tmp, *goal = NULL;
++              struct list_head *lp;
++
++              /* first, look for an existing dentry - any one is good */
++              spin_lock(&dcache_lock);
++              list_for_each(lp, &inode->i_dentry) {
++                      tmp = list_entry(lp, struct dentry, d_alias);
++                      goal = tmp;
++                      dget_locked(goal);
++                      break;
++              }
++              if (goal == NULL) {
++                      /* there is no alias, we need to make current dentry:
++                       *  a) inaccessible for __d_lookup()
++                       *  b) inaccessible for iopen */
++                      J_ASSERT(list_empty(&dentry->d_alias));
++                      dentry->d_flags |= DCACHE_NFSFS_RENAMED;
++                      /* this is d_instantiate() ... */
++                      list_add(&dentry->d_alias, &inode->i_dentry);
++                      dentry->d_inode = inode;
++              }
++              spin_unlock(&dcache_lock);
++              if (goal)
++                      iput(inode);
++              return goal;
++      }
++
+       return iopen_connect_dentry(dentry, inode, 1);
+ }
+Index: linux-2.4.21/fs/ext3/iopen.c
+===================================================================
+--- linux-2.4.21.orig/fs/ext3/iopen.c  2006-04-29 20:48:23.000000000 +0400
++++ linux-2.4.21/fs/ext3/iopen.c       2006-04-29 20:59:50.000000000 +0400
+@@ -92,9 +92,12 @@ static struct dentry *iopen_lookup(struc
+               assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED));
+       }
+-      if (!list_empty(&inode->i_dentry)) {
+-              alternate = list_entry(inode->i_dentry.next,
+-                                     struct dentry, d_alias);
++      list_for_each(lp, &inode->i_dentry) {
++              alternate = list_entry(lp, struct dentry, d_alias);
++              /* ignore dentries created for ".." to preserve
++               * proper dcache hierarchy -- bug 10458 */
++              if (alternate->d_flags & DCACHE_NFSFS_RENAMED)
++                      continue;
+               dget_locked(alternate);
+               alternate->d_vfs_flags |= DCACHE_REFERENCED;
+               iput(inode);
diff --git a/lustre/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch b/lustre/kernel_patches/patches/ext3-lookup-dotdot-2.6.9.patch
new file mode 100644 (file)
index 0000000..f9d4857
--- /dev/null
@@ -0,0 +1,63 @@
+Index: linux-2.6.9-full/fs/ext3/iopen.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/iopen.c      2006-04-25 08:51:11.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/iopen.c   2006-05-06 01:21:11.000000000 +0400
+@@ -94,9 +94,12 @@ static struct dentry *iopen_lookup(struc
+               assert(!(alternate->d_flags & DCACHE_DISCONNECTED));
+       }
+-      if (!list_empty(&inode->i_dentry)) {
+-              alternate = list_entry(inode->i_dentry.next,
+-                                     struct dentry, d_alias);
++      list_for_each(lp, &inode->i_dentry) {
++              alternate = list_entry(lp, struct dentry, d_alias);
++              /* ignore dentries created for ".." to preserve
++               * proper dcache hierarchy -- bug 10458 */
++              if (alternate->d_flags & DCACHE_NFSFS_RENAMED)
++                      continue;
+               dget_locked(alternate);
+               spin_lock(&alternate->d_lock);
+               alternate->d_flags |= DCACHE_REFERENCED;
+Index: linux-2.6.9-full/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/namei.c      2006-05-06 01:21:10.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/namei.c   2006-05-06 01:29:30.000000000 +0400
+@@ -1003,6 +1003,38 @@ static struct dentry *ext3_lookup(struct
+                       return ERR_PTR(-EACCES);
+       }
++      /* ".." shouldn't go into dcache to preserve dcache hierarchy
++       * otherwise we'll get parent being a child of actual child.
++       * see bug 10458 for details -bzzz */
++      if (dentry->d_name.name[0] == '.' && (dentry->d_name.len == 1 ||
++              (dentry->d_name.len == 2 && dentry->d_name.name[1] == '.'))) {
++              struct dentry *tmp, *goal = NULL;
++              struct list_head *lp;
++
++              /* first, look for an existing dentry - any one is good */
++              spin_lock(&dcache_lock);
++              list_for_each(lp, &inode->i_dentry) {
++                      tmp = list_entry(lp, struct dentry, d_alias);
++                      goal = tmp;
++                      dget_locked(goal);
++                      break;
++              }
++              if (goal == NULL) {
++                      /* there is no alias, we need to make current dentry:
++                       *  a) inaccessible for __d_lookup()
++                       *  b) inaccessible for iopen */
++                      J_ASSERT(list_empty(&dentry->d_alias));
++                      dentry->d_flags |= DCACHE_NFSFS_RENAMED;
++                      /* this is d_instantiate() ... */
++                      list_add(&dentry->d_alias, &inode->i_dentry);
++                      dentry->d_inode = inode;
++              }
++              spin_unlock(&dcache_lock);
++              if (goal)
++                      iput(inode);
++              return goal;
++      }
++
+       return iopen_connect_dentry(dentry, inode, 1);
+ }
index bab81b9..3661023 100644 (file)
@@ -10,3 +10,4 @@ ext3-extents-2.6.9-rhel4.patch
 ext3-mballoc2-2.6.9-rhel4.patch 
 ext3-nlinks-2.6.9.patch
 ext3-ialloc-2.6.patch
+ext3-lookup-dotdot-2.6.9.patch
index 2584c1d..efa7700 100644 (file)
@@ -12,3 +12,4 @@ ext3-nlinks-2.6.7.patch
 ext3-rename-reserve-2.6-suse.patch
 ext3-htree-dot-2.6.5-suse.patch 
 ext3-ialloc-2.6.patch
+ext3-lookup-dotdot-2.6.9.patch
index 7d0a383..b44e35e 100644 (file)
@@ -11,3 +11,4 @@ ext3-ialloc-2.6.patch
 ext3-remove-cond_resched-calls-2.6.12.patch
 ext3-htree-dot-2.6.patch
 ext3-external-journal-2.6.12.patch
+ext3-lookup-dotdot-2.6.9.patch
index bc6e9f6..dcaff40 100644 (file)
@@ -51,3 +51,4 @@ statfs64-cast-unsigned-2.4-rhel.patch
 fsprivate-2.4.patch
 nfsd_iallocsem.patch
 linux-2.4.24-jbd-handle-EIO-rhel3.patch
+ext3-lookup-dotdot-2.4.20.patch 
index a0a2633..5e34152 100644 (file)
@@ -1,5 +1,5 @@
 lnxmaj="2.6.5"
-lnxrel="7.244"
+lnxrel="7.252"
 
 KERNEL=linux-$lnxmaj-$lnxrel.tar.bz2
 # they include our patches
index 9db3f3f..d639b74 100644 (file)
@@ -701,9 +701,8 @@ int lustre_read_dquot(struct lustre_dquot *dquot)
         struct lustre_disk_dqblk ddquot, empty;
         int ret = 0;
 
-        filp = dquot->dq_info->qi_files[type];
-
-        if (!filp || !dquot->dq_info) { /* Invalidated quota? */
+        /* Invalidated quota? */
+        if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
                 printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
                 return -EIO;
         }
index a5e1e86..614a91c 100644 (file)
@@ -1436,6 +1436,10 @@ int target_handle_qc_callback(struct ptlrpc_request *req)
 
         oqctl = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*oqctl),
                                    lustre_swab_obd_quotactl);
+        if (oqctl == NULL) {
+                CERROR("Can't unpack obd_quotactl\n");
+                RETURN(-EPROTO);
+        }
 
         cli->cl_qchk_stat = oqctl->qc_stat;
 
@@ -1465,7 +1469,7 @@ int target_handle_dqacq_callback(struct ptlrpc_request *req)
         qdata = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata),
                                    lustre_swab_qdata);
         if (qdata == NULL) {
-                CERROR("unpacking request buffer failed!");
+                CERROR("Can't unpack qunit_data\n");
                 RETURN(-EPROTO);
         }
 
index 321e578..f2392e5 100644 (file)
@@ -617,11 +617,11 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
                 body->lock_flags |= LDLM_FL_AST_SENT;
 
                 /* We might get here prior to ldlm_handle_enqueue setting
-                   LDLM_FL_CANCEL_ON_BLOCK flag. Then we will put this lock into
-                   waiting list, but this is safe and similar code in
-                   ldlm_handle_enqueue will call ldlm_lock_cancel() still, that
-                   would not only cancel the loc, but will also remove it from
-                   waiting list */
+                 * LDLM_FL_CANCEL_ON_BLOCK flag. Then we will put this lock
+                 * into waiting list, but this is safe and similar code in
+                 * ldlm_handle_enqueue will call ldlm_lock_cancel() still,
+                 * that would not only cancel the lock, but will also remove
+                 * it from waiting list */
                 if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) {
                         ldlm_lock_cancel(lock);
                         instant_cancel = 1;
index 21eee08..d8945a9 100644 (file)
@@ -54,9 +54,10 @@ int ldlm_expired_completion_wait(void *data)
         if (lock->l_conn_export == NULL) {
                 static cfs_time_t next_dump = 0, last_dump = 0;
 
-                LDLM_ERROR(lock, "lock timed out (enq %lus ago); not entering "
-                           "recovery in server code, just going back to sleep",
-                           lock->l_enqueued_time.tv_sec);
+                LDLM_ERROR(lock, "lock timed out (enqueued at %lu, %lus ago); "
+                           "not entering recovery in server code, just going "
+                           "back to sleep", lock->l_enqueued_time.tv_sec,
+                           CURRENT_SECONDS - lock->l_enqueued_time.tv_sec);
                 if (cfs_time_after(cfs_time_current(), next_dump)) {
                         last_dump = next_dump;
                         next_dump = cfs_time_shift(300);
@@ -71,8 +72,9 @@ int ldlm_expired_completion_wait(void *data)
         obd = lock->l_conn_export->exp_obd;
         imp = obd->u.cli.cl_import;
         ptlrpc_fail_import(imp, lwd->lwd_conn_cnt);
-        LDLM_ERROR(lock, "lock timed out (enqueued %lus ago), entering "
+        LDLM_ERROR(lock, "lock timed out (enqueued at %lu, %lus ago), entering "
                    "recovery for %s@%s", lock->l_enqueued_time.tv_sec,
+                   CURRENT_SECONDS - lock->l_enqueued_time.tv_sec,
                    obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid);
 
         RETURN(0);
index 503480c..f4b19ee 100644 (file)
@@ -340,8 +340,7 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset,
         /* NB 1 request reference will be taken away by ll_intent_lock()
          * when I return
          */
-        if (!it_disposition(it, DISP_LOOKUP_NEG) ||
-            (it->it_op & IT_CREAT)) {
+        if (!it_disposition(it, DISP_LOOKUP_NEG) || (it->it_op & IT_CREAT)) {
                 struct lustre_md md;
                 struct llu_inode_info *lli;
                 struct intnl_stat *st;
index 8917eff..2c5f924 100644 (file)
@@ -349,14 +349,16 @@ static void llu_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
         oa->o_valid = OBD_MD_FLID;
         valid_flags = OBD_MD_FLTYPE | OBD_MD_FLATIME;
         if (cmd & OBD_BRW_WRITE)
-                valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME;
+                valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
+                        OBD_MD_FLUID | OBD_MD_FLGID |
+                        OBD_MD_FLFID | OBD_MD_FLGENER;
 
         obdo_from_inode(oa, inode, valid_flags);
         EXIT;
 }
 
 /* called for each page in a completed rpc.*/
-static void llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
+static int llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
 {
         struct ll_async_page *llap;
         struct page *page;
@@ -371,7 +373,7 @@ static void llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
                         CERROR("writeback error on page %p index %ld: %d\n",
                                page, page->index, rc);
         }
-        EXIT;
+        RETURN(0);
 }
 
 static struct obd_async_page_ops llu_async_page_ops = {
index 4f0ec58..529f5a7 100644 (file)
@@ -148,6 +148,51 @@ void ll_intent_release(struct lookup_intent *it)
         EXIT;
 }
 
+/* Drop dentry if it is not used already, unhash otherwise.
+   Should be called with dcache lock held!
+   Returns: 1 if dentry was dropped, 0 if unhashed. */
+int ll_drop_dentry(struct dentry *dentry)
+{
+        lock_dentry(dentry);
+        if (atomic_read(&dentry->d_count) == 0) {
+                CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
+                       "inode %p\n", dentry->d_name.len,
+                       dentry->d_name.name, dentry, dentry->d_parent,
+                       dentry->d_inode);
+                dget_locked(dentry);
+                __d_drop(dentry);
+                unlock_dentry(dentry);
+                spin_unlock(&dcache_lock);
+                dput(dentry);
+                spin_lock(&dcache_lock);
+                return 1;
+        } 
+        
+        if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+                struct inode *inode = dentry->d_inode;
+#endif
+               CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
+                       "inode %p refc %d\n", dentry->d_name.len,
+                       dentry->d_name.name, dentry, dentry->d_parent,
+                       dentry->d_inode, atomic_read(&dentry->d_count));
+                /* actually we don't unhash the dentry, rather just
+                 * mark it inaccessible for to __d_lookup(). otherwise
+                 * sys_getcwd() could return -ENOENT -bzzz */
+                dentry->d_flags |= DCACHE_LUSTRE_INVALID;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+                __d_drop(dentry);
+                if (inode) {
+                        /* Put positive dentries to orphan list */
+                        hlist_add_head(&dentry->d_hash,
+                                       &ll_i2sbi(inode)->ll_orphan_dentry_list);
+                }
+#endif
+        }
+        unlock_dentry(dentry);
+        return 0;
+}
+
 void ll_unhash_aliases(struct inode *inode)
 {
         struct list_head *tmp, *head;
@@ -162,8 +207,8 @@ void ll_unhash_aliases(struct inode *inode)
                inode->i_ino, inode->i_generation, inode);
 
         head = &inode->i_dentry;
-restart:
         spin_lock(&dcache_lock);
+restart:
         tmp = head;
         while ((tmp = tmp->next) != head) {
                 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
@@ -185,35 +230,9 @@ restart:
 
                         continue;
                 }
-
-                lock_dentry(dentry);
-                if (atomic_read(&dentry->d_count) == 0) {
-                        CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
-                               "inode %p\n", dentry->d_name.len,
-                               dentry->d_name.name, dentry, dentry->d_parent,
-                               dentry->d_inode);
-                        dget_locked(dentry);
-                        __d_drop(dentry);
-                        unlock_dentry(dentry);
-                        spin_unlock(&dcache_lock);
-                        dput(dentry);
-                        goto restart;
-                } else if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
-                        CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
-                               "inode %p refc %d\n", dentry->d_name.len,
-                               dentry->d_name.name, dentry, dentry->d_parent,
-                               dentry->d_inode, atomic_read(&dentry->d_count));
-                        /* actually we don't unhash the dentry, rather just
-                         * mark it inaccessible for to __d_lookup(). otherwise
-                         * sys_getcwd() could return -ENOENT -bzzz */
-                        dentry->d_flags |= DCACHE_LUSTRE_INVALID;
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-                        __d_drop(dentry);
-                        hlist_add_head(&dentry->d_hash,
-                                       &ll_i2sbi(inode)->ll_orphan_dentry_list);
-#endif
-                }
-                unlock_dentry(dentry);
+                
+                if (ll_drop_dentry(dentry))
+                          goto restart;
         }
         spin_unlock(&dcache_lock);
         EXIT;
@@ -282,7 +301,6 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags,
                      struct lookup_intent *it)
 {
         int rc;
-        struct it_cb_data icbd;
         struct mdc_op_data op_data;
         struct ptlrpc_request *req = NULL;
         struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
@@ -292,13 +310,25 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags,
         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
                LL_IT2STR(it));
 
-        /* Cached negative dentries are unsafe for now - look them up again */
-        if (de->d_inode == NULL)
-                RETURN(0);
+        if (de->d_inode == NULL) {
+                /* We can only use negative dentries if this is stat or lookup,
+                   for opens and stuff we do need to query server. */
+                /* If there is IT_CREAT in intent op set, then we must throw
+                   away this negative dentry and actually do the request to
+                   kernel to create whatever needs to be created (if possible)*/
+                if (it && (it->it_op & IT_CREAT))
+                        RETURN(0);
+
+                if (de->d_flags & DCACHE_LUSTRE_INVALID)
+                        RETURN(0);
+
+                rc = ll_have_md_lock(de->d_parent->d_inode, 
+                                     MDS_INODELOCK_UPDATE);
+        
+                RETURN(rc);
+        }
 
         exp = ll_i2mdcexp(de->d_inode);
-        icbd.icbd_parent = de->d_parent->d_inode;
-        icbd.icbd_childp = &de;
 
         /* Never execute intents for mount points.
          * Attributes will be fixed up in ll_inode_revalidate_it */
@@ -312,6 +342,53 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags,
         ll_prepare_mdc_op_data(&op_data, de->d_parent->d_inode, de->d_inode,
                                de->d_name.name, de->d_name.len, 0);
 
+        if ((it->it_op == IT_OPEN) && de->d_inode) {
+                struct inode *inode = de->d_inode;
+                struct ll_inode_info *lli = ll_i2info(inode);
+                struct obd_client_handle **och_p;
+                __u64 *och_usecount;
+                /* We used to check for MDS_INODELOCK_OPEN here, but in fact
+                 * just having LOOKUP lock is enough to justify inode is the
+                 * same. And if inode is the same and we have suitable
+                 * openhandle, then there is no point in doing another OPEN RPC
+                 * just to throw away newly received openhandle.
+                 * There are no security implications too, if file owner or
+                 * access mode is change, LOOKUP lock is revoked */
+
+                it->it_create_mode &= ~current->fs->umask;
+
+                if (it->it_flags & FMODE_WRITE) {
+                        och_p = &lli->lli_mds_write_och;
+                        och_usecount = &lli->lli_open_fd_write_count;
+                } else if (it->it_flags & FMODE_EXEC) {
+                        och_p = &lli->lli_mds_exec_och;
+                        och_usecount = &lli->lli_open_fd_exec_count;
+                 } else {
+                        och_p = &lli->lli_mds_read_och;
+                        och_usecount = &lli->lli_open_fd_read_count;
+                }
+                /* Check for the proper lock. */
+                if (!ll_have_md_lock(inode, MDS_INODELOCK_LOOKUP))
+                        goto do_lock;
+                down(&lli->lli_och_sem);
+                if (*och_p) { /* Everything is open already, do nothing */
+                        /*(*och_usecount)++;  Do not let them steal our open
+                                              handle from under us */
+                        /* XXX The code above was my original idea, but in case
+                           we have the handle, but we cannot use it due to later
+                           checks (e.g. O_CREAT|O_EXCL flags set), nobody
+                           would decrement counter increased here. So we just
+                           hope the lock won't be invalidated in between. But
+                           if it would be, we'll reopen the open request to
+                           MDS later during file open path */
+                        up(&lli->lli_och_sem);
+                        RETURN(1);
+                } else {
+                        up(&lli->lli_och_sem);
+                }
+        }
+
+do_lock:
         rc = mdc_intent_lock(exp, &op_data, NULL, 0, it, lookup_flags,
                              &req, ll_mdc_blocking_ast, 0);
         /* If req is NULL, then mdc_intent_lock only tried to do a lock match;
index 31b2fde..9a5a294 100644 (file)
@@ -51,8 +51,25 @@ static int ll_close_inode_openhandle(struct inode *inode,
                                      struct obd_client_handle *och)
 {
         struct ptlrpc_request *req = NULL;
+        struct obd_device *obd;
         struct obdo *oa;
         int rc;
+        ENTRY;
+
+        obd = class_exp2obd(ll_i2mdcexp(inode));
+        if (obd == NULL) {
+                CERROR("Invalid MDC connection handle "LPX64"\n",
+                       ll_i2mdcexp(inode)->exp_handle.h_cookie);
+                GOTO(out, rc = 0);
+        }
+
+        /*
+         * here we check if this is forced umount. If so this is called on
+         * canceling "open lock" and we do not call mdc_close() in this case, as
+         * it will not be successful, as import is already deactivated.
+         */
+        if (obd->obd_no_recov)
+                GOTO(out, rc = 0);
 
         oa = obdo_alloc();
         if (!oa)
@@ -89,8 +106,52 @@ static int ll_close_inode_openhandle(struct inode *inode,
                                inode->i_ino, rc);
         }
 
-        mdc_clear_open_replay_data(och);
         ptlrpc_req_finished(req); /* This is close request */
+        EXIT;
+out:
+        mdc_clear_open_replay_data(och);
+
+        return rc;
+}
+
+int ll_mdc_real_close(struct inode *inode, int flags)
+{
+        struct ll_inode_info *lli = ll_i2info(inode);
+        int rc = 0;
+        struct obd_client_handle **och_p;
+        struct obd_client_handle *och;
+        __u64 *och_usecount;
+
+        ENTRY;
+
+        if (flags & FMODE_WRITE) {
+                och_p = &lli->lli_mds_write_och;
+                och_usecount = &lli->lli_open_fd_write_count;
+        } else if (flags & FMODE_EXEC) {
+                och_p = &lli->lli_mds_exec_och;
+                och_usecount = &lli->lli_open_fd_exec_count;
+         } else {
+                LASSERT(flags & FMODE_READ);
+                och_p = &lli->lli_mds_read_och;
+                och_usecount = &lli->lli_open_fd_read_count;
+        }
+
+        down(&lli->lli_och_sem);
+        if (*och_usecount) { /* There are still users of this handle, so
+                                skip freeing it. */
+                up(&lli->lli_och_sem);
+                RETURN(0);
+        }
+        och=*och_p;
+        *och_p = NULL;
+        up(&lli->lli_och_sem);
+
+        if (och) { /* There might be a race and somebody have freed this och
+                      already */
+                rc = ll_close_inode_openhandle(inode, och);
+                och->och_fh.cookie = DEAD_HANDLE_MAGIC;
+                OBD_FREE(och, sizeof *och);
+        }
 
         RETURN(rc);
 }
@@ -99,8 +160,8 @@ int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode,
                         struct file *file)
 {
         struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
-        struct obd_client_handle *och = &fd->fd_mds_och;
-        int rc;
+        struct ll_inode_info *lli = ll_i2info(inode);
+        int rc = 0;
         ENTRY;
 
         /* clear group lock, if present */
@@ -110,9 +171,45 @@ int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode,
                 rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP,
                                       &fd->fd_cwlockh);
         }
+
+        /* Let's see if we have good enough OPEN lock on the file and if
+           we can skip talking to MDS */
+        if (file->f_dentry->d_inode) { /* Can this ever be false? */
+                int lockmode;
+                int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
+                struct lustre_handle lockh;
+                struct inode *inode = file->f_dentry->d_inode;
+                struct ldlm_res_id file_res_id = {.name={inode->i_ino,
+                                                         inode->i_generation}};
+                ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
+
+                down(&lli->lli_och_sem);
+                if (fd->fd_omode & FMODE_WRITE) {
+                        lockmode = LCK_CW;
+                        LASSERT(lli->lli_open_fd_write_count);
+                        lli->lli_open_fd_write_count--;
+                } else if (fd->fd_omode & FMODE_EXEC) {
+                        lockmode = LCK_PR;
+                        LASSERT(lli->lli_open_fd_exec_count);
+                        lli->lli_open_fd_exec_count--;
+                } else {
+                        lockmode = LCK_CR;
+                        LASSERT(lli->lli_open_fd_read_count);
+                        lli->lli_open_fd_read_count--;
+                }
+                up(&lli->lli_och_sem);
+
+                if (!ldlm_lock_match(mdc_exp->exp_obd->obd_namespace, flags,
+                                     &file_res_id, LDLM_IBITS, &policy,lockmode,
+                                     &lockh)) {
+                        rc = ll_mdc_real_close(file->f_dentry->d_inode,
+                                                fd->fd_omode);
+                }
+        } else {
+                CERROR("Releasing a file %p with negative dentry %p. Name %s",
+                       file, file->f_dentry, file->f_dentry->d_name.name);
+        }
         
-        rc = ll_close_inode_openhandle(inode, och);
-        och->och_fh.cookie = DEAD_HANDLE_MAGIC;
         LUSTRE_FPRIVATE(file) = NULL;
         ll_file_data_put(fd);
 
@@ -170,6 +267,18 @@ static int ll_intent_file_open(struct file *file, void *lmm,
 
         ll_prepare_mdc_op_data(&data, parent->d_inode, NULL, name, len, O_RDWR);
 
+        /* Usually we come here only for NFSD, and we want open lock.
+           But we can also get here with pre 2.6.15 patchless kernels, and in
+           that case that lock is also ok */
+        /* We can also get here if there was cached open handle in revalidate_it
+         * but it disappeared while we were getting from there to ll_file_open.
+         * But this means this file was closed and immediatelly opened which
+         * makes a good candidate for using OPEN lock */
+        /* If lmmsize & lmm are not 0, we are just setting stripe info
+         * parameters. No need for the open lock */
+        if (!lmm && !lmmsize)
+                itp->it_flags |= MDS_OPEN_LOCK;
+
         rc = mdc_enqueue(sbi->ll_mdc_exp, LDLM_IBITS, itp, LCK_PW, &data,
                          &lockh, lmm, lmmsize, ldlm_completion_ast,
                          ll_mdc_blocking_ast, NULL, 0);
@@ -178,6 +287,11 @@ static int ll_intent_file_open(struct file *file, void *lmm,
                 GOTO(out, rc);
         }
 
+        if (itp->d.lustre.it_lock_mode) { /* If we got lock - release it right
+                                           * away */
+                ldlm_lock_decref(&lockh, itp->d.lustre.it_lock_mode);
+                itp->d.lustre.it_lock_mode = 0;
+        }
         rc = ll_prep_inode(sbi->ll_osc_exp, &file->f_dentry->d_inode,
                            (struct ptlrpc_request *)itp->d.lustre.it_data,
                            DLM_REPLY_REC_OFF, NULL);
@@ -205,7 +319,7 @@ static void ll_och_fill(struct ll_inode_info *lli, struct lookup_intent *it,
 }
 
 int ll_local_open(struct file *file, struct lookup_intent *it,
-                  struct ll_file_data *fd)
+                  struct ll_file_data *fd, struct obd_client_handle *och)
 {
         ENTRY;
 
@@ -213,9 +327,11 @@ int ll_local_open(struct file *file, struct lookup_intent *it,
 
         LASSERT(fd != NULL);
 
-        ll_och_fill(ll_i2info(file->f_dentry->d_inode), it, &fd->fd_mds_och);
+        if (och)
+                ll_och_fill(ll_i2info(file->f_dentry->d_inode), it, och);
         LUSTRE_FPRIVATE(file) = fd;
         ll_readahead_init(file->f_dentry->d_inode, &fd->fd_ras);
+        fd->fd_omode = it->it_flags;
 
         RETURN(0);
 }
@@ -241,7 +357,9 @@ int ll_file_open(struct inode *inode, struct file *file)
         struct lookup_intent *it, oit = { .it_op = IT_OPEN,
                                           .it_flags = file->f_flags };
         struct lov_stripe_md *lsm;
-        struct ptlrpc_request *req;
+        struct ptlrpc_request *req = NULL;
+        struct obd_client_handle **och_p;
+        __u64 *och_usecount;
         struct ll_file_data *fd;
         int rc = 0;
         ENTRY;
@@ -276,25 +394,77 @@ int ll_file_open(struct inode *inode, struct file *file)
                 oit.it_flags &= ~O_EXCL;
 
                 it = &oit;
-                rc = ll_intent_file_open(file, NULL, 0, it);
+        }
+
+        /* Let's see if we have file open on MDS already. */
+        if (it->it_flags & FMODE_WRITE) {
+                och_p = &lli->lli_mds_write_och;
+                och_usecount = &lli->lli_open_fd_write_count;
+        } else if (it->it_flags & FMODE_EXEC) {
+                och_p = &lli->lli_mds_exec_och;
+                och_usecount = &lli->lli_open_fd_exec_count;
+         } else {
+                och_p = &lli->lli_mds_read_och;
+                och_usecount = &lli->lli_open_fd_read_count;
+        }
+        down(&lli->lli_och_sem);
+        if (*och_p) { /* Open handle is present */
+                if (it_disposition(it, DISP_LOOKUP_POS) && /* Positive lookup */
+                    it_disposition(it, DISP_OPEN_OPEN)) { /* & OPEN happened */
+                        /* Well, there's extra open request that we do not need,
+                           let's close it somehow. This will decref request. */
+                        ll_release_openhandle(file->f_dentry, it);
+                }
+                (*och_usecount)++;
+
+                rc = ll_local_open(file, it, fd, NULL);
+
+                LASSERTF(rc == 0, "rc = %d\n", rc);
+        } else {
+                LASSERT(*och_usecount == 0);
+                OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
+                if (!*och_p) {
+                        ll_file_data_put(fd);
+                        GOTO(out_och_free, rc = -ENOMEM);
+                }
+                (*och_usecount)++;
+                if (!it->d.lustre.it_disposition) {
+                        rc = ll_intent_file_open(file, NULL, 0, it);
+                        if (rc) {
+                                ll_file_data_put(fd);
+                                GOTO(out_och_free, rc);
+                        }
+
+                        /* Got some error? Release the request */
+                        if (it->d.lustre.it_status < 0) {
+                                req = it->d.lustre.it_data;
+                                ptlrpc_req_finished(req);
+                        }
+                        mdc_set_lock_data(&it->d.lustre.it_lock_handle,
+                                          file->f_dentry->d_inode);
+                }
+                req = it->d.lustre.it_data;
+
+                /* mdc_intent_lock() didn't get a request ref if there was an
+                 * open error, so don't do cleanup on the request here 
+                 * (bug 3430) */
+                /* XXX (green): Should not we bail out on any error here, not
+                 * just open error? */
+                rc = it_open_error(DISP_OPEN_OPEN, it);
                 if (rc) {
                         ll_file_data_put(fd);
-                        GOTO(out, rc);
+                        GOTO(out_och_free, rc);
                 }
-        }
 
-        lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
-        rc = it_open_error(DISP_OPEN_OPEN, it);
-        /* mdc_intent_lock() didn't get a request ref if there was an open
-         * error, so don't do cleanup on the request here (bug 3430) */
-        if (rc) {
-                ll_file_data_put(fd);
-                RETURN(rc);
+                lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
+                rc = ll_local_open(file, it, fd, *och_p);
+                LASSERTF(rc == 0, "rc = %d\n", rc);
         }
+        up(&lli->lli_och_sem);
 
-        rc = ll_local_open(file, it, fd);
-        LASSERTF(rc == 0, "rc = %d\n", rc);
-
+        /* Must do this outside lli_och_sem lock to prevent deadlock where
+           different kind of OPEN lock for this same inode gets cancelled
+           by ldlm_cancel_lru */
         if (!S_ISREG(inode->i_mode))
                 GOTO(out, rc);
 
@@ -309,12 +479,21 @@ int ll_file_open(struct inode *inode, struct file *file)
         file->f_flags &= ~O_LOV_DELAY_CREATE;
         GOTO(out, rc);
  out:
-        req = it->d.lustre.it_data;
         ptlrpc_req_finished(req);
         if (req)
                 it_clear_disposition(it, DISP_ENQ_OPEN_REF);
-        if (rc == 0)
+        if (rc == 0) {
                 ll_open_complete(inode);
+        } else {
+out_och_free:
+                if (*och_p) {
+                        OBD_FREE(*och_p, sizeof (struct obd_client_handle));
+                        *och_p = NULL; /* OBD_FREE writes some magic there */
+                        (*och_usecount)--;
+                }
+                up(&lli->lli_och_sem);
+        }
+
         return rc;
 }
 
@@ -1023,10 +1202,8 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
                 *ppos = inode->i_size;
 
         if (*ppos >= maxbytes) {
-                if (count || *ppos > maxbytes) {
-                        send_sig(SIGXFSZ, current, 0);
-                        GOTO(out, retval = -EFBIG);
-                }
+                send_sig(SIGXFSZ, current, 0);
+                GOTO(out, retval = -EFBIG);
         }
         if (*ppos + count > maxbytes)
                 count = maxbytes - *ppos;
@@ -1200,14 +1377,9 @@ static int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
                                     int lum_size)
 {
         struct ll_inode_info *lli = ll_i2info(inode);
-        struct file *f = NULL;
-        struct obd_export *exp = ll_i2obdexp(inode);
         struct lov_stripe_md *lsm;
         struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
-        struct ptlrpc_request *req = NULL;
-        struct ll_file_data *fd;
         int rc = 0;
-        struct lustre_md md;
         ENTRY;
 
         down(&lli->lli_open_sem);
@@ -1219,49 +1391,24 @@ static int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
                 RETURN(-EEXIST);
         }
 
-        fd = ll_file_data_get();
-        if (fd == NULL)
-                GOTO(out, -ENOMEM);
-
-        f = get_empty_filp();
-        if (!f)
-                GOTO(out, -ENOMEM);
-
-        f->f_dentry = dget(file->f_dentry);
-        f->f_vfsmnt = mntget(file->f_vfsmnt);
-
-        rc = ll_intent_file_open(f, lum, lum_size, &oit);
+        rc = ll_intent_file_open(file, lum, lum_size, &oit);
         if (rc)
                 GOTO(out, rc);
         if (it_disposition(&oit, DISP_LOOKUP_NEG))
-                GOTO(out, -ENOENT);
-        req = oit.d.lustre.it_data;
+                GOTO(out_req_free, rc = -ENOENT);
         rc = oit.d.lustre.it_status;
-
         if (rc < 0)
-                GOTO(out, rc);
+                GOTO(out_req_free, rc);
 
-        rc = mdc_req2lustre_md(req, DLM_REPLY_REC_OFF, exp, &md);
-        if (rc)
-                GOTO(out, rc);
-        ll_update_inode(f->f_dentry->d_inode, &md);
-
-        rc = ll_local_open(f, &oit, fd);
-        if (rc)
-                GOTO(out, rc);
-        fd = NULL;
-        ll_intent_release(&oit);
-
-        rc = ll_file_release(f->f_dentry->d_inode, f);
+        ll_release_openhandle(file->f_dentry, &oit);
 
  out:
-        if (f)
-                fput(f);
-        ll_file_data_put(fd);
         up(&lli->lli_open_sem);
-        if (req != NULL)
-                ptlrpc_req_finished(req);
+        ll_intent_release(&oit);
         RETURN(rc);
+out_req_free:
+        ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
+        goto out;
 }
 
 static int ll_lov_setea(struct inode *inode, struct file *file,
@@ -1419,13 +1566,10 @@ static int join_file(struct inode *head_inode, struct file *head_filp,
         struct dentry *tail_dentry = tail_filp->f_dentry;
         struct lookup_intent oit = {.it_op = IT_OPEN,
                                    .it_flags = head_filp->f_flags|O_JOIN_FILE};
-        struct ptlrpc_request *req = NULL;
-        struct ll_file_data *fd;
         struct lustre_handle lockh;
         struct mdc_op_data *op_data;
         __u32  hsize = head_inode->i_size >> 32;
         __u32  tsize = head_inode->i_size;
-        struct file *f;
         int    rc;
         ENTRY;
 
@@ -1433,23 +1577,11 @@ static int join_file(struct inode *head_inode, struct file *head_filp,
         tail_inode = tail_dentry->d_inode;
         tail_parent = tail_dentry->d_parent->d_inode;
 
-        fd = ll_file_data_get();
-        if (fd == NULL)
-                RETURN(-ENOMEM);
-
         OBD_ALLOC_PTR(op_data);
         if (op_data == NULL) {
-                ll_file_data_put(fd);
                 RETURN(-ENOMEM);
         }
 
-        f = get_empty_filp();
-        if (f == NULL)
-                GOTO(out, rc = -ENOMEM);
-
-        f->f_dentry = dget(head_filp->f_dentry);
-        f->f_vfsmnt = mntget(head_filp->f_vfsmnt);
-
         ll_prepare_mdc_op_data(op_data, head_inode, tail_parent,
                                tail_dentry->d_name.name,
                                tail_dentry->d_name.len, 0);
@@ -1460,26 +1592,24 @@ static int join_file(struct inode *head_inode, struct file *head_filp,
         if (rc < 0)
                 GOTO(out, rc);
 
-        req = oit.d.lustre.it_data;
         rc = oit.d.lustre.it_status;
 
-        if (rc < 0)
+        if (rc < 0) {
+                ptlrpc_req_finished((struct ptlrpc_request *)
+                                                          oit.d.lustre.it_data);
                 GOTO(out, rc);
+        }
 
-        rc = ll_local_open(f, &oit, fd);
-        LASSERTF(rc == 0, "rc = %d\n", rc);
-
-        fd = NULL;
-        ll_intent_release(&oit);
-
-        rc = ll_file_release(f->f_dentry->d_inode, f);
+        if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right
+                                           * away */
+                ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode);
+                oit.d.lustre.it_lock_mode = 0;
+        }
+        ll_release_openhandle(head_filp->f_dentry, &oit);
 out:
         if (op_data)
                 OBD_FREE_PTR(op_data);
-        if (f)
-                fput(f);
-        ll_file_data_put(fd);
-        ptlrpc_req_finished(req);
+        ll_intent_release(&oit);
         RETURN(rc);
 }
 
@@ -1883,23 +2013,21 @@ int ll_file_flock(struct file *file, int cmd, struct file_lock *file_lock)
         RETURN(rc);
 }
 
-static int ll_have_md_lock(struct dentry *de)
+int ll_have_md_lock(struct inode *inode, __u64 bits)
 {
-        struct ll_sb_info *sbi = ll_s2sbi(de->d_sb);
         struct lustre_handle lockh;
         struct ldlm_res_id res_id = { .name = {0} };
         struct obd_device *obddev;
-        ldlm_policy_data_t policy = { .l_inodebits = {
-                MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP}};
+        ldlm_policy_data_t policy = { .l_inodebits = {bits}};
         int flags;
         ENTRY;
 
-        if (!de->d_inode)
+        if (!inode)
                RETURN(0);
 
-        obddev = sbi->ll_mdc_exp->exp_obd;
-        res_id.name[0] = de->d_inode->i_ino;
-        res_id.name[1] = de->d_inode->i_generation;
+        obddev = ll_i2mdcexp(inode)->exp_obd;
+        res_id.name[0] = inode->i_ino;
+        res_id.name[1] = inode->i_generation;
 
         CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
 
@@ -1976,8 +2104,19 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
                         GOTO(out, rc);
                 }
 
+                /* Unlinked? Unhash dentry, so it is not picked up later by
+                   do_lookup() -> ll_revalidate_it(). We cannot use d_drop
+                   here to preserve get_cwd functionality on 2.6.
+                   Bug 10503 */
+                if (!dentry->d_inode->i_nlink) {
+                        spin_lock(&dcache_lock);
+                        ll_drop_dentry(dentry);
+                        spin_unlock(&dcache_lock);
+                }
+
                 ll_lookup_finish_locks(&oit, dentry);
-        } else if (!ll_have_md_lock(dentry)) {
+        } else if (!ll_have_md_lock(dentry->d_inode,
+                                  MDS_INODELOCK_UPDATE|MDS_INODELOCK_LOOKUP)) {
                 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
                 struct ll_fid fid;
                 obd_valid valid = OBD_MD_FLGETATTR;
index ea519ba..f34079f 100644 (file)
@@ -83,6 +83,17 @@ struct ll_inode_info {
 
         struct list_head        lli_dead_list;
 
+        struct semaphore        lli_och_sem; /* Protects access to och pointers
+                                                and their usage counters */
+        /* We need all three because every inode may be opened in different
+           modes */
+        struct obd_client_handle *lli_mds_read_och;
+        __u64                   lli_open_fd_read_count;
+        struct obd_client_handle *lli_mds_write_och;
+        __u64                   lli_open_fd_write_count;
+        struct obd_client_handle *lli_mds_exec_och;
+        __u64                   lli_open_fd_exec_count;
+
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
         struct inode            lli_vfs_inode;
 #endif
@@ -256,11 +267,11 @@ struct ll_readahead_state {
 extern kmem_cache_t *ll_file_data_slab;
 struct lustre_handle;
 struct ll_file_data {
-        struct obd_client_handle fd_mds_och;
         struct ll_readahead_state fd_ras;
-        __u32 fd_flags;
+        int fd_omode;
         struct lustre_handle fd_cwlockh;
         unsigned long fd_gid;
+        __u32 fd_flags;
 };
 
 struct lov_stripe_md;
@@ -383,6 +394,7 @@ extern struct file_operations ll_file_operations;
 extern struct file_operations ll_file_operations_flock;
 extern struct inode_operations ll_file_inode_operations;
 extern int ll_inode_revalidate_it(struct dentry *, struct lookup_intent *);
+extern int ll_have_md_lock(struct inode *inode, __u64 bits);
 int ll_extent_lock(struct ll_file_data *, struct inode *,
                    struct lov_stripe_md *, int mode, ldlm_policy_data_t *,
                    struct lustre_handle *, int ast_flags);
@@ -393,10 +405,12 @@ int ll_file_release(struct inode *inode, struct file *file);
 int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *);
 int ll_glimpse_size(struct inode *inode, int ast_flags);
 int ll_local_open(struct file *file,
-                  struct lookup_intent *it, struct ll_file_data *fd);
+                  struct lookup_intent *it, struct ll_file_data *fd,
+                  struct obd_client_handle *och);
 int ll_release_openhandle(struct dentry *, struct lookup_intent *);
 int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode,
                  struct file *file);
+int ll_mdc_real_close(struct inode *inode, int flags);
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
                struct lookup_intent *it, struct kstat *stat);
@@ -413,6 +427,7 @@ int ll_inode_permission(struct inode *inode, int mask);
 void ll_intent_drop_lock(struct lookup_intent *);
 void ll_intent_release(struct lookup_intent *);
 extern void ll_set_dd(struct dentry *de);
+int ll_drop_dentry(struct dentry *dentry);
 void ll_unhash_aliases(struct inode *);
 void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft);
 void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry);
index 488066f..0e25235 100644 (file)
@@ -484,7 +484,7 @@ static void prune_deathrow(struct ll_sb_info *sbi, int try)
         int empty;
 
         do {
-                if (need_resched())
+                if (need_resched() && try)
                         break;
 
                 if (try) {
@@ -641,6 +641,11 @@ void ll_lli_init(struct ll_inode_info *lli)
         spin_lock_init(&lli->lli_lock);
         INIT_LIST_HEAD(&lli->lli_pending_write_llaps);
         lli->lli_inode_magic = LLI_INODE_MAGIC;
+        sema_init(&lli->lli_och_sem, 1);
+        lli->lli_mds_read_och = lli->lli_mds_write_och = NULL;
+        lli->lli_mds_exec_och = NULL;
+        lli->lli_open_fd_read_count = lli->lli_open_fd_write_count = 0;
+        lli->lli_open_fd_exec_count = 0;
         INIT_LIST_HEAD(&lli->lli_dead_list);
 }
 
@@ -1042,9 +1047,21 @@ void ll_clear_inode(struct inode *inode)
                inode->i_generation, inode);
 
         ll_inode2fid(&fid, inode);
-        clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(ll_i2info(inode)->lli_flags));
+        clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags);
         mdc_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode);
 
+        LASSERT(!lli->lli_open_fd_write_count);
+        LASSERT(!lli->lli_open_fd_read_count);
+        LASSERT(!lli->lli_open_fd_exec_count);
+        
+        if (lli->lli_mds_write_och)
+                ll_mdc_real_close(inode, FMODE_WRITE);
+        if (lli->lli_mds_exec_och)
+                ll_mdc_real_close(inode, FMODE_EXEC);
+        if (lli->lli_mds_read_och)
+                ll_mdc_real_close(inode, FMODE_READ);
+
+
         if (lli->lli_smd) {
                 obd_change_cbdata(sbi->ll_osc_exp, lli->lli_smd,
                                   null_if_equal, inode);
index 0cf1eef..33cfa18 100644 (file)
@@ -180,16 +180,71 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                                    inode->i_ino, inode->i_generation, inode);
                 }
 
+                if (bits & MDS_INODELOCK_OPEN) {
+                        int flags = 0;
+                        switch (lock->l_req_mode) {
+                        case LCK_CW:
+                                flags = FMODE_WRITE;
+                                break;
+                        case LCK_PR:
+                                flags = FMODE_EXEC;
+                                break;
+                        case LCK_CR:
+                                flags = FMODE_READ;
+                                break;
+                        default:
+                                CERROR("Unexpected lock mode for OPEN lock "
+                                       "%d, inode %ld\n", lock->l_req_mode,
+                                       inode->i_ino);
+                        }
+                        ll_mdc_real_close(inode, flags);
+                }
+
                 if (bits & MDS_INODELOCK_UPDATE)
                         clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK,
                                   &(ll_i2info(inode)->lli_flags));
 
-                
                 if (S_ISDIR(inode->i_mode) &&
-                     (bits & MDS_INODELOCK_UPDATE))  {
+                     (bits & MDS_INODELOCK_UPDATE)) {
+                        struct dentry *dentry, *tmp, *dir;
+                        struct list_head *list;
+                        
                         CDEBUG(D_INODE, "invalidating inode %lu\n",
                                inode->i_ino);
                         truncate_inode_pages(inode->i_mapping, 0);
+
+                        
+                        /* Drop possible cached negative dentries */
+                        list = &inode->i_dentry;
+                        dir = NULL;
+                        spin_lock(&dcache_lock);
+                        
+                        /* It is possible to have several dentries (with 
+                           racer?) */
+                        while ((list = list->next) != &inode->i_dentry) {
+                                dir = list_entry(list, struct dentry, d_alias);
+                                if (!(dir->d_flags & DCACHE_LUSTRE_INVALID))
+                                        break;
+
+                                dir = NULL;
+                        }
+                        
+                        if (dir) {
+restart:
+                                list_for_each_entry_safe(dentry, tmp, 
+                                                         &dir->d_subdirs, 
+                                                         d_child)
+                                {
+                                        /* XXX Print some debug here? */
+                                        if (!dentry->d_inode) 
+                                                /* Negative dentry. If we were 
+                                                   dropping dcache lock, go 
+                                                   throught the list again */
+                                                if (ll_drop_dentry(dentry))
+                                                        goto restart;
+                                }
+                        }
+                        spin_unlock(&dcache_lock);
                 }
 
                 if (inode->i_sb->s_root &&
@@ -407,9 +462,16 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset,
                 *de = ll_find_alias(inode, *de);
         } else {
                 ENTRY;
-                spin_lock(&dcache_lock);
-                ll_d_add(*de, inode);
-                spin_unlock(&dcache_lock);
+                /* Check that parent has UPDATE lock. If there is none, we
+                   cannot afford to hash this dentry (done by ll_d_add) as it
+                   might get picked up later when UPDATE lock will appear */
+                if (ll_have_md_lock(parent, MDS_INODELOCK_UPDATE)) {
+                        spin_lock(&dcache_lock);
+                        ll_d_add(*de, inode);
+                        spin_unlock(&dcache_lock);
+                } else {
+                        (*de)->d_inode = NULL; 
+                }
         }
 
         ll_set_dd(*de);
index e340c1d..49e6407 100644 (file)
@@ -359,8 +359,6 @@ void ll_inode_fill_obdo(struct inode *inode, int cmd, struct obdo *oa)
         if (cmd & OBD_BRW_WRITE) {
                 oa->o_valid |= OBD_MD_FLEPOCH;
                 oa->o_easize = ll_i2info(inode)->lli_io_epoch;
-                oa->o_uid = inode->i_uid;
-                oa->o_gid = inode->i_gid;
 
                 valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
                         OBD_MD_FLUID | OBD_MD_FLGID |
index fecd1b5..f020980 100644 (file)
@@ -50,7 +50,7 @@ struct lov_request_set {
         struct list_head         set_list;
 };
 
-#define LAP_MAGIC 8200
+#define LOV_AP_MAGIC 8200
 
 struct lov_async_page {
         int                             lap_magic;
@@ -62,8 +62,8 @@ struct lov_async_page {
         void                            *lap_caller_data;
 };
 
-#define LAP_FROM_COOKIE(c)                                                      \
-        (LASSERT(((struct lov_async_page *)(c))->lap_magic == LAP_MAGIC),       \
+#define LAP_FROM_COOKIE(c)                                                     \
+        (LASSERT(((struct lov_async_page *)(c))->lap_magic == LOV_AP_MAGIC),   \
          (struct lov_async_page *)(c))
 
 static inline void lov_llh_addref(void *llhp)
index 2585e9f..33dc956 100644 (file)
@@ -1043,7 +1043,7 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa,
                 rc = obd_destroy(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa,
                                  NULL, oti, NULL);
                 err = lov_update_common_set(set, req, rc);
-                if (rc) {
+                if (err) {
                         CERROR("error: destroying objid "LPX64" subobj "
                                LPX64" on OST idx %d: rc = %d\n",
                                set->set_oa->o_id, req->rq_oa->o_id,
@@ -1052,7 +1052,7 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa,
                                 rc = err;
                 }
         }
-        lov_fini_destroy_set(set);
+        rc = lov_fini_destroy_set(set);
         if (rc == 0) {
                 LASSERT(lsm_op_find(lsm->lsm_magic) != NULL);
                 rc = lsm_op_find(lsm->lsm_magic)->lsm_destroy(lsm, oa, md_exp);
@@ -1566,7 +1566,7 @@ int lov_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm,
         LASSERT(loi == NULL);
 
         lap = *res;
-        lap->lap_magic = LAP_MAGIC;
+        lap->lap_magic = LOV_AP_MAGIC;
         lap->lap_caller_ops = ops;
         lap->lap_caller_data = data;
 
index 65800ab..d76c4cb 100644 (file)
@@ -44,6 +44,7 @@ static void lov_init_set(struct lov_request_set *set)
         set->set_count = 0;
         set->set_completes = 0;
         set->set_success = 0;
+        set->set_cookies = 0;
         CFS_INIT_LIST_HEAD(&set->set_list);
         atomic_set(&set->set_refcount, 1);
 }
@@ -129,11 +130,12 @@ int lov_update_enqueue_set(struct lov_request_set *set,
          * can be addressed then. */
         if (rc == ELDLM_OK) {
                 struct ldlm_lock *lock = ldlm_handle2lock(lov_lockhp);
-                __u64 tmp = req->rq_md->lsm_oinfo->loi_lvb.lvb_size;
+                __u64 tmp;
 
                 LASSERT(lock != NULL);
                 lov_stripe_lock(set->set_md);
                 loi->loi_lvb = req->rq_md->lsm_oinfo->loi_lvb;
+                tmp = loi->loi_lvb.lvb_size;
                 /* Extend KMS up to the end of this lock and no further
                  * A lock on [x,y] means a KMS of up to y + 1 bytes! */
                 if (tmp > lock->l_policy_data.l_extent.end)
@@ -641,7 +643,7 @@ int lov_update_create_set(struct lov_request_set *set,
                lsm->lsm_object_id, loi->loi_id, loi->loi_id, req->rq_idx);
         loi_init(loi);
 
-        if (set->set_cookies)
+        if (oti && set->set_cookies)
                 ++oti->oti_logcookies;
         if (req->rq_oa->o_valid & OBD_MD_FLCOOKIE)
                 set->set_cookie_sent++;
@@ -1010,7 +1012,7 @@ int lov_prep_destroy_set(struct obd_export *exp, struct obdo *src_oa,
                 req->rq_oa->o_id = loi->loi_id;
 
                 /* Setup the first request's cookie position */
-                if (!cookie_set && set->set_cookies) {
+                if (oti && !cookie_set && set->set_cookies) {
                         oti->oti_logcookies = set->set_cookies + i;
                         cookie_set = 1;
                 }
index 563085b..9ebb767 100644 (file)
@@ -109,7 +109,8 @@ static __u32 mds_pack_open_flags(__u32 flags)
         return
                 (flags & (FMODE_READ | FMODE_WRITE |
                           MDS_OPEN_DELAY_CREATE | MDS_OPEN_HAS_EA |
-                          MDS_OPEN_HAS_OBJS | MDS_OPEN_OWNEROVERRIDE)) |
+                          MDS_OPEN_HAS_OBJS | MDS_OPEN_OWNEROVERRIDE |
+                          MDS_OPEN_LOCK)) |
                 ((flags & O_CREAT) ? MDS_OPEN_CREAT : 0) |
                 ((flags & O_EXCL) ? MDS_OPEN_EXCL : 0) |
                 ((flags & O_TRUNC) ? MDS_OPEN_TRUNC : 0) |
index a6b190b..131263a 100644 (file)
@@ -630,6 +630,12 @@ int mdc_close(struct obd_export *exp, struct obdo *oa,
         if (req == NULL)
                 GOTO(out, rc = -ENOMEM);
 
+        /* To avoid a livelock (bug 7034), we need to send CLOSE RPCs to a
+         * portal whose threads are not taking any DLM locks and are therefore
+         * always progressing */
+        /* XXX FIXME bug 249 */
+        req->rq_request_portal = MDS_READPAGE_PORTAL;
+
         /* Ensure that this close's handle is fixed up during replay. */
         LASSERT(och != NULL);
         LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC);
@@ -656,9 +662,9 @@ int mdc_close(struct obd_export *exp, struct obdo *oa,
         LASSERT(req->rq_cb_data == NULL);
         req->rq_cb_data = mod;
 
-        mdc_get_rpc_lock(obd->u.cli.cl_rpc_lock, NULL);
+        mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL);
         rc = ptlrpc_queue_wait(req);
-        mdc_put_rpc_lock(obd->u.cli.cl_rpc_lock, NULL);
+        mdc_put_rpc_lock(obd->u.cli.cl_close_lock, NULL);
 
         if (req->rq_repmsg == NULL) {
                 CDEBUG(D_HA, "request failed to send: %p, %d\n", req,
@@ -1122,9 +1128,14 @@ static int mdc_setup(struct obd_device *obd, obd_count len, void *buf)
                 GOTO(err_rpc_lock, rc = -ENOMEM);
         mdc_init_rpc_lock(cli->cl_setattr_lock);
 
+        OBD_ALLOC(cli->cl_close_lock, sizeof (*cli->cl_close_lock));
+        if (!cli->cl_close_lock)
+                GOTO(err_setattr_lock, rc = -ENOMEM);
+        mdc_init_rpc_lock(cli->cl_close_lock);
+
         rc = client_obd_setup(obd, len, buf);
         if (rc)
-                GOTO(err_setattr_lock, rc);
+                GOTO(err_close_lock, rc);
         lprocfs_init_vars(mdc, &lvars);
         lprocfs_obd_setup(obd, lvars.obd_vars);
 
@@ -1136,6 +1147,8 @@ static int mdc_setup(struct obd_device *obd, obd_count len, void *buf)
 
         RETURN(rc);
 
+err_close_lock:
+        OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock));
 err_setattr_lock:
         OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock));
 err_rpc_lock:
@@ -1212,6 +1225,7 @@ static int mdc_cleanup(struct obd_device *obd)
 
         OBD_FREE(cli->cl_rpc_lock, sizeof (*cli->cl_rpc_lock));
         OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock));
+        OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock));
 
         lprocfs_obd_cleanup(obd);
         ptlrpcd_decref();
index 86c8023..28bde34 100644 (file)
@@ -1431,12 +1431,16 @@ int mds_handle(struct ptlrpc_request *req)
 
                 /* sanity check: if the xid matches, the request must
                  * be marked as a resent or replayed */
-                if (req->rq_xid == med->med_mcd->mcd_last_xid)
-                        LASSERTF(lustre_msg_get_flags(req->rq_reqmsg) &
-                                 (MSG_RESENT | MSG_REPLAY),
-                                 "rq_xid "LPU64" matches last_xid, "
-                                 "expected RESENT flag\n",
-                                 req->rq_xid);
+                if (req->rq_xid == le64_to_cpu(med->med_mcd->mcd_last_xid) ||
+                   req->rq_xid == le64_to_cpu(med->med_mcd->mcd_last_close_xid))
+                        if (!(lustre_msg_get_flags(req->rq_reqmsg) &
+                                 (MSG_RESENT | MSG_REPLAY))) {
+                                CERROR("rq_xid "LPU64" matches last_xid, "
+                                       "expected RESENT flag\n",
+                                        req->rq_xid);
+                                req->rq_status = -ENOTCONN;
+                                GOTO(out, rc = -EFAULT);
+                        }
                 /* else: note the opposite is not always true; a
                  * RESENT req after a failover will usually not match
                  * the last_xid, since it was likely never
@@ -1696,6 +1700,9 @@ int mds_handle(struct ptlrpc_request *req)
         /* If we're DISCONNECTing, the mds_export_data is already freed */
         if (!rc && lustre_msg_get_opc(req->rq_reqmsg) != MDS_DISCONNECT) {
                 struct mds_export_data *med = &req->rq_export->exp_mds_data;
+                
+                /* I don't think last_xid is used for anyway, so I'm not sure
+                   if we need to care about last_close_xid here.*/
                 lustre_msg_set_last_xid(req->rq_repmsg,
                                        le64_to_cpu(med->med_mcd->mcd_last_xid));
 
@@ -1985,7 +1992,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf)
                               obd->obd_replayable ? "enabled" : "disabled");
         }
 
-        ldlm_timeout = 2;
+        ldlm_timeout = 6;
 
         RETURN(0);
 
@@ -2272,6 +2279,10 @@ static void fixup_handle_for_resent_req(struct ptlrpc_request *req, int offset,
             le64_to_cpu(exp->exp_mds_data.med_mcd->mcd_last_xid))
                 return;
 
+        if (req->rq_xid ==
+            le64_to_cpu(exp->exp_mds_data.med_mcd->mcd_last_close_xid))
+                return;
+
         /* This remote handle isn't enqueued, so we never received or
          * processed this request.  Clear MSG_RESENT, because it can
          * be handled like any normal request now. */
@@ -2366,7 +2377,13 @@ static int mds_intent_policy(struct ldlm_namespace *ns,
                 if (intent_disposition(rep, DISP_LOOKUP_NEG) &&
                     !intent_disposition(rep, DISP_OPEN_OPEN))
 #endif
+                if (rep->lock_policy_res2) {
+                        /* mds_open returns ENOLCK where it should return zero,
+                           but it has no lock to return */
+                        if (rep->lock_policy_res2 == ENOLCK)
+                                rep->lock_policy_res2 = 0;
                         RETURN(ELDLM_LOCK_ABORTED);
+                }
                 break;
         case IT_LOOKUP:
                         getattr_part = MDS_INODELOCK_LOOKUP;
index 2bae298..ca387f8 100644 (file)
@@ -345,7 +345,10 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file)
                         continue;
                 }
 
-                last_transno = le64_to_cpu(mcd->mcd_last_transno);
+                last_transno = le64_to_cpu(mcd->mcd_last_transno) >
+                               le64_to_cpu(mcd->mcd_last_close_transno) ?
+                               le64_to_cpu(mcd->mcd_last_transno) :
+                               le64_to_cpu(mcd->mcd_last_close_transno);
 
                 /* These exports are cleaned up by mds_disconnect(), so they
                  * need to be set up like real exports as mds_connect() does.
index cc7b49c..9fd562a 100644 (file)
@@ -18,7 +18,12 @@ struct mds_client_data {
         __u64 mcd_last_xid;     /* xid for the last transaction */
         __u32 mcd_last_result;  /* result from last RPC */
         __u32 mcd_last_data;    /* per-op data (disposition for open &c.) */
-        __u8 mcd_padding[LR_CLIENT_SIZE - 64];
+        /* for MDS_CLOSE requests */
+        __u64 mcd_last_close_transno; /* last completed transaction ID */
+        __u64 mcd_last_close_xid;     /* xid for the last transaction */
+        __u32 mcd_last_close_result;  /* result from last RPC */
+        __u32 mcd_last_close_data;  /* per-op data (disposition for open &c.) */
+        __u8 mcd_padding[LR_CLIENT_SIZE - 88];
 };
 
 #define MDS_SERVICE_WATCHDOG_TIMEOUT (obd_timeout * 1000)
@@ -103,9 +108,13 @@ static inline void mds_inode_unset_orphan(struct inode *inode)
         if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {              \
                 struct mds_client_data *mcd =                                 \
                         req->rq_export->exp_mds_data.med_mcd;                 \
-                if (mcd->mcd_last_xid == req->rq_xid) {                       \
+                if (le64_to_cpu(mcd->mcd_last_xid) == req->rq_xid) {          \
                         reconstruct;                                          \
-                        RETURN(lustre_msg_get_status(req->rq_repmsg));        \
+                        RETURN(le32_to_cpu(mcd->mcd_last_result));            \
+                }                                                             \
+                if (le64_to_cpu(mcd->mcd_last_close_xid) == req->rq_xid) {    \
+                        reconstruct;                                          \
+                        RETURN(le32_to_cpu(mcd->mcd_last_close_result));      \
                 }                                                             \
                 DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")",\
                           mcd->mcd_last_xid);                                 \
index 9790280..a566a5d 100644 (file)
@@ -218,7 +218,7 @@ static void mds_finish_join(struct mds_obd *mds, struct ptlrpc_request *req,
                                 sizeof(struct llog_cookie);
         int max_easize = sizeof(*lmmj);
 
-        CDEBUG(D_INFO, "change the max md size from %d to %d \n",
+        CDEBUG(D_INFO, "change the max md size from %d to "LPSZ"\n",
                mds->mds_max_mdsize, sizeof(*lmmj));
 
         if (mds->mds_max_mdsize < max_easize || 
index f35feff..a291ace 100644 (file)
@@ -517,7 +517,7 @@ static void reconstruct_open(struct mds_update_record *rec, int offset,
 
         /* copy rc, transno and disp; steal locks */
         mds_req_from_mcd(req, mcd);
-        intent_set_disposition(rep, mcd->mcd_last_data);
+        intent_set_disposition(rep, le32_to_cpu(mcd->mcd_last_data));
 
         /* Only replay if create or open actually happened. */
         if (!intent_disposition(rep, DISP_OPEN_CREATE | DISP_OPEN_OPEN) ) {
@@ -720,12 +720,12 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild,
         }
         UNLOCK_INODE_MUTEX(dchild->d_inode);
 
-        if (!(rec->ur_flags & MDS_OPEN_JOIN_FILE))
+        if (rec && !(rec->ur_flags & MDS_OPEN_JOIN_FILE))
                 lustre_shrink_reply(req, DLM_REPLY_REC_OFF + 1,
                                     body->eadatasize, 0);
 
         if (req->rq_export->exp_connect_flags & OBD_CONNECT_ACL &&
-            !(rec->ur_flags & MDS_OPEN_JOIN_FILE)) {
+            rec && !(rec->ur_flags & MDS_OPEN_JOIN_FILE)) {
                 int acl_off = DLM_REPLY_REC_OFF + (body->eadatasize ? 2 : 1);
 
                 rc = mds_pack_acl(&req->rq_export->exp_mds_data,
@@ -869,8 +869,17 @@ int mds_open(struct mds_update_record *rec, int offset,
         struct dentry_params dp;
         unsigned int qcids[MAXQUOTAS] = { current->fsuid, current->fsgid };
         unsigned int qpids[MAXQUOTAS] = { 0, 0 };
+        int child_mode = LCK_CR;
+        /* Always returning LOOKUP lock if open succesful to guard
+           dentry on client. */
+        ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_LOOKUP}};
+        struct ldlm_res_id child_res_id = { .name = {0}};
+        int lock_flags = 0;
         ENTRY;
 
+        OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PAUSE_OPEN | OBD_FAIL_ONCE,
+                         (obd_timeout + 1) / 4);
+
         CLASSERT(MAXQUOTAS < 4);
         if (offset == DLM_INTENT_REC_OFF) { /* intent */
                 rep = lustre_msg_buf(req->rq_repmsg, DLM_LOCKREPLY_OFF,
@@ -1107,6 +1116,36 @@ found_child:
                 GOTO(cleanup, rc = -EAGAIN);
         }
 
+        /* Obtain OPEN lock as well */
+        policy.l_inodebits.bits |= MDS_INODELOCK_OPEN;
+
+        /* We cannot use acc_mode here, because it is zeroed in case of
+           creating a file, so we get wrong lockmode */
+        if (accmode(dchild->d_inode, rec->ur_flags) & MAY_WRITE)
+                child_mode = LCK_CW;
+        else if (accmode(dchild->d_inode, rec->ur_flags) & MAY_EXEC)
+                child_mode = LCK_PR;
+        else
+                child_mode = LCK_CR;
+
+        if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) && 
+             (rec->ur_flags & MDS_OPEN_LOCK)) {
+                /* In case of replay we do not get a lock assuming that the
+                   caller has it already */
+                child_res_id.name[0] = dchild->d_inode->i_ino;
+                child_res_id.name[1] = dchild->d_inode->i_generation;
+
+                rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace,
+                                      child_res_id, LDLM_IBITS, &policy,
+                                      child_mode, &lock_flags,
+                                      ldlm_blocking_ast, ldlm_completion_ast,
+                                      NULL, NULL, NULL, 0, NULL, child_lockh);
+                if (rc != ELDLM_OK)
+                        GOTO(cleanup, rc);
+
+                cleanup_phase = 3;
+        }
+
         if (!S_ISREG(dchild->d_inode->i_mode) &&
             !S_ISDIR(dchild->d_inode->i_mode) &&
             (req->rq_export->exp_connect_flags & OBD_CONNECT_NODEVOH)) {
@@ -1126,6 +1165,9 @@ found_child:
 
  cleanup_no_trans:
         switch (cleanup_phase) {
+        case 3:
+                if (rc)
+                        ldlm_lock_decref(child_lockh, child_mode);
         case 2:
                 if (rc && created) {
                         int err = vfs_unlink(dparent->d_inode, dchild);
@@ -1151,6 +1193,14 @@ found_child:
                 else
                         ptlrpc_save_lock(req, &parent_lockh, parent_mode);
         }
+        /* If we have not taken the "open" lock, we may not return 0 here,
+           because caller expects 0 to mean "lock is taken", and it needs
+           nonzero return here for caller to return EDLM_LOCK_ABORTED to
+           client. Later caller should rewrite the return value back to zero
+           if it to be used any further
+         */
+        if ((cleanup_phase != 3) && !rc)
+                rc = ENOLCK;
 
         /* trigger dqacq on the owner of child and parent */
         lquota_adjust(quota_interface, obd, qcids, qpids, rc, FSFILT_OP_CREATE);
index 79dc8a5..891cd2a 100644 (file)
@@ -160,12 +160,20 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle,
                         mds->mds_last_transno = transno;
                 spin_unlock(&mds->mds_transno_lock);
         }
+
         req->rq_transno = transno;
         lustre_msg_set_transno(req->rq_repmsg, transno);
-        mcd->mcd_last_transno = cpu_to_le64(transno);
-        mcd->mcd_last_xid = cpu_to_le64(req->rq_xid);
-        mcd->mcd_last_result = cpu_to_le32(rc);
-        mcd->mcd_last_data = cpu_to_le32(op_data);
+        if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CLOSE) {
+                mcd->mcd_last_close_transno = cpu_to_le64(transno);
+                mcd->mcd_last_close_xid = cpu_to_le64(req->rq_xid);
+                mcd->mcd_last_close_result = cpu_to_le32(rc);
+                mcd->mcd_last_close_data = cpu_to_le32(op_data);
+        } else {
+                mcd->mcd_last_transno = cpu_to_le64(transno);
+                mcd->mcd_last_xid = cpu_to_le64(req->rq_xid);
+                mcd->mcd_last_result = cpu_to_le32(rc);
+                mcd->mcd_last_data = cpu_to_le32(op_data);
+        }
 
         if (off <= 0) {
                 CERROR("client idx %d has offset %lld\n", med->med_lr_idx, off);
@@ -355,12 +363,19 @@ void mds_steal_ack_locks(struct ptlrpc_request *req)
 
 void mds_req_from_mcd(struct ptlrpc_request *req, struct mds_client_data *mcd)
 {
+        if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CLOSE) {
+                req->rq_transno = le64_to_cpu(mcd->mcd_last_close_transno);
+                lustre_msg_set_transno(req->rq_repmsg, req->rq_transno);
+                req->rq_status = le32_to_cpu(mcd->mcd_last_close_result);
+                lustre_msg_set_status(req->rq_repmsg, req->rq_status);
+        } else {
+                req->rq_transno = le64_to_cpu(mcd->mcd_last_transno);
+                lustre_msg_set_transno(req->rq_repmsg, req->rq_transno);
+                req->rq_status = le32_to_cpu(mcd->mcd_last_result);
+                lustre_msg_set_status(req->rq_repmsg, req->rq_status);
+        }
         DEBUG_REQ(D_HA, req, "restoring transno "LPD64"/status %d",
-                  mcd->mcd_last_transno, mcd->mcd_last_result);
-        req->rq_transno = mcd->mcd_last_transno;
-        lustre_msg_set_transno(req->rq_repmsg, req->rq_transno);
-        req->rq_status = mcd->mcd_last_result;
-        lustre_msg_set_status(req->rq_repmsg, req->rq_status);
+                  req->rq_transno, req->rq_status);
 
         mds_steal_ack_locks(req);
 }
@@ -1263,6 +1278,10 @@ cleanup:
         return rc;
 }
 
+#define INODE_CTIME_AGE (10)
+#define INODE_CTIME_OLD(inode) (LTIME_S(inode->i_ctime) +               \
+                                INODE_CTIME_AGE < CURRENT_SECONDS)
+
 int mds_get_parent_child_locked(struct obd_device *obd, struct mds_obd *mds,
                                 struct ll_fid *fid,
                                 struct lustre_handle *parent_lockh,
@@ -1320,6 +1339,16 @@ int mds_get_parent_child_locked(struct obd_device *obd, struct mds_obd *mds,
 
         child_res_id.name[0] = inode->i_ino;
         child_res_id.name[1] = inode->i_generation;
+
+        /* If we want a LCK_CR for a directory, and this directory has not been
+           changed for some time, we return not only a LOOKUP lock, but also an 
+           UPDATE lock to have negative dentry starts working for this dir.
+           Also we apply same logic to non-directories. If the file is rarely
+           changed - we return both locks and this might save us RPC on
+           later STAT. */
+        if ((child_mode & (LCK_CR|LCK_PR|LCK_CW)) && INODE_CTIME_OLD(inode))
+                child_policy.l_inodebits.bits |= MDS_INODELOCK_UPDATE;
+
         iput(inode);
 
 retry_locks:
index 9bdea74..2e9c52f 100644 (file)
@@ -119,7 +119,14 @@ static int llog_check_cb(struct llog_handle *handle, struct llog_rec_hdr *rec,
                 }
                 if (handle->lgh_ctxt == NULL)
                         RETURN(-EOPNOTSUPP);
-                llog_cat_id2handle(handle, &log_handle, &lir->lid_id);
+                rc = llog_cat_id2handle(handle, &log_handle, &lir->lid_id);
+                if (rc) {
+                        CDEBUG(D_IOCTL, 
+                               "cannot find log #"LPX64"#"LPX64"#%08x\n",
+                               lir->lid_id.lgl_oid, lir->lid_id.lgl_ogr,
+                               lir->lid_id.lgl_ogen);
+                        RETURN(rc);
+                }
                 rc = llog_process(log_handle, llog_check_cb, NULL, NULL);
                 llog_close(log_handle);
         } else {
index e3aea4b..f6cc6ff 100644 (file)
@@ -1050,12 +1050,13 @@ int filter_vfs_unlink(struct inode *dir, struct dentry *dentry)
 
         /* don't need dir->i_zombie for 2.4, it is for rename/unlink of dir
          * itself we already hold dir->i_mutex for child create/unlink ops */
+        LASSERT(dentry->d_inode != NULL);
         LASSERT(TRYLOCK_INODE_MUTEX(dir) == 0);
         LASSERT(TRYLOCK_INODE_MUTEX(dentry->d_inode) == 0);
 
 
         /* may_delete() */
-        if (!dentry->d_inode || dentry->d_parent->d_inode != dir)
+        if (/*!dentry->d_inode ||*/dentry->d_parent->d_inode != dir)
                 GOTO(out, rc = -ENOENT);
 
         rc = ll_permission(dir, MAY_WRITE | MAY_EXEC, NULL);
@@ -2216,12 +2217,14 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
                                       EXT3_IOC_SETFLAGS, (long)&oa->o_flags);
         } else {
                 rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1);
-                if (fcc != NULL)
+                if (fcc != NULL) {
                         /* set cancel cookie callback function */
                         fsfilt_add_journal_cb(exp->exp_obd, 0, oti ?
                                               oti->oti_handle : handle,
                                               filter_cancel_cookies_cb,
                                               fcc);
+                        fcc = NULL;
+                }
         }
 
         if (locked) {
@@ -2242,6 +2245,9 @@ out_unlock:
         if (locked)
                 UNLOCK_INODE_MUTEX(inode);
 
+        if (fcc)
+                OBD_FREE(fcc, sizeof(*fcc));
+
         /* trigger quota release */
         if (ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID)) {
                 unsigned int cur_ids[MAXQUOTAS] = {oa->o_uid, oa->o_gid};
index 0987611..d1c7a2d 100644 (file)
@@ -284,7 +284,8 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa,
         ENTRY;
 
         /* We are currently not supporting multi-obj BRW_READ RPCS at all.
-         * When we do this function's dentry cleanup will need to be fixed */
+         * When we do this function's dentry cleanup will need to be fixed.
+         * These values are verified in ost_brw_write() from the wire. */
         LASSERTF(objcount == 1, "%d\n", objcount);
         LASSERTF(obj->ioo_bufcnt > 0, "%d\n", obj->ioo_bufcnt);
 
@@ -310,9 +311,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa,
 
         inode = dentry->d_inode;
 
-        if (oa)
-                obdo_to_inode(inode, oa, OBD_MD_FLATIME);
-
+        obdo_to_inode(inode, oa, OBD_MD_FLATIME);
         fsfilt_check_slow(now, obd_timeout, "preprw_read setup");
 
         for (i = 0, lnb = res, rnb = nb; i < obj->ioo_bufcnt;
index 97728eb..bf8de9d 100644 (file)
@@ -72,3 +72,5 @@ flock_test
 writemany
 random-reads
 chownmany
+llverdev
+llverfs
index cec369b..b5b7c2b 100644 (file)
@@ -45,8 +45,7 @@ static int be_verbose(int verbose, struct timeval *next_time,
                 gettimeofday(&now, NULL);
 
         /* A positive verbosity means to print every X iterations */
-        if (verbose > 0 &&
-            (next_num == NULL || num >= *next_num || num >= num_total)) {
+        if (verbose > 0 && (num >= *next_num || num >= num_total)) {
                 *next_num += verbose;
                 if (next_time) {
                         next_time->tv_sec = now.tv_sec - verbose;
@@ -59,8 +58,7 @@ static int be_verbose(int verbose, struct timeval *next_time,
         if (verbose < 0 && next_time != NULL && difftime(&now, next_time) >= 0){
                 next_time->tv_sec = now.tv_sec - verbose;
                 next_time->tv_usec = now.tv_usec;
-                if (next_num)
-                        *next_num = num;
+                *next_num = num;
                 return 1;
         }
 
index aa1ed8c..52ed448 100644 (file)
 
 #define MAX_LOV_UUID_COUNT      1000
 
+/* Returns bytes read on success and a negative value on failure.
+ * If zero bytes are read it will be treated as failure as such
+ * zero cannot be returned from this function.
+ */
 int read_proc_entry(char *proc_path, char *buf, int len)
 {
-        int rcnt = -2, fd;
+        int rc, fd;
+
+        memset(buf, 0, len);
 
-        if ((fd = open(proc_path, O_RDONLY)) == -1) {
+        fd = open(proc_path, O_RDONLY);
+        if (fd == -1) {
                 fprintf(stderr, "open('%s') failed: %s\n",
                         proc_path, strerror(errno));
-                rcnt = -3;
-        } else if ((rcnt = read(fd, buf, len)) <= 0) {
+                return -2;
+        }
+
+        rc = read(fd, buf, len - 1);
+        if (rc < 0) {
                 fprintf(stderr, "read('%s') failed: %s\n",
                         proc_path, strerror(errno));
-        } else {
-                buf[rcnt - 1] = '\0';
+                rc = -3;
+        } else if (rc == 0) {
+                fprintf(stderr, "read('%s') zero bytes\n", proc_path);
+                rc = -4;
+        } else if (/* rc > 0 && */ buf[rc - 1] == '\n') {
+                buf[rc - 1] = '\0'; /* Remove trailing newline */
         }
+        close(fd);
 
-        if (fd >= 0)
-                close(fd);
-
-        return (rcnt);
+        return (rc);
 }
 
 int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1,
@@ -62,7 +74,7 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1,
         int i, rc;
 
         rc = read_proc_entry("/proc/fs/lustre/llite/fs0/lov/common_name",
-                             buf, sizeof(buf)) <= 0;
+                             buf, sizeof(buf));
         if (rc < 0)
                 return -rc;
 
@@ -71,7 +83,7 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1,
         if (lum_dir == NULL) {
                 snprintf(tmp_path, sizeof(tmp_path) - 1, "%s/stripecount",
                          lov_path);
-                if (read_proc_entry(tmp_path, buf, sizeof(buf)) <= 0)
+                if (read_proc_entry(tmp_path, buf, sizeof(buf)) < 0)
                         return 5;
 
                 stripe_count = atoi(buf);
@@ -82,7 +94,7 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1,
                 stripe_count = 1;
 
         snprintf(tmp_path, sizeof(tmp_path) - 1, "%s/numobd", lov_path);
-        if (read_proc_entry(tmp_path, buf, sizeof(buf)) <= 0)
+        if (read_proc_entry(tmp_path, buf, sizeof(buf)) < 0)
                 return 6;
 
         ost_count = atoi(buf);
@@ -99,7 +111,7 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1,
         if (stripe_size == 0) {
                 snprintf(tmp_path, sizeof(tmp_path) - 1, "%s/stripesize",
                          lov_path);
-                if (read_proc_entry(tmp_path, buf, sizeof(buf)) <= 0)
+                if (read_proc_entry(tmp_path, buf, sizeof(buf)) < 0)
                         return 5;
 
                 stripe_size = atoi(buf);
@@ -149,7 +161,7 @@ int main(int argc, char **argv)
         if (argc < 3) {
                 fprintf(stderr, "Usage: %s <dirname> <filename1> [filename2]\n",
                         argv[0]);
-                exit(1);
+                return 1;
         }
 
         dir = opendir(argv[1]);
index 91b6a2f..cfa4caf 100644 (file)
@@ -472,7 +472,7 @@ static int mmap_tst5(char *mnt)
         memset(ptr, 'a', region);
 
         /* cancel unused locks */
-        cancel_lru_locks("osc");
+        rc = cancel_lru_locks("osc");
         if (rc)
                 goto out_unmap;
 
@@ -538,7 +538,7 @@ static int mmap_tst6(char *mnt)
                 goto out;
         }
 
-        cancel_lru_locks("osc");
+        rc = cancel_lru_locks("osc");
         if (rc)
                 goto out;
 
index 77d6d04..c336b9c 100755 (executable)
@@ -330,16 +330,233 @@ test_20b() {     # bug 2986 - ldlm_handle_enqueue error during open
 }
 run_test 20b "ldlm_handle_enqueue error (should return error)"
 
-#b_cray run_test 21a "drop close request while close and open are both in flight"
-#b_cray run_test 21b "drop open request while close and open are both in flight"
-#b_cray run_test 21c "drop both request while close and open are both in flight"
-#b_cray run_test 21d "drop close reply while close and open are both in flight"
-#b_cray run_test 21e "drop open reply while close and open are both in flight"
-#b_cray run_test 21f "drop both reply while close and open are both in flight"
-#b_cray run_test 21g "drop open reply and close request while close and open are both in flight"
-#b_cray run_test 21h "drop open request and close reply while close and open are both in flight"
-#b_cray run_test 22 "drop close request and do mknod"
-#b_cray run_test 23 "client hang when close a file after mds crash"
+test_21a() {
+       mkdir -p $DIR/$tdir-1
+       mkdir -p $DIR/$tdir-2
+       multiop $DIR/$tdir-1/f O_c &
+       close_pid=$!
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000129"
+       multiop $DIR/$tdir-2/f Oc &
+       open_pid=$!
+       sleep 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+       kill -USR1 $close_pid
+       cancel_lru_locks MDC  # force the close
+       wait $close_pid || return 1
+       wait $open_pid || return 2
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       $CHECKSTAT -t file $DIR/$tdir-1/f || return 3
+       $CHECKSTAT -t file $DIR/$tdir-2/f || return 4
+
+       rm -rf $DIR/$tdir-*
+}
+run_test 21a "drop close request while close and open are both in flight"
+
+test_21b() {
+       mkdir -p $DIR/$tdir-1
+       mkdir -p $DIR/$tdir-2
+       multiop $DIR/$tdir-1/f O_c &
+       close_pid=$!
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+       mcreate $DIR/$tdir-2/f &
+       open_pid=$!
+       sleep 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       kill -USR1 $close_pid
+       cancel_lru_locks MDC  # force the close
+       wait $close_pid || return 1
+       wait $open_pid || return 3
+
+       $CHECKSTAT -t file $DIR/$tdir-1/f || return 4
+       $CHECKSTAT -t file $DIR/$tdir-2/f || return 5
+       rm -rf $DIR/$tdir-*
+}
+run_test 21b "drop open request while close and open are both in flight"
+
+test_21c() {
+       mkdir -p $DIR/$tdir-1
+       mkdir -p $DIR/$tdir-2
+       multiop $DIR/$tdir-1/f O_c &
+       close_pid=$!
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+       mcreate $DIR/$tdir-2/f &
+       open_pid=$!
+       sleep 3
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+       kill -USR1 $close_pid
+       cancel_lru_locks MDC  # force the close
+       wait $close_pid || return 1
+       wait $open_pid || return 2
+
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+       $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+       rm -rf $DIR/$tdir-*
+}
+run_test 21c "drop both request while close and open are both in flight"
+
+test_21d() {
+       mkdir -p $DIR/$tdir-1
+       mkdir -p $DIR/$tdir-2
+       multiop $DIR/$tdir-1/f O_c &
+       pid=$!
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000129"
+       multiop $DIR/$tdir-2/f Oc &
+       sleep 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+       kill -USR1 $pid
+       cancel_lru_locks MDC  # force the close
+       wait $pid || return 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+       $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+
+       rm -rf $DIR/$tdir-*
+}
+run_test 21d "drop close reply while close and open are both in flight"
+
+test_21e() {
+       mkdir -p $DIR/$tdir-1
+       mkdir -p $DIR/$tdir-2
+       multiop $DIR/$tdir-1/f O_c &
+       pid=$!
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+       touch $DIR/$tdir-2/f &
+       sleep 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       kill -USR1 $pid
+       cancel_lru_locks MDC  # force the close
+       wait $pid || return 1
+
+       sleep $TIMEOUT
+       $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+       $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+       rm -rf $DIR/$tdir-*
+}
+run_test 21e "drop open reply while close and open are both in flight"
+
+test_21f() {
+       mkdir -p $DIR/$tdir-1
+       mkdir -p $DIR/$tdir-2
+       multiop $DIR/$tdir-1/f O_c &
+       pid=$!
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+       touch $DIR/$tdir-2/f &
+       sleep 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+       kill -USR1 $pid
+       cancel_lru_locks MDC  # force the close
+       wait $pid || return 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+       $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+       rm -rf $DIR/$tdir-*
+}
+run_test 21f "drop both reply while close and open are both in flight"
+
+test_21g() {
+       mkdir -p $DIR/$tdir-1
+       mkdir -p $DIR/$tdir-2
+       multiop $DIR/$tdir-1/f O_c &
+       pid=$!
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+       touch $DIR/$tdir-2/f &
+       sleep 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+       kill -USR1 $pid
+       cancel_lru_locks MDC  # force the close
+       wait $pid || return 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+       $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+       rm -rf $DIR/$tdir-*
+}
+run_test 21g "drop open reply and close request while close and open are both in flight"
+
+test_21h() {
+       mkdir -p $DIR/$tdir-1
+       mkdir -p $DIR/$tdir-2
+       multiop $DIR/$tdir-1/f O_c &
+       pid=$!
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+       touch $DIR/$tdir-2/f &
+       touch_pid=$!
+       sleep 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+       cancel_lru_locks MDC  # force the close
+       kill -USR1 $pid
+       wait $pid || return 1
+       do_facet mds "sysctl -w lustre.fail_loc=0"
+
+       wait $touch_pid || return 2
+
+       $CHECKSTAT -t file $DIR/$tdir-1/f || return 3
+       $CHECKSTAT -t file $DIR/$tdir-2/f || return 4
+       rm -rf $DIR/$tdir-*
+}
+run_test 21h "drop open request and close reply while close and open are both in flight"
+
+# bug 3462 - multiple MDC requests
+test_22() {
+    f1=$DIR/${tfile}-1
+    f2=$DIR/${tfile}-2
+    
+    do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+    multiop $f2 Oc &
+    close_pid=$!
+
+    sleep 1
+    multiop $f1 msu || return 1
+
+     cancel_lru_locks MDC  # force the close
+    do_facet mds "sysctl -w lustre.fail_loc=0"
+
+    wait $close_pid || return 2
+    rm -rf $f2 || return 4
+}
+run_test 22 "drop close request and do mknod"
+
+test_23() { #b=4561
+    multiop $DIR/$tfile O_c &
+    pid=$!
+    # give a chance for open
+    sleep 5
+
+    # try the close
+    drop_request "kill -USR1 $pid"
+
+    fail mds
+    wait $pid || return 1
+    return 0
+}
+run_test 23 "client hang when close a file after mds crash"
 
 test_24() {    # bug 2248 - eviction fails writeback but app doesn't see it
        mkdir -p $DIR/$tdir
index 1ca3bb1..1f4d128 100644 (file)
@@ -21,7 +21,7 @@ EXCEPT="$EXCEPT 48a"
 
 case `uname -r` in
 2.4*) FSTYPE=${FSTYPE:-ext3};    ALWAYS_EXCEPT="$ALWAYS_EXCEPT 76" ;;
-2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 48b" ;;
+2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT " ;;
 *) error "unsupported kernel" ;;
 esac
 
@@ -2628,7 +2628,35 @@ test_72() { # bug 5695 - Test that on 2.6 remove_suid works properly
 }
 run_test 72 "Test that remove suid works properly (bug5695) ===="
 
-#b_cray run_test 73 "multiple MDC requests (should not deadlock)"
+# bug 3462 - multiple simultaneous MDC requests
+test_73() {
+       mkdir $DIR/d73-1 
+       mkdir $DIR/d73-2
+       multiop $DIR/d73-1/f73-1 O_c &
+       pid1=$!
+       #give multiop a chance to open
+       usleep 500
+
+       echo 0x80000129 > /proc/sys/lustre/fail_loc
+       multiop $DIR/d73-1/f73-2 Oc &
+       sleep 1
+       echo 0 > /proc/sys/lustre/fail_loc
+
+       multiop $DIR/d73-2/f73-3 Oc &
+       pid3=$!
+
+       kill -USR1 $pid1
+       wait $pid1 || return 1
+
+       sleep 25
+
+       $CHECKSTAT -t file $DIR/d73-1/f73-1 || return 4
+       $CHECKSTAT -t file $DIR/d73-1/f73-2 || return 5 
+       $CHECKSTAT -t file $DIR/d73-2/f73-3 || return 6 
+
+       rm -rf $DIR/d73-*
+}
+run_test 73 "multiple MDC requests (should not deadlock)"
 
 test_74() { # bug 6149, 6184
        #define OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT 0x30e
index ebbb2b3..442d0fd 100644 (file)
@@ -7,42 +7,44 @@
 #include <stdlib.h>
 #include <string.h>
 
+#define GOTO(label, rc)   do { rc; goto label; } while (0)
+
 int main (int argc, char **argv) {
-       int fd, i, rc;
+       int fd, i, rc = 0;
        unsigned long bytes, lbytes;
        struct stat st;
        char *str, *str2, *readbuf;
 
        if (argc != 3) {
                fprintf(stderr, "usage: %s <filename> <bytes>\n", argv[0]);
-               return 1;
+               GOTO(out, rc = 1);
        }
 
        bytes = strtoul(argv[2], NULL, 10);
        if (!bytes) {
                printf("No bytes!\n");
-               return 1;
+               GOTO(out, rc = 2);
        }
        if (bytes % 2) {
                printf("Need an even number of bytes!\n");
-               return 1;
+               GOTO(out, rc = 3);
        }
        lbytes = 3*bytes/2;
 
        str = malloc(bytes+1);
        if (!str) {
                printf("No enough memory for %lu bytes.\n", bytes);
-               return 1;
+               GOTO(out, rc = 4);
        }
        str2 = malloc(lbytes+1);
-       if (!str) {
+       if (!str2) {
                printf("No enough memory for %lu bytes.\n", lbytes);
-               return 1;
+               GOTO(out_str, rc = 5);
        }
        readbuf = malloc(bytes*2);
-       if (!str) {
+       if (!readbuf) {
                printf("No enough memory for %lu bytes.\n", bytes*2);
-               return 1;
+               GOTO(out_str2, rc = 6);
        }
 
        for(i=0; i < bytes; i++)
@@ -59,13 +61,13 @@ int main (int argc, char **argv) {
        fd = open(argv[1], O_CREAT|O_RDWR|O_TRUNC, 0700);
        if (fd == -1) {
                printf("Could not open file %s.\n", argv[1]);
-               return 1;
+               GOTO(out_readbuf, rc = 7);
        }
 
        rc = write(fd, str, bytes);
        if (rc != bytes) {
                printf("Write failed!\n");
-               return 1;
+               GOTO(out_fd, rc = 8);
        }
 
        sleep(1);
@@ -74,19 +76,19 @@ int main (int argc, char **argv) {
                printf("bad file %lu size first write %lu != %lu: rc %d\n",
                       (unsigned long)st.st_ino, (unsigned long)st.st_size,
                        bytes, rc);
-               return 1;
+               GOTO(out_fd, rc = 9);
        }
 
        rc = lseek(fd, bytes / 2, SEEK_SET);
        if (rc != bytes / 2) {
                printf("Seek failed!\n");
-               return 1;
+               GOTO(out_fd, rc = 10);
        }
 
        rc = write(fd, str, bytes);
        if (rc != bytes) {
                printf("Write failed!\n");
-               return 1;
+               GOTO(out_fd, rc = 11);
        }
 
        rc = fstat(fd, &st);
@@ -94,13 +96,13 @@ int main (int argc, char **argv) {
                printf("bad file %lu size second write %lu != %lu: rc %d\n",
                       (unsigned long)st.st_ino, (unsigned long)st.st_size,
                        bytes, rc);
-               return 1;
+               GOTO(out_fd, rc = 12);
        }
 
        rc = lseek(fd, 0, SEEK_SET);
        if (rc != 0) {
                printf("Seek failed!\n");
-               return 1;
+               GOTO(out_fd, rc = 13);
        }
 
        rc = read(fd, readbuf, bytes * 2);
@@ -115,23 +117,29 @@ int main (int argc, char **argv) {
                        printf("bad file size after read %lu != %lu: rc %d\n",
                               (unsigned long)st.st_size, bytes + bytes / 2,
                                rc);
-                       return 1;
+                       GOTO(out_fd, rc = 14);
                }
 
-               return 1;
+               GOTO(out_fd, rc = 15);
        }
-
-       fd = close(fd);
-       if (fd == -1)
-               return 1;
+       rc = 0;
 
        if (bytes < 320)
                printf("%s\n%s\n", readbuf, str2);
        if (strcmp(readbuf, str2)) {
                printf("No match!\n");
-               return 1;
+               GOTO(out_fd, rc = 16);
        }
 
        printf("Pass!\n");
-       return 0;
+out_fd:
+       close(fd);
+out_readbuf:
+        free(readbuf);
+out_str2:
+        free(str2);
+out_str:
+        free(str);
+out:
+        return rc;
 }
index e05090e..5b9c2dd 100644 (file)
@@ -90,7 +90,7 @@ int main(int argc, char *argv[])
        }
 
        if (st.st_atime != utb.actime ) {
-               fprintf(stderr, "%s: bad utime mtime %lu should be  %lu\n",
+               fprintf(stderr, "%s: bad utime atime %lu should be  %lu\n",
                        prog, st.st_atime, utb.actime);
                return 7;
        }
index 23be7e4..03b48fb 100644 (file)
@@ -42,13 +42,17 @@ struct kid_list_t {
 
 struct kid_list_t *head = NULL;
 
-void push_kid(pid_t kid)
+int push_kid(pid_t kid)
 {
         struct kid_list_t *new;
         new = (struct kid_list_t *)malloc(sizeof(struct kid_list_t));
+        if (new == NULL)
+                return 1;
+
         new->kid = kid;
         new->next = head;
         head = new;
+        return 0;
 }
 
 void kill_kids(void)
@@ -258,7 +262,11 @@ int main(int argc, char *argv[])
                         return (run_one_child(directory, i, duration));
                 } else {
                         /* parent */
-                        push_kid(rc);
+                        rc = push_kid(rc);
+                        if (rc != 0) {
+                                kill_kids();
+                                exit(3);
+                        }
                 }
         }
         /* parent process */
index eb43617..6fd7f84 100644 (file)
@@ -16,7 +16,7 @@ if UTILS
 rootsbin_PROGRAMS = mount.lustre
 sbin_PROGRAMS = lctl obdio obdbarrier lload wirecheck wiretest \
        mount_lustre mkfs_lustre mkfs.lustre \
-       tunefs_lustre tunefs.lustre l_getgroups
+       tunefs_lustre tunefs.lustre l_getgroups # llverfs llverdev
 bin_PROGRAMS = lfs llog_reader
 lib_LIBRARIES = liblustreapi.a
 sbin_SCRIPTS = $(sbin_scripts)
@@ -34,6 +34,10 @@ lfs_DEPENDENCIES := $(LIBPTLCTL) liblustreapi.a
 lload_SOURCES = lload.c 
 lload_LDADD := $(LIBREADLINE) $(LIBPTLCTL)
 lload_DEPENDENCIES := $(LIBPTLCTL)
+lload_SOURCES = lload.c 
+
+llverfs_LDADD := -lext2fs -le2p
+llverdev_LDADD := -lext2fs -lblkid
 
 liblustreapi_a_SOURCES = liblustreapi.c
 
index fa92ab1..296f600 100755 (executable)
@@ -72,7 +72,7 @@ PORTALS_DIR = '../lnet'
 # Needed to call lconf --record
 CONFIG_FILE = ""
 
-# Please keep these in sync with the values in portals/kp30.h
+# Please keep these in sync with the values in lnet/include/libcfs/libcfs.h
 ptldebug_names = {
     "trace" :     (1 << 0),
     "inode" :     (1 << 1),
@@ -88,7 +88,8 @@ ptldebug_names = {
     "buffs" :     (1 << 11),
     "other" :     (1 << 12),
     "dentry" :    (1 << 13),
-    "portals" :   (1 << 14),
+    "portals" :   (1 << 14), # deprecated
+    "lnet" :      (1 << 14),
     "page" :      (1 << 15),
     "dlmtrace" :  (1 << 16),
     "error" :     (1 << 17),
@@ -114,22 +115,29 @@ subsystem_names = {
     "log" :          (1 << 6),
     "llite" :        (1 << 7),
     "rpc" :          (1 << 8),
-    "portals" :      (1 << 10),
-    "nal" :          (1 << 11),
+    "lnet" :         (1 << 10),
+    "portals" :      (1 << 10), # deprecated
+    "lnd" :          (1 << 11),
+    "nal" :          (1 << 11), # deprecated
     "pinger" :       (1 << 12),
     "filter" :       (1 << 13),
-    "ptlbd" :        (1 << 14),
+    "ptlbd" :        (1 << 14), # deprecated
     "echo" :         (1 << 15),
     "ldlm" :         (1 << 16),
     "lov" :          (1 << 17),
-    "ptlrouter" :    (1 << 18),
+    "ptlrouter" :    (1 << 18), # deprecated
     "cobd" :         (1 << 19),
     "sm" :           (1 << 20),
     "asobd" :        (1 << 21),
-    "confobd" :      (1 << 22),
+    "confobd" :      (1 << 22), # deprecated
     "lmv" :          (1 << 23),
     "cmobd" :        (1 << 24),
     "sec" :          (1 << 25),
+    "sec" :          (1 << 26),
+    "gss" :          (1 << 27),
+    "gks" :          (1 << 28),
+    "mgc" :          (1 << 29),
+    "mgs" :          (1 << 30),
     }
 
 
@@ -1419,7 +1427,7 @@ class MDSDEV(Module):
                         if not fs_uuid in self.filesystem_uuids:
                             continue;
 
-                        debug("recording", client_name)
+                        log("Recording log", client_name, "on", self.name)
                         old_noexec = config.noexec
                         config.noexec = 0
                         noexec_opt = ('', '-n')
index c320aed..24fd739 100644 (file)
@@ -69,7 +69,7 @@ static void err_msg(char *fmt, ...)
         fprintf(stderr, ": %s (%d)\n", strerror(tmp_errno), tmp_errno);
 }
 
-int llapi_file_create(char *name, long stripe_size, int stripe_offset,
+int llapi_file_create(const char *name, long stripe_size, int stripe_offset,
                       int stripe_count, int stripe_pattern)
 {
         struct lov_user_md lum = { 0 };
@@ -145,14 +145,6 @@ out:
         return rc;
 }
 
-/* short term backwards compat only */
-int op_create_file(char *name, long stripe_size, int stripe_offset,
-                   int stripe_count)
-{
-        return llapi_file_create(name, stripe_size, stripe_offset,
-                                 stripe_count, 0);
-}
-
 struct find_param {
         int     recursive;
         int     verbose;
@@ -493,10 +485,30 @@ int llapi_file_get_stripe(char *path, struct lov_user_md *lum)
         return rc;
 }
 
-/* short term backwards compat only */
-int op_get_file_stripe(char *path, struct lov_user_md *lum)
+int llapi_file_lookup(int dirfd, const char *name)
 {
-        return llapi_file_get_stripe(path, lum);
+        struct obd_ioctl_data data = { 0 };
+        char rawbuf[8192];
+        char *buf = rawbuf;
+        int rc;
+
+        if (dirfd < 0 || name == NULL)
+                return -EINVAL;
+
+        data.ioc_version = OBD_IOCTL_VERSION;
+        data.ioc_len = sizeof(data);
+        data.ioc_inlbuf1 = (char *)name;
+        data.ioc_inllen1 = strlen(name) + 1;
+
+        rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+        if (rc) {
+                fprintf(stderr,
+                        "error: IOC_MDC_LOOKUP pack failed for '%s': rc %d\n",
+                        name, rc);
+                return rc;
+        }
+
+        return ioctl(dirfd, IOC_MDC_LOOKUP, buf);
 }
 
 static int find_process_file(DIR *dir, char *dname, char *fname,
index b42c8a3..be8fba7 100755 (executable)
@@ -73,7 +73,7 @@ sub get_cpumhz()
 get_cpumhz();
 print "Processor counters run at $mhz MHz\n";
 
-sub readall()
+sub readstat()
 {
        my $prevcount;
        my @iodata;
@@ -101,6 +101,7 @@ sub readall()
                }
        }
 }
+
 sub process_stats()
 {
        my $delta;
@@ -149,7 +150,7 @@ sub process_stats()
 
 open(STATS, $statspath) || die "Cannot open $statspath: $!\n";
 do {
-       readall();
+       readstat();
        process_stats();
        if ($interval) { 
                sleep($interval);
@@ -157,4 +158,3 @@ do {
        }
 } while ($interval);
 close STATS;
-
index 03f04c7..6b5601d 100644 (file)
 #include <liblustre.h>
 #include <lustre/lustre_idl.h>
 
-int llog_pack_buffer(int fd, struct llog_log_hdr** llog_buf, struct llog_rec_hdr*** recs, int* recs_number);
+int llog_pack_buffer(int fd, struct llog_log_hdr **llog_buf,
+                     struct llog_rec_hdr ***recs, int *recs_number);
 
-void print_llog_header(struct llog_log_hdr* llog_buf);
-void print_records(struct llog_rec_hdr** recs_buf,int rec_number);
-void llog_unpack_buffer(int fd,struct llog_log_hdr* llog_buf,struct llog_rec_hdr** recs_buf);
+void print_llog_header(struct llog_log_hdr *llog_buf);
+void print_records(struct llog_rec_hdr **recs_buf,int rec_number);
+void llog_unpack_buffer(int fd, struct llog_log_hdr *llog_buf,
+                        struct llog_rec_hdr **recs_buf);
 
 #define PTL_CMD_BASE 100
 char* portals_command[17]=
@@ -57,37 +59,36 @@ char* portals_command[17]=
         "GET_INTERFACE",
         ""
 };
-                
+
 int main(int argc, char **argv)
 {
-        int rc=0;
-        int fd,rec_number;
-        
-        struct llog_log_hdr* llog_buf=NULL;
-        struct llog_rec_hdr** recs_buf=NULL;
-                
+        int rc = 0;
+        int fd, rec_number;
+        struct llog_log_hdr *llog_buf = NULL;
+        struct llog_rec_hdr **recs_buf = NULL;
 
         setlinebuf(stdout);
-        
+
         if(argc != 2 ){
                 printf("Usage: llog_reader filename \n");
                 return -1;
         }
-        
+
         fd = open(argv[1],O_RDONLY);
         if (fd < 0){
                 printf("Could not open the file %s \n",argv[1]);
                 goto out;
         }
         rc = llog_pack_buffer(fd, &llog_buf, &recs_buf, &rec_number);
-                
-        if(llog_buf == NULL )
-                printf("error");
+        if (rc < 0) {
+                printf("Could not pack buffer; rc=%d\n", rc);
+                goto out_fd;
+        }
+
         print_llog_header(llog_buf);
-        
         print_records(recs_buf,rec_number);
-
         llog_unpack_buffer(fd,llog_buf,recs_buf);
+out_fd:
         close(fd);
 out:
         return rc;
@@ -95,31 +96,31 @@ out:
 
 
 
-int llog_pack_buffer(int fd, struct llog_log_hdr** llog, 
-                     struct llog_rec_hdr*** recs, 
-                     intrecs_number)
+int llog_pack_buffer(int fd, struct llog_log_hdr **llog,
+                     struct llog_rec_hdr ***recs,
+                     int *recs_number)
 {
-        int rc=0,recs_num,rd;
+        int rc = 0, recs_num,rd;
         off_t file_size;
         struct stat st;
-        char  *file_buf=NULL, *recs_buf=NULL;
-        struct llog_rec_hdr** recs_pr=NULL;
-        charptr=NULL;
+        char *file_buf=NULL, *recs_buf=NULL;
+        struct llog_rec_hdr **recs_pr=NULL;
+        char *ptr=NULL;
         int cur_idx,i;
-        
+
         rc = fstat(fd,&st);
         if (rc < 0){
                 printf("Get file stat error.\n");
                 goto out;
-        }       
+        }
         file_size = st.st_size;
-        
+
         file_buf = malloc(file_size);
         if (file_buf == NULL){
                 printf("Memory Alloc for file_buf error.\n");
                 rc = -ENOMEM;
                 goto out;
-        }       
+        }
         *llog = (struct llog_log_hdr*)file_buf;
 
         rd = read(fd,file_buf,file_size);
@@ -127,40 +128,37 @@ int llog_pack_buffer(int fd, struct llog_log_hdr** llog,
                 printf("Read file error.\n");
                 rc = -EIO; /*FIXME*/
                 goto clear_file_buf;
-        }       
+        }
 
         /* the llog header not countable here.*/
         recs_num = le32_to_cpu((*llog)->llh_count)-1;
-        
-        recs_buf = malloc(recs_num*sizeof(struct llog_rec_hdr*));
+
+        recs_buf = malloc(recs_num * sizeof(struct llog_rec_hdr *));
         if (recs_buf == NULL){
                 printf("Memory Alloc for recs_buf error.\n");
                 rc = -ENOMEM;
                 goto clear_file_buf;
-        }       
+        }
         recs_pr = (struct llog_rec_hdr **)recs_buf;
-        
+
         ptr = file_buf + le32_to_cpu((*llog)->llh_hdr.lrh_len);
         cur_idx = 1;
         i = 0;
-        while (i < recs_num){   
-                struct llog_rec_hdr* cur_rec=(struct llog_rec_hdr*)ptr;
 
-                while(!ext2_test_bit(cur_idx,(*llog)->llh_bitmap)){
-                        cur_idx++;
+        while (i < recs_num){
+                struct llog_rec_hdr *cur_rec = (struct llog_rec_hdr*)ptr;
+
+                if (ext2_test_bit(cur_idx++, (*llog)->llh_bitmap)) {
+                        recs_pr[i++] = cur_rec;
                         ptr += cur_rec->lrh_len;
-                        if ((ptr-file_buf) > file_size){
-                                printf("The log is corrupted. \n");
+                        if ((ptr - file_buf) > file_size) {
+                                printf("The log is corrupted.\n");
                                 rc = -EINVAL;
                                 goto clear_recs_buf;
-                        }       
+                        }
                 }
-                recs_pr[i] = cur_rec;
-                ptr+=cur_rec->lrh_len;
-                i++;
-                cur_idx++;
         }
-        
+
         *recs = recs_pr;
         *recs_number=recs_num;
 
@@ -175,24 +173,21 @@ clear_file_buf:
 
         *llog=NULL;
         goto out;
-
 }
 
-
-void llog_unpack_buffer(int fd,struct llog_log_hdr* llog_buf,struct llog_rec_hdr **recs_buf)
+void llog_unpack_buffer(int fd, struct llog_log_hdr *llog_buf,
+                        struct llog_rec_hdr **recs_buf)
 {
         free(llog_buf);
         free(recs_buf);
         return;
 }
 
-
-void print_llog_header(struct llog_log_hdr* llog_buf)
+void print_llog_header(struct llog_log_hdr *llog_buf)
 {
         time_t t;
 
         printf("Header size : %d \n",
-        //              le32_to_cpu(llog_buf->llh_hdr.lrh_len));
                 llog_buf->llh_hdr.lrh_len);
 
         t = le64_to_cpu(llog_buf->llh_timestamp);
@@ -204,7 +199,7 @@ void print_llog_header(struct llog_log_hdr* llog_buf)
         printf("Target uuid : %s \n",
                (char *)(&llog_buf->llh_tgtuuid));
 
-        /* Add the other infor you want to view here*/
+        /* Add the other info you want to view here */
 
         printf("-----------------------\n");
         return;
@@ -213,13 +208,14 @@ void print_llog_header(struct llog_log_hdr* llog_buf)
 static void print_1_cfg(struct lustre_cfg *lcfg)
 {
         int i;
+
         if (lcfg->lcfg_nid)
                 printf("nid=%s("LPX64")  ", libcfs_nid2str(lcfg->lcfg_nid),
                        lcfg->lcfg_nid);
         if (lcfg->lcfg_nal)
                 printf("nal=%d ", lcfg->lcfg_nal);
         for (i = 0; i <  lcfg->lcfg_bufcount; i++)
-                printf("%d:%.*s  ", i, lcfg->lcfg_buflens[i], 
+                printf("%d:%.*s  ", i, lcfg->lcfg_buflens[i],
                        (char*)lustre_cfg_buf(lcfg, i));
         return;
 }
@@ -229,7 +225,7 @@ static void print_setup_cfg(struct lustre_cfg *lcfg)
 {
         struct lov_desc *desc;
 
-        if ((lcfg->lcfg_bufcount == 2) && 
+        if ((lcfg->lcfg_bufcount == 2) &&
             (lcfg->lcfg_buflens[1] == sizeof(*desc))) {
                 printf("lov_setup ");
                 printf("0:%s  ", lustre_cfg_string(lcfg, 0));
@@ -250,7 +246,7 @@ static void print_setup_cfg(struct lustre_cfg *lcfg)
 void print_lustre_cfg(struct lustre_cfg *lcfg, int *skip)
 {
         enum lcfg_command_type cmd = le32_to_cpu(lcfg->lcfg_command);
-        
+
         if (*skip > 0)
                 printf("SKIP ");
 
@@ -352,27 +348,25 @@ void print_lustre_cfg(struct lustre_cfg *lcfg, int *skip)
         return;
 }
 
-void print_records(struct llog_rec_hdr** recs,int rec_number)
+void print_records(struct llog_rec_hdr **recs, int rec_number)
 {
         __u32 lopt;
         int i, skip = 0;
         
-        for(i = 0; i < rec_number; i++){
-        
+        for(i = 0; i < rec_number; i++) {
                 printf("#%.2d ", le32_to_cpu(recs[i]->lrh_index));
 
                 lopt = le32_to_cpu(recs[i]->lrh_type);
 
                 if (lopt == OBD_CFG_REC){
                         struct lustre_cfg *lcfg;
-                        printf("L "); 
-                        lcfg = (struct lustre_cfg *)
-                                ((char*)(recs[i]) + sizeof(struct llog_rec_hdr));
+                        printf("L ");
+                        lcfg = (struct lustre_cfg *)((char*)(recs[i]) +
+                                                     sizeof(struct llog_rec_hdr));
                         print_lustre_cfg(lcfg, &skip);
                 }
 
-                if (lopt == PTL_CFG_REC){
-                        printf("Portals - unknown type\n"); 
-                }
+                if (lopt == PTL_CFG_REC)
+                        printf("Portals - unknown type\n");
         }
 }
index 7c3855a..0305f3d 100755 (executable)
@@ -2,6 +2,9 @@
 
 my $pname = $0;
 
+my $defaultpath = "/proc/fs/lustre";
+my $obdstats = "stats";
+
 sub usage()
 {
     print STDERR "Usage: $pname <stats_file> [<interval>]\n";
@@ -9,19 +12,41 @@ sub usage()
 }
 
 
-my $statspath;
+my $statspath = "None";
 my $interval = 0;
 
 if (($#ARGV < 0) || ($#ARGV > 1)) {
     usage();
 } else {
-    $statspath = $ARGV[0];
+    if ( $ARGV[0] =~ /help$/ ) {
+       usage();
+    }
+    if ( -f $ARGV[0] ) {
+       $statspath = $ARGV[0];
+    } elsif ( -f "$ARGV[0]/$obdstats" ) {
+       $statspath = "$ARGV[0]/$obdstats";
+    } else {
+       my $st = `ls $defaultpath/*/$ARGV[0]/$obdstats 2> /dev/null`;
+       chop $st;
+       if ( -f "$st" ) {
+           $statspath = $st;
+       } else {
+           $st = `ls $defaultpath/*/*/$ARGV[0]/$obdstats 2> /dev/null`;
+           chop $st;
+           if ( -f "$st" ) {
+               $statspath = $st;
+           }
+       }
+    }
+    if ( $statspath =~ /^None$/ ) {
+       die "Cannot locate stat file for: $ARGV[0]\n";
+    }
     if ($#ARGV == 1) {
        $interval = $ARGV[1];
     } 
 }
 
-
+print "$pname on $statspath\n";
 
 my %cumulhash;
 my %sumhash;
@@ -43,6 +68,7 @@ sub get_cpumhz()
     if (defined($itc_freq)) { $mhz = $itc_freq; }
     elsif (defined($cpu_freq)) { $mhz = $cpu_freq; }
     else { $mhz = 1; }
+    close CPUINFO;
 }
 
 get_cpumhz();
@@ -50,7 +76,7 @@ print "Processor counters run at $mhz MHz\n";
 
 sub readstat()
 {
-    open(STATS, $statspath) || die "Cannot open $statspath: $!\n";
+    seek STATS, 0, 0;
     while (<STATS>) {
        chop;
        ($name, $cumulcount, $samples, $unit, $min, $max, $sum, $sumsquare) 
@@ -125,9 +151,11 @@ sub readstat()
     }
 }
 
+open(STATS, $statspath) || die "Cannot open $statspath: $!\n";
 do {
     readstat();
     if ($interval) { 
        sleep($interval);
     }
 } while ($interval);
+close STATS;
diff --git a/lustre/utils/llverdev.c b/lustre/utils/llverdev.c
new file mode 100644 (file)
index 0000000..a00db8e
--- /dev/null
@@ -0,0 +1,502 @@
+/*
+ * Large Block Device Verification Tool.
+ * This program is used to test whether the block device is correctly
+ * handling IO beyond 2TB boundary.
+ * This tool have two working modes
+ * 1. full mode
+ * 2. fast mode
+ *     The full mode is basic mode in which program writes the test pattern
+ * on entire disk. The test pattern (device offset and timestamp) is written
+ * at the beginning of each 4kB block. When the whole device is full then
+ * read operation is performed to verify that the test pattern is correct.
+ *     In the fast mode the program writes data at the critical locations
+ * of the device such as start of the device, before and after multiple of 1GB
+ * offset and at the end.
+ *     A chunk buffer with default size of 1MB is used to write and read test
+ * pattern in bulk.
+ */
+
+#include <features.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <gnu/stubs.h>
+#include <ext2fs/ext2fs.h>
+#include <blkid/blkid.h>
+
+#define ONE_MB (1024 * 1024)
+#define ONE_GB (1024 * 1024 * 1024)
+#define HALF_MB (ONE_MB / 2)
+#define ONE_KB 1024
+#define HALF_KB (ONE_KB / 2)
+#define BLOCKSIZE 4096
+
+/* Structure for writting test pattern */
+struct block_data {
+       loff_t  bd_offset;
+       time_t  bd_time;
+};
+static char *progname;         /* name by which this program was run. */
+static unsigned verbose = 1;   /* prints offset in kB, operation rate */
+static int readoption;         /* run test in read-only (verify) mode */
+static int writeoption;                /* run test in write_only mode */
+const char *devname;           /* name of device to be tested. */
+static unsigned full = 1;      /* flag to full check */
+static int fd;
+static int isatty_flag;
+
+static struct option const longopts[] =
+{
+       { "chunksize", required_argument, 0, 'c' },
+       { "force", no_argument, 0, 'f' },
+       { "help", no_argument, 0, 'h' },
+       { "offset", required_argument, 0, 'o' },
+       { "partial", required_argument, 0, 'p' },
+       { "quiet", required_argument, 0, 'q' },
+       { "read", no_argument, 0, 'r' },
+       { "timestamp", required_argument, 0, 't' },
+       { "verbose", no_argument, 0, 'v' },
+       { "write", no_argument, 0, 'w' },
+       { "long", no_argument, 0, 'l' },
+       { 0, 0, 0, 0}
+};
+
+/*
+ * Usage: displays help information, whenever user supply --help option in
+ * command or enters incorrect command line.
+ */
+void usage(int status)
+{
+       if (status != 0) {
+             printf("\nUsage: %s [OPTION]... <device-name> ...\n",
+                    progname);
+             printf("Block device verification tool.\n"
+                    "\t-t {seconds}, --timestamp, "
+                    "set test time  (default=current time())\n"
+                    "\t-o {offset}, --offset, "
+                    "offset in kB of start of test, default=0\n"
+                    "\t-r, --read run test in verify mode\n"
+                    "\t-w, --write run test in test-pattern mode, default=rw\n"
+                    "\t-v, --verbose\n"
+                    "\t-q, --quiet\n"
+                    "\t-l, --long, full check of device\n"
+                    "\t-p, --partial, for partial check (1GB steps)\n"
+                    "\t-c, --chunksize, IO chunk size, default=1048576\n"
+                    "\t-f, --force, force test to run without confirmation\n"
+                    "\t-h, --help display this help and exit\n");
+       }
+       exit(status);
+}
+
+/*
+ * Open_dev: Opens device in specified mode and returns fd.
+ */
+static int open_dev(const char *devname, int mode)
+{
+       int     mount_flags;
+       char    mountpt[80] = "";
+
+       if (ext2fs_check_mount_point(devname, &mount_flags, mountpt,
+                                    sizeof(mountpt))) {
+               fprintf(stderr, "%s: ext2fs_check_mount_point failed:%s",
+                       progname, strerror(errno));
+               exit(1);
+       }
+       if (mount_flags & EXT2_MF_MOUNTED){
+               fprintf(stderr, "%s: %s is already mounted\n", progname,
+                       devname);
+               exit(1);
+       }
+       fd = open(devname, mode | O_EXCL | O_LARGEFILE);
+       if (fd < 0) {
+               fprintf(stderr, "%s: Open failed: %s",progname,strerror(errno));
+               exit(3);
+       }
+       return (fd);
+}
+
+/*
+ * sizeof_dev: Returns size of device in bytes
+ */
+static unsigned long long sizeof_dev(int fd)
+{
+       blkid_loff_t numbytes = 0;
+
+       numbytes = blkid_get_dev_size(fd);
+       if (numbytes <= 0) {
+               fprintf(stderr, "%s: blkid_get_dev_size(%s) failed",
+                       progname, devname);
+               return 1;
+       }
+
+       if (verbose)
+               printf("%s: %s is %llu bytes (%g GB) in size\n",
+                      progname, devname,
+                      (unsigned long long)numbytes, (double)numbytes / ONE_GB);
+
+       return numbytes;
+}
+
+/*
+ * Verify_chunk: Verifies test pattern in each 4kB (BLOCKSIZE) is correct.
+ * Returns 0 if test offset and timestamp is correct otherwise 1.
+ */
+int verify_chunk(char *chunk_buf, size_t chunksize,
+                loff_t chunk_off, time_t time_st)
+{
+       struct block_data *bd;
+       char *chunk_end;
+
+       for (chunk_end = chunk_buf + chunksize - sizeof(*bd);
+            (char *)chunk_buf < chunk_end;
+            chunk_buf += BLOCKSIZE, chunk_off += BLOCKSIZE) {
+               bd = (struct block_data *)chunk_buf;
+               if ((bd->bd_offset == chunk_off) && (bd->bd_time == time_st))
+                       continue;
+
+               fprintf(stderr, "\n%s: verify failed at offset/timestamp "
+                       "%llu/%lu: found %llu/%lu instead\n", progname,
+                       chunk_off, time_st, bd->bd_offset, bd->bd_time);
+               return 1;
+       }
+       return 0;
+}
+
+/*
+ * fill_chunk: Fills the chunk with current or user specified timestamp
+ * and  offset. The test patters is filled at the beginning of
+ * each 4kB(BLOCKSIZE) blocks in chunk_buf.
+ */
+void fill_chunk(char *chunk_buf, size_t chunksize, loff_t chunk_off,
+               time_t time_st)
+{
+       struct block_data *bd;
+       char *chunk_end;
+
+       for (chunk_end = chunk_buf + chunksize - sizeof(*bd);
+            (char *)chunk_buf < chunk_end;
+            chunk_buf += BLOCKSIZE, chunk_off += BLOCKSIZE) {
+               bd = (struct block_data *)chunk_buf;
+               bd->bd_offset = chunk_off;
+               bd->bd_time = time_st;
+       }
+}
+
+void show_rate(char *op, unsigned long long offset, unsigned long long *count)
+{
+       static time_t last;
+       time_t now;
+       double diff;
+
+       now = time(NULL);
+       diff = now - last;
+
+       if (diff > 4) {
+               if (last != 0) {
+                       if (isatty_flag)
+                               printf("\r");
+                       printf("%s offset: %14llukB %5g MB/s            ", op,
+                              offset / ONE_KB, (double)(*count) /ONE_MB /diff);
+                       if (isatty_flag)
+                               fflush(stdout);
+                       else
+                               printf("\n");
+
+                       *count = 0;
+               }
+               last = now;
+       }
+}
+
+/*
+ * write_chunk: write the chunk_buf on the device. The number of write
+ * operations are based on the parameters write_end, offset, and chunksize.
+ */
+int write_chunks(loff_t offset, loff_t write_end, char *chunk_buf,
+                size_t chunksize, time_t time_st)
+{
+       unsigned long long stride, count = 0;
+
+       stride = full ? chunksize : (ONE_GB - chunksize);
+
+       for (offset = offset & ~(chunksize - 1); offset < write_end;
+            offset += stride) {
+               if (lseek64(fd, offset, SEEK_SET) == -1) {
+                       fprintf(stderr, "\n%s: lseek64(%llu) failed: %s\n",
+                               progname, offset, strerror(errno));
+                       return 1;
+               }
+               if (offset + chunksize > write_end)
+                       chunksize = write_end - offset;
+
+               if (!full && offset > chunksize) {
+                       fill_chunk(chunk_buf, chunksize, offset, time_st);
+                       if (write(fd, chunk_buf, chunksize) < 0) {
+                               fprintf(stderr, "\n%s: write %llu failed: %s\n",
+                                       progname, offset, strerror(errno));
+                               return 1;
+                       }
+                       offset += chunksize;
+                       if (offset + chunksize > write_end)
+                               chunksize = write_end - offset;
+               }
+
+               fill_chunk(chunk_buf, chunksize, offset, time_st);
+               if (write(fd, chunk_buf, chunksize) < 0) {
+                       fprintf(stderr, "\n%s: write %llu failed: %s\n",
+                               progname, offset, strerror(errno));
+                       return 1;
+               }
+
+               count += chunksize;
+               if (verbose > 1)
+                       show_rate("write", offset, &count);
+       }
+       if (verbose > 1) {
+               show_rate("write", offset, &count);
+               printf("\nwrite complete\n");
+       }
+       if (fsync(fd) == -1) {
+               fprintf(stderr, "%s: fsync faild: %s\n", progname,
+                       strerror(errno));
+                       return 1;
+       }
+       return 0;
+}
+
+/*
+ * read_chunk: reads the chunk_buf from the device. The number of read
+ * operations are based on the parameters read_end, offset, and chunksize.
+ */
+int read_chunks(loff_t offset, loff_t read_end, char *chunk_buf,
+               size_t chunksize, time_t time_st)
+{
+       unsigned long long stride, count = 0;
+
+       stride = full ? chunksize : (ONE_GB - chunksize);
+
+       if (ioctl(fd, BLKFLSBUF, 0) < 0 && verbose)
+               fprintf(stderr, "%s: ioctl BLKFLSBUF failed: %s (ignoring)\n",
+                       progname, strerror(errno));
+
+       for (offset = offset & ~(chunksize - 1); offset < read_end;
+            offset += stride) {
+               if (lseek64(fd, offset, SEEK_SET) == -1) {
+                       fprintf(stderr, "\n%s: lseek64(%llu) failed: %s\n",
+                               progname, offset, strerror(errno));
+                       return 1;
+               }
+               if (offset + chunksize > read_end)
+                       chunksize = read_end - offset;
+
+               if (!full && offset > chunksize) {
+                       if (read (fd, chunk_buf, chunksize) < 0) {
+                               fprintf(stderr, "\n%s: read %llu failed: %s\n",
+                                       progname, offset, strerror(errno));
+                               return 1;
+                       }
+                       if (verify_chunk(chunk_buf, chunksize, offset,
+                                        time_st) != 0)
+                               return 1;
+                       offset += chunksize;
+                       if (offset + chunksize >= read_end)
+                               chunksize = read_end - offset;
+               }
+
+               if (read(fd, chunk_buf, chunksize) < 0) {
+                       fprintf(stderr, "\n%s: read failed: %s\n", progname,
+                               strerror(errno));
+                       return 1;
+               }
+
+               if (verify_chunk(chunk_buf, chunksize, offset, time_st) != 0)
+                       return 1;
+
+               count += chunksize;
+               if (verbose > 1)
+                       show_rate("read", offset, &count);
+       }
+       if (verbose > 1) {
+               show_rate("read", offset, &count);
+               printf("\nread complete\n");
+       }
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       time_t time_st = 0;             /* Default timestamp */
+       loff_t offset = 0, offset_orig; /* offset in kB */
+       size_t chunksize = ONE_MB;      /* IO chunk size */
+       char *chunk_buf = NULL;
+       unsigned int force = 0;         /* run test run without confirmation*/
+       unsigned long long dev_size = 0;
+       char yesno[4];
+       int mode = O_RDWR;              /* mode which device should be opened */
+       int error = 0, c;
+
+       progname = strrchr(argv[0], '/') == NULL ?
+               argv[0] : strrchr(argv[0], '/') + 1;
+       while ((c = getopt_long(argc, argv, "c:fhlo:pqrt:vw", longopts,
+                               NULL)) != -1) {
+               switch (c) {
+               case 'c':
+                       chunksize = (strtoul(optarg, NULL, 0) * ONE_MB);
+                       if (!chunksize) {
+                               fprintf(stderr, "%s: chunk size value should be"
+                                       "nonzero and multiple of 1MB\n",
+                                       progname);
+                               return -1;
+                       }
+                       break;
+               case 'f':
+                       force = 1;
+                       break;
+               case 'l':
+                       full = 1;
+                       break;
+               case 'o':
+                       offset = strtoull(optarg, NULL, 0) * ONE_KB;
+                       break;
+               case 'p':
+                       full = 0;
+                       break;
+               case 'q':
+                       verbose = 0;
+                       break;
+               case 'r':
+                       readoption = 1;
+                       mode = O_RDONLY;
+                       break;
+               case 't':
+                       time_st = (time_t)strtoul(optarg, NULL, 0);
+                       break;
+               case 'v':
+                       verbose++;
+                       break;
+               case 'w':
+                       writeoption = 1;
+                       mode = O_WRONLY;
+                       break;
+               case 'h':
+               default:
+                       usage (1);
+                       return 0;
+               }
+       }
+       offset_orig = offset;
+       devname = argv[optind];
+       if (!devname) {
+               fprintf(stderr, "%s: device name not given\n", progname);
+               usage (1);
+               return -1;
+       }
+
+       if (readoption && writeoption)
+               mode = O_RDWR;
+       if (!readoption && !writeoption) {
+               readoption = 1;
+               writeoption = 1;
+       }
+
+       if (!force && writeoption) {
+               printf("%s: permanently overwrite all data on %s (yes/no)? ",
+                      progname, devname);
+               scanf("%3s", yesno);
+               if (!(strcasecmp("yes", yesno) || strcasecmp("y", yesno))) {
+                       printf("Not continuing due to '%s' response", yesno);
+                       return 0;
+               }
+       }
+
+       if (!writeoption && time_st == 0) {
+               fprintf(stderr, "%s: must give timestamp for read-only test\n",
+                       progname);
+               usage(1);
+       }
+
+       fd = open_dev(devname, mode);
+       dev_size = sizeof_dev(fd);
+       if (!dev_size) {
+               fprintf(stderr, "%s: cannot test on device size < 1MB\n",
+                       progname);
+               error = 7;
+               goto close_dev;
+       }
+
+       if (dev_size < (offset * 2)) {
+               fprintf(stderr, "%s: device size %llu < offset %llu\n",
+                       progname, dev_size, offset);
+               error = 6;
+               goto close_dev;
+       }
+       if (!time_st)
+               (void)time(&time_st);
+
+       isatty_flag = isatty(STDOUT_FILENO);
+
+       if (verbose)
+               printf("Timestamp: %lu\n", time_st);
+
+       chunk_buf = (char *)calloc(chunksize, 1);
+       if (chunk_buf == NULL) {
+               fprintf(stderr, "%s: memory allocation failed for chunk_buf\n",
+                       progname);
+               error = 4;
+               goto close_dev;
+       }
+       if (writeoption) {
+               if (write_chunks(offset, dev_size, chunk_buf, chunksize,
+                                time_st)) {
+                       error = 3;
+                       goto chunk_buf;
+               }
+               if (!full) {  /* end of device aligned to a block */
+                       offset = ((dev_size - chunksize + BLOCKSIZE - 1) &
+                                 ~(BLOCKSIZE - 1));
+                       if (write_chunks(offset, dev_size, chunk_buf, chunksize,
+                                        time_st)) {
+                               error = 3;
+                               goto chunk_buf;
+                       }
+               }
+               offset = offset_orig;
+       }
+       if (readoption) {
+               if (read_chunks(offset, dev_size, chunk_buf, chunksize,
+                               time_st)) {
+                       error = 2;
+                       goto chunk_buf;
+               }
+               if (!full) { /* end of device aligned to a block */
+                       offset = ((dev_size - chunksize + BLOCKSIZE - 1) &
+                                 ~(BLOCKSIZE - 1));
+                       if (read_chunks(offset, dev_size, chunk_buf, chunksize,
+                                       time_st)) {
+                               error = 2;
+                               goto chunk_buf;
+                       }
+               }
+               if (verbose)
+                       printf("\n%s: data verified successfully\n", progname);
+       }
+       error = 0;
+chunk_buf:
+       free(chunk_buf);
+close_dev:
+       close(fd);
+       return error;
+}
diff --git a/lustre/utils/llverfs.c b/lustre/utils/llverfs.c
new file mode 100644 (file)
index 0000000..77e54dd
--- /dev/null
@@ -0,0 +1,630 @@
+/*
+ * ext3 Filesystem Verification Tool.
+ * This program tests the correct operation of ext3 filesystem.
+ * This tool have two working modes
+ * 1. full mode
+ * 2. fast mode
+ *     The full mode is basic mode in which program creates a subdirectory
+ * in the test fileysytem, writes n(files_in_dir, default=16) large(4GB) files
+ * to the directory with the test pattern at the start of each 4kb block.
+ * The test pattern contains timestamp, relative file offset and per file
+ * unique idenfifier(inode number). this continues until whole filesystem is
+ * full and then this tooll verifies that the data in all of the test files
+ * is correct.
+ *     In the fast mode the tool creates a test directories with
+ * EXT3_TOPDIR_FL flag set. the number of directories equals to the number
+ * of block groups in the filesystem(e.g. 65536 directories for 8TB filesystem)
+ * and then writes a single 1MB file in each directory. The tool then verifies
+ * that the data in each file is correct.
+ */
+
+#define _GNU_SOURCE
+
+#include <features.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <time.h>
+#include <dirent.h>
+#include <mntent.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/vfs.h>
+#include <gnu/stubs.h>
+#include <ext2fs/ext2fs.h>
+#include <gnu/stubs.h>
+#include <e2p/e2p.h>
+
+#define ONE_MB (1024 * 1024)
+#define ONE_GB ((unsigned long long)(1024 * 1024 * 1024))
+#define BLOCKSIZE 4096
+
+/* Structure for writing test pattern */
+struct block_data {
+       loff_t  bd_offset;
+       time_t  bd_time;
+       ino_t   bd_inode;
+};
+static char *progname;             /* name by which this program was run. */
+static unsigned verbose = 1;       /* prints offset in kB, operation rate */
+static int readoption;             /* run test in read-only (verify) mode */
+static int writeoption;                    /* run test in write_only mode */
+char *testdir;                     /* name of device to be tested. */
+static unsigned full = 1;          /* flag to full check */
+static int errno_local;                    /* local copy of errno */
+static unsigned long num_files;     /* Total number of files for read/write */
+static loff_t file_size;           /* Size of each file */
+static unsigned files_in_dir = 32;  /* number of files in each directioy */
+static unsigned num_dirs = 30000;   /* total number of directories */
+const int dirmode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
+static int fd = -1;
+static int isatty_flag;
+static int perms =  S_IRWXU | S_IRGRP | S_IROTH;
+
+static struct option const longopts[] =
+{
+       { "chunksize", required_argument, 0, 'c' },
+       { "help", no_argument, 0, 'h' },
+       { "offset", required_argument, 0, 'o' },
+       { "long", no_argument, 0, 'l' },
+       { "partial", required_argument, 0, 'p' },
+       { "quiet", required_argument, 0, 'q' },
+       { "read", no_argument, 0, 'r' },
+       { "timestamp", required_argument, 0, 't' },
+       { "verbose", no_argument, 0, 'v' },
+       { "write", no_argument, 0, 'w' },
+       { 0, 0, 0, 0}
+};
+
+/*
+ * Usages: displays help information, whenever user supply --help option in
+ * command or enters incorrect command line.
+ */
+void usage(int status)
+{
+       if (status != 0)
+       {
+             printf("\nUsage: %s [OPTION]... <filesystem path> ...\n",
+                       progname);
+             printf("ext3 filesystem verification tool.\n"
+                 "\t-t {seconds} for --timestamp,  set test time"
+                 "(default=current time())\n"
+                 "\t-o {offset}  for --offset, directory starting offset"
+                 " from which tests should start\n"
+                 "\t-r run test in read (verify) mode\n"
+                 "\t-w run test in write (test-pattern) mode (default=r&w)\n"
+                 "\t-v for verbose\n"
+                 "\t-p for --partial, for partial check (1MB files)\n"
+                 "\t-l for --long, full check (4GB file with 4k blocks)\n"
+                 "\t-c for --chunksize, IO chunk size (default=1048576)\n"
+                 "\t-h display this help and exit\n"
+                 "\t--help display this help and exit\n");
+       }
+       exit(status);
+}
+
+/*
+ * open_file: Opens file in specified mode and returns fd.
+ */
+static int open_file(const char *file, int flag)
+{
+       fd = open(file, flag, perms);
+       if (fd < 0) {
+               fprintf(stderr, "\n%s: Open '%s' failed:%s\n",
+                       progname, file, strerror(errno));
+               exit(3);
+       }
+       return (fd);
+}
+
+/*
+ * Verify_chunk: Verifies test pattern in each 4kB (BLOCKSIZE) is correct.
+ * Returns 0 if test offset and timestamp is correct otherwise 1.
+ */
+int verify_chunk(char *chunk_buf, size_t chunksize, loff_t chunk_off,
+                time_t time_st, ino_t inode_st, char *file)
+{
+       struct block_data *bd;
+       char *chunk_end;
+
+       for (chunk_end = chunk_buf + chunksize - sizeof(*bd);
+            (char *)chunk_buf < chunk_end;
+            chunk_buf += BLOCKSIZE, chunk_off += BLOCKSIZE) {
+               bd = (struct block_data *)chunk_buf;
+               if ((bd->bd_offset == chunk_off) && (bd->bd_time == time_st) &&
+                   (bd->bd_inode == inode_st))
+                       continue;
+               fprintf(stderr,"\n%s: verify %s failed offset/timestamp/inode "
+                       "%llu/%lu/%lu: found %llu/%lu/%lu instead\n", progname,
+                       file, chunk_off, time_st, inode_st, bd->bd_offset,
+                       bd->bd_time, bd->bd_inode);
+               return 1;
+       }
+       return 0;
+}
+
+/*
+ * fill_chunk: Fills the chunk with current or user specified timestamp
+ * and  offset. The test patters is filled at the beginning of
+ * each 4kB(BLOCKSIZE) blocks in chunk_buf.
+ */
+void fill_chunk(char *chunk_buf, size_t chunksize, loff_t chunk_off,
+               time_t time_st, ino_t inode_st)
+{
+       struct block_data *bd;
+       char *chunk_end;
+
+       for (chunk_end = chunk_buf + chunksize - sizeof(*bd);
+            (char *)chunk_buf < chunk_end;
+            chunk_buf += BLOCKSIZE, chunk_off += BLOCKSIZE) {
+               bd = (struct block_data *)chunk_buf;
+               bd->bd_offset = chunk_off;
+               bd->bd_time = time_st;
+               bd->bd_inode = inode_st;
+       }
+}
+
+/*
+ * write_chunk: write the chunk_buf on the device. The number of write
+ * operations are based on the parameters write_end, offset, and chunksize.
+ */
+int write_chunks(int fd, loff_t offset, loff_t write_end, char *chunk_buf,
+                size_t chunksize, time_t time_st,
+                ino_t inode_st, const char *file)
+{
+       unsigned long long stride;
+
+       stride = full ? chunksize : (ONE_GB - chunksize);
+       for (offset = offset & ~(chunksize - 1); offset < write_end;
+            offset += stride) {
+               if (lseek64(fd, offset, SEEK_SET) == -1) {
+                       fprintf(stderr, "\n%s: lseek64(%s+%llu) failed: %s\n",
+                               progname, file, offset, strerror(errno));
+                       return 1;
+               }
+               if (offset + chunksize > write_end)
+                       chunksize = write_end - offset;
+               if (!full && offset > chunksize) {
+                       fill_chunk(chunk_buf, chunksize, offset, time_st,
+                                   inode_st);
+                       if (write(fd, chunk_buf, chunksize) < 0) {
+                               if (errno == ENOSPC) {
+                                       errno_local = errno;
+                                       return 0;
+                               }
+                               fprintf(stderr,
+                                       "\n%s: write %s+%llu failed: %s\n",
+                                       progname, file, offset,strerror(errno));
+                               return errno;
+                       }
+                       offset += chunksize;
+                       if (offset + chunksize > write_end)
+                               chunksize = write_end - offset;
+               }
+               fill_chunk(chunk_buf, chunksize, offset, time_st, inode_st);
+               if (write(fd, (char *) chunk_buf, chunksize) < 0) {
+                       if (errno == ENOSPC) {
+                               errno_local = errno;
+                               return 0;
+                       }
+                       fprintf(stderr, "\n%s: write %s+%llu failed: %s\n",
+                               progname, file, offset, strerror(errno));
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+/*
+ * read_chunk: reads the chunk_buf from the device. The number of read
+ * operations are based on the parameters read_end, offset, and chunksize.
+ */
+int read_chunks(int fd, loff_t offset, loff_t read_end, char *chunk_buf,
+               size_t chunksize, time_t time_st, ino_t inode_st, char *file)
+{
+       unsigned long long stride;
+
+       stride = full ? chunksize : (ONE_GB - chunksize);
+       for (offset = offset & ~(chunksize - 1); offset < read_end;
+            offset += stride) {
+               if (lseek64(fd, offset, SEEK_SET) == -1) {
+                       fprintf(stderr, "\n%s: lseek64(%s+%llu) failed: %s\n",
+                               progname, file, offset, strerror(errno));
+                       return 1;
+               }
+               if (offset + chunksize > read_end)
+                       chunksize = read_end - offset;
+               if (!full && offset > chunksize) {
+                       if (read(fd, chunk_buf, chunksize) < 0) {
+                               fprintf(stderr,
+                                       "\n%s: read %s+%llu failed: %s\n",
+                                       progname, file, offset,strerror(errno));
+                               return 1;
+                       }
+                       if (verify_chunk(chunk_buf, chunksize, offset,
+                                        time_st, inode_st, file) != 0)
+                               return 1;
+                       offset += chunksize;
+                       if (offset + chunksize >= read_end)
+                               chunksize = read_end - offset;
+               }
+               if (read(fd, chunk_buf, chunksize) < 0) {
+                       fprintf(stderr, "\n%s: read %s+%llu failed: %s\n",
+                               progname, file, offset, strerror(errno));
+                       return 1;
+               }
+               if (verify_chunk(chunk_buf, chunksize, offset, time_st,
+                                inode_st, file) != 0)
+                       return 1;
+       }
+       return 0;
+}
+
+/*
+ * new_file: prepares new filename using file counter and current dir.
+ */
+char *new_file(char *tempfile, char *cur_dir, int file_num)
+{
+       sprintf(tempfile, "%s/file%03d", cur_dir, file_num);
+       return tempfile;
+}
+
+/*
+ * new_dir: prepares new dir name using dir counters.
+ */
+char *new_dir(char *tempdir, int dir_num)
+{
+       sprintf(tempdir, "%s/dir%05d", testdir, dir_num);
+       return tempdir;
+}
+
+/*
+ * show_filename: Displays name of current file read/write
+ */
+void show_filename(char *op, char *filename)
+{
+       static time_t last;
+       time_t now;
+       double diff;
+
+       now = time(NULL);
+       diff = now - last;
+       if (diff > 4 || verbose > 2) {
+               if (isatty_flag)
+                       printf("\r");
+               printf("%s File name: %s          ", op, filename);
+               if (isatty_flag)
+                       fflush(stdout);
+               else
+                       printf("\n");
+               last = now;
+       }
+}
+
+/*
+ * dir_write: This function writes directories and files on device.
+ * it works for both full and fast modes.
+ */
+static int dir_write(char *chunk_buf, size_t chunksize,
+                    time_t time_st, unsigned long dir_num)
+{
+       char tempfile[PATH_MAX];
+       char tempdir[PATH_MAX];
+       struct stat64 file;
+       int file_num = 999999999;
+       ino_t inode_st = 0;
+
+       if (!full && fsetflags(testdir, EXT2_TOPDIR_FL))
+               fprintf(stderr,
+                       "\n%s: can't set TOPDIR_FL on %s: %s (ignoring)",
+                       progname, testdir, strerror(errno));
+
+       for (; dir_num < num_dirs; num_files++, file_num++) {
+               if (file_num >= files_in_dir) {
+                       if (dir_num == num_dirs - 1)
+                               break;
+
+                       file_num = 0;
+                       if (mkdir(new_dir(tempdir, dir_num), dirmode) < 0) {
+                               if (errno == ENOSPC)
+                                       break;
+                               if (errno != EEXIST) {
+                                       fprintf(stderr, "\n%s: mkdir %s : %s\n",
+                                               progname, tempdir,
+                                               strerror(errno));
+                                       return 1;
+                               }
+                       }
+                       dir_num++;
+               }
+               fd = open_file(new_file(tempfile, tempdir, file_num),
+                              O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE);
+
+               if (fd >= 0 && fstat64(fd, &file) == 0) {
+                       inode_st = file.st_ino;
+               } else {
+                       fprintf(stderr, "\n%s: write stat64 to file %s: %s",
+                               progname, tempfile, strerror(errno));
+                       exit(1);
+               }
+
+               if (verbose > 1)
+                       show_filename("write", tempfile);
+
+               if (write_chunks(fd, 0, file_size, chunk_buf, chunksize,
+                                time_st, inode_st, tempfile)) {
+                       close(fd);
+                       return 1;
+               }
+               close(fd);
+
+               if (errno_local == ENOSPC)
+                       break;
+       }
+
+       if (verbose) {
+               verbose++;
+               show_filename("write", tempfile);
+               printf("\nwrite complete\n");
+               verbose--;
+       }
+
+       return 0;
+}
+
+/*
+ * dir_read: This function reads directories and files on device.
+ * it works for both full and fast modes.
+ */
+static int dir_read(char *chunk_buf, size_t chunksize,
+                   time_t time_st, unsigned long dir_num)
+{
+       char tempfile[PATH_MAX];
+       char tempdir[PATH_MAX];
+       unsigned long count = 0;
+       struct stat64 file;
+       int file_num = 0;
+       ino_t inode_st = 0;
+
+       for (count = 0; count < num_files && dir_num < num_dirs; count++) {
+               if (file_num == 0) {
+                       if (dir_num == num_dirs - 1)
+                               break;
+
+                       new_dir(tempdir, dir_num);
+                       dir_num++;
+               }
+
+               fd = open_file(new_file(tempfile, tempdir, file_num),
+                              O_RDONLY | O_LARGEFILE);
+               if (fd >= 0 && fstat64(fd, &file) == 0) {
+                       inode_st = file.st_ino;
+               } else {
+                       fprintf(stderr, "\n%s: read stat64 file '%s': %s\n",
+                               progname, tempfile, strerror(errno));
+                       return 1;
+               }
+
+               if (verbose > 1)
+                       show_filename("read", tempfile);
+
+               if (count == num_files)
+                       file_size = file.st_size;
+               if (read_chunks(fd, 0, file_size, chunk_buf, chunksize,
+                               time_st, inode_st, tempfile)) {
+                       close(fd);
+                       return 1;
+               }
+               close(fd);
+
+               if (++file_num >= files_in_dir)
+                       file_num = 0;
+       }
+       if (verbose > 1){
+               verbose++;
+               show_filename("read", tempfile);
+               printf("\nread complete\n");
+               verbose--;
+       }
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       time_t time_st = 0;             /* Default timestamp */
+       size_t chunksize = ONE_MB;      /* IO chunk size(defailt=1MB) */
+       char *chunk_buf;                /* chunk buffer */
+       int error = 0;
+       FILE *countfile = NULL;
+       char filecount[PATH_MAX];
+       unsigned long dir_num = 0, dir_num_orig = 0;/* starting directory */
+       char c;
+
+       progname = strrchr(argv[0], '/') ? strrchr(argv[0], '/') + 1 : argv[0];
+       while ((c = (char)getopt_long(argc, argv, "t:rwvplo:h",
+                                     longopts, NULL)) != -1) {
+               switch (c) {
+               case 'c':
+                       chunksize = (strtoul(optarg, NULL, 0) * ONE_MB);
+                       if (!chunksize) {
+                               fprintf(stderr, "%s: Chunk size value should be"
+                                       "a multiple of 1MB\n", progname);
+                               return -1;
+                       }
+                       break;
+               case 'l':
+                       full = 1;
+                       break;
+               case 'o': /* offset */
+                       dir_num = strtoul(optarg, NULL, 0);
+                       break;
+               case 'p':
+                       full = 0;
+                       break;
+               case 'q':
+                       verbose = 0;
+                       break;
+               case 'r':
+                       readoption = 1;
+                       break;
+               case 't':
+                       time_st = (time_t)strtoul(optarg, NULL, 0);
+                       break;
+               case 'w':
+                       writeoption = 1;
+                       break;
+               case 'v':
+                       verbose++;
+                       break;
+
+               case 'h':
+               default:
+                       usage(1);
+                       return 0;
+               }
+       }
+       testdir = argv[optind];
+
+       if (!testdir) {
+               fprintf(stderr, "%s: pathname not given\n", progname);
+               usage(1);
+               return -1;
+       }
+       file_size = 4 * ONE_GB;
+       if (!readoption && !writeoption) {
+               readoption = 1;
+               writeoption = 1;
+       }
+       if (!time_st)
+               (void) time(&time_st);
+       printf("Timestamp: %lu\n", (unsigned long )time_st);
+       isatty_flag = isatty(STDOUT_FILENO);
+
+       if (!full) {
+               struct mntent *tempmnt;
+               FILE *fp = NULL;
+               ext2_filsys fs;
+
+               if ((fp = setmntent("/etc/mtab", "r")) == NULL){
+                       fprintf(stderr, "%s: fail to open /etc/mtab in read"
+                               "mode :%s\n", progname, strerror(errno));
+                       goto guess;
+               }
+
+               /* find device name using filesystem */
+               while ((tempmnt = getmntent(fp)) != NULL) {
+                       if (strcmp(tempmnt->mnt_dir, testdir) == 0)
+                               break;
+               }
+
+               if (tempmnt == NULL) {
+                       fprintf(stderr, "%s: no device found for '%s'\n",
+                               progname, testdir);
+                       endmntent(fp);
+                       goto guess;
+               }
+
+               if (ext2fs_open(tempmnt->mnt_fsname, 0, 0, 0,
+                               unix_io_manager, &fs)) {
+                       fprintf(stderr, "%s: unable to open ext3 fs on '%s'\n",
+                               progname, testdir);
+                       endmntent(fp);
+                       goto guess;
+               }
+               endmntent(fp);
+
+               num_dirs = (fs->super->s_blocks_count +
+                           fs->super->s_blocks_per_group - 1) /
+                       fs->super->s_blocks_per_group;
+               if (verbose)
+                       printf("ext3 block groups: %u, fs blocks: %u "
+                              "blocks per group: %u\n",
+                              num_dirs, fs->super->s_blocks_count,
+                              fs->super->s_blocks_per_group);
+               ext2fs_close(fs);
+               if (0) { /* ugh */
+                       struct statfs64 statbuf;
+               guess:
+                       if (statfs64(testdir, &statbuf) == 0) {
+                               num_dirs = (long long)statbuf.f_blocks *
+                                       statbuf.f_bsize / (128ULL << 20);
+                               if (verbose)
+                                       printf("dirs: %u, fs blocks: %llu\n",
+                                              num_dirs,
+                                              (long long)statbuf.f_blocks);
+                       } else {
+                               fprintf(stderr, "%s: unable to stat '%s': %s\n",
+                                       progname, testdir, strerror(errno));
+                               if (verbose)
+                                       printf("dirs: %u\n", num_dirs);
+                       }
+               }
+
+               file_size = ONE_MB;
+               chunksize = ONE_MB;
+               files_in_dir = 1;
+       }
+       chunk_buf = (char *)calloc(chunksize, 1);
+       if (chunk_buf == NULL) {
+               fprintf(stderr, "Memory allocation failed for chunk_buf\n");
+               return 4;
+       }
+       sprintf(filecount, "%s/%s.filecount", testdir, progname);
+       if (writeoption) {
+               (void)mkdir(testdir, dirmode);
+
+               unlink(filecount);
+               if (dir_num != 0) {
+                       num_files = dir_num * files_in_dir;
+                       if (verbose)
+                               printf("\n%s: %lu files already written\n",
+                                      progname, num_files);
+               }
+               if (dir_write(chunk_buf, chunksize, time_st, dir_num)) {
+                       error = 3;
+                       goto out;
+               }
+               countfile = fopen(filecount, "w");
+               if (countfile != NULL) {
+                       if (fprintf(countfile, "%lu", num_files) < 1 ||
+                           fflush(countfile) != 0) {
+                               fprintf(stderr, "\n%s: writing %s failed :%s\n",
+                                       progname, filecount, strerror(errno));
+                       }
+                       fclose(countfile);
+               }
+               dir_num = dir_num_orig;
+       }
+       if (readoption) {
+               if (!writeoption) {
+                       countfile = fopen(filecount, "r");
+                       if (countfile == NULL ||
+                           fscanf(countfile, "%lu", &num_files) != 1) {
+                               fprintf(stderr, "\n%s: reading %s failed :%s\n",
+                                       progname, filecount, strerror(errno));
+                               num_files = num_dirs * files_in_dir;
+                       } else {
+                               num_files -= (dir_num * files_in_dir);
+                       }
+                       if (countfile)
+                               fclose(countfile);
+               }
+               if (dir_read(chunk_buf, chunksize, time_st, dir_num)) {
+                       fprintf(stderr, "\n%s: Data verification failed\n",
+                               progname) ;
+                       error = 2;
+                       goto out;
+               }
+       }
+       error = 0;
+out:
+       free(chunk_buf);
+       return error;
+}
index a856c63..96cb3a0 100644 (file)
@@ -397,8 +397,7 @@ static int be_verbose(int verbose, struct timeval *next_time,
                 gettimeofday(&now, NULL);
 
         /* A positive verbosity means to print every X iterations */
-        if (verbose > 0 &&
-            (next_num == NULL || num >= *next_num || num >= num_total)) {
+        if (verbose > 0 && (num >= *next_num || num >= num_total)) {
                 *next_num += verbose;
                 if (next_time) {
                         next_time->tv_sec = now.tv_sec - verbose;
@@ -412,8 +411,7 @@ static int be_verbose(int verbose, struct timeval *next_time,
             difftime(&now, next_time) >= 0.0){
                 next_time->tv_sec = now.tv_sec - verbose;
                 next_time->tv_usec = now.tv_usec;
-                if (next_num)
-                        *next_num = num;
+                *next_num = num;
                 return 1;
         }
 
index f10dff3..0152064 100644 (file)
@@ -43,8 +43,8 @@ obdio_test_fixed_extent (struct obdio_conn *conn,
         int                  j;
         int                  rc = 0;
 
-        buffer = obdio_alloc_aligned_buffer (&space, size);
-        if (buffer == NULL) {
+        space = obdio_alloc_aligned_buffer (&buffer, size);
+        if (space == NULL) {
                 fprintf (stderr, "Can't allocate buffer size %d\n", size);
                 return (-1);
         }
index b948c45..c402104 100644 (file)
@@ -51,9 +51,9 @@ obdio_ioctl (struct obdio_conn *conn, int cmd)
 
         rc = obd_ioctl_pack (&conn->oc_data, &buf, sizeof (conn->oc_buffer));
         if (rc != 0) {
-                fprintf (stderr, "obdio_ioctl: obd_ioctl_pack: %d (%s)\n",
-                         rc, strerror (errno));
-                abort ();
+                fprintf(stderr, "%s: obd_ioctl_pack: %d (%s)\n",
+                        __FUNCTION__, rc, strerror(errno));
+                abort();
         }
 
         rc = ioctl (conn->oc_fd, cmd, buf);
@@ -62,8 +62,8 @@ obdio_ioctl (struct obdio_conn *conn, int cmd)
 
         rc2 = obd_ioctl_unpack (&conn->oc_data, buf, sizeof (conn->oc_buffer));
         if (rc2 != 0) {
-                fprintf (stderr, "obdio_ioctl: obd_ioctl_unpack: %d (%s)\n",
-                         rc2, strerror (errno));
+                fprintf(stderr, "%s: obd_ioctl_unpack: %d (%s)\n",
+                        __FUNCTION__, rc2, strerror(errno));
                 abort ();
         }
 
@@ -77,15 +77,15 @@ obdio_connect (int device)
 
         conn = malloc (sizeof (*conn));
         if (conn == NULL) {
-                fprintf (stderr, "obdio_connect: no memory\n");
+                fprintf (stderr, "%s: no memory\n", __FUNCTION__);
                 return (NULL);
         }
         memset (conn, 0, sizeof (*conn));
 
         conn->oc_fd = open ("/dev/obd", O_RDWR);
         if (conn->oc_fd < 0) {
-                fprintf (stderr, "obdio_connect: Can't open /dev/obd: %s\n",
-                         strerror (errno));
+                fprintf(stderr, "%s: Can't open /dev/obd: %s\n",
+                        __FUNCTION__, strerror(errno));
                 goto failed;
         }
 
@@ -107,13 +107,14 @@ obdio_disconnect (struct obdio_conn *conn, int flags)
 
 int
 obdio_pread (struct obdio_conn *conn, uint64_t oid,
-             char *buffer, uint32_t count, uint64_t offset)
+             void *buffer, uint32_t count, uint64_t offset)
 {
         obdio_iocinit (conn);
 
         conn->oc_data.ioc_obdo1.o_id = oid;
         conn->oc_data.ioc_obdo1.o_mode = S_IFREG;
-        conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
+        conn->oc_data.ioc_obdo1.o_valid =
+                OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
 
         conn->oc_data.ioc_pbuf2 = buffer;
         conn->oc_data.ioc_plen2 = count;
@@ -125,13 +126,14 @@ obdio_pread (struct obdio_conn *conn, uint64_t oid,
 
 int
 obdio_pwrite (struct obdio_conn *conn, uint64_t oid,
-              char *buffer, uint32_t count, uint64_t offset)
+              void *buffer, uint32_t count, uint64_t offset)
 {
         obdio_iocinit (conn);
 
         conn->oc_data.ioc_obdo1.o_id = oid;
         conn->oc_data.ioc_obdo1.o_mode = S_IFREG;
-        conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
+        conn->oc_data.ioc_obdo1.o_valid =
+                OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
 
         conn->oc_data.ioc_pbuf2 = buffer;
         conn->oc_data.ioc_plen2 = count;
@@ -180,14 +182,14 @@ obdio_cancel (struct obdio_conn *conn, struct lustre_handle *lh)
 void *
 obdio_alloc_aligned_buffer (void **spacep, int size)
 {
-        int   pagesize = getpagesize();
-        void *space = malloc (size + pagesize - 1);
+        int   pagemask = getpagesize() - 1;
+        void *space = malloc(size + pagemask);
 
-        *spacep = space;
         if (space == NULL)
                 return (NULL);
 
-        return ((void *)(((unsigned long)space + pagesize - 1) & ~(pagesize - 1)));
+        *spacep = (void *)(((unsigned long)space + pagemask) & ~pagemask);
+        return space;
 }
 
 struct obdio_barrier *
@@ -195,10 +197,11 @@ obdio_new_barrier (uint64_t oid, uint64_t id, int npeers)
 {
         struct obdio_barrier *b;
 
-        b = (struct obdio_barrier *)malloc (sizeof (*b));
+        b = malloc(sizeof(*b));
         if (b == NULL) {
-                fprintf (stderr, "obdio_new_barrier "LPX64": Can't allocate\n", oid);
-                return (NULL);
+                fprintf(stderr, "%s "LPX64": Can't allocate\n",
+                        __FUNCTION__, oid);
+                return(NULL);
         }
 
         b->ob_id = id;
@@ -215,41 +218,42 @@ obdio_setup_barrier (struct obdio_conn *conn, struct obdio_barrier *b)
         struct lustre_handle    lh;
         int                     rc;
         int                     rc2;
-        void                   *space;
+        void                   *space, *fileptr;
         struct obdio_barrier   *fileb;
 
         if (b->ob_ordinal != 0 ||
             b->ob_count != 0) {
-                fprintf (stderr, "obdio_setup_barrier: invalid parameter\n");
+                fprintf(stderr, "%s: invalid parameter\n", __FUNCTION__);
                 abort ();
         }
 
-        fileb = (struct obdio_barrier *) obdio_alloc_aligned_buffer (&space, getpagesize ());
-        if (fileb == NULL) {
-                fprintf (stderr, "obdio_setup_barrier "LPX64": Can't allocate page buffer\n",
-                         b->ob_oid);
+        space = obdio_alloc_aligned_buffer(&fileptr, getpagesize());
+        if (space == NULL) {
+                fprintf(stderr, "%s "LPX64": Can't allocate page buffer\n",
+                        __FUNCTION__, b->ob_oid);
                 return (-1);
         }
 
-        memset (fileb, 0, getpagesize ());
+        fileb = fileptr;
+        memset(fileb, 0, getpagesize());
         *fileb = *b;
 
-        rc = obdio_enqueue (conn, b->ob_oid, LCK_PW, 0, getpagesize (), &lh);
+        rc = obdio_enqueue(conn, b->ob_oid, LCK_PW, 0, getpagesize(), &lh);
         if (rc != 0) {
-                fprintf (stderr, "obdio_setup_barrier "LPX64": Error on enqueue: %s\n",
-                         b->ob_oid, strerror (errno));
+                fprintf(stderr, "%s "LPX64": Error on enqueue: %s\n",
+                        __FUNCTION__, b->ob_oid, strerror(errno));
                 goto out;
         }
 
-        rc = obdio_pwrite (conn, b->ob_oid, (void *)fileb, getpagesize (), 0);
+        rc = obdio_pwrite(conn, b->ob_oid, fileb, getpagesize(), 0);
         if (rc != 0)
-                fprintf (stderr, "obdio_setup_barrier "LPX64": Error on write: %s\n",
-                         b->ob_oid, strerror (errno));
+                fprintf(stderr, "%s "LPX64": Error on write: %s\n",
+                        __FUNCTION__, b->ob_oid, strerror(errno));
 
         rc2 = obdio_cancel (conn, &lh);
         if (rc == 0 && rc2 != 0) {
-                fprintf (stderr, "obdio_setup_barrier "LPX64": Error on cancel: %s\n",
-                         b->ob_oid, strerror (errno));
+                fprintf(stderr, "%s "LPX64": Error on cancel: %s\n",
+                        __FUNCTION__, b->ob_oid, strerror(errno));
                 rc = rc2;
         }
  out:
@@ -263,29 +267,30 @@ obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b)
         struct lustre_handle   lh;
         int                    rc;
         int                    rc2;
-        void                  *space;
+        void                  *space, *fileptr;
         struct obdio_barrier  *fileb;
         char                  *mode;
 
-        fileb = (struct obdio_barrier *) obdio_alloc_aligned_buffer (&space, getpagesize ());
-        if (fileb == NULL) {
-                fprintf (stderr, "obdio_barrier "LPX64": Can't allocate page buffer\n",
-                         b->ob_oid);
+        space = obdio_alloc_aligned_buffer(&fileptr, getpagesize());
+        if (space == NULL) {
+                fprintf(stderr, "%s "LPX64": Can't allocate page buffer\n",
+                        __FUNCTION__, b->ob_oid);
                 return (-1);
         }
 
-        rc = obdio_enqueue (conn, b->ob_oid, LCK_PW, 0, getpagesize (), &lh);
+        rc = obdio_enqueue(conn, b->ob_oid, LCK_PW, 0, getpagesize(), &lh);
         if (rc != 0) {
-                fprintf (stderr, "obdio_barrier "LPX64": Error on PW enqueue: %s\n",
-                         b->ob_oid, strerror (errno));
+                fprintf(stderr, "%s "LPX64": Error on PW enqueue: %s\n",
+                        __FUNCTION__, b->ob_oid, strerror(errno));
                 goto out_1;
         }
 
-        memset (fileb, 0xeb, getpagesize ());
-        rc = obdio_pread (conn, b->ob_oid, (void *)fileb, getpagesize (), 0);
+        fileb = fileptr;
+        memset(fileb, 0xeb, getpagesize());
+        rc = obdio_pread(conn, b->ob_oid, fileb, getpagesize(), 0);
         if (rc != 0) {
-                fprintf (stderr, "obdio_barrier "LPX64": Error on initial read: %s\n",
-                         b->ob_oid, strerror (errno));
+                fprintf(stderr, "%s "LPX64": Error on initial read: %s\n",
+                        __FUNCTION__, b->ob_oid, strerror(errno));
                 goto out_2;
         }
 
@@ -294,13 +299,16 @@ obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b)
             fileb->ob_npeers != b->ob_npeers ||
             fileb->ob_count >= b->ob_npeers ||
             fileb->ob_ordinal != b->ob_ordinal) {
-                fprintf (stderr, "obdio_barrier "LPX64": corrupt on initial read\n", b->ob_id);
-                fprintf (stderr, "  got ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
-                         fileb->ob_id, fileb->ob_oid, fileb->ob_npeers,
-                         fileb->ob_ordinal, fileb->ob_count);
-                fprintf (stderr, "  expected ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
-                         b->ob_id, b->ob_oid, b->ob_npeers,
-                         b->ob_ordinal, b->ob_count);
+                fprintf(stderr, "%s "LPX64": corrupt on initial read\n",
+                        __FUNCTION__, b->ob_id);
+                fprintf(stderr,
+                        "  got ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
+                        fileb->ob_id, fileb->ob_oid, fileb->ob_npeers,
+                        fileb->ob_ordinal, fileb->ob_count);
+                fprintf(stderr,
+                       "  expected ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
+                        b->ob_id, b->ob_oid, b->ob_npeers,
+                        b->ob_ordinal, b->ob_count);
                 rc = -1;
                 goto out_2;
         }
@@ -311,37 +319,36 @@ obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b)
                 fileb->ob_ordinal++;                 /* signal all joined */
         }
 
-        rc = obdio_pwrite (conn, b->ob_oid, (void *)fileb, getpagesize (), 0);
+        rc = obdio_pwrite(conn, b->ob_oid, fileb, getpagesize(), 0);
         if (rc != 0) {
-                fprintf (stderr, "obdio_barrier "LPX64": Error on initial write: %s\n",
-                         b->ob_oid, strerror (errno));
+                fprintf (stderr, "%s "LPX64": Error on initial write: %s\n",
+                         __FUNCTION__, b->ob_oid, strerror(errno));
                 goto out_2;
         }
 
         mode = "PW";
         b->ob_ordinal++;           /* now I wait... */
         while (fileb->ob_ordinal != b->ob_ordinal) {
-
                 rc = obdio_cancel (conn, &lh);
                 if (rc != 0) {
-                        fprintf (stderr, "obdio_barrier "LPX64": Error on %s cancel: %s\n",
-                                 b->ob_oid, mode, strerror (errno));
+                        fprintf(stderr, "%s "LPX64": Error on %s cancel: %s\n",
+                                __FUNCTION__, b->ob_oid, mode, strerror(errno));
                         goto out_1;
                 }
 
                 mode = "PR";
-                rc = obdio_enqueue (conn, b->ob_oid, LCK_PR, 0, getpagesize (), &lh);
+                rc = obdio_enqueue(conn, b->ob_oid, LCK_PR,0,getpagesize(),&lh);
                 if (rc != 0) {
-                        fprintf (stderr, "obdio_barrier "LPX64": Error on PR enqueue: %s\n",
-                                 b->ob_oid, strerror (errno));
+                        fprintf(stderr, "%s "LPX64": Error on PR enqueue: %s\n",
+                                __FUNCTION__, b->ob_oid, strerror(errno));
                         goto out_1;
                 }
 
-                memset (fileb, 0xeb, getpagesize ());
-                rc = obdio_pread (conn, b->ob_oid, (void *)fileb, getpagesize (), 0);
+                memset (fileb, 0xeb, getpagesize());
+                rc = obdio_pread(conn, b->ob_oid, fileb, getpagesize(), 0);
                 if (rc != 0) {
-                        fprintf (stderr, "obdio_barrier "LPX64": Error on read: %s\n",
-                                 b->ob_oid, strerror (errno));
+                        fprintf(stderr, "%s "LPX64": Error on read: %s\n",
+                                __FUNCTION__, b->ob_oid, strerror(errno));
                         goto out_2;
                 }
 
@@ -351,13 +358,16 @@ obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b)
                     fileb->ob_count >= b->ob_npeers ||
                     (fileb->ob_ordinal != b->ob_ordinal - 1 &&
                      fileb->ob_ordinal != b->ob_ordinal)) {
-                        fprintf (stderr, "obdio_barrier "LPX64": corrupt\n", b->ob_id);
-                        fprintf (stderr, "  got ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
-                                 fileb->ob_id, fileb->ob_oid, fileb->ob_npeers,
-                                 fileb->ob_ordinal, fileb->ob_count);
-                        fprintf (stderr, "  expected ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
-                                 b->ob_id, b->ob_oid, b->ob_npeers,
-                                 b->ob_ordinal, b->ob_count);
+                        fprintf(stderr, "%s "LPX64": corrupt\n",
+                                __FUNCTION__, b->ob_id);
+                        fprintf(stderr, "  got ["LPX64","LPX64","LPX64","
+                                LPX64","LPX64"]\n",
+                                fileb->ob_id, fileb->ob_oid, fileb->ob_npeers,
+                                fileb->ob_ordinal, fileb->ob_count);
+                        fprintf(stderr, "  expected ["LPX64","LPX64","LPX64
+                                ","LPX64","LPX64"]\n",
+                                b->ob_id, b->ob_oid, b->ob_npeers,
+                                b->ob_ordinal, b->ob_count);
                         rc = -1;
                         goto out_2;
                 }
@@ -366,13 +376,11 @@ obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b)
  out_2:
         rc2 = obdio_cancel (conn, &lh);
         if (rc == 0 && rc2 != 0) {
-                fprintf (stderr, "obdio_barrier "LPX64": Error on cancel: %s\n",
-                         b->ob_oid, strerror (errno));
+                fprintf(stderr, "%s "LPX64": Error on cancel: %s\n",
+                        __FUNCTION__, b->ob_oid, strerror(errno));
                 rc = rc2;
         }
  out_1:
         free (space);
         return (rc);
 }
-
-
index 8813de4..97be436 100644 (file)
@@ -30,16 +30,16 @@ struct obdio_barrier {
         uint64_t               ob_count;
 };
 
-extern struct obdio_conn * obdio_connect (int device);
+extern struct obdio_conn *obdio_connect(int device);
 extern void obdio_disconnect(struct obdio_conn *conn, int flags);
 extern int obdio_open(struct obdio_conn *conn, uint64_t oid,
                       struct lustre_handle *fh);
 extern int obdio_close(struct obdio_conn *conn, uint64_t oid,
                        struct lustre_handle *fh);
 extern int obdio_pread(struct obdio_conn *conn, uint64_t oid,
-                       char *buffer, uint32_t count, uint64_t offset);
+                       void *buffer, uint32_t count, uint64_t offset);
 extern int obdio_pwrite(struct obdio_conn *conn, uint64_t oid,
-                        char *buffer, uint32_t count, uint64_t offset);
+                        void *buffer, uint32_t count, uint64_t offset);
 extern int obdio_enqueue(struct obdio_conn *conn, uint64_t oid,
                          int mode, uint64_t offset, uint32_t count,
                          struct lustre_handle *lh);
@@ -47,8 +47,7 @@ extern int obdio_cancel(struct obdio_conn *conn, struct lustre_handle *lh);
 extern void *obdio_alloc_aligned_buffer(void **spacep, int size);
 extern struct obdio_barrier *obdio_new_barrier(uint64_t oid, uint64_t id,
                                                int npeers);
-extern int obdio_setup_barrier(struct obdio_conn *conn,
-                               struct obdio_barrier *b);
+extern int obdio_setup_barrier(struct obdio_conn *conn,struct obdio_barrier *b);
 extern int obdio_barrier(struct obdio_conn *conn, struct obdio_barrier *b);
 
 #endif
index 9aa1d66..4b3adc1 100644 (file)
@@ -1120,6 +1120,7 @@ main(int argc, char **argv)
 
         COMMENT("Sizes and Offsets");
         BLANK_LINE();
+        CHECK_STRUCT(obd_uuid);
         check_lustre_handle();
         check_lustre_msg_v1();
         check_lustre_msg_v2();