--- /dev/null
+Index: linux-2.6.9-full/fs/ext3/iopen.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/iopen.c 2006-04-25 08:51:11.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/iopen.c 2006-05-06 01:21:11.000000000 +0400
+@@ -94,9 +94,12 @@ static struct dentry *iopen_lookup(struc
+ assert(!(alternate->d_flags & DCACHE_DISCONNECTED));
+ }
+
+- if (!list_empty(&inode->i_dentry)) {
+- alternate = list_entry(inode->i_dentry.next,
+- struct dentry, d_alias);
++ list_for_each(lp, &inode->i_dentry) {
++ alternate = list_entry(lp, struct dentry, d_alias);
++ /* ignore dentries created for ".." to preserve
++ * proper dcache hierarchy -- bug 10458 */
++ if (alternate->d_flags & DCACHE_NFSFS_RENAMED)
++ continue;
+ dget_locked(alternate);
+ spin_lock(&alternate->d_lock);
+ alternate->d_flags |= DCACHE_REFERENCED;
+Index: linux-2.6.9-full/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/namei.c 2006-05-06 01:21:10.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/namei.c 2006-05-06 01:29:30.000000000 +0400
+@@ -1003,6 +1003,38 @@ static struct dentry *ext3_lookup(struct
+ return ERR_PTR(-EACCES);
+ }
+
++ /* ".." shouldn't go into dcache to preserve dcache hierarchy
++ * otherwise we'll get parent being a child of actual child.
++ * see bug 10458 for details -bzzz */
++ if (dentry->d_name.name[0] == '.' && (dentry->d_name.len == 1 ||
++ (dentry->d_name.len == 2 && dentry->d_name.name[1] == '.'))) {
++ struct dentry *tmp, *goal = NULL;
++ struct list_head *lp;
++
++ /* first, look for an existing dentry - any one is good */
++ spin_lock(&dcache_lock);
++ list_for_each(lp, &inode->i_dentry) {
++ tmp = list_entry(lp, struct dentry, d_alias);
++ goal = tmp;
++ dget_locked(goal);
++ break;
++ }
++ if (goal == NULL) {
++ /* there is no alias, we need to make current dentry:
++ * a) inaccessible for __d_lookup()
++ * b) inaccessible for iopen */
++ J_ASSERT(list_empty(&dentry->d_alias));
++ dentry->d_flags |= DCACHE_NFSFS_RENAMED;
++ /* this is d_instantiate() ... */
++ list_add(&dentry->d_alias, &inode->i_dentry);
++ dentry->d_inode = inode;
++ }
++ spin_unlock(&dcache_lock);
++ if (goal)
++ iput(inode);
++ return goal;
++ }
++
+ return iopen_connect_dentry(dentry, inode, 1);
+ }
+
ext3-mballoc2-2.6.9-rhel4.patch
ext3-nlinks-2.6.9.patch
ext3-ialloc-2.6.patch
+ext3-lookup-dotdot-2.6.9.patch
ext3-rename-reserve-2.6-suse.patch
ext3-htree-dot-2.6.5-suse.patch
ext3-ialloc-2.6.patch
+ext3-lookup-dotdot-2.6.9.patch
ext3-remove-cond_resched-calls-2.6.12.patch
ext3-htree-dot-2.6.patch
ext3-external-journal-2.6.12.patch
+ext3-lookup-dotdot-2.6.9.patch
Severity : minor
Frequency : Rare
-Bugzilla : 10484
+Bugzilla : 10484
Description: Request leak when working with deleted CWD
Details : Introduce advanced request refcount tracking for requests
referenced from lustre intent.
+Severity : Enhancement
+Bugzilla : 10482
+Description: Cache open file handles on client.
+Details : MDS now will return special lock along with openhandle, if
+ requested and client is allowed to hold openhandle, even if unused,
+ until such a lock is revoked. Helps NFS a lot, since NFS is opening
+ closing files for every read/write openration.
+
+Severity : Enhancement
+Bugzilla : 9291
+Description: Cache open negative dentries on client when possible.
+Details : Guard negative dentries with UPDATE lock on parent dir, drop
+ negative dentries on lock revocation.
+
------------------------------------------------------------------------------
all-recursive: lustre_build_version
+BUILD_VER_H=$(top_builddir)/lustre/include/linux/lustre_build_version.h
+
lustre_build_version:
perl $(top_builddir)/lustre/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver
echo "#define LUSTRE_RELEASE @RELEASE@" >> tmpver
- cmp -s $(top_builddir)/lustre/include/linux/lustre_build_version.h tmpver \
- 2> /dev/null && \
- $(RM) tmpver || \
- mv tmpver $(top_builddir)/lustre/include/linux/lustre_build_version.h
+ cmp -s $(BUILD_VER_H) tmpver > tmpdiff 2> /dev/null && \
+ $(RM) tmpver tmpdiff || \
+ mv tmpver $(BUILD_VER_H)
CSTK=/tmp/checkstack
CSTKO=/tmp/checkstack.orig
}
#endif /* !HAVE_PAGE_MAPPED */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16))
static inline void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
{
update_atime(dentry->d_inode);
}
-#endif
static inline void file_accessed(struct file *file)
{
typedef void (*llapi_cb_t)(char *obd_type_name, char *obd_name, char *obd_uuid, void *args);
/* liblustreapi.c */
-extern int llapi_file_create(char *name, long stripe_size, int stripe_offset,
- int stripe_count, int stripe_pattern);
+extern int llapi_file_create(const char *name, long stripe_size,
+ int stripe_offset, int stripe_count,
+ int stripe_pattern);
extern int llapi_file_get_stripe(char *path, struct lov_user_md *lum);
+#define HAVE_LLAPI_FILE_LOOKUP
+extern int llapi_file_lookup(int dirfd, const char *name);
extern int llapi_find(char *path, struct obd_uuid *obduuid, int recursive,
int verbose, int quiet);
extern int llapi_obd_statfs(char *path, __u32 type, __u32 index,
__u32 o_mds;
__u32 o_stripe_idx; /* holds stripe idx */
__u32 o_padding_1;
- char o_inline[OBD_INLINESZ]; /* fid in ost writes */
+ char o_inline[OBD_INLINESZ];
+ /* lustre_handle + llog_cookie */
};
#define o_dirty o_blocks
#define MDS_OPEN_DELAY_CREATE 0100000000 /* delay initial object create */
#define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */
#define MDS_OPEN_JOIN_FILE 0400000000 /* open for join file*/
+#define MDS_OPEN_LOCK 04000000000 /* This open requires open lock */
#define MDS_OPEN_HAS_EA 010000000000 /* specify object create pattern */
#define MDS_OPEN_HAS_OBJS 020000000000 /* Just set the EA the obj exist */
struct mdc_rpc_lock *cl_rpc_lock;
struct mdc_rpc_lock *cl_setattr_lock;
+ struct mdc_rpc_lock *cl_close_lock;
struct osc_creator cl_oscc;
/* mgc datastruct */
--- /dev/null
+Index: linux-2.4.21/fs/ext3/namei.c
+===================================================================
+--- linux-2.4.21.orig/fs/ext3/namei.c 2006-04-29 20:48:26.000000000 +0400
++++ linux-2.4.21/fs/ext3/namei.c 2006-05-06 01:31:51.000000000 +0400
+@@ -955,6 +955,38 @@ static struct dentry *ext3_lookup(struct
+ }
+ }
+
++ /* ".." shouldn't go into dcache to preserve dcache hierarchy
++ * otherwise we'll get parent being a child of actual child.
++ * see bug 10458 for details -bzzz */
++ if (dentry->d_name.name[0] == '.' && (dentry->d_name.len == 1 ||
++ (dentry->d_name.len == 2 && dentry->d_name.name[1] == '.'))) {
++ struct dentry *tmp, *goal = NULL;
++ struct list_head *lp;
++
++ /* first, look for an existing dentry - any one is good */
++ spin_lock(&dcache_lock);
++ list_for_each(lp, &inode->i_dentry) {
++ tmp = list_entry(lp, struct dentry, d_alias);
++ goal = tmp;
++ dget_locked(goal);
++ break;
++ }
++ if (goal == NULL) {
++ /* there is no alias, we need to make current dentry:
++ * a) inaccessible for __d_lookup()
++ * b) inaccessible for iopen */
++ J_ASSERT(list_empty(&dentry->d_alias));
++ dentry->d_flags |= DCACHE_NFSFS_RENAMED;
++ /* this is d_instantiate() ... */
++ list_add(&dentry->d_alias, &inode->i_dentry);
++ dentry->d_inode = inode;
++ }
++ spin_unlock(&dcache_lock);
++ if (goal)
++ iput(inode);
++ return goal;
++ }
++
+ return iopen_connect_dentry(dentry, inode, 1);
+ }
+
+Index: linux-2.4.21/fs/ext3/iopen.c
+===================================================================
+--- linux-2.4.21.orig/fs/ext3/iopen.c 2006-04-29 20:48:23.000000000 +0400
++++ linux-2.4.21/fs/ext3/iopen.c 2006-04-29 20:59:50.000000000 +0400
+@@ -92,9 +92,12 @@ static struct dentry *iopen_lookup(struc
+ assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED));
+ }
+
+- if (!list_empty(&inode->i_dentry)) {
+- alternate = list_entry(inode->i_dentry.next,
+- struct dentry, d_alias);
++ list_for_each(lp, &inode->i_dentry) {
++ alternate = list_entry(lp, struct dentry, d_alias);
++ /* ignore dentries created for ".." to preserve
++ * proper dcache hierarchy -- bug 10458 */
++ if (alternate->d_flags & DCACHE_NFSFS_RENAMED)
++ continue;
+ dget_locked(alternate);
+ alternate->d_vfs_flags |= DCACHE_REFERENCED;
+ iput(inode);
--- /dev/null
+Index: linux-2.6.9-full/fs/ext3/iopen.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/iopen.c 2006-04-25 08:51:11.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/iopen.c 2006-05-06 01:21:11.000000000 +0400
+@@ -94,9 +94,12 @@ static struct dentry *iopen_lookup(struc
+ assert(!(alternate->d_flags & DCACHE_DISCONNECTED));
+ }
+
+- if (!list_empty(&inode->i_dentry)) {
+- alternate = list_entry(inode->i_dentry.next,
+- struct dentry, d_alias);
++ list_for_each(lp, &inode->i_dentry) {
++ alternate = list_entry(lp, struct dentry, d_alias);
++ /* ignore dentries created for ".." to preserve
++ * proper dcache hierarchy -- bug 10458 */
++ if (alternate->d_flags & DCACHE_NFSFS_RENAMED)
++ continue;
+ dget_locked(alternate);
+ spin_lock(&alternate->d_lock);
+ alternate->d_flags |= DCACHE_REFERENCED;
+Index: linux-2.6.9-full/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/namei.c 2006-05-06 01:21:10.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/namei.c 2006-05-06 01:29:30.000000000 +0400
+@@ -1003,6 +1003,38 @@ static struct dentry *ext3_lookup(struct
+ return ERR_PTR(-EACCES);
+ }
+
++ /* ".." shouldn't go into dcache to preserve dcache hierarchy
++ * otherwise we'll get parent being a child of actual child.
++ * see bug 10458 for details -bzzz */
++ if (dentry->d_name.name[0] == '.' && (dentry->d_name.len == 1 ||
++ (dentry->d_name.len == 2 && dentry->d_name.name[1] == '.'))) {
++ struct dentry *tmp, *goal = NULL;
++ struct list_head *lp;
++
++ /* first, look for an existing dentry - any one is good */
++ spin_lock(&dcache_lock);
++ list_for_each(lp, &inode->i_dentry) {
++ tmp = list_entry(lp, struct dentry, d_alias);
++ goal = tmp;
++ dget_locked(goal);
++ break;
++ }
++ if (goal == NULL) {
++ /* there is no alias, we need to make current dentry:
++ * a) inaccessible for __d_lookup()
++ * b) inaccessible for iopen */
++ J_ASSERT(list_empty(&dentry->d_alias));
++ dentry->d_flags |= DCACHE_NFSFS_RENAMED;
++ /* this is d_instantiate() ... */
++ list_add(&dentry->d_alias, &inode->i_dentry);
++ dentry->d_inode = inode;
++ }
++ spin_unlock(&dcache_lock);
++ if (goal)
++ iput(inode);
++ return goal;
++ }
++
+ return iopen_connect_dentry(dentry, inode, 1);
+ }
+
ext3-mballoc2-2.6.9-rhel4.patch
ext3-nlinks-2.6.9.patch
ext3-ialloc-2.6.patch
+ext3-lookup-dotdot-2.6.9.patch
ext3-rename-reserve-2.6-suse.patch
ext3-htree-dot-2.6.5-suse.patch
ext3-ialloc-2.6.patch
+ext3-lookup-dotdot-2.6.9.patch
ext3-remove-cond_resched-calls-2.6.12.patch
ext3-htree-dot-2.6.patch
ext3-external-journal-2.6.12.patch
+ext3-lookup-dotdot-2.6.9.patch
fsprivate-2.4.patch
nfsd_iallocsem.patch
linux-2.4.24-jbd-handle-EIO-rhel3.patch
+ext3-lookup-dotdot-2.4.20.patch
lnxmaj="2.6.5"
-lnxrel="7.244"
+lnxrel="7.252"
KERNEL=linux-$lnxmaj-$lnxrel.tar.bz2
# they include our patches
struct lustre_disk_dqblk ddquot, empty;
int ret = 0;
- filp = dquot->dq_info->qi_files[type];
-
- if (!filp || !dquot->dq_info) { /* Invalidated quota? */
+ /* Invalidated quota? */
+ if (!dquot->dq_info || !(filp = dquot->dq_info->qi_files[type])) {
printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
return -EIO;
}
oqctl = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*oqctl),
lustre_swab_obd_quotactl);
+ if (oqctl == NULL) {
+ CERROR("Can't unpack obd_quotactl\n");
+ RETURN(-EPROTO);
+ }
cli->cl_qchk_stat = oqctl->qc_stat;
qdata = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*qdata),
lustre_swab_qdata);
if (qdata == NULL) {
- CERROR("unpacking request buffer failed!");
+ CERROR("Can't unpack qunit_data\n");
RETURN(-EPROTO);
}
body->lock_flags |= LDLM_FL_AST_SENT;
/* We might get here prior to ldlm_handle_enqueue setting
- LDLM_FL_CANCEL_ON_BLOCK flag. Then we will put this lock into
- waiting list, but this is safe and similar code in
- ldlm_handle_enqueue will call ldlm_lock_cancel() still, that
- would not only cancel the loc, but will also remove it from
- waiting list */
+ * LDLM_FL_CANCEL_ON_BLOCK flag. Then we will put this lock
+ * into waiting list, but this is safe and similar code in
+ * ldlm_handle_enqueue will call ldlm_lock_cancel() still,
+ * that would not only cancel the lock, but will also remove
+ * it from waiting list */
if (lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) {
ldlm_lock_cancel(lock);
instant_cancel = 1;
if (lock->l_conn_export == NULL) {
static cfs_time_t next_dump = 0, last_dump = 0;
- LDLM_ERROR(lock, "lock timed out (enq %lus ago); not entering "
- "recovery in server code, just going back to sleep",
- lock->l_enqueued_time.tv_sec);
+ LDLM_ERROR(lock, "lock timed out (enqueued at %lu, %lus ago); "
+ "not entering recovery in server code, just going "
+ "back to sleep", lock->l_enqueued_time.tv_sec,
+ CURRENT_SECONDS - lock->l_enqueued_time.tv_sec);
if (cfs_time_after(cfs_time_current(), next_dump)) {
last_dump = next_dump;
next_dump = cfs_time_shift(300);
obd = lock->l_conn_export->exp_obd;
imp = obd->u.cli.cl_import;
ptlrpc_fail_import(imp, lwd->lwd_conn_cnt);
- LDLM_ERROR(lock, "lock timed out (enqueued %lus ago), entering "
+ LDLM_ERROR(lock, "lock timed out (enqueued at %lu, %lus ago), entering "
"recovery for %s@%s", lock->l_enqueued_time.tv_sec,
+ CURRENT_SECONDS - lock->l_enqueued_time.tv_sec,
obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid);
RETURN(0);
/* NB 1 request reference will be taken away by ll_intent_lock()
* when I return
*/
- if (!it_disposition(it, DISP_LOOKUP_NEG) ||
- (it->it_op & IT_CREAT)) {
+ if (!it_disposition(it, DISP_LOOKUP_NEG) || (it->it_op & IT_CREAT)) {
struct lustre_md md;
struct llu_inode_info *lli;
struct intnl_stat *st;
oa->o_valid = OBD_MD_FLID;
valid_flags = OBD_MD_FLTYPE | OBD_MD_FLATIME;
if (cmd & OBD_BRW_WRITE)
- valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME;
+ valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
+ OBD_MD_FLUID | OBD_MD_FLGID |
+ OBD_MD_FLFID | OBD_MD_FLGENER;
obdo_from_inode(oa, inode, valid_flags);
EXIT;
}
/* called for each page in a completed rpc.*/
-static void llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
+static int llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
{
struct ll_async_page *llap;
struct page *page;
CERROR("writeback error on page %p index %ld: %d\n",
page, page->index, rc);
}
- EXIT;
+ RETURN(0);
}
static struct obd_async_page_ops llu_async_page_ops = {
EXIT;
}
+/* Drop dentry if it is not used already, unhash otherwise.
+ Should be called with dcache lock held!
+ Returns: 1 if dentry was dropped, 0 if unhashed. */
+int ll_drop_dentry(struct dentry *dentry)
+{
+ lock_dentry(dentry);
+ if (atomic_read(&dentry->d_count) == 0) {
+ CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
+ "inode %p\n", dentry->d_name.len,
+ dentry->d_name.name, dentry, dentry->d_parent,
+ dentry->d_inode);
+ dget_locked(dentry);
+ __d_drop(dentry);
+ unlock_dentry(dentry);
+ spin_unlock(&dcache_lock);
+ dput(dentry);
+ spin_lock(&dcache_lock);
+ return 1;
+ }
+
+ if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ struct inode *inode = dentry->d_inode;
+#endif
+ CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
+ "inode %p refc %d\n", dentry->d_name.len,
+ dentry->d_name.name, dentry, dentry->d_parent,
+ dentry->d_inode, atomic_read(&dentry->d_count));
+ /* actually we don't unhash the dentry, rather just
+ * mark it inaccessible for to __d_lookup(). otherwise
+ * sys_getcwd() could return -ENOENT -bzzz */
+ dentry->d_flags |= DCACHE_LUSTRE_INVALID;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ __d_drop(dentry);
+ if (inode) {
+ /* Put positive dentries to orphan list */
+ hlist_add_head(&dentry->d_hash,
+ &ll_i2sbi(inode)->ll_orphan_dentry_list);
+ }
+#endif
+ }
+ unlock_dentry(dentry);
+ return 0;
+}
+
void ll_unhash_aliases(struct inode *inode)
{
struct list_head *tmp, *head;
inode->i_ino, inode->i_generation, inode);
head = &inode->i_dentry;
-restart:
spin_lock(&dcache_lock);
+restart:
tmp = head;
while ((tmp = tmp->next) != head) {
struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
continue;
}
-
- lock_dentry(dentry);
- if (atomic_read(&dentry->d_count) == 0) {
- CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
- "inode %p\n", dentry->d_name.len,
- dentry->d_name.name, dentry, dentry->d_parent,
- dentry->d_inode);
- dget_locked(dentry);
- __d_drop(dentry);
- unlock_dentry(dentry);
- spin_unlock(&dcache_lock);
- dput(dentry);
- goto restart;
- } else if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
- CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
- "inode %p refc %d\n", dentry->d_name.len,
- dentry->d_name.name, dentry, dentry->d_parent,
- dentry->d_inode, atomic_read(&dentry->d_count));
- /* actually we don't unhash the dentry, rather just
- * mark it inaccessible for to __d_lookup(). otherwise
- * sys_getcwd() could return -ENOENT -bzzz */
- dentry->d_flags |= DCACHE_LUSTRE_INVALID;
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- __d_drop(dentry);
- hlist_add_head(&dentry->d_hash,
- &ll_i2sbi(inode)->ll_orphan_dentry_list);
-#endif
- }
- unlock_dentry(dentry);
+
+ if (ll_drop_dentry(dentry))
+ goto restart;
}
spin_unlock(&dcache_lock);
EXIT;
struct lookup_intent *it)
{
int rc;
- struct it_cb_data icbd;
struct mdc_op_data op_data;
struct ptlrpc_request *req = NULL;
struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
LL_IT2STR(it));
- /* Cached negative dentries are unsafe for now - look them up again */
- if (de->d_inode == NULL)
- RETURN(0);
+ if (de->d_inode == NULL) {
+ /* We can only use negative dentries if this is stat or lookup,
+ for opens and stuff we do need to query server. */
+ /* If there is IT_CREAT in intent op set, then we must throw
+ away this negative dentry and actually do the request to
+ kernel to create whatever needs to be created (if possible)*/
+ if (it && (it->it_op & IT_CREAT))
+ RETURN(0);
+
+ if (de->d_flags & DCACHE_LUSTRE_INVALID)
+ RETURN(0);
+
+ rc = ll_have_md_lock(de->d_parent->d_inode,
+ MDS_INODELOCK_UPDATE);
+
+ RETURN(rc);
+ }
exp = ll_i2mdcexp(de->d_inode);
- icbd.icbd_parent = de->d_parent->d_inode;
- icbd.icbd_childp = &de;
/* Never execute intents for mount points.
* Attributes will be fixed up in ll_inode_revalidate_it */
ll_prepare_mdc_op_data(&op_data, de->d_parent->d_inode, de->d_inode,
de->d_name.name, de->d_name.len, 0);
+ if ((it->it_op == IT_OPEN) && de->d_inode) {
+ struct inode *inode = de->d_inode;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_client_handle **och_p;
+ __u64 *och_usecount;
+ /* We used to check for MDS_INODELOCK_OPEN here, but in fact
+ * just having LOOKUP lock is enough to justify inode is the
+ * same. And if inode is the same and we have suitable
+ * openhandle, then there is no point in doing another OPEN RPC
+ * just to throw away newly received openhandle.
+ * There are no security implications too, if file owner or
+ * access mode is change, LOOKUP lock is revoked */
+
+ it->it_create_mode &= ~current->fs->umask;
+
+ if (it->it_flags & FMODE_WRITE) {
+ och_p = &lli->lli_mds_write_och;
+ och_usecount = &lli->lli_open_fd_write_count;
+ } else if (it->it_flags & FMODE_EXEC) {
+ och_p = &lli->lli_mds_exec_och;
+ och_usecount = &lli->lli_open_fd_exec_count;
+ } else {
+ och_p = &lli->lli_mds_read_och;
+ och_usecount = &lli->lli_open_fd_read_count;
+ }
+ /* Check for the proper lock. */
+ if (!ll_have_md_lock(inode, MDS_INODELOCK_LOOKUP))
+ goto do_lock;
+ down(&lli->lli_och_sem);
+ if (*och_p) { /* Everything is open already, do nothing */
+ /*(*och_usecount)++; Do not let them steal our open
+ handle from under us */
+ /* XXX The code above was my original idea, but in case
+ we have the handle, but we cannot use it due to later
+ checks (e.g. O_CREAT|O_EXCL flags set), nobody
+ would decrement counter increased here. So we just
+ hope the lock won't be invalidated in between. But
+ if it would be, we'll reopen the open request to
+ MDS later during file open path */
+ up(&lli->lli_och_sem);
+ RETURN(1);
+ } else {
+ up(&lli->lli_och_sem);
+ }
+ }
+
+do_lock:
rc = mdc_intent_lock(exp, &op_data, NULL, 0, it, lookup_flags,
&req, ll_mdc_blocking_ast, 0);
/* If req is NULL, then mdc_intent_lock only tried to do a lock match;
struct obd_client_handle *och)
{
struct ptlrpc_request *req = NULL;
+ struct obd_device *obd;
struct obdo *oa;
int rc;
+ ENTRY;
+
+ obd = class_exp2obd(ll_i2mdcexp(inode));
+ if (obd == NULL) {
+ CERROR("Invalid MDC connection handle "LPX64"\n",
+ ll_i2mdcexp(inode)->exp_handle.h_cookie);
+ GOTO(out, rc = 0);
+ }
+
+ /*
+ * here we check if this is forced umount. If so this is called on
+ * canceling "open lock" and we do not call mdc_close() in this case, as
+ * it will not be successful, as import is already deactivated.
+ */
+ if (obd->obd_no_recov)
+ GOTO(out, rc = 0);
oa = obdo_alloc();
if (!oa)
inode->i_ino, rc);
}
- mdc_clear_open_replay_data(och);
ptlrpc_req_finished(req); /* This is close request */
+ EXIT;
+out:
+ mdc_clear_open_replay_data(och);
+
+ return rc;
+}
+
+int ll_mdc_real_close(struct inode *inode, int flags)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ int rc = 0;
+ struct obd_client_handle **och_p;
+ struct obd_client_handle *och;
+ __u64 *och_usecount;
+
+ ENTRY;
+
+ if (flags & FMODE_WRITE) {
+ och_p = &lli->lli_mds_write_och;
+ och_usecount = &lli->lli_open_fd_write_count;
+ } else if (flags & FMODE_EXEC) {
+ och_p = &lli->lli_mds_exec_och;
+ och_usecount = &lli->lli_open_fd_exec_count;
+ } else {
+ LASSERT(flags & FMODE_READ);
+ och_p = &lli->lli_mds_read_och;
+ och_usecount = &lli->lli_open_fd_read_count;
+ }
+
+ down(&lli->lli_och_sem);
+ if (*och_usecount) { /* There are still users of this handle, so
+ skip freeing it. */
+ up(&lli->lli_och_sem);
+ RETURN(0);
+ }
+ och=*och_p;
+ *och_p = NULL;
+ up(&lli->lli_och_sem);
+
+ if (och) { /* There might be a race and somebody have freed this och
+ already */
+ rc = ll_close_inode_openhandle(inode, och);
+ och->och_fh.cookie = DEAD_HANDLE_MAGIC;
+ OBD_FREE(och, sizeof *och);
+ }
RETURN(rc);
}
struct file *file)
{
struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
- struct obd_client_handle *och = &fd->fd_mds_och;
- int rc;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ int rc = 0;
ENTRY;
/* clear group lock, if present */
rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP,
&fd->fd_cwlockh);
}
+
+ /* Let's see if we have good enough OPEN lock on the file and if
+ we can skip talking to MDS */
+ if (file->f_dentry->d_inode) { /* Can this ever be false? */
+ int lockmode;
+ int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_TEST_LOCK;
+ struct lustre_handle lockh;
+ struct inode *inode = file->f_dentry->d_inode;
+ struct ldlm_res_id file_res_id = {.name={inode->i_ino,
+ inode->i_generation}};
+ ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
+
+ down(&lli->lli_och_sem);
+ if (fd->fd_omode & FMODE_WRITE) {
+ lockmode = LCK_CW;
+ LASSERT(lli->lli_open_fd_write_count);
+ lli->lli_open_fd_write_count--;
+ } else if (fd->fd_omode & FMODE_EXEC) {
+ lockmode = LCK_PR;
+ LASSERT(lli->lli_open_fd_exec_count);
+ lli->lli_open_fd_exec_count--;
+ } else {
+ lockmode = LCK_CR;
+ LASSERT(lli->lli_open_fd_read_count);
+ lli->lli_open_fd_read_count--;
+ }
+ up(&lli->lli_och_sem);
+
+ if (!ldlm_lock_match(mdc_exp->exp_obd->obd_namespace, flags,
+ &file_res_id, LDLM_IBITS, &policy,lockmode,
+ &lockh)) {
+ rc = ll_mdc_real_close(file->f_dentry->d_inode,
+ fd->fd_omode);
+ }
+ } else {
+ CERROR("Releasing a file %p with negative dentry %p. Name %s",
+ file, file->f_dentry, file->f_dentry->d_name.name);
+ }
- rc = ll_close_inode_openhandle(inode, och);
- och->och_fh.cookie = DEAD_HANDLE_MAGIC;
LUSTRE_FPRIVATE(file) = NULL;
ll_file_data_put(fd);
ll_prepare_mdc_op_data(&data, parent->d_inode, NULL, name, len, O_RDWR);
+ /* Usually we come here only for NFSD, and we want open lock.
+ But we can also get here with pre 2.6.15 patchless kernels, and in
+ that case that lock is also ok */
+ /* We can also get here if there was cached open handle in revalidate_it
+ * but it disappeared while we were getting from there to ll_file_open.
+ * But this means this file was closed and immediatelly opened which
+ * makes a good candidate for using OPEN lock */
+ /* If lmmsize & lmm are not 0, we are just setting stripe info
+ * parameters. No need for the open lock */
+ if (!lmm && !lmmsize)
+ itp->it_flags |= MDS_OPEN_LOCK;
+
rc = mdc_enqueue(sbi->ll_mdc_exp, LDLM_IBITS, itp, LCK_PW, &data,
&lockh, lmm, lmmsize, ldlm_completion_ast,
ll_mdc_blocking_ast, NULL, 0);
GOTO(out, rc);
}
+ if (itp->d.lustre.it_lock_mode) { /* If we got lock - release it right
+ * away */
+ ldlm_lock_decref(&lockh, itp->d.lustre.it_lock_mode);
+ itp->d.lustre.it_lock_mode = 0;
+ }
rc = ll_prep_inode(sbi->ll_osc_exp, &file->f_dentry->d_inode,
(struct ptlrpc_request *)itp->d.lustre.it_data,
DLM_REPLY_REC_OFF, NULL);
}
int ll_local_open(struct file *file, struct lookup_intent *it,
- struct ll_file_data *fd)
+ struct ll_file_data *fd, struct obd_client_handle *och)
{
ENTRY;
LASSERT(fd != NULL);
- ll_och_fill(ll_i2info(file->f_dentry->d_inode), it, &fd->fd_mds_och);
+ if (och)
+ ll_och_fill(ll_i2info(file->f_dentry->d_inode), it, och);
LUSTRE_FPRIVATE(file) = fd;
ll_readahead_init(file->f_dentry->d_inode, &fd->fd_ras);
+ fd->fd_omode = it->it_flags;
RETURN(0);
}
struct lookup_intent *it, oit = { .it_op = IT_OPEN,
.it_flags = file->f_flags };
struct lov_stripe_md *lsm;
- struct ptlrpc_request *req;
+ struct ptlrpc_request *req = NULL;
+ struct obd_client_handle **och_p;
+ __u64 *och_usecount;
struct ll_file_data *fd;
int rc = 0;
ENTRY;
oit.it_flags &= ~O_EXCL;
it = &oit;
- rc = ll_intent_file_open(file, NULL, 0, it);
+ }
+
+ /* Let's see if we have file open on MDS already. */
+ if (it->it_flags & FMODE_WRITE) {
+ och_p = &lli->lli_mds_write_och;
+ och_usecount = &lli->lli_open_fd_write_count;
+ } else if (it->it_flags & FMODE_EXEC) {
+ och_p = &lli->lli_mds_exec_och;
+ och_usecount = &lli->lli_open_fd_exec_count;
+ } else {
+ och_p = &lli->lli_mds_read_och;
+ och_usecount = &lli->lli_open_fd_read_count;
+ }
+ down(&lli->lli_och_sem);
+ if (*och_p) { /* Open handle is present */
+ if (it_disposition(it, DISP_LOOKUP_POS) && /* Positive lookup */
+ it_disposition(it, DISP_OPEN_OPEN)) { /* & OPEN happened */
+ /* Well, there's extra open request that we do not need,
+ let's close it somehow. This will decref request. */
+ ll_release_openhandle(file->f_dentry, it);
+ }
+ (*och_usecount)++;
+
+ rc = ll_local_open(file, it, fd, NULL);
+
+ LASSERTF(rc == 0, "rc = %d\n", rc);
+ } else {
+ LASSERT(*och_usecount == 0);
+ OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
+ if (!*och_p) {
+ ll_file_data_put(fd);
+ GOTO(out_och_free, rc = -ENOMEM);
+ }
+ (*och_usecount)++;
+ if (!it->d.lustre.it_disposition) {
+ rc = ll_intent_file_open(file, NULL, 0, it);
+ if (rc) {
+ ll_file_data_put(fd);
+ GOTO(out_och_free, rc);
+ }
+
+ /* Got some error? Release the request */
+ if (it->d.lustre.it_status < 0) {
+ req = it->d.lustre.it_data;
+ ptlrpc_req_finished(req);
+ }
+ mdc_set_lock_data(&it->d.lustre.it_lock_handle,
+ file->f_dentry->d_inode);
+ }
+ req = it->d.lustre.it_data;
+
+ /* mdc_intent_lock() didn't get a request ref if there was an
+ * open error, so don't do cleanup on the request here
+ * (bug 3430) */
+ /* XXX (green): Should not we bail out on any error here, not
+ * just open error? */
+ rc = it_open_error(DISP_OPEN_OPEN, it);
if (rc) {
ll_file_data_put(fd);
- GOTO(out, rc);
+ GOTO(out_och_free, rc);
}
- }
- lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
- rc = it_open_error(DISP_OPEN_OPEN, it);
- /* mdc_intent_lock() didn't get a request ref if there was an open
- * error, so don't do cleanup on the request here (bug 3430) */
- if (rc) {
- ll_file_data_put(fd);
- RETURN(rc);
+ lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
+ rc = ll_local_open(file, it, fd, *och_p);
+ LASSERTF(rc == 0, "rc = %d\n", rc);
}
+ up(&lli->lli_och_sem);
- rc = ll_local_open(file, it, fd);
- LASSERTF(rc == 0, "rc = %d\n", rc);
-
+ /* Must do this outside lli_och_sem lock to prevent deadlock where
+ different kind of OPEN lock for this same inode gets cancelled
+ by ldlm_cancel_lru */
if (!S_ISREG(inode->i_mode))
GOTO(out, rc);
file->f_flags &= ~O_LOV_DELAY_CREATE;
GOTO(out, rc);
out:
- req = it->d.lustre.it_data;
ptlrpc_req_finished(req);
if (req)
it_clear_disposition(it, DISP_ENQ_OPEN_REF);
- if (rc == 0)
+ if (rc == 0) {
ll_open_complete(inode);
+ } else {
+out_och_free:
+ if (*och_p) {
+ OBD_FREE(*och_p, sizeof (struct obd_client_handle));
+ *och_p = NULL; /* OBD_FREE writes some magic there */
+ (*och_usecount)--;
+ }
+ up(&lli->lli_och_sem);
+ }
+
return rc;
}
*ppos = inode->i_size;
if (*ppos >= maxbytes) {
- if (count || *ppos > maxbytes) {
- send_sig(SIGXFSZ, current, 0);
- GOTO(out, retval = -EFBIG);
- }
+ send_sig(SIGXFSZ, current, 0);
+ GOTO(out, retval = -EFBIG);
}
if (*ppos + count > maxbytes)
count = maxbytes - *ppos;
int lum_size)
{
struct ll_inode_info *lli = ll_i2info(inode);
- struct file *f = NULL;
- struct obd_export *exp = ll_i2obdexp(inode);
struct lov_stripe_md *lsm;
struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
- struct ptlrpc_request *req = NULL;
- struct ll_file_data *fd;
int rc = 0;
- struct lustre_md md;
ENTRY;
down(&lli->lli_open_sem);
RETURN(-EEXIST);
}
- fd = ll_file_data_get();
- if (fd == NULL)
- GOTO(out, -ENOMEM);
-
- f = get_empty_filp();
- if (!f)
- GOTO(out, -ENOMEM);
-
- f->f_dentry = dget(file->f_dentry);
- f->f_vfsmnt = mntget(file->f_vfsmnt);
-
- rc = ll_intent_file_open(f, lum, lum_size, &oit);
+ rc = ll_intent_file_open(file, lum, lum_size, &oit);
if (rc)
GOTO(out, rc);
if (it_disposition(&oit, DISP_LOOKUP_NEG))
- GOTO(out, -ENOENT);
- req = oit.d.lustre.it_data;
+ GOTO(out_req_free, rc = -ENOENT);
rc = oit.d.lustre.it_status;
-
if (rc < 0)
- GOTO(out, rc);
+ GOTO(out_req_free, rc);
- rc = mdc_req2lustre_md(req, DLM_REPLY_REC_OFF, exp, &md);
- if (rc)
- GOTO(out, rc);
- ll_update_inode(f->f_dentry->d_inode, &md);
-
- rc = ll_local_open(f, &oit, fd);
- if (rc)
- GOTO(out, rc);
- fd = NULL;
- ll_intent_release(&oit);
-
- rc = ll_file_release(f->f_dentry->d_inode, f);
+ ll_release_openhandle(file->f_dentry, &oit);
out:
- if (f)
- fput(f);
- ll_file_data_put(fd);
up(&lli->lli_open_sem);
- if (req != NULL)
- ptlrpc_req_finished(req);
+ ll_intent_release(&oit);
RETURN(rc);
+out_req_free:
+ ptlrpc_req_finished((struct ptlrpc_request *) oit.d.lustre.it_data);
+ goto out;
}
static int ll_lov_setea(struct inode *inode, struct file *file,
struct dentry *tail_dentry = tail_filp->f_dentry;
struct lookup_intent oit = {.it_op = IT_OPEN,
.it_flags = head_filp->f_flags|O_JOIN_FILE};
- struct ptlrpc_request *req = NULL;
- struct ll_file_data *fd;
struct lustre_handle lockh;
struct mdc_op_data *op_data;
__u32 hsize = head_inode->i_size >> 32;
__u32 tsize = head_inode->i_size;
- struct file *f;
int rc;
ENTRY;
tail_inode = tail_dentry->d_inode;
tail_parent = tail_dentry->d_parent->d_inode;
- fd = ll_file_data_get();
- if (fd == NULL)
- RETURN(-ENOMEM);
-
OBD_ALLOC_PTR(op_data);
if (op_data == NULL) {
- ll_file_data_put(fd);
RETURN(-ENOMEM);
}
- f = get_empty_filp();
- if (f == NULL)
- GOTO(out, rc = -ENOMEM);
-
- f->f_dentry = dget(head_filp->f_dentry);
- f->f_vfsmnt = mntget(head_filp->f_vfsmnt);
-
ll_prepare_mdc_op_data(op_data, head_inode, tail_parent,
tail_dentry->d_name.name,
tail_dentry->d_name.len, 0);
if (rc < 0)
GOTO(out, rc);
- req = oit.d.lustre.it_data;
rc = oit.d.lustre.it_status;
- if (rc < 0)
+ if (rc < 0) {
+ ptlrpc_req_finished((struct ptlrpc_request *)
+ oit.d.lustre.it_data);
GOTO(out, rc);
+ }
- rc = ll_local_open(f, &oit, fd);
- LASSERTF(rc == 0, "rc = %d\n", rc);
-
- fd = NULL;
- ll_intent_release(&oit);
-
- rc = ll_file_release(f->f_dentry->d_inode, f);
+ if (oit.d.lustre.it_lock_mode) { /* If we got lock - release it right
+ * away */
+ ldlm_lock_decref(&lockh, oit.d.lustre.it_lock_mode);
+ oit.d.lustre.it_lock_mode = 0;
+ }
+ ll_release_openhandle(head_filp->f_dentry, &oit);
out:
if (op_data)
OBD_FREE_PTR(op_data);
- if (f)
- fput(f);
- ll_file_data_put(fd);
- ptlrpc_req_finished(req);
+ ll_intent_release(&oit);
RETURN(rc);
}
RETURN(rc);
}
-static int ll_have_md_lock(struct dentry *de)
+int ll_have_md_lock(struct inode *inode, __u64 bits)
{
- struct ll_sb_info *sbi = ll_s2sbi(de->d_sb);
struct lustre_handle lockh;
struct ldlm_res_id res_id = { .name = {0} };
struct obd_device *obddev;
- ldlm_policy_data_t policy = { .l_inodebits = {
- MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP}};
+ ldlm_policy_data_t policy = { .l_inodebits = {bits}};
int flags;
ENTRY;
- if (!de->d_inode)
+ if (!inode)
RETURN(0);
- obddev = sbi->ll_mdc_exp->exp_obd;
- res_id.name[0] = de->d_inode->i_ino;
- res_id.name[1] = de->d_inode->i_generation;
+ obddev = ll_i2mdcexp(inode)->exp_obd;
+ res_id.name[0] = inode->i_ino;
+ res_id.name[1] = inode->i_generation;
CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
GOTO(out, rc);
}
+ /* Unlinked? Unhash dentry, so it is not picked up later by
+ do_lookup() -> ll_revalidate_it(). We cannot use d_drop
+ here to preserve get_cwd functionality on 2.6.
+ Bug 10503 */
+ if (!dentry->d_inode->i_nlink) {
+ spin_lock(&dcache_lock);
+ ll_drop_dentry(dentry);
+ spin_unlock(&dcache_lock);
+ }
+
ll_lookup_finish_locks(&oit, dentry);
- } else if (!ll_have_md_lock(dentry)) {
+ } else if (!ll_have_md_lock(dentry->d_inode,
+ MDS_INODELOCK_UPDATE|MDS_INODELOCK_LOOKUP)) {
struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
struct ll_fid fid;
obd_valid valid = OBD_MD_FLGETATTR;
struct list_head lli_dead_list;
+ struct semaphore lli_och_sem; /* Protects access to och pointers
+ and their usage counters */
+ /* We need all three because every inode may be opened in different
+ modes */
+ struct obd_client_handle *lli_mds_read_och;
+ __u64 lli_open_fd_read_count;
+ struct obd_client_handle *lli_mds_write_och;
+ __u64 lli_open_fd_write_count;
+ struct obd_client_handle *lli_mds_exec_och;
+ __u64 lli_open_fd_exec_count;
+
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
struct inode lli_vfs_inode;
#endif
extern kmem_cache_t *ll_file_data_slab;
struct lustre_handle;
struct ll_file_data {
- struct obd_client_handle fd_mds_och;
struct ll_readahead_state fd_ras;
- __u32 fd_flags;
+ int fd_omode;
struct lustre_handle fd_cwlockh;
unsigned long fd_gid;
+ __u32 fd_flags;
};
struct lov_stripe_md;
extern struct file_operations ll_file_operations_flock;
extern struct inode_operations ll_file_inode_operations;
extern int ll_inode_revalidate_it(struct dentry *, struct lookup_intent *);
+extern int ll_have_md_lock(struct inode *inode, __u64 bits);
int ll_extent_lock(struct ll_file_data *, struct inode *,
struct lov_stripe_md *, int mode, ldlm_policy_data_t *,
struct lustre_handle *, int ast_flags);
int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *);
int ll_glimpse_size(struct inode *inode, int ast_flags);
int ll_local_open(struct file *file,
- struct lookup_intent *it, struct ll_file_data *fd);
+ struct lookup_intent *it, struct ll_file_data *fd,
+ struct obd_client_handle *och);
int ll_release_openhandle(struct dentry *, struct lookup_intent *);
int ll_mdc_close(struct obd_export *mdc_exp, struct inode *inode,
struct file *file);
+int ll_mdc_real_close(struct inode *inode, int flags);
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
struct lookup_intent *it, struct kstat *stat);
void ll_intent_drop_lock(struct lookup_intent *);
void ll_intent_release(struct lookup_intent *);
extern void ll_set_dd(struct dentry *de);
+int ll_drop_dentry(struct dentry *dentry);
void ll_unhash_aliases(struct inode *);
void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft);
void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry);
int empty;
do {
- if (need_resched())
+ if (need_resched() && try)
break;
if (try) {
spin_lock_init(&lli->lli_lock);
INIT_LIST_HEAD(&lli->lli_pending_write_llaps);
lli->lli_inode_magic = LLI_INODE_MAGIC;
+ sema_init(&lli->lli_och_sem, 1);
+ lli->lli_mds_read_och = lli->lli_mds_write_och = NULL;
+ lli->lli_mds_exec_och = NULL;
+ lli->lli_open_fd_read_count = lli->lli_open_fd_write_count = 0;
+ lli->lli_open_fd_exec_count = 0;
INIT_LIST_HEAD(&lli->lli_dead_list);
}
inode->i_generation, inode);
ll_inode2fid(&fid, inode);
- clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(ll_i2info(inode)->lli_flags));
+ clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &lli->lli_flags);
mdc_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode);
+ LASSERT(!lli->lli_open_fd_write_count);
+ LASSERT(!lli->lli_open_fd_read_count);
+ LASSERT(!lli->lli_open_fd_exec_count);
+
+ if (lli->lli_mds_write_och)
+ ll_mdc_real_close(inode, FMODE_WRITE);
+ if (lli->lli_mds_exec_och)
+ ll_mdc_real_close(inode, FMODE_EXEC);
+ if (lli->lli_mds_read_och)
+ ll_mdc_real_close(inode, FMODE_READ);
+
+
if (lli->lli_smd) {
obd_change_cbdata(sbi->ll_osc_exp, lli->lli_smd,
null_if_equal, inode);
inode->i_ino, inode->i_generation, inode);
}
+ if (bits & MDS_INODELOCK_OPEN) {
+ int flags = 0;
+ switch (lock->l_req_mode) {
+ case LCK_CW:
+ flags = FMODE_WRITE;
+ break;
+ case LCK_PR:
+ flags = FMODE_EXEC;
+ break;
+ case LCK_CR:
+ flags = FMODE_READ;
+ break;
+ default:
+ CERROR("Unexpected lock mode for OPEN lock "
+ "%d, inode %ld\n", lock->l_req_mode,
+ inode->i_ino);
+ }
+ ll_mdc_real_close(inode, flags);
+ }
+
if (bits & MDS_INODELOCK_UPDATE)
clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK,
&(ll_i2info(inode)->lli_flags));
-
if (S_ISDIR(inode->i_mode) &&
- (bits & MDS_INODELOCK_UPDATE)) {
+ (bits & MDS_INODELOCK_UPDATE)) {
+ struct dentry *dentry, *tmp, *dir;
+ struct list_head *list;
+
CDEBUG(D_INODE, "invalidating inode %lu\n",
inode->i_ino);
truncate_inode_pages(inode->i_mapping, 0);
+
+
+ /* Drop possible cached negative dentries */
+ list = &inode->i_dentry;
+ dir = NULL;
+ spin_lock(&dcache_lock);
+
+ /* It is possible to have several dentries (with
+ racer?) */
+ while ((list = list->next) != &inode->i_dentry) {
+ dir = list_entry(list, struct dentry, d_alias);
+ if (!(dir->d_flags & DCACHE_LUSTRE_INVALID))
+ break;
+
+ dir = NULL;
+ }
+
+ if (dir) {
+restart:
+ list_for_each_entry_safe(dentry, tmp,
+ &dir->d_subdirs,
+ d_child)
+ {
+ /* XXX Print some debug here? */
+ if (!dentry->d_inode)
+ /* Negative dentry. If we were
+ dropping dcache lock, go
+ throught the list again */
+ if (ll_drop_dentry(dentry))
+ goto restart;
+ }
+ }
+ spin_unlock(&dcache_lock);
}
if (inode->i_sb->s_root &&
*de = ll_find_alias(inode, *de);
} else {
ENTRY;
- spin_lock(&dcache_lock);
- ll_d_add(*de, inode);
- spin_unlock(&dcache_lock);
+ /* Check that parent has UPDATE lock. If there is none, we
+ cannot afford to hash this dentry (done by ll_d_add) as it
+ might get picked up later when UPDATE lock will appear */
+ if (ll_have_md_lock(parent, MDS_INODELOCK_UPDATE)) {
+ spin_lock(&dcache_lock);
+ ll_d_add(*de, inode);
+ spin_unlock(&dcache_lock);
+ } else {
+ (*de)->d_inode = NULL;
+ }
}
ll_set_dd(*de);
if (cmd & OBD_BRW_WRITE) {
oa->o_valid |= OBD_MD_FLEPOCH;
oa->o_easize = ll_i2info(inode)->lli_io_epoch;
- oa->o_uid = inode->i_uid;
- oa->o_gid = inode->i_gid;
valid_flags |= OBD_MD_FLMTIME | OBD_MD_FLCTIME |
OBD_MD_FLUID | OBD_MD_FLGID |
struct list_head set_list;
};
-#define LAP_MAGIC 8200
+#define LOV_AP_MAGIC 8200
struct lov_async_page {
int lap_magic;
void *lap_caller_data;
};
-#define LAP_FROM_COOKIE(c) \
- (LASSERT(((struct lov_async_page *)(c))->lap_magic == LAP_MAGIC), \
+#define LAP_FROM_COOKIE(c) \
+ (LASSERT(((struct lov_async_page *)(c))->lap_magic == LOV_AP_MAGIC), \
(struct lov_async_page *)(c))
static inline void lov_llh_addref(void *llhp)
rc = obd_destroy(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa,
NULL, oti, NULL);
err = lov_update_common_set(set, req, rc);
- if (rc) {
+ if (err) {
CERROR("error: destroying objid "LPX64" subobj "
LPX64" on OST idx %d: rc = %d\n",
set->set_oa->o_id, req->rq_oa->o_id,
rc = err;
}
}
- lov_fini_destroy_set(set);
+ rc = lov_fini_destroy_set(set);
if (rc == 0) {
LASSERT(lsm_op_find(lsm->lsm_magic) != NULL);
rc = lsm_op_find(lsm->lsm_magic)->lsm_destroy(lsm, oa, md_exp);
LASSERT(loi == NULL);
lap = *res;
- lap->lap_magic = LAP_MAGIC;
+ lap->lap_magic = LOV_AP_MAGIC;
lap->lap_caller_ops = ops;
lap->lap_caller_data = data;
set->set_count = 0;
set->set_completes = 0;
set->set_success = 0;
+ set->set_cookies = 0;
CFS_INIT_LIST_HEAD(&set->set_list);
atomic_set(&set->set_refcount, 1);
}
* can be addressed then. */
if (rc == ELDLM_OK) {
struct ldlm_lock *lock = ldlm_handle2lock(lov_lockhp);
- __u64 tmp = req->rq_md->lsm_oinfo->loi_lvb.lvb_size;
+ __u64 tmp;
LASSERT(lock != NULL);
lov_stripe_lock(set->set_md);
loi->loi_lvb = req->rq_md->lsm_oinfo->loi_lvb;
+ tmp = loi->loi_lvb.lvb_size;
/* Extend KMS up to the end of this lock and no further
* A lock on [x,y] means a KMS of up to y + 1 bytes! */
if (tmp > lock->l_policy_data.l_extent.end)
lsm->lsm_object_id, loi->loi_id, loi->loi_id, req->rq_idx);
loi_init(loi);
- if (set->set_cookies)
+ if (oti && set->set_cookies)
++oti->oti_logcookies;
if (req->rq_oa->o_valid & OBD_MD_FLCOOKIE)
set->set_cookie_sent++;
req->rq_oa->o_id = loi->loi_id;
/* Setup the first request's cookie position */
- if (!cookie_set && set->set_cookies) {
+ if (oti && !cookie_set && set->set_cookies) {
oti->oti_logcookies = set->set_cookies + i;
cookie_set = 1;
}
return
(flags & (FMODE_READ | FMODE_WRITE |
MDS_OPEN_DELAY_CREATE | MDS_OPEN_HAS_EA |
- MDS_OPEN_HAS_OBJS | MDS_OPEN_OWNEROVERRIDE)) |
+ MDS_OPEN_HAS_OBJS | MDS_OPEN_OWNEROVERRIDE |
+ MDS_OPEN_LOCK)) |
((flags & O_CREAT) ? MDS_OPEN_CREAT : 0) |
((flags & O_EXCL) ? MDS_OPEN_EXCL : 0) |
((flags & O_TRUNC) ? MDS_OPEN_TRUNC : 0) |
if (req == NULL)
GOTO(out, rc = -ENOMEM);
+ /* To avoid a livelock (bug 7034), we need to send CLOSE RPCs to a
+ * portal whose threads are not taking any DLM locks and are therefore
+ * always progressing */
+ /* XXX FIXME bug 249 */
+ req->rq_request_portal = MDS_READPAGE_PORTAL;
+
/* Ensure that this close's handle is fixed up during replay. */
LASSERT(och != NULL);
LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC);
LASSERT(req->rq_cb_data == NULL);
req->rq_cb_data = mod;
- mdc_get_rpc_lock(obd->u.cli.cl_rpc_lock, NULL);
+ mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL);
rc = ptlrpc_queue_wait(req);
- mdc_put_rpc_lock(obd->u.cli.cl_rpc_lock, NULL);
+ mdc_put_rpc_lock(obd->u.cli.cl_close_lock, NULL);
if (req->rq_repmsg == NULL) {
CDEBUG(D_HA, "request failed to send: %p, %d\n", req,
GOTO(err_rpc_lock, rc = -ENOMEM);
mdc_init_rpc_lock(cli->cl_setattr_lock);
+ OBD_ALLOC(cli->cl_close_lock, sizeof (*cli->cl_close_lock));
+ if (!cli->cl_close_lock)
+ GOTO(err_setattr_lock, rc = -ENOMEM);
+ mdc_init_rpc_lock(cli->cl_close_lock);
+
rc = client_obd_setup(obd, len, buf);
if (rc)
- GOTO(err_setattr_lock, rc);
+ GOTO(err_close_lock, rc);
lprocfs_init_vars(mdc, &lvars);
lprocfs_obd_setup(obd, lvars.obd_vars);
RETURN(rc);
+err_close_lock:
+ OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock));
err_setattr_lock:
OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock));
err_rpc_lock:
OBD_FREE(cli->cl_rpc_lock, sizeof (*cli->cl_rpc_lock));
OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock));
+ OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock));
lprocfs_obd_cleanup(obd);
ptlrpcd_decref();
/* sanity check: if the xid matches, the request must
* be marked as a resent or replayed */
- if (req->rq_xid == med->med_mcd->mcd_last_xid)
- LASSERTF(lustre_msg_get_flags(req->rq_reqmsg) &
- (MSG_RESENT | MSG_REPLAY),
- "rq_xid "LPU64" matches last_xid, "
- "expected RESENT flag\n",
- req->rq_xid);
+ if (req->rq_xid == le64_to_cpu(med->med_mcd->mcd_last_xid) ||
+ req->rq_xid == le64_to_cpu(med->med_mcd->mcd_last_close_xid))
+ if (!(lustre_msg_get_flags(req->rq_reqmsg) &
+ (MSG_RESENT | MSG_REPLAY))) {
+ CERROR("rq_xid "LPU64" matches last_xid, "
+ "expected RESENT flag\n",
+ req->rq_xid);
+ req->rq_status = -ENOTCONN;
+ GOTO(out, rc = -EFAULT);
+ }
/* else: note the opposite is not always true; a
* RESENT req after a failover will usually not match
* the last_xid, since it was likely never
/* If we're DISCONNECTing, the mds_export_data is already freed */
if (!rc && lustre_msg_get_opc(req->rq_reqmsg) != MDS_DISCONNECT) {
struct mds_export_data *med = &req->rq_export->exp_mds_data;
+
+ /* I don't think last_xid is used for anyway, so I'm not sure
+ if we need to care about last_close_xid here.*/
lustre_msg_set_last_xid(req->rq_repmsg,
le64_to_cpu(med->med_mcd->mcd_last_xid));
obd->obd_replayable ? "enabled" : "disabled");
}
- ldlm_timeout = 2;
+ ldlm_timeout = 6;
RETURN(0);
le64_to_cpu(exp->exp_mds_data.med_mcd->mcd_last_xid))
return;
+ if (req->rq_xid ==
+ le64_to_cpu(exp->exp_mds_data.med_mcd->mcd_last_close_xid))
+ return;
+
/* This remote handle isn't enqueued, so we never received or
* processed this request. Clear MSG_RESENT, because it can
* be handled like any normal request now. */
if (intent_disposition(rep, DISP_LOOKUP_NEG) &&
!intent_disposition(rep, DISP_OPEN_OPEN))
#endif
+ if (rep->lock_policy_res2) {
+ /* mds_open returns ENOLCK where it should return zero,
+ but it has no lock to return */
+ if (rep->lock_policy_res2 == ENOLCK)
+ rep->lock_policy_res2 = 0;
RETURN(ELDLM_LOCK_ABORTED);
+ }
break;
case IT_LOOKUP:
getattr_part = MDS_INODELOCK_LOOKUP;
continue;
}
- last_transno = le64_to_cpu(mcd->mcd_last_transno);
+ last_transno = le64_to_cpu(mcd->mcd_last_transno) >
+ le64_to_cpu(mcd->mcd_last_close_transno) ?
+ le64_to_cpu(mcd->mcd_last_transno) :
+ le64_to_cpu(mcd->mcd_last_close_transno);
/* These exports are cleaned up by mds_disconnect(), so they
* need to be set up like real exports as mds_connect() does.
__u64 mcd_last_xid; /* xid for the last transaction */
__u32 mcd_last_result; /* result from last RPC */
__u32 mcd_last_data; /* per-op data (disposition for open &c.) */
- __u8 mcd_padding[LR_CLIENT_SIZE - 64];
+ /* for MDS_CLOSE requests */
+ __u64 mcd_last_close_transno; /* last completed transaction ID */
+ __u64 mcd_last_close_xid; /* xid for the last transaction */
+ __u32 mcd_last_close_result; /* result from last RPC */
+ __u32 mcd_last_close_data; /* per-op data (disposition for open &c.) */
+ __u8 mcd_padding[LR_CLIENT_SIZE - 88];
};
#define MDS_SERVICE_WATCHDOG_TIMEOUT (obd_timeout * 1000)
if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { \
struct mds_client_data *mcd = \
req->rq_export->exp_mds_data.med_mcd; \
- if (mcd->mcd_last_xid == req->rq_xid) { \
+ if (le64_to_cpu(mcd->mcd_last_xid) == req->rq_xid) { \
reconstruct; \
- RETURN(lustre_msg_get_status(req->rq_repmsg)); \
+ RETURN(le32_to_cpu(mcd->mcd_last_result)); \
+ } \
+ if (le64_to_cpu(mcd->mcd_last_close_xid) == req->rq_xid) { \
+ reconstruct; \
+ RETURN(le32_to_cpu(mcd->mcd_last_close_result)); \
} \
DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")",\
mcd->mcd_last_xid); \
sizeof(struct llog_cookie);
int max_easize = sizeof(*lmmj);
- CDEBUG(D_INFO, "change the max md size from %d to %d \n",
+ CDEBUG(D_INFO, "change the max md size from %d to "LPSZ"\n",
mds->mds_max_mdsize, sizeof(*lmmj));
if (mds->mds_max_mdsize < max_easize ||
/* copy rc, transno and disp; steal locks */
mds_req_from_mcd(req, mcd);
- intent_set_disposition(rep, mcd->mcd_last_data);
+ intent_set_disposition(rep, le32_to_cpu(mcd->mcd_last_data));
/* Only replay if create or open actually happened. */
if (!intent_disposition(rep, DISP_OPEN_CREATE | DISP_OPEN_OPEN) ) {
}
UNLOCK_INODE_MUTEX(dchild->d_inode);
- if (!(rec->ur_flags & MDS_OPEN_JOIN_FILE))
+ if (rec && !(rec->ur_flags & MDS_OPEN_JOIN_FILE))
lustre_shrink_reply(req, DLM_REPLY_REC_OFF + 1,
body->eadatasize, 0);
if (req->rq_export->exp_connect_flags & OBD_CONNECT_ACL &&
- !(rec->ur_flags & MDS_OPEN_JOIN_FILE)) {
+ rec && !(rec->ur_flags & MDS_OPEN_JOIN_FILE)) {
int acl_off = DLM_REPLY_REC_OFF + (body->eadatasize ? 2 : 1);
rc = mds_pack_acl(&req->rq_export->exp_mds_data,
struct dentry_params dp;
unsigned int qcids[MAXQUOTAS] = { current->fsuid, current->fsgid };
unsigned int qpids[MAXQUOTAS] = { 0, 0 };
+ int child_mode = LCK_CR;
+ /* Always returning LOOKUP lock if open succesful to guard
+ dentry on client. */
+ ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_LOOKUP}};
+ struct ldlm_res_id child_res_id = { .name = {0}};
+ int lock_flags = 0;
ENTRY;
+ OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PAUSE_OPEN | OBD_FAIL_ONCE,
+ (obd_timeout + 1) / 4);
+
CLASSERT(MAXQUOTAS < 4);
if (offset == DLM_INTENT_REC_OFF) { /* intent */
rep = lustre_msg_buf(req->rq_repmsg, DLM_LOCKREPLY_OFF,
GOTO(cleanup, rc = -EAGAIN);
}
+ /* Obtain OPEN lock as well */
+ policy.l_inodebits.bits |= MDS_INODELOCK_OPEN;
+
+ /* We cannot use acc_mode here, because it is zeroed in case of
+ creating a file, so we get wrong lockmode */
+ if (accmode(dchild->d_inode, rec->ur_flags) & MAY_WRITE)
+ child_mode = LCK_CW;
+ else if (accmode(dchild->d_inode, rec->ur_flags) & MAY_EXEC)
+ child_mode = LCK_PR;
+ else
+ child_mode = LCK_CR;
+
+ if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) &&
+ (rec->ur_flags & MDS_OPEN_LOCK)) {
+ /* In case of replay we do not get a lock assuming that the
+ caller has it already */
+ child_res_id.name[0] = dchild->d_inode->i_ino;
+ child_res_id.name[1] = dchild->d_inode->i_generation;
+
+ rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace,
+ child_res_id, LDLM_IBITS, &policy,
+ child_mode, &lock_flags,
+ ldlm_blocking_ast, ldlm_completion_ast,
+ NULL, NULL, NULL, 0, NULL, child_lockh);
+ if (rc != ELDLM_OK)
+ GOTO(cleanup, rc);
+
+ cleanup_phase = 3;
+ }
+
if (!S_ISREG(dchild->d_inode->i_mode) &&
!S_ISDIR(dchild->d_inode->i_mode) &&
(req->rq_export->exp_connect_flags & OBD_CONNECT_NODEVOH)) {
cleanup_no_trans:
switch (cleanup_phase) {
+ case 3:
+ if (rc)
+ ldlm_lock_decref(child_lockh, child_mode);
case 2:
if (rc && created) {
int err = vfs_unlink(dparent->d_inode, dchild);
else
ptlrpc_save_lock(req, &parent_lockh, parent_mode);
}
+ /* If we have not taken the "open" lock, we may not return 0 here,
+ because caller expects 0 to mean "lock is taken", and it needs
+ nonzero return here for caller to return EDLM_LOCK_ABORTED to
+ client. Later caller should rewrite the return value back to zero
+ if it to be used any further
+ */
+ if ((cleanup_phase != 3) && !rc)
+ rc = ENOLCK;
/* trigger dqacq on the owner of child and parent */
lquota_adjust(quota_interface, obd, qcids, qpids, rc, FSFILT_OP_CREATE);
mds->mds_last_transno = transno;
spin_unlock(&mds->mds_transno_lock);
}
+
req->rq_transno = transno;
lustre_msg_set_transno(req->rq_repmsg, transno);
- mcd->mcd_last_transno = cpu_to_le64(transno);
- mcd->mcd_last_xid = cpu_to_le64(req->rq_xid);
- mcd->mcd_last_result = cpu_to_le32(rc);
- mcd->mcd_last_data = cpu_to_le32(op_data);
+ if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CLOSE) {
+ mcd->mcd_last_close_transno = cpu_to_le64(transno);
+ mcd->mcd_last_close_xid = cpu_to_le64(req->rq_xid);
+ mcd->mcd_last_close_result = cpu_to_le32(rc);
+ mcd->mcd_last_close_data = cpu_to_le32(op_data);
+ } else {
+ mcd->mcd_last_transno = cpu_to_le64(transno);
+ mcd->mcd_last_xid = cpu_to_le64(req->rq_xid);
+ mcd->mcd_last_result = cpu_to_le32(rc);
+ mcd->mcd_last_data = cpu_to_le32(op_data);
+ }
if (off <= 0) {
CERROR("client idx %d has offset %lld\n", med->med_lr_idx, off);
void mds_req_from_mcd(struct ptlrpc_request *req, struct mds_client_data *mcd)
{
+ if (lustre_msg_get_opc(req->rq_reqmsg) == MDS_CLOSE) {
+ req->rq_transno = le64_to_cpu(mcd->mcd_last_close_transno);
+ lustre_msg_set_transno(req->rq_repmsg, req->rq_transno);
+ req->rq_status = le32_to_cpu(mcd->mcd_last_close_result);
+ lustre_msg_set_status(req->rq_repmsg, req->rq_status);
+ } else {
+ req->rq_transno = le64_to_cpu(mcd->mcd_last_transno);
+ lustre_msg_set_transno(req->rq_repmsg, req->rq_transno);
+ req->rq_status = le32_to_cpu(mcd->mcd_last_result);
+ lustre_msg_set_status(req->rq_repmsg, req->rq_status);
+ }
DEBUG_REQ(D_HA, req, "restoring transno "LPD64"/status %d",
- mcd->mcd_last_transno, mcd->mcd_last_result);
- req->rq_transno = mcd->mcd_last_transno;
- lustre_msg_set_transno(req->rq_repmsg, req->rq_transno);
- req->rq_status = mcd->mcd_last_result;
- lustre_msg_set_status(req->rq_repmsg, req->rq_status);
+ req->rq_transno, req->rq_status);
mds_steal_ack_locks(req);
}
return rc;
}
+#define INODE_CTIME_AGE (10)
+#define INODE_CTIME_OLD(inode) (LTIME_S(inode->i_ctime) + \
+ INODE_CTIME_AGE < CURRENT_SECONDS)
+
int mds_get_parent_child_locked(struct obd_device *obd, struct mds_obd *mds,
struct ll_fid *fid,
struct lustre_handle *parent_lockh,
child_res_id.name[0] = inode->i_ino;
child_res_id.name[1] = inode->i_generation;
+
+ /* If we want a LCK_CR for a directory, and this directory has not been
+ changed for some time, we return not only a LOOKUP lock, but also an
+ UPDATE lock to have negative dentry starts working for this dir.
+ Also we apply same logic to non-directories. If the file is rarely
+ changed - we return both locks and this might save us RPC on
+ later STAT. */
+ if ((child_mode & (LCK_CR|LCK_PR|LCK_CW)) && INODE_CTIME_OLD(inode))
+ child_policy.l_inodebits.bits |= MDS_INODELOCK_UPDATE;
+
iput(inode);
retry_locks:
}
if (handle->lgh_ctxt == NULL)
RETURN(-EOPNOTSUPP);
- llog_cat_id2handle(handle, &log_handle, &lir->lid_id);
+ rc = llog_cat_id2handle(handle, &log_handle, &lir->lid_id);
+ if (rc) {
+ CDEBUG(D_IOCTL,
+ "cannot find log #"LPX64"#"LPX64"#%08x\n",
+ lir->lid_id.lgl_oid, lir->lid_id.lgl_ogr,
+ lir->lid_id.lgl_ogen);
+ RETURN(rc);
+ }
rc = llog_process(log_handle, llog_check_cb, NULL, NULL);
llog_close(log_handle);
} else {
/* don't need dir->i_zombie for 2.4, it is for rename/unlink of dir
* itself we already hold dir->i_mutex for child create/unlink ops */
+ LASSERT(dentry->d_inode != NULL);
LASSERT(TRYLOCK_INODE_MUTEX(dir) == 0);
LASSERT(TRYLOCK_INODE_MUTEX(dentry->d_inode) == 0);
/* may_delete() */
- if (!dentry->d_inode || dentry->d_parent->d_inode != dir)
+ if (/*!dentry->d_inode ||*/dentry->d_parent->d_inode != dir)
GOTO(out, rc = -ENOENT);
rc = ll_permission(dir, MAY_WRITE | MAY_EXEC, NULL);
EXT3_IOC_SETFLAGS, (long)&oa->o_flags);
} else {
rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1);
- if (fcc != NULL)
+ if (fcc != NULL) {
/* set cancel cookie callback function */
fsfilt_add_journal_cb(exp->exp_obd, 0, oti ?
oti->oti_handle : handle,
filter_cancel_cookies_cb,
fcc);
+ fcc = NULL;
+ }
}
if (locked) {
if (locked)
UNLOCK_INODE_MUTEX(inode);
+ if (fcc)
+ OBD_FREE(fcc, sizeof(*fcc));
+
/* trigger quota release */
if (ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID)) {
unsigned int cur_ids[MAXQUOTAS] = {oa->o_uid, oa->o_gid};
ENTRY;
/* We are currently not supporting multi-obj BRW_READ RPCS at all.
- * When we do this function's dentry cleanup will need to be fixed */
+ * When we do this function's dentry cleanup will need to be fixed.
+ * These values are verified in ost_brw_write() from the wire. */
LASSERTF(objcount == 1, "%d\n", objcount);
LASSERTF(obj->ioo_bufcnt > 0, "%d\n", obj->ioo_bufcnt);
inode = dentry->d_inode;
- if (oa)
- obdo_to_inode(inode, oa, OBD_MD_FLATIME);
-
+ obdo_to_inode(inode, oa, OBD_MD_FLATIME);
fsfilt_check_slow(now, obd_timeout, "preprw_read setup");
for (i = 0, lnb = res, rnb = nb; i < obj->ioo_bufcnt;
writemany
random-reads
chownmany
+llverdev
+llverfs
gettimeofday(&now, NULL);
/* A positive verbosity means to print every X iterations */
- if (verbose > 0 &&
- (next_num == NULL || num >= *next_num || num >= num_total)) {
+ if (verbose > 0 && (num >= *next_num || num >= num_total)) {
*next_num += verbose;
if (next_time) {
next_time->tv_sec = now.tv_sec - verbose;
if (verbose < 0 && next_time != NULL && difftime(&now, next_time) >= 0){
next_time->tv_sec = now.tv_sec - verbose;
next_time->tv_usec = now.tv_usec;
- if (next_num)
- *next_num = num;
+ *next_num = num;
return 1;
}
#define MAX_LOV_UUID_COUNT 1000
+/* Returns bytes read on success and a negative value on failure.
+ * If zero bytes are read it will be treated as failure as such
+ * zero cannot be returned from this function.
+ */
int read_proc_entry(char *proc_path, char *buf, int len)
{
- int rcnt = -2, fd;
+ int rc, fd;
+
+ memset(buf, 0, len);
- if ((fd = open(proc_path, O_RDONLY)) == -1) {
+ fd = open(proc_path, O_RDONLY);
+ if (fd == -1) {
fprintf(stderr, "open('%s') failed: %s\n",
proc_path, strerror(errno));
- rcnt = -3;
- } else if ((rcnt = read(fd, buf, len)) <= 0) {
+ return -2;
+ }
+
+ rc = read(fd, buf, len - 1);
+ if (rc < 0) {
fprintf(stderr, "read('%s') failed: %s\n",
proc_path, strerror(errno));
- } else {
- buf[rcnt - 1] = '\0';
+ rc = -3;
+ } else if (rc == 0) {
+ fprintf(stderr, "read('%s') zero bytes\n", proc_path);
+ rc = -4;
+ } else if (/* rc > 0 && */ buf[rc - 1] == '\n') {
+ buf[rc - 1] = '\0'; /* Remove trailing newline */
}
+ close(fd);
- if (fd >= 0)
- close(fd);
-
- return (rcnt);
+ return (rc);
}
int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1,
int i, rc;
rc = read_proc_entry("/proc/fs/lustre/llite/fs0/lov/common_name",
- buf, sizeof(buf)) <= 0;
+ buf, sizeof(buf));
if (rc < 0)
return -rc;
if (lum_dir == NULL) {
snprintf(tmp_path, sizeof(tmp_path) - 1, "%s/stripecount",
lov_path);
- if (read_proc_entry(tmp_path, buf, sizeof(buf)) <= 0)
+ if (read_proc_entry(tmp_path, buf, sizeof(buf)) < 0)
return 5;
stripe_count = atoi(buf);
stripe_count = 1;
snprintf(tmp_path, sizeof(tmp_path) - 1, "%s/numobd", lov_path);
- if (read_proc_entry(tmp_path, buf, sizeof(buf)) <= 0)
+ if (read_proc_entry(tmp_path, buf, sizeof(buf)) < 0)
return 6;
ost_count = atoi(buf);
if (stripe_size == 0) {
snprintf(tmp_path, sizeof(tmp_path) - 1, "%s/stripesize",
lov_path);
- if (read_proc_entry(tmp_path, buf, sizeof(buf)) <= 0)
+ if (read_proc_entry(tmp_path, buf, sizeof(buf)) < 0)
return 5;
stripe_size = atoi(buf);
if (argc < 3) {
fprintf(stderr, "Usage: %s <dirname> <filename1> [filename2]\n",
argv[0]);
- exit(1);
+ return 1;
}
dir = opendir(argv[1]);
memset(ptr, 'a', region);
/* cancel unused locks */
- cancel_lru_locks("osc");
+ rc = cancel_lru_locks("osc");
if (rc)
goto out_unmap;
goto out;
}
- cancel_lru_locks("osc");
+ rc = cancel_lru_locks("osc");
if (rc)
goto out;
}
run_test 20b "ldlm_handle_enqueue error (should return error)"
-#b_cray run_test 21a "drop close request while close and open are both in flight"
-#b_cray run_test 21b "drop open request while close and open are both in flight"
-#b_cray run_test 21c "drop both request while close and open are both in flight"
-#b_cray run_test 21d "drop close reply while close and open are both in flight"
-#b_cray run_test 21e "drop open reply while close and open are both in flight"
-#b_cray run_test 21f "drop both reply while close and open are both in flight"
-#b_cray run_test 21g "drop open reply and close request while close and open are both in flight"
-#b_cray run_test 21h "drop open request and close reply while close and open are both in flight"
-#b_cray run_test 22 "drop close request and do mknod"
-#b_cray run_test 23 "client hang when close a file after mds crash"
+test_21a() {
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ close_pid=$!
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000129"
+ multiop $DIR/$tdir-2/f Oc &
+ open_pid=$!
+ sleep 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+ kill -USR1 $close_pid
+ cancel_lru_locks MDC # force the close
+ wait $close_pid || return 1
+ wait $open_pid || return 2
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ $CHECKSTAT -t file $DIR/$tdir-1/f || return 3
+ $CHECKSTAT -t file $DIR/$tdir-2/f || return 4
+
+ rm -rf $DIR/$tdir-*
+}
+run_test 21a "drop close request while close and open are both in flight"
+
+test_21b() {
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ close_pid=$!
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+ mcreate $DIR/$tdir-2/f &
+ open_pid=$!
+ sleep 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ kill -USR1 $close_pid
+ cancel_lru_locks MDC # force the close
+ wait $close_pid || return 1
+ wait $open_pid || return 3
+
+ $CHECKSTAT -t file $DIR/$tdir-1/f || return 4
+ $CHECKSTAT -t file $DIR/$tdir-2/f || return 5
+ rm -rf $DIR/$tdir-*
+}
+run_test 21b "drop open request while close and open are both in flight"
+
+test_21c() {
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ close_pid=$!
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+ mcreate $DIR/$tdir-2/f &
+ open_pid=$!
+ sleep 3
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+ kill -USR1 $close_pid
+ cancel_lru_locks MDC # force the close
+ wait $close_pid || return 1
+ wait $open_pid || return 2
+
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+ $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+ rm -rf $DIR/$tdir-*
+}
+run_test 21c "drop both request while close and open are both in flight"
+
+test_21d() {
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ pid=$!
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000129"
+ multiop $DIR/$tdir-2/f Oc &
+ sleep 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+ kill -USR1 $pid
+ cancel_lru_locks MDC # force the close
+ wait $pid || return 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+ $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+
+ rm -rf $DIR/$tdir-*
+}
+run_test 21d "drop close reply while close and open are both in flight"
+
+test_21e() {
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ pid=$!
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+ touch $DIR/$tdir-2/f &
+ sleep 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ kill -USR1 $pid
+ cancel_lru_locks MDC # force the close
+ wait $pid || return 1
+
+ sleep $TIMEOUT
+ $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+ $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+ rm -rf $DIR/$tdir-*
+}
+run_test 21e "drop open reply while close and open are both in flight"
+
+test_21f() {
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ pid=$!
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+ touch $DIR/$tdir-2/f &
+ sleep 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+ kill -USR1 $pid
+ cancel_lru_locks MDC # force the close
+ wait $pid || return 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+ $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+ rm -rf $DIR/$tdir-*
+}
+run_test 21f "drop both reply while close and open are both in flight"
+
+test_21g() {
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ pid=$!
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+ touch $DIR/$tdir-2/f &
+ sleep 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+ kill -USR1 $pid
+ cancel_lru_locks MDC # force the close
+ wait $pid || return 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+ $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+ rm -rf $DIR/$tdir-*
+}
+run_test 21g "drop open reply and close request while close and open are both in flight"
+
+test_21h() {
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ pid=$!
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+ touch $DIR/$tdir-2/f &
+ touch_pid=$!
+ sleep 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+ cancel_lru_locks MDC # force the close
+ kill -USR1 $pid
+ wait $pid || return 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ wait $touch_pid || return 2
+
+ $CHECKSTAT -t file $DIR/$tdir-1/f || return 3
+ $CHECKSTAT -t file $DIR/$tdir-2/f || return 4
+ rm -rf $DIR/$tdir-*
+}
+run_test 21h "drop open request and close reply while close and open are both in flight"
+
+# bug 3462 - multiple MDC requests
+test_22() {
+ f1=$DIR/${tfile}-1
+ f2=$DIR/${tfile}-2
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+ multiop $f2 Oc &
+ close_pid=$!
+
+ sleep 1
+ multiop $f1 msu || return 1
+
+ cancel_lru_locks MDC # force the close
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ wait $close_pid || return 2
+ rm -rf $f2 || return 4
+}
+run_test 22 "drop close request and do mknod"
+
+test_23() { #b=4561
+ multiop $DIR/$tfile O_c &
+ pid=$!
+ # give a chance for open
+ sleep 5
+
+ # try the close
+ drop_request "kill -USR1 $pid"
+
+ fail mds
+ wait $pid || return 1
+ return 0
+}
+run_test 23 "client hang when close a file after mds crash"
test_24() { # bug 2248 - eviction fails writeback but app doesn't see it
mkdir -p $DIR/$tdir
case `uname -r` in
2.4*) FSTYPE=${FSTYPE:-ext3}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 76" ;;
-2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 48b" ;;
+2.6*) FSTYPE=${FSTYPE:-ldiskfs}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT " ;;
*) error "unsupported kernel" ;;
esac
}
run_test 72 "Test that remove suid works properly (bug5695) ===="
-#b_cray run_test 73 "multiple MDC requests (should not deadlock)"
+# bug 3462 - multiple simultaneous MDC requests
+test_73() {
+ mkdir $DIR/d73-1
+ mkdir $DIR/d73-2
+ multiop $DIR/d73-1/f73-1 O_c &
+ pid1=$!
+ #give multiop a chance to open
+ usleep 500
+
+ echo 0x80000129 > /proc/sys/lustre/fail_loc
+ multiop $DIR/d73-1/f73-2 Oc &
+ sleep 1
+ echo 0 > /proc/sys/lustre/fail_loc
+
+ multiop $DIR/d73-2/f73-3 Oc &
+ pid3=$!
+
+ kill -USR1 $pid1
+ wait $pid1 || return 1
+
+ sleep 25
+
+ $CHECKSTAT -t file $DIR/d73-1/f73-1 || return 4
+ $CHECKSTAT -t file $DIR/d73-1/f73-2 || return 5
+ $CHECKSTAT -t file $DIR/d73-2/f73-3 || return 6
+
+ rm -rf $DIR/d73-*
+}
+run_test 73 "multiple MDC requests (should not deadlock)"
test_74() { # bug 6149, 6184
#define OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT 0x30e
#include <stdlib.h>
#include <string.h>
+#define GOTO(label, rc) do { rc; goto label; } while (0)
+
int main (int argc, char **argv) {
- int fd, i, rc;
+ int fd, i, rc = 0;
unsigned long bytes, lbytes;
struct stat st;
char *str, *str2, *readbuf;
if (argc != 3) {
fprintf(stderr, "usage: %s <filename> <bytes>\n", argv[0]);
- return 1;
+ GOTO(out, rc = 1);
}
bytes = strtoul(argv[2], NULL, 10);
if (!bytes) {
printf("No bytes!\n");
- return 1;
+ GOTO(out, rc = 2);
}
if (bytes % 2) {
printf("Need an even number of bytes!\n");
- return 1;
+ GOTO(out, rc = 3);
}
lbytes = 3*bytes/2;
str = malloc(bytes+1);
if (!str) {
printf("No enough memory for %lu bytes.\n", bytes);
- return 1;
+ GOTO(out, rc = 4);
}
str2 = malloc(lbytes+1);
- if (!str) {
+ if (!str2) {
printf("No enough memory for %lu bytes.\n", lbytes);
- return 1;
+ GOTO(out_str, rc = 5);
}
readbuf = malloc(bytes*2);
- if (!str) {
+ if (!readbuf) {
printf("No enough memory for %lu bytes.\n", bytes*2);
- return 1;
+ GOTO(out_str2, rc = 6);
}
for(i=0; i < bytes; i++)
fd = open(argv[1], O_CREAT|O_RDWR|O_TRUNC, 0700);
if (fd == -1) {
printf("Could not open file %s.\n", argv[1]);
- return 1;
+ GOTO(out_readbuf, rc = 7);
}
rc = write(fd, str, bytes);
if (rc != bytes) {
printf("Write failed!\n");
- return 1;
+ GOTO(out_fd, rc = 8);
}
sleep(1);
printf("bad file %lu size first write %lu != %lu: rc %d\n",
(unsigned long)st.st_ino, (unsigned long)st.st_size,
bytes, rc);
- return 1;
+ GOTO(out_fd, rc = 9);
}
rc = lseek(fd, bytes / 2, SEEK_SET);
if (rc != bytes / 2) {
printf("Seek failed!\n");
- return 1;
+ GOTO(out_fd, rc = 10);
}
rc = write(fd, str, bytes);
if (rc != bytes) {
printf("Write failed!\n");
- return 1;
+ GOTO(out_fd, rc = 11);
}
rc = fstat(fd, &st);
printf("bad file %lu size second write %lu != %lu: rc %d\n",
(unsigned long)st.st_ino, (unsigned long)st.st_size,
bytes, rc);
- return 1;
+ GOTO(out_fd, rc = 12);
}
rc = lseek(fd, 0, SEEK_SET);
if (rc != 0) {
printf("Seek failed!\n");
- return 1;
+ GOTO(out_fd, rc = 13);
}
rc = read(fd, readbuf, bytes * 2);
printf("bad file size after read %lu != %lu: rc %d\n",
(unsigned long)st.st_size, bytes + bytes / 2,
rc);
- return 1;
+ GOTO(out_fd, rc = 14);
}
- return 1;
+ GOTO(out_fd, rc = 15);
}
-
- fd = close(fd);
- if (fd == -1)
- return 1;
+ rc = 0;
if (bytes < 320)
printf("%s\n%s\n", readbuf, str2);
if (strcmp(readbuf, str2)) {
printf("No match!\n");
- return 1;
+ GOTO(out_fd, rc = 16);
}
printf("Pass!\n");
- return 0;
+out_fd:
+ close(fd);
+out_readbuf:
+ free(readbuf);
+out_str2:
+ free(str2);
+out_str:
+ free(str);
+out:
+ return rc;
}
}
if (st.st_atime != utb.actime ) {
- fprintf(stderr, "%s: bad utime mtime %lu should be %lu\n",
+ fprintf(stderr, "%s: bad utime atime %lu should be %lu\n",
prog, st.st_atime, utb.actime);
return 7;
}
struct kid_list_t *head = NULL;
-void push_kid(pid_t kid)
+int push_kid(pid_t kid)
{
struct kid_list_t *new;
new = (struct kid_list_t *)malloc(sizeof(struct kid_list_t));
+ if (new == NULL)
+ return 1;
+
new->kid = kid;
new->next = head;
head = new;
+ return 0;
}
void kill_kids(void)
return (run_one_child(directory, i, duration));
} else {
/* parent */
- push_kid(rc);
+ rc = push_kid(rc);
+ if (rc != 0) {
+ kill_kids();
+ exit(3);
+ }
}
}
/* parent process */
rootsbin_PROGRAMS = mount.lustre
sbin_PROGRAMS = lctl obdio obdbarrier lload wirecheck wiretest \
mount_lustre mkfs_lustre mkfs.lustre \
- tunefs_lustre tunefs.lustre l_getgroups
+ tunefs_lustre tunefs.lustre l_getgroups # llverfs llverdev
bin_PROGRAMS = lfs llog_reader
lib_LIBRARIES = liblustreapi.a
sbin_SCRIPTS = $(sbin_scripts)
lload_SOURCES = lload.c
lload_LDADD := $(LIBREADLINE) $(LIBPTLCTL)
lload_DEPENDENCIES := $(LIBPTLCTL)
+lload_SOURCES = lload.c
+
+llverfs_LDADD := -lext2fs -le2p
+llverdev_LDADD := -lext2fs -lblkid
liblustreapi_a_SOURCES = liblustreapi.c
# Needed to call lconf --record
CONFIG_FILE = ""
-# Please keep these in sync with the values in portals/kp30.h
+# Please keep these in sync with the values in lnet/include/libcfs/libcfs.h
ptldebug_names = {
"trace" : (1 << 0),
"inode" : (1 << 1),
"buffs" : (1 << 11),
"other" : (1 << 12),
"dentry" : (1 << 13),
- "portals" : (1 << 14),
+ "portals" : (1 << 14), # deprecated
+ "lnet" : (1 << 14),
"page" : (1 << 15),
"dlmtrace" : (1 << 16),
"error" : (1 << 17),
"log" : (1 << 6),
"llite" : (1 << 7),
"rpc" : (1 << 8),
- "portals" : (1 << 10),
- "nal" : (1 << 11),
+ "lnet" : (1 << 10),
+ "portals" : (1 << 10), # deprecated
+ "lnd" : (1 << 11),
+ "nal" : (1 << 11), # deprecated
"pinger" : (1 << 12),
"filter" : (1 << 13),
- "ptlbd" : (1 << 14),
+ "ptlbd" : (1 << 14), # deprecated
"echo" : (1 << 15),
"ldlm" : (1 << 16),
"lov" : (1 << 17),
- "ptlrouter" : (1 << 18),
+ "ptlrouter" : (1 << 18), # deprecated
"cobd" : (1 << 19),
"sm" : (1 << 20),
"asobd" : (1 << 21),
- "confobd" : (1 << 22),
+ "confobd" : (1 << 22), # deprecated
"lmv" : (1 << 23),
"cmobd" : (1 << 24),
"sec" : (1 << 25),
+ "sec" : (1 << 26),
+ "gss" : (1 << 27),
+ "gks" : (1 << 28),
+ "mgc" : (1 << 29),
+ "mgs" : (1 << 30),
}
if not fs_uuid in self.filesystem_uuids:
continue;
- debug("recording", client_name)
+ log("Recording log", client_name, "on", self.name)
old_noexec = config.noexec
config.noexec = 0
noexec_opt = ('', '-n')
fprintf(stderr, ": %s (%d)\n", strerror(tmp_errno), tmp_errno);
}
-int llapi_file_create(char *name, long stripe_size, int stripe_offset,
+int llapi_file_create(const char *name, long stripe_size, int stripe_offset,
int stripe_count, int stripe_pattern)
{
struct lov_user_md lum = { 0 };
return rc;
}
-/* short term backwards compat only */
-int op_create_file(char *name, long stripe_size, int stripe_offset,
- int stripe_count)
-{
- return llapi_file_create(name, stripe_size, stripe_offset,
- stripe_count, 0);
-}
-
struct find_param {
int recursive;
int verbose;
return rc;
}
-/* short term backwards compat only */
-int op_get_file_stripe(char *path, struct lov_user_md *lum)
+int llapi_file_lookup(int dirfd, const char *name)
{
- return llapi_file_get_stripe(path, lum);
+ struct obd_ioctl_data data = { 0 };
+ char rawbuf[8192];
+ char *buf = rawbuf;
+ int rc;
+
+ if (dirfd < 0 || name == NULL)
+ return -EINVAL;
+
+ data.ioc_version = OBD_IOCTL_VERSION;
+ data.ioc_len = sizeof(data);
+ data.ioc_inlbuf1 = (char *)name;
+ data.ioc_inllen1 = strlen(name) + 1;
+
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc) {
+ fprintf(stderr,
+ "error: IOC_MDC_LOOKUP pack failed for '%s': rc %d\n",
+ name, rc);
+ return rc;
+ }
+
+ return ioctl(dirfd, IOC_MDC_LOOKUP, buf);
}
static int find_process_file(DIR *dir, char *dname, char *fname,
get_cpumhz();
print "Processor counters run at $mhz MHz\n";
-sub readall()
+sub readstat()
{
my $prevcount;
my @iodata;
}
}
}
+
sub process_stats()
{
my $delta;
open(STATS, $statspath) || die "Cannot open $statspath: $!\n";
do {
- readall();
+ readstat();
process_stats();
if ($interval) {
sleep($interval);
}
} while ($interval);
close STATS;
-
#include <liblustre.h>
#include <lustre/lustre_idl.h>
-int llog_pack_buffer(int fd, struct llog_log_hdr** llog_buf, struct llog_rec_hdr*** recs, int* recs_number);
+int llog_pack_buffer(int fd, struct llog_log_hdr **llog_buf,
+ struct llog_rec_hdr ***recs, int *recs_number);
-void print_llog_header(struct llog_log_hdr* llog_buf);
-void print_records(struct llog_rec_hdr** recs_buf,int rec_number);
-void llog_unpack_buffer(int fd,struct llog_log_hdr* llog_buf,struct llog_rec_hdr** recs_buf);
+void print_llog_header(struct llog_log_hdr *llog_buf);
+void print_records(struct llog_rec_hdr **recs_buf,int rec_number);
+void llog_unpack_buffer(int fd, struct llog_log_hdr *llog_buf,
+ struct llog_rec_hdr **recs_buf);
#define PTL_CMD_BASE 100
char* portals_command[17]=
"GET_INTERFACE",
""
};
-
+
int main(int argc, char **argv)
{
- int rc=0;
- int fd,rec_number;
-
- struct llog_log_hdr* llog_buf=NULL;
- struct llog_rec_hdr** recs_buf=NULL;
-
+ int rc = 0;
+ int fd, rec_number;
+ struct llog_log_hdr *llog_buf = NULL;
+ struct llog_rec_hdr **recs_buf = NULL;
setlinebuf(stdout);
-
+
if(argc != 2 ){
printf("Usage: llog_reader filename \n");
return -1;
}
-
+
fd = open(argv[1],O_RDONLY);
if (fd < 0){
printf("Could not open the file %s \n",argv[1]);
goto out;
}
rc = llog_pack_buffer(fd, &llog_buf, &recs_buf, &rec_number);
-
- if(llog_buf == NULL )
- printf("error");
+ if (rc < 0) {
+ printf("Could not pack buffer; rc=%d\n", rc);
+ goto out_fd;
+ }
+
print_llog_header(llog_buf);
-
print_records(recs_buf,rec_number);
-
llog_unpack_buffer(fd,llog_buf,recs_buf);
+out_fd:
close(fd);
out:
return rc;
-int llog_pack_buffer(int fd, struct llog_log_hdr** llog,
- struct llog_rec_hdr*** recs,
- int* recs_number)
+int llog_pack_buffer(int fd, struct llog_log_hdr **llog,
+ struct llog_rec_hdr ***recs,
+ int *recs_number)
{
- int rc=0,recs_num,rd;
+ int rc = 0, recs_num,rd;
off_t file_size;
struct stat st;
- char *file_buf=NULL, *recs_buf=NULL;
- struct llog_rec_hdr** recs_pr=NULL;
- char* ptr=NULL;
+ char *file_buf=NULL, *recs_buf=NULL;
+ struct llog_rec_hdr **recs_pr=NULL;
+ char *ptr=NULL;
int cur_idx,i;
-
+
rc = fstat(fd,&st);
if (rc < 0){
printf("Get file stat error.\n");
goto out;
- }
+ }
file_size = st.st_size;
-
+
file_buf = malloc(file_size);
if (file_buf == NULL){
printf("Memory Alloc for file_buf error.\n");
rc = -ENOMEM;
goto out;
- }
+ }
*llog = (struct llog_log_hdr*)file_buf;
rd = read(fd,file_buf,file_size);
printf("Read file error.\n");
rc = -EIO; /*FIXME*/
goto clear_file_buf;
- }
+ }
/* the llog header not countable here.*/
recs_num = le32_to_cpu((*llog)->llh_count)-1;
-
- recs_buf = malloc(recs_num*sizeof(struct llog_rec_hdr*));
+
+ recs_buf = malloc(recs_num * sizeof(struct llog_rec_hdr *));
if (recs_buf == NULL){
printf("Memory Alloc for recs_buf error.\n");
rc = -ENOMEM;
goto clear_file_buf;
- }
+ }
recs_pr = (struct llog_rec_hdr **)recs_buf;
-
+
ptr = file_buf + le32_to_cpu((*llog)->llh_hdr.lrh_len);
cur_idx = 1;
i = 0;
- while (i < recs_num){
- struct llog_rec_hdr* cur_rec=(struct llog_rec_hdr*)ptr;
- while(!ext2_test_bit(cur_idx,(*llog)->llh_bitmap)){
- cur_idx++;
+ while (i < recs_num){
+ struct llog_rec_hdr *cur_rec = (struct llog_rec_hdr*)ptr;
+
+ if (ext2_test_bit(cur_idx++, (*llog)->llh_bitmap)) {
+ recs_pr[i++] = cur_rec;
ptr += cur_rec->lrh_len;
- if ((ptr-file_buf) > file_size){
- printf("The log is corrupted. \n");
+ if ((ptr - file_buf) > file_size) {
+ printf("The log is corrupted.\n");
rc = -EINVAL;
goto clear_recs_buf;
- }
+ }
}
- recs_pr[i] = cur_rec;
- ptr+=cur_rec->lrh_len;
- i++;
- cur_idx++;
}
-
+
*recs = recs_pr;
*recs_number=recs_num;
*llog=NULL;
goto out;
-
}
-
-void llog_unpack_buffer(int fd,struct llog_log_hdr* llog_buf,struct llog_rec_hdr **recs_buf)
+void llog_unpack_buffer(int fd, struct llog_log_hdr *llog_buf,
+ struct llog_rec_hdr **recs_buf)
{
free(llog_buf);
free(recs_buf);
return;
}
-
-void print_llog_header(struct llog_log_hdr* llog_buf)
+void print_llog_header(struct llog_log_hdr *llog_buf)
{
time_t t;
printf("Header size : %d \n",
- // le32_to_cpu(llog_buf->llh_hdr.lrh_len));
llog_buf->llh_hdr.lrh_len);
t = le64_to_cpu(llog_buf->llh_timestamp);
printf("Target uuid : %s \n",
(char *)(&llog_buf->llh_tgtuuid));
- /* Add the other infor you want to view here*/
+ /* Add the other info you want to view here */
printf("-----------------------\n");
return;
static void print_1_cfg(struct lustre_cfg *lcfg)
{
int i;
+
if (lcfg->lcfg_nid)
printf("nid=%s("LPX64") ", libcfs_nid2str(lcfg->lcfg_nid),
lcfg->lcfg_nid);
if (lcfg->lcfg_nal)
printf("nal=%d ", lcfg->lcfg_nal);
for (i = 0; i < lcfg->lcfg_bufcount; i++)
- printf("%d:%.*s ", i, lcfg->lcfg_buflens[i],
+ printf("%d:%.*s ", i, lcfg->lcfg_buflens[i],
(char*)lustre_cfg_buf(lcfg, i));
return;
}
{
struct lov_desc *desc;
- if ((lcfg->lcfg_bufcount == 2) &&
+ if ((lcfg->lcfg_bufcount == 2) &&
(lcfg->lcfg_buflens[1] == sizeof(*desc))) {
printf("lov_setup ");
printf("0:%s ", lustre_cfg_string(lcfg, 0));
void print_lustre_cfg(struct lustre_cfg *lcfg, int *skip)
{
enum lcfg_command_type cmd = le32_to_cpu(lcfg->lcfg_command);
-
+
if (*skip > 0)
printf("SKIP ");
return;
}
-void print_records(struct llog_rec_hdr** recs,int rec_number)
+void print_records(struct llog_rec_hdr **recs, int rec_number)
{
__u32 lopt;
int i, skip = 0;
- for(i = 0; i < rec_number; i++){
-
+ for(i = 0; i < rec_number; i++) {
printf("#%.2d ", le32_to_cpu(recs[i]->lrh_index));
lopt = le32_to_cpu(recs[i]->lrh_type);
if (lopt == OBD_CFG_REC){
struct lustre_cfg *lcfg;
- printf("L ");
- lcfg = (struct lustre_cfg *)
- ((char*)(recs[i]) + sizeof(struct llog_rec_hdr));
+ printf("L ");
+ lcfg = (struct lustre_cfg *)((char*)(recs[i]) +
+ sizeof(struct llog_rec_hdr));
print_lustre_cfg(lcfg, &skip);
}
- if (lopt == PTL_CFG_REC){
- printf("Portals - unknown type\n");
- }
+ if (lopt == PTL_CFG_REC)
+ printf("Portals - unknown type\n");
}
}
my $pname = $0;
+my $defaultpath = "/proc/fs/lustre";
+my $obdstats = "stats";
+
sub usage()
{
print STDERR "Usage: $pname <stats_file> [<interval>]\n";
}
-my $statspath;
+my $statspath = "None";
my $interval = 0;
if (($#ARGV < 0) || ($#ARGV > 1)) {
usage();
} else {
- $statspath = $ARGV[0];
+ if ( $ARGV[0] =~ /help$/ ) {
+ usage();
+ }
+ if ( -f $ARGV[0] ) {
+ $statspath = $ARGV[0];
+ } elsif ( -f "$ARGV[0]/$obdstats" ) {
+ $statspath = "$ARGV[0]/$obdstats";
+ } else {
+ my $st = `ls $defaultpath/*/$ARGV[0]/$obdstats 2> /dev/null`;
+ chop $st;
+ if ( -f "$st" ) {
+ $statspath = $st;
+ } else {
+ $st = `ls $defaultpath/*/*/$ARGV[0]/$obdstats 2> /dev/null`;
+ chop $st;
+ if ( -f "$st" ) {
+ $statspath = $st;
+ }
+ }
+ }
+ if ( $statspath =~ /^None$/ ) {
+ die "Cannot locate stat file for: $ARGV[0]\n";
+ }
if ($#ARGV == 1) {
$interval = $ARGV[1];
}
}
-
+print "$pname on $statspath\n";
my %cumulhash;
my %sumhash;
if (defined($itc_freq)) { $mhz = $itc_freq; }
elsif (defined($cpu_freq)) { $mhz = $cpu_freq; }
else { $mhz = 1; }
+ close CPUINFO;
}
get_cpumhz();
sub readstat()
{
- open(STATS, $statspath) || die "Cannot open $statspath: $!\n";
+ seek STATS, 0, 0;
while (<STATS>) {
chop;
($name, $cumulcount, $samples, $unit, $min, $max, $sum, $sumsquare)
}
}
+open(STATS, $statspath) || die "Cannot open $statspath: $!\n";
do {
readstat();
if ($interval) {
sleep($interval);
}
} while ($interval);
+close STATS;
--- /dev/null
+/*
+ * Large Block Device Verification Tool.
+ * This program is used to test whether the block device is correctly
+ * handling IO beyond 2TB boundary.
+ * This tool have two working modes
+ * 1. full mode
+ * 2. fast mode
+ * The full mode is basic mode in which program writes the test pattern
+ * on entire disk. The test pattern (device offset and timestamp) is written
+ * at the beginning of each 4kB block. When the whole device is full then
+ * read operation is performed to verify that the test pattern is correct.
+ * In the fast mode the program writes data at the critical locations
+ * of the device such as start of the device, before and after multiple of 1GB
+ * offset and at the end.
+ * A chunk buffer with default size of 1MB is used to write and read test
+ * pattern in bulk.
+ */
+
+#include <features.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <gnu/stubs.h>
+#include <ext2fs/ext2fs.h>
+#include <blkid/blkid.h>
+
+#define ONE_MB (1024 * 1024)
+#define ONE_GB (1024 * 1024 * 1024)
+#define HALF_MB (ONE_MB / 2)
+#define ONE_KB 1024
+#define HALF_KB (ONE_KB / 2)
+#define BLOCKSIZE 4096
+
+/* Structure for writting test pattern */
+struct block_data {
+ loff_t bd_offset;
+ time_t bd_time;
+};
+static char *progname; /* name by which this program was run. */
+static unsigned verbose = 1; /* prints offset in kB, operation rate */
+static int readoption; /* run test in read-only (verify) mode */
+static int writeoption; /* run test in write_only mode */
+const char *devname; /* name of device to be tested. */
+static unsigned full = 1; /* flag to full check */
+static int fd;
+static int isatty_flag;
+
+static struct option const longopts[] =
+{
+ { "chunksize", required_argument, 0, 'c' },
+ { "force", no_argument, 0, 'f' },
+ { "help", no_argument, 0, 'h' },
+ { "offset", required_argument, 0, 'o' },
+ { "partial", required_argument, 0, 'p' },
+ { "quiet", required_argument, 0, 'q' },
+ { "read", no_argument, 0, 'r' },
+ { "timestamp", required_argument, 0, 't' },
+ { "verbose", no_argument, 0, 'v' },
+ { "write", no_argument, 0, 'w' },
+ { "long", no_argument, 0, 'l' },
+ { 0, 0, 0, 0}
+};
+
+/*
+ * Usage: displays help information, whenever user supply --help option in
+ * command or enters incorrect command line.
+ */
+void usage(int status)
+{
+ if (status != 0) {
+ printf("\nUsage: %s [OPTION]... <device-name> ...\n",
+ progname);
+ printf("Block device verification tool.\n"
+ "\t-t {seconds}, --timestamp, "
+ "set test time (default=current time())\n"
+ "\t-o {offset}, --offset, "
+ "offset in kB of start of test, default=0\n"
+ "\t-r, --read run test in verify mode\n"
+ "\t-w, --write run test in test-pattern mode, default=rw\n"
+ "\t-v, --verbose\n"
+ "\t-q, --quiet\n"
+ "\t-l, --long, full check of device\n"
+ "\t-p, --partial, for partial check (1GB steps)\n"
+ "\t-c, --chunksize, IO chunk size, default=1048576\n"
+ "\t-f, --force, force test to run without confirmation\n"
+ "\t-h, --help display this help and exit\n");
+ }
+ exit(status);
+}
+
+/*
+ * Open_dev: Opens device in specified mode and returns fd.
+ */
+static int open_dev(const char *devname, int mode)
+{
+ int mount_flags;
+ char mountpt[80] = "";
+
+ if (ext2fs_check_mount_point(devname, &mount_flags, mountpt,
+ sizeof(mountpt))) {
+ fprintf(stderr, "%s: ext2fs_check_mount_point failed:%s",
+ progname, strerror(errno));
+ exit(1);
+ }
+ if (mount_flags & EXT2_MF_MOUNTED){
+ fprintf(stderr, "%s: %s is already mounted\n", progname,
+ devname);
+ exit(1);
+ }
+ fd = open(devname, mode | O_EXCL | O_LARGEFILE);
+ if (fd < 0) {
+ fprintf(stderr, "%s: Open failed: %s",progname,strerror(errno));
+ exit(3);
+ }
+ return (fd);
+}
+
+/*
+ * sizeof_dev: Returns size of device in bytes
+ */
+static unsigned long long sizeof_dev(int fd)
+{
+ blkid_loff_t numbytes = 0;
+
+ numbytes = blkid_get_dev_size(fd);
+ if (numbytes <= 0) {
+ fprintf(stderr, "%s: blkid_get_dev_size(%s) failed",
+ progname, devname);
+ return 1;
+ }
+
+ if (verbose)
+ printf("%s: %s is %llu bytes (%g GB) in size\n",
+ progname, devname,
+ (unsigned long long)numbytes, (double)numbytes / ONE_GB);
+
+ return numbytes;
+}
+
+/*
+ * Verify_chunk: Verifies test pattern in each 4kB (BLOCKSIZE) is correct.
+ * Returns 0 if test offset and timestamp is correct otherwise 1.
+ */
+int verify_chunk(char *chunk_buf, size_t chunksize,
+ loff_t chunk_off, time_t time_st)
+{
+ struct block_data *bd;
+ char *chunk_end;
+
+ for (chunk_end = chunk_buf + chunksize - sizeof(*bd);
+ (char *)chunk_buf < chunk_end;
+ chunk_buf += BLOCKSIZE, chunk_off += BLOCKSIZE) {
+ bd = (struct block_data *)chunk_buf;
+ if ((bd->bd_offset == chunk_off) && (bd->bd_time == time_st))
+ continue;
+
+ fprintf(stderr, "\n%s: verify failed at offset/timestamp "
+ "%llu/%lu: found %llu/%lu instead\n", progname,
+ chunk_off, time_st, bd->bd_offset, bd->bd_time);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * fill_chunk: Fills the chunk with current or user specified timestamp
+ * and offset. The test patters is filled at the beginning of
+ * each 4kB(BLOCKSIZE) blocks in chunk_buf.
+ */
+void fill_chunk(char *chunk_buf, size_t chunksize, loff_t chunk_off,
+ time_t time_st)
+{
+ struct block_data *bd;
+ char *chunk_end;
+
+ for (chunk_end = chunk_buf + chunksize - sizeof(*bd);
+ (char *)chunk_buf < chunk_end;
+ chunk_buf += BLOCKSIZE, chunk_off += BLOCKSIZE) {
+ bd = (struct block_data *)chunk_buf;
+ bd->bd_offset = chunk_off;
+ bd->bd_time = time_st;
+ }
+}
+
+void show_rate(char *op, unsigned long long offset, unsigned long long *count)
+{
+ static time_t last;
+ time_t now;
+ double diff;
+
+ now = time(NULL);
+ diff = now - last;
+
+ if (diff > 4) {
+ if (last != 0) {
+ if (isatty_flag)
+ printf("\r");
+ printf("%s offset: %14llukB %5g MB/s ", op,
+ offset / ONE_KB, (double)(*count) /ONE_MB /diff);
+ if (isatty_flag)
+ fflush(stdout);
+ else
+ printf("\n");
+
+ *count = 0;
+ }
+ last = now;
+ }
+}
+
+/*
+ * write_chunk: write the chunk_buf on the device. The number of write
+ * operations are based on the parameters write_end, offset, and chunksize.
+ */
+int write_chunks(loff_t offset, loff_t write_end, char *chunk_buf,
+ size_t chunksize, time_t time_st)
+{
+ unsigned long long stride, count = 0;
+
+ stride = full ? chunksize : (ONE_GB - chunksize);
+
+ for (offset = offset & ~(chunksize - 1); offset < write_end;
+ offset += stride) {
+ if (lseek64(fd, offset, SEEK_SET) == -1) {
+ fprintf(stderr, "\n%s: lseek64(%llu) failed: %s\n",
+ progname, offset, strerror(errno));
+ return 1;
+ }
+ if (offset + chunksize > write_end)
+ chunksize = write_end - offset;
+
+ if (!full && offset > chunksize) {
+ fill_chunk(chunk_buf, chunksize, offset, time_st);
+ if (write(fd, chunk_buf, chunksize) < 0) {
+ fprintf(stderr, "\n%s: write %llu failed: %s\n",
+ progname, offset, strerror(errno));
+ return 1;
+ }
+ offset += chunksize;
+ if (offset + chunksize > write_end)
+ chunksize = write_end - offset;
+ }
+
+ fill_chunk(chunk_buf, chunksize, offset, time_st);
+ if (write(fd, chunk_buf, chunksize) < 0) {
+ fprintf(stderr, "\n%s: write %llu failed: %s\n",
+ progname, offset, strerror(errno));
+ return 1;
+ }
+
+ count += chunksize;
+ if (verbose > 1)
+ show_rate("write", offset, &count);
+ }
+ if (verbose > 1) {
+ show_rate("write", offset, &count);
+ printf("\nwrite complete\n");
+ }
+ if (fsync(fd) == -1) {
+ fprintf(stderr, "%s: fsync faild: %s\n", progname,
+ strerror(errno));
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * read_chunk: reads the chunk_buf from the device. The number of read
+ * operations are based on the parameters read_end, offset, and chunksize.
+ */
+int read_chunks(loff_t offset, loff_t read_end, char *chunk_buf,
+ size_t chunksize, time_t time_st)
+{
+ unsigned long long stride, count = 0;
+
+ stride = full ? chunksize : (ONE_GB - chunksize);
+
+ if (ioctl(fd, BLKFLSBUF, 0) < 0 && verbose)
+ fprintf(stderr, "%s: ioctl BLKFLSBUF failed: %s (ignoring)\n",
+ progname, strerror(errno));
+
+ for (offset = offset & ~(chunksize - 1); offset < read_end;
+ offset += stride) {
+ if (lseek64(fd, offset, SEEK_SET) == -1) {
+ fprintf(stderr, "\n%s: lseek64(%llu) failed: %s\n",
+ progname, offset, strerror(errno));
+ return 1;
+ }
+ if (offset + chunksize > read_end)
+ chunksize = read_end - offset;
+
+ if (!full && offset > chunksize) {
+ if (read (fd, chunk_buf, chunksize) < 0) {
+ fprintf(stderr, "\n%s: read %llu failed: %s\n",
+ progname, offset, strerror(errno));
+ return 1;
+ }
+ if (verify_chunk(chunk_buf, chunksize, offset,
+ time_st) != 0)
+ return 1;
+ offset += chunksize;
+ if (offset + chunksize >= read_end)
+ chunksize = read_end - offset;
+ }
+
+ if (read(fd, chunk_buf, chunksize) < 0) {
+ fprintf(stderr, "\n%s: read failed: %s\n", progname,
+ strerror(errno));
+ return 1;
+ }
+
+ if (verify_chunk(chunk_buf, chunksize, offset, time_st) != 0)
+ return 1;
+
+ count += chunksize;
+ if (verbose > 1)
+ show_rate("read", offset, &count);
+ }
+ if (verbose > 1) {
+ show_rate("read", offset, &count);
+ printf("\nread complete\n");
+ }
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ time_t time_st = 0; /* Default timestamp */
+ loff_t offset = 0, offset_orig; /* offset in kB */
+ size_t chunksize = ONE_MB; /* IO chunk size */
+ char *chunk_buf = NULL;
+ unsigned int force = 0; /* run test run without confirmation*/
+ unsigned long long dev_size = 0;
+ char yesno[4];
+ int mode = O_RDWR; /* mode which device should be opened */
+ int error = 0, c;
+
+ progname = strrchr(argv[0], '/') == NULL ?
+ argv[0] : strrchr(argv[0], '/') + 1;
+ while ((c = getopt_long(argc, argv, "c:fhlo:pqrt:vw", longopts,
+ NULL)) != -1) {
+ switch (c) {
+ case 'c':
+ chunksize = (strtoul(optarg, NULL, 0) * ONE_MB);
+ if (!chunksize) {
+ fprintf(stderr, "%s: chunk size value should be"
+ "nonzero and multiple of 1MB\n",
+ progname);
+ return -1;
+ }
+ break;
+ case 'f':
+ force = 1;
+ break;
+ case 'l':
+ full = 1;
+ break;
+ case 'o':
+ offset = strtoull(optarg, NULL, 0) * ONE_KB;
+ break;
+ case 'p':
+ full = 0;
+ break;
+ case 'q':
+ verbose = 0;
+ break;
+ case 'r':
+ readoption = 1;
+ mode = O_RDONLY;
+ break;
+ case 't':
+ time_st = (time_t)strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ verbose++;
+ break;
+ case 'w':
+ writeoption = 1;
+ mode = O_WRONLY;
+ break;
+ case 'h':
+ default:
+ usage (1);
+ return 0;
+ }
+ }
+ offset_orig = offset;
+ devname = argv[optind];
+ if (!devname) {
+ fprintf(stderr, "%s: device name not given\n", progname);
+ usage (1);
+ return -1;
+ }
+
+ if (readoption && writeoption)
+ mode = O_RDWR;
+ if (!readoption && !writeoption) {
+ readoption = 1;
+ writeoption = 1;
+ }
+
+ if (!force && writeoption) {
+ printf("%s: permanently overwrite all data on %s (yes/no)? ",
+ progname, devname);
+ scanf("%3s", yesno);
+ if (!(strcasecmp("yes", yesno) || strcasecmp("y", yesno))) {
+ printf("Not continuing due to '%s' response", yesno);
+ return 0;
+ }
+ }
+
+ if (!writeoption && time_st == 0) {
+ fprintf(stderr, "%s: must give timestamp for read-only test\n",
+ progname);
+ usage(1);
+ }
+
+ fd = open_dev(devname, mode);
+ dev_size = sizeof_dev(fd);
+ if (!dev_size) {
+ fprintf(stderr, "%s: cannot test on device size < 1MB\n",
+ progname);
+ error = 7;
+ goto close_dev;
+ }
+
+ if (dev_size < (offset * 2)) {
+ fprintf(stderr, "%s: device size %llu < offset %llu\n",
+ progname, dev_size, offset);
+ error = 6;
+ goto close_dev;
+ }
+ if (!time_st)
+ (void)time(&time_st);
+
+ isatty_flag = isatty(STDOUT_FILENO);
+
+ if (verbose)
+ printf("Timestamp: %lu\n", time_st);
+
+ chunk_buf = (char *)calloc(chunksize, 1);
+ if (chunk_buf == NULL) {
+ fprintf(stderr, "%s: memory allocation failed for chunk_buf\n",
+ progname);
+ error = 4;
+ goto close_dev;
+ }
+ if (writeoption) {
+ if (write_chunks(offset, dev_size, chunk_buf, chunksize,
+ time_st)) {
+ error = 3;
+ goto chunk_buf;
+ }
+ if (!full) { /* end of device aligned to a block */
+ offset = ((dev_size - chunksize + BLOCKSIZE - 1) &
+ ~(BLOCKSIZE - 1));
+ if (write_chunks(offset, dev_size, chunk_buf, chunksize,
+ time_st)) {
+ error = 3;
+ goto chunk_buf;
+ }
+ }
+ offset = offset_orig;
+ }
+ if (readoption) {
+ if (read_chunks(offset, dev_size, chunk_buf, chunksize,
+ time_st)) {
+ error = 2;
+ goto chunk_buf;
+ }
+ if (!full) { /* end of device aligned to a block */
+ offset = ((dev_size - chunksize + BLOCKSIZE - 1) &
+ ~(BLOCKSIZE - 1));
+ if (read_chunks(offset, dev_size, chunk_buf, chunksize,
+ time_st)) {
+ error = 2;
+ goto chunk_buf;
+ }
+ }
+ if (verbose)
+ printf("\n%s: data verified successfully\n", progname);
+ }
+ error = 0;
+chunk_buf:
+ free(chunk_buf);
+close_dev:
+ close(fd);
+ return error;
+}
--- /dev/null
+/*
+ * ext3 Filesystem Verification Tool.
+ * This program tests the correct operation of ext3 filesystem.
+ * This tool have two working modes
+ * 1. full mode
+ * 2. fast mode
+ * The full mode is basic mode in which program creates a subdirectory
+ * in the test fileysytem, writes n(files_in_dir, default=16) large(4GB) files
+ * to the directory with the test pattern at the start of each 4kb block.
+ * The test pattern contains timestamp, relative file offset and per file
+ * unique idenfifier(inode number). this continues until whole filesystem is
+ * full and then this tooll verifies that the data in all of the test files
+ * is correct.
+ * In the fast mode the tool creates a test directories with
+ * EXT3_TOPDIR_FL flag set. the number of directories equals to the number
+ * of block groups in the filesystem(e.g. 65536 directories for 8TB filesystem)
+ * and then writes a single 1MB file in each directory. The tool then verifies
+ * that the data in each file is correct.
+ */
+
+#define _GNU_SOURCE
+
+#include <features.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <time.h>
+#include <dirent.h>
+#include <mntent.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/vfs.h>
+#include <gnu/stubs.h>
+#include <ext2fs/ext2fs.h>
+#include <gnu/stubs.h>
+#include <e2p/e2p.h>
+
+#define ONE_MB (1024 * 1024)
+#define ONE_GB ((unsigned long long)(1024 * 1024 * 1024))
+#define BLOCKSIZE 4096
+
+/* Structure for writing test pattern */
+struct block_data {
+ loff_t bd_offset;
+ time_t bd_time;
+ ino_t bd_inode;
+};
+static char *progname; /* name by which this program was run. */
+static unsigned verbose = 1; /* prints offset in kB, operation rate */
+static int readoption; /* run test in read-only (verify) mode */
+static int writeoption; /* run test in write_only mode */
+char *testdir; /* name of device to be tested. */
+static unsigned full = 1; /* flag to full check */
+static int errno_local; /* local copy of errno */
+static unsigned long num_files; /* Total number of files for read/write */
+static loff_t file_size; /* Size of each file */
+static unsigned files_in_dir = 32; /* number of files in each directioy */
+static unsigned num_dirs = 30000; /* total number of directories */
+const int dirmode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
+static int fd = -1;
+static int isatty_flag;
+static int perms = S_IRWXU | S_IRGRP | S_IROTH;
+
+static struct option const longopts[] =
+{
+ { "chunksize", required_argument, 0, 'c' },
+ { "help", no_argument, 0, 'h' },
+ { "offset", required_argument, 0, 'o' },
+ { "long", no_argument, 0, 'l' },
+ { "partial", required_argument, 0, 'p' },
+ { "quiet", required_argument, 0, 'q' },
+ { "read", no_argument, 0, 'r' },
+ { "timestamp", required_argument, 0, 't' },
+ { "verbose", no_argument, 0, 'v' },
+ { "write", no_argument, 0, 'w' },
+ { 0, 0, 0, 0}
+};
+
+/*
+ * Usages: displays help information, whenever user supply --help option in
+ * command or enters incorrect command line.
+ */
+void usage(int status)
+{
+ if (status != 0)
+ {
+ printf("\nUsage: %s [OPTION]... <filesystem path> ...\n",
+ progname);
+ printf("ext3 filesystem verification tool.\n"
+ "\t-t {seconds} for --timestamp, set test time"
+ "(default=current time())\n"
+ "\t-o {offset} for --offset, directory starting offset"
+ " from which tests should start\n"
+ "\t-r run test in read (verify) mode\n"
+ "\t-w run test in write (test-pattern) mode (default=r&w)\n"
+ "\t-v for verbose\n"
+ "\t-p for --partial, for partial check (1MB files)\n"
+ "\t-l for --long, full check (4GB file with 4k blocks)\n"
+ "\t-c for --chunksize, IO chunk size (default=1048576)\n"
+ "\t-h display this help and exit\n"
+ "\t--help display this help and exit\n");
+ }
+ exit(status);
+}
+
+/*
+ * open_file: Opens file in specified mode and returns fd.
+ */
+static int open_file(const char *file, int flag)
+{
+ fd = open(file, flag, perms);
+ if (fd < 0) {
+ fprintf(stderr, "\n%s: Open '%s' failed:%s\n",
+ progname, file, strerror(errno));
+ exit(3);
+ }
+ return (fd);
+}
+
+/*
+ * Verify_chunk: Verifies test pattern in each 4kB (BLOCKSIZE) is correct.
+ * Returns 0 if test offset and timestamp is correct otherwise 1.
+ */
+int verify_chunk(char *chunk_buf, size_t chunksize, loff_t chunk_off,
+ time_t time_st, ino_t inode_st, char *file)
+{
+ struct block_data *bd;
+ char *chunk_end;
+
+ for (chunk_end = chunk_buf + chunksize - sizeof(*bd);
+ (char *)chunk_buf < chunk_end;
+ chunk_buf += BLOCKSIZE, chunk_off += BLOCKSIZE) {
+ bd = (struct block_data *)chunk_buf;
+ if ((bd->bd_offset == chunk_off) && (bd->bd_time == time_st) &&
+ (bd->bd_inode == inode_st))
+ continue;
+ fprintf(stderr,"\n%s: verify %s failed offset/timestamp/inode "
+ "%llu/%lu/%lu: found %llu/%lu/%lu instead\n", progname,
+ file, chunk_off, time_st, inode_st, bd->bd_offset,
+ bd->bd_time, bd->bd_inode);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * fill_chunk: Fills the chunk with current or user specified timestamp
+ * and offset. The test patters is filled at the beginning of
+ * each 4kB(BLOCKSIZE) blocks in chunk_buf.
+ */
+void fill_chunk(char *chunk_buf, size_t chunksize, loff_t chunk_off,
+ time_t time_st, ino_t inode_st)
+{
+ struct block_data *bd;
+ char *chunk_end;
+
+ for (chunk_end = chunk_buf + chunksize - sizeof(*bd);
+ (char *)chunk_buf < chunk_end;
+ chunk_buf += BLOCKSIZE, chunk_off += BLOCKSIZE) {
+ bd = (struct block_data *)chunk_buf;
+ bd->bd_offset = chunk_off;
+ bd->bd_time = time_st;
+ bd->bd_inode = inode_st;
+ }
+}
+
+/*
+ * write_chunk: write the chunk_buf on the device. The number of write
+ * operations are based on the parameters write_end, offset, and chunksize.
+ */
+int write_chunks(int fd, loff_t offset, loff_t write_end, char *chunk_buf,
+ size_t chunksize, time_t time_st,
+ ino_t inode_st, const char *file)
+{
+ unsigned long long stride;
+
+ stride = full ? chunksize : (ONE_GB - chunksize);
+ for (offset = offset & ~(chunksize - 1); offset < write_end;
+ offset += stride) {
+ if (lseek64(fd, offset, SEEK_SET) == -1) {
+ fprintf(stderr, "\n%s: lseek64(%s+%llu) failed: %s\n",
+ progname, file, offset, strerror(errno));
+ return 1;
+ }
+ if (offset + chunksize > write_end)
+ chunksize = write_end - offset;
+ if (!full && offset > chunksize) {
+ fill_chunk(chunk_buf, chunksize, offset, time_st,
+ inode_st);
+ if (write(fd, chunk_buf, chunksize) < 0) {
+ if (errno == ENOSPC) {
+ errno_local = errno;
+ return 0;
+ }
+ fprintf(stderr,
+ "\n%s: write %s+%llu failed: %s\n",
+ progname, file, offset,strerror(errno));
+ return errno;
+ }
+ offset += chunksize;
+ if (offset + chunksize > write_end)
+ chunksize = write_end - offset;
+ }
+ fill_chunk(chunk_buf, chunksize, offset, time_st, inode_st);
+ if (write(fd, (char *) chunk_buf, chunksize) < 0) {
+ if (errno == ENOSPC) {
+ errno_local = errno;
+ return 0;
+ }
+ fprintf(stderr, "\n%s: write %s+%llu failed: %s\n",
+ progname, file, offset, strerror(errno));
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * read_chunk: reads the chunk_buf from the device. The number of read
+ * operations are based on the parameters read_end, offset, and chunksize.
+ */
+int read_chunks(int fd, loff_t offset, loff_t read_end, char *chunk_buf,
+ size_t chunksize, time_t time_st, ino_t inode_st, char *file)
+{
+ unsigned long long stride;
+
+ stride = full ? chunksize : (ONE_GB - chunksize);
+ for (offset = offset & ~(chunksize - 1); offset < read_end;
+ offset += stride) {
+ if (lseek64(fd, offset, SEEK_SET) == -1) {
+ fprintf(stderr, "\n%s: lseek64(%s+%llu) failed: %s\n",
+ progname, file, offset, strerror(errno));
+ return 1;
+ }
+ if (offset + chunksize > read_end)
+ chunksize = read_end - offset;
+ if (!full && offset > chunksize) {
+ if (read(fd, chunk_buf, chunksize) < 0) {
+ fprintf(stderr,
+ "\n%s: read %s+%llu failed: %s\n",
+ progname, file, offset,strerror(errno));
+ return 1;
+ }
+ if (verify_chunk(chunk_buf, chunksize, offset,
+ time_st, inode_st, file) != 0)
+ return 1;
+ offset += chunksize;
+ if (offset + chunksize >= read_end)
+ chunksize = read_end - offset;
+ }
+ if (read(fd, chunk_buf, chunksize) < 0) {
+ fprintf(stderr, "\n%s: read %s+%llu failed: %s\n",
+ progname, file, offset, strerror(errno));
+ return 1;
+ }
+ if (verify_chunk(chunk_buf, chunksize, offset, time_st,
+ inode_st, file) != 0)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * new_file: prepares new filename using file counter and current dir.
+ */
+char *new_file(char *tempfile, char *cur_dir, int file_num)
+{
+ sprintf(tempfile, "%s/file%03d", cur_dir, file_num);
+ return tempfile;
+}
+
+/*
+ * new_dir: prepares new dir name using dir counters.
+ */
+char *new_dir(char *tempdir, int dir_num)
+{
+ sprintf(tempdir, "%s/dir%05d", testdir, dir_num);
+ return tempdir;
+}
+
+/*
+ * show_filename: Displays name of current file read/write
+ */
+void show_filename(char *op, char *filename)
+{
+ static time_t last;
+ time_t now;
+ double diff;
+
+ now = time(NULL);
+ diff = now - last;
+ if (diff > 4 || verbose > 2) {
+ if (isatty_flag)
+ printf("\r");
+ printf("%s File name: %s ", op, filename);
+ if (isatty_flag)
+ fflush(stdout);
+ else
+ printf("\n");
+ last = now;
+ }
+}
+
+/*
+ * dir_write: This function writes directories and files on device.
+ * it works for both full and fast modes.
+ */
+static int dir_write(char *chunk_buf, size_t chunksize,
+ time_t time_st, unsigned long dir_num)
+{
+ char tempfile[PATH_MAX];
+ char tempdir[PATH_MAX];
+ struct stat64 file;
+ int file_num = 999999999;
+ ino_t inode_st = 0;
+
+ if (!full && fsetflags(testdir, EXT2_TOPDIR_FL))
+ fprintf(stderr,
+ "\n%s: can't set TOPDIR_FL on %s: %s (ignoring)",
+ progname, testdir, strerror(errno));
+
+ for (; dir_num < num_dirs; num_files++, file_num++) {
+ if (file_num >= files_in_dir) {
+ if (dir_num == num_dirs - 1)
+ break;
+
+ file_num = 0;
+ if (mkdir(new_dir(tempdir, dir_num), dirmode) < 0) {
+ if (errno == ENOSPC)
+ break;
+ if (errno != EEXIST) {
+ fprintf(stderr, "\n%s: mkdir %s : %s\n",
+ progname, tempdir,
+ strerror(errno));
+ return 1;
+ }
+ }
+ dir_num++;
+ }
+ fd = open_file(new_file(tempfile, tempdir, file_num),
+ O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE);
+
+ if (fd >= 0 && fstat64(fd, &file) == 0) {
+ inode_st = file.st_ino;
+ } else {
+ fprintf(stderr, "\n%s: write stat64 to file %s: %s",
+ progname, tempfile, strerror(errno));
+ exit(1);
+ }
+
+ if (verbose > 1)
+ show_filename("write", tempfile);
+
+ if (write_chunks(fd, 0, file_size, chunk_buf, chunksize,
+ time_st, inode_st, tempfile)) {
+ close(fd);
+ return 1;
+ }
+ close(fd);
+
+ if (errno_local == ENOSPC)
+ break;
+ }
+
+ if (verbose) {
+ verbose++;
+ show_filename("write", tempfile);
+ printf("\nwrite complete\n");
+ verbose--;
+ }
+
+ return 0;
+}
+
+/*
+ * dir_read: This function reads directories and files on device.
+ * it works for both full and fast modes.
+ */
+static int dir_read(char *chunk_buf, size_t chunksize,
+ time_t time_st, unsigned long dir_num)
+{
+ char tempfile[PATH_MAX];
+ char tempdir[PATH_MAX];
+ unsigned long count = 0;
+ struct stat64 file;
+ int file_num = 0;
+ ino_t inode_st = 0;
+
+ for (count = 0; count < num_files && dir_num < num_dirs; count++) {
+ if (file_num == 0) {
+ if (dir_num == num_dirs - 1)
+ break;
+
+ new_dir(tempdir, dir_num);
+ dir_num++;
+ }
+
+ fd = open_file(new_file(tempfile, tempdir, file_num),
+ O_RDONLY | O_LARGEFILE);
+ if (fd >= 0 && fstat64(fd, &file) == 0) {
+ inode_st = file.st_ino;
+ } else {
+ fprintf(stderr, "\n%s: read stat64 file '%s': %s\n",
+ progname, tempfile, strerror(errno));
+ return 1;
+ }
+
+ if (verbose > 1)
+ show_filename("read", tempfile);
+
+ if (count == num_files)
+ file_size = file.st_size;
+ if (read_chunks(fd, 0, file_size, chunk_buf, chunksize,
+ time_st, inode_st, tempfile)) {
+ close(fd);
+ return 1;
+ }
+ close(fd);
+
+ if (++file_num >= files_in_dir)
+ file_num = 0;
+ }
+ if (verbose > 1){
+ verbose++;
+ show_filename("read", tempfile);
+ printf("\nread complete\n");
+ verbose--;
+ }
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ time_t time_st = 0; /* Default timestamp */
+ size_t chunksize = ONE_MB; /* IO chunk size(defailt=1MB) */
+ char *chunk_buf; /* chunk buffer */
+ int error = 0;
+ FILE *countfile = NULL;
+ char filecount[PATH_MAX];
+ unsigned long dir_num = 0, dir_num_orig = 0;/* starting directory */
+ char c;
+
+ progname = strrchr(argv[0], '/') ? strrchr(argv[0], '/') + 1 : argv[0];
+ while ((c = (char)getopt_long(argc, argv, "t:rwvplo:h",
+ longopts, NULL)) != -1) {
+ switch (c) {
+ case 'c':
+ chunksize = (strtoul(optarg, NULL, 0) * ONE_MB);
+ if (!chunksize) {
+ fprintf(stderr, "%s: Chunk size value should be"
+ "a multiple of 1MB\n", progname);
+ return -1;
+ }
+ break;
+ case 'l':
+ full = 1;
+ break;
+ case 'o': /* offset */
+ dir_num = strtoul(optarg, NULL, 0);
+ break;
+ case 'p':
+ full = 0;
+ break;
+ case 'q':
+ verbose = 0;
+ break;
+ case 'r':
+ readoption = 1;
+ break;
+ case 't':
+ time_st = (time_t)strtoul(optarg, NULL, 0);
+ break;
+ case 'w':
+ writeoption = 1;
+ break;
+ case 'v':
+ verbose++;
+ break;
+
+ case 'h':
+ default:
+ usage(1);
+ return 0;
+ }
+ }
+ testdir = argv[optind];
+
+ if (!testdir) {
+ fprintf(stderr, "%s: pathname not given\n", progname);
+ usage(1);
+ return -1;
+ }
+ file_size = 4 * ONE_GB;
+ if (!readoption && !writeoption) {
+ readoption = 1;
+ writeoption = 1;
+ }
+ if (!time_st)
+ (void) time(&time_st);
+ printf("Timestamp: %lu\n", (unsigned long )time_st);
+ isatty_flag = isatty(STDOUT_FILENO);
+
+ if (!full) {
+ struct mntent *tempmnt;
+ FILE *fp = NULL;
+ ext2_filsys fs;
+
+ if ((fp = setmntent("/etc/mtab", "r")) == NULL){
+ fprintf(stderr, "%s: fail to open /etc/mtab in read"
+ "mode :%s\n", progname, strerror(errno));
+ goto guess;
+ }
+
+ /* find device name using filesystem */
+ while ((tempmnt = getmntent(fp)) != NULL) {
+ if (strcmp(tempmnt->mnt_dir, testdir) == 0)
+ break;
+ }
+
+ if (tempmnt == NULL) {
+ fprintf(stderr, "%s: no device found for '%s'\n",
+ progname, testdir);
+ endmntent(fp);
+ goto guess;
+ }
+
+ if (ext2fs_open(tempmnt->mnt_fsname, 0, 0, 0,
+ unix_io_manager, &fs)) {
+ fprintf(stderr, "%s: unable to open ext3 fs on '%s'\n",
+ progname, testdir);
+ endmntent(fp);
+ goto guess;
+ }
+ endmntent(fp);
+
+ num_dirs = (fs->super->s_blocks_count +
+ fs->super->s_blocks_per_group - 1) /
+ fs->super->s_blocks_per_group;
+ if (verbose)
+ printf("ext3 block groups: %u, fs blocks: %u "
+ "blocks per group: %u\n",
+ num_dirs, fs->super->s_blocks_count,
+ fs->super->s_blocks_per_group);
+ ext2fs_close(fs);
+ if (0) { /* ugh */
+ struct statfs64 statbuf;
+ guess:
+ if (statfs64(testdir, &statbuf) == 0) {
+ num_dirs = (long long)statbuf.f_blocks *
+ statbuf.f_bsize / (128ULL << 20);
+ if (verbose)
+ printf("dirs: %u, fs blocks: %llu\n",
+ num_dirs,
+ (long long)statbuf.f_blocks);
+ } else {
+ fprintf(stderr, "%s: unable to stat '%s': %s\n",
+ progname, testdir, strerror(errno));
+ if (verbose)
+ printf("dirs: %u\n", num_dirs);
+ }
+ }
+
+ file_size = ONE_MB;
+ chunksize = ONE_MB;
+ files_in_dir = 1;
+ }
+ chunk_buf = (char *)calloc(chunksize, 1);
+ if (chunk_buf == NULL) {
+ fprintf(stderr, "Memory allocation failed for chunk_buf\n");
+ return 4;
+ }
+ sprintf(filecount, "%s/%s.filecount", testdir, progname);
+ if (writeoption) {
+ (void)mkdir(testdir, dirmode);
+
+ unlink(filecount);
+ if (dir_num != 0) {
+ num_files = dir_num * files_in_dir;
+ if (verbose)
+ printf("\n%s: %lu files already written\n",
+ progname, num_files);
+ }
+ if (dir_write(chunk_buf, chunksize, time_st, dir_num)) {
+ error = 3;
+ goto out;
+ }
+ countfile = fopen(filecount, "w");
+ if (countfile != NULL) {
+ if (fprintf(countfile, "%lu", num_files) < 1 ||
+ fflush(countfile) != 0) {
+ fprintf(stderr, "\n%s: writing %s failed :%s\n",
+ progname, filecount, strerror(errno));
+ }
+ fclose(countfile);
+ }
+ dir_num = dir_num_orig;
+ }
+ if (readoption) {
+ if (!writeoption) {
+ countfile = fopen(filecount, "r");
+ if (countfile == NULL ||
+ fscanf(countfile, "%lu", &num_files) != 1) {
+ fprintf(stderr, "\n%s: reading %s failed :%s\n",
+ progname, filecount, strerror(errno));
+ num_files = num_dirs * files_in_dir;
+ } else {
+ num_files -= (dir_num * files_in_dir);
+ }
+ if (countfile)
+ fclose(countfile);
+ }
+ if (dir_read(chunk_buf, chunksize, time_st, dir_num)) {
+ fprintf(stderr, "\n%s: Data verification failed\n",
+ progname) ;
+ error = 2;
+ goto out;
+ }
+ }
+ error = 0;
+out:
+ free(chunk_buf);
+ return error;
+}
gettimeofday(&now, NULL);
/* A positive verbosity means to print every X iterations */
- if (verbose > 0 &&
- (next_num == NULL || num >= *next_num || num >= num_total)) {
+ if (verbose > 0 && (num >= *next_num || num >= num_total)) {
*next_num += verbose;
if (next_time) {
next_time->tv_sec = now.tv_sec - verbose;
difftime(&now, next_time) >= 0.0){
next_time->tv_sec = now.tv_sec - verbose;
next_time->tv_usec = now.tv_usec;
- if (next_num)
- *next_num = num;
+ *next_num = num;
return 1;
}
int j;
int rc = 0;
- buffer = obdio_alloc_aligned_buffer (&space, size);
- if (buffer == NULL) {
+ space = obdio_alloc_aligned_buffer (&buffer, size);
+ if (space == NULL) {
fprintf (stderr, "Can't allocate buffer size %d\n", size);
return (-1);
}
rc = obd_ioctl_pack (&conn->oc_data, &buf, sizeof (conn->oc_buffer));
if (rc != 0) {
- fprintf (stderr, "obdio_ioctl: obd_ioctl_pack: %d (%s)\n",
- rc, strerror (errno));
- abort ();
+ fprintf(stderr, "%s: obd_ioctl_pack: %d (%s)\n",
+ __FUNCTION__, rc, strerror(errno));
+ abort();
}
rc = ioctl (conn->oc_fd, cmd, buf);
rc2 = obd_ioctl_unpack (&conn->oc_data, buf, sizeof (conn->oc_buffer));
if (rc2 != 0) {
- fprintf (stderr, "obdio_ioctl: obd_ioctl_unpack: %d (%s)\n",
- rc2, strerror (errno));
+ fprintf(stderr, "%s: obd_ioctl_unpack: %d (%s)\n",
+ __FUNCTION__, rc2, strerror(errno));
abort ();
}
conn = malloc (sizeof (*conn));
if (conn == NULL) {
- fprintf (stderr, "obdio_connect: no memory\n");
+ fprintf (stderr, "%s: no memory\n", __FUNCTION__);
return (NULL);
}
memset (conn, 0, sizeof (*conn));
conn->oc_fd = open ("/dev/obd", O_RDWR);
if (conn->oc_fd < 0) {
- fprintf (stderr, "obdio_connect: Can't open /dev/obd: %s\n",
- strerror (errno));
+ fprintf(stderr, "%s: Can't open /dev/obd: %s\n",
+ __FUNCTION__, strerror(errno));
goto failed;
}
int
obdio_pread (struct obdio_conn *conn, uint64_t oid,
- char *buffer, uint32_t count, uint64_t offset)
+ void *buffer, uint32_t count, uint64_t offset)
{
obdio_iocinit (conn);
conn->oc_data.ioc_obdo1.o_id = oid;
conn->oc_data.ioc_obdo1.o_mode = S_IFREG;
- conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
+ conn->oc_data.ioc_obdo1.o_valid =
+ OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
conn->oc_data.ioc_pbuf2 = buffer;
conn->oc_data.ioc_plen2 = count;
int
obdio_pwrite (struct obdio_conn *conn, uint64_t oid,
- char *buffer, uint32_t count, uint64_t offset)
+ void *buffer, uint32_t count, uint64_t offset)
{
obdio_iocinit (conn);
conn->oc_data.ioc_obdo1.o_id = oid;
conn->oc_data.ioc_obdo1.o_mode = S_IFREG;
- conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
+ conn->oc_data.ioc_obdo1.o_valid =
+ OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE;
conn->oc_data.ioc_pbuf2 = buffer;
conn->oc_data.ioc_plen2 = count;
void *
obdio_alloc_aligned_buffer (void **spacep, int size)
{
- int pagesize = getpagesize();
- void *space = malloc (size + pagesize - 1);
+ int pagemask = getpagesize() - 1;
+ void *space = malloc(size + pagemask);
- *spacep = space;
if (space == NULL)
return (NULL);
- return ((void *)(((unsigned long)space + pagesize - 1) & ~(pagesize - 1)));
+ *spacep = (void *)(((unsigned long)space + pagemask) & ~pagemask);
+ return space;
}
struct obdio_barrier *
{
struct obdio_barrier *b;
- b = (struct obdio_barrier *)malloc (sizeof (*b));
+ b = malloc(sizeof(*b));
if (b == NULL) {
- fprintf (stderr, "obdio_new_barrier "LPX64": Can't allocate\n", oid);
- return (NULL);
+ fprintf(stderr, "%s "LPX64": Can't allocate\n",
+ __FUNCTION__, oid);
+ return(NULL);
}
b->ob_id = id;
struct lustre_handle lh;
int rc;
int rc2;
- void *space;
+ void *space, *fileptr;
struct obdio_barrier *fileb;
if (b->ob_ordinal != 0 ||
b->ob_count != 0) {
- fprintf (stderr, "obdio_setup_barrier: invalid parameter\n");
+ fprintf(stderr, "%s: invalid parameter\n", __FUNCTION__);
abort ();
}
- fileb = (struct obdio_barrier *) obdio_alloc_aligned_buffer (&space, getpagesize ());
- if (fileb == NULL) {
- fprintf (stderr, "obdio_setup_barrier "LPX64": Can't allocate page buffer\n",
- b->ob_oid);
+ space = obdio_alloc_aligned_buffer(&fileptr, getpagesize());
+ if (space == NULL) {
+ fprintf(stderr, "%s "LPX64": Can't allocate page buffer\n",
+ __FUNCTION__, b->ob_oid);
return (-1);
}
- memset (fileb, 0, getpagesize ());
+ fileb = fileptr;
+ memset(fileb, 0, getpagesize());
*fileb = *b;
- rc = obdio_enqueue (conn, b->ob_oid, LCK_PW, 0, getpagesize (), &lh);
+ rc = obdio_enqueue(conn, b->ob_oid, LCK_PW, 0, getpagesize(), &lh);
if (rc != 0) {
- fprintf (stderr, "obdio_setup_barrier "LPX64": Error on enqueue: %s\n",
- b->ob_oid, strerror (errno));
+ fprintf(stderr, "%s "LPX64": Error on enqueue: %s\n",
+ __FUNCTION__, b->ob_oid, strerror(errno));
goto out;
}
- rc = obdio_pwrite (conn, b->ob_oid, (void *)fileb, getpagesize (), 0);
+ rc = obdio_pwrite(conn, b->ob_oid, fileb, getpagesize(), 0);
if (rc != 0)
- fprintf (stderr, "obdio_setup_barrier "LPX64": Error on write: %s\n",
- b->ob_oid, strerror (errno));
+ fprintf(stderr, "%s "LPX64": Error on write: %s\n",
+ __FUNCTION__, b->ob_oid, strerror(errno));
rc2 = obdio_cancel (conn, &lh);
if (rc == 0 && rc2 != 0) {
- fprintf (stderr, "obdio_setup_barrier "LPX64": Error on cancel: %s\n",
- b->ob_oid, strerror (errno));
+ fprintf(stderr, "%s "LPX64": Error on cancel: %s\n",
+ __FUNCTION__, b->ob_oid, strerror(errno));
rc = rc2;
}
out:
struct lustre_handle lh;
int rc;
int rc2;
- void *space;
+ void *space, *fileptr;
struct obdio_barrier *fileb;
char *mode;
- fileb = (struct obdio_barrier *) obdio_alloc_aligned_buffer (&space, getpagesize ());
- if (fileb == NULL) {
- fprintf (stderr, "obdio_barrier "LPX64": Can't allocate page buffer\n",
- b->ob_oid);
+ space = obdio_alloc_aligned_buffer(&fileptr, getpagesize());
+ if (space == NULL) {
+ fprintf(stderr, "%s "LPX64": Can't allocate page buffer\n",
+ __FUNCTION__, b->ob_oid);
return (-1);
}
- rc = obdio_enqueue (conn, b->ob_oid, LCK_PW, 0, getpagesize (), &lh);
+ rc = obdio_enqueue(conn, b->ob_oid, LCK_PW, 0, getpagesize(), &lh);
if (rc != 0) {
- fprintf (stderr, "obdio_barrier "LPX64": Error on PW enqueue: %s\n",
- b->ob_oid, strerror (errno));
+ fprintf(stderr, "%s "LPX64": Error on PW enqueue: %s\n",
+ __FUNCTION__, b->ob_oid, strerror(errno));
goto out_1;
}
- memset (fileb, 0xeb, getpagesize ());
- rc = obdio_pread (conn, b->ob_oid, (void *)fileb, getpagesize (), 0);
+ fileb = fileptr;
+ memset(fileb, 0xeb, getpagesize());
+ rc = obdio_pread(conn, b->ob_oid, fileb, getpagesize(), 0);
if (rc != 0) {
- fprintf (stderr, "obdio_barrier "LPX64": Error on initial read: %s\n",
- b->ob_oid, strerror (errno));
+ fprintf(stderr, "%s "LPX64": Error on initial read: %s\n",
+ __FUNCTION__, b->ob_oid, strerror(errno));
goto out_2;
}
fileb->ob_npeers != b->ob_npeers ||
fileb->ob_count >= b->ob_npeers ||
fileb->ob_ordinal != b->ob_ordinal) {
- fprintf (stderr, "obdio_barrier "LPX64": corrupt on initial read\n", b->ob_id);
- fprintf (stderr, " got ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
- fileb->ob_id, fileb->ob_oid, fileb->ob_npeers,
- fileb->ob_ordinal, fileb->ob_count);
- fprintf (stderr, " expected ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
- b->ob_id, b->ob_oid, b->ob_npeers,
- b->ob_ordinal, b->ob_count);
+ fprintf(stderr, "%s "LPX64": corrupt on initial read\n",
+ __FUNCTION__, b->ob_id);
+ fprintf(stderr,
+ " got ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
+ fileb->ob_id, fileb->ob_oid, fileb->ob_npeers,
+ fileb->ob_ordinal, fileb->ob_count);
+ fprintf(stderr,
+ " expected ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
+ b->ob_id, b->ob_oid, b->ob_npeers,
+ b->ob_ordinal, b->ob_count);
rc = -1;
goto out_2;
}
fileb->ob_ordinal++; /* signal all joined */
}
- rc = obdio_pwrite (conn, b->ob_oid, (void *)fileb, getpagesize (), 0);
+ rc = obdio_pwrite(conn, b->ob_oid, fileb, getpagesize(), 0);
if (rc != 0) {
- fprintf (stderr, "obdio_barrier "LPX64": Error on initial write: %s\n",
- b->ob_oid, strerror (errno));
+ fprintf (stderr, "%s "LPX64": Error on initial write: %s\n",
+ __FUNCTION__, b->ob_oid, strerror(errno));
goto out_2;
}
mode = "PW";
b->ob_ordinal++; /* now I wait... */
while (fileb->ob_ordinal != b->ob_ordinal) {
-
rc = obdio_cancel (conn, &lh);
if (rc != 0) {
- fprintf (stderr, "obdio_barrier "LPX64": Error on %s cancel: %s\n",
- b->ob_oid, mode, strerror (errno));
+ fprintf(stderr, "%s "LPX64": Error on %s cancel: %s\n",
+ __FUNCTION__, b->ob_oid, mode, strerror(errno));
goto out_1;
}
mode = "PR";
- rc = obdio_enqueue (conn, b->ob_oid, LCK_PR, 0, getpagesize (), &lh);
+ rc = obdio_enqueue(conn, b->ob_oid, LCK_PR,0,getpagesize(),&lh);
if (rc != 0) {
- fprintf (stderr, "obdio_barrier "LPX64": Error on PR enqueue: %s\n",
- b->ob_oid, strerror (errno));
+ fprintf(stderr, "%s "LPX64": Error on PR enqueue: %s\n",
+ __FUNCTION__, b->ob_oid, strerror(errno));
goto out_1;
}
- memset (fileb, 0xeb, getpagesize ());
- rc = obdio_pread (conn, b->ob_oid, (void *)fileb, getpagesize (), 0);
+ memset (fileb, 0xeb, getpagesize());
+ rc = obdio_pread(conn, b->ob_oid, fileb, getpagesize(), 0);
if (rc != 0) {
- fprintf (stderr, "obdio_barrier "LPX64": Error on read: %s\n",
- b->ob_oid, strerror (errno));
+ fprintf(stderr, "%s "LPX64": Error on read: %s\n",
+ __FUNCTION__, b->ob_oid, strerror(errno));
goto out_2;
}
fileb->ob_count >= b->ob_npeers ||
(fileb->ob_ordinal != b->ob_ordinal - 1 &&
fileb->ob_ordinal != b->ob_ordinal)) {
- fprintf (stderr, "obdio_barrier "LPX64": corrupt\n", b->ob_id);
- fprintf (stderr, " got ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
- fileb->ob_id, fileb->ob_oid, fileb->ob_npeers,
- fileb->ob_ordinal, fileb->ob_count);
- fprintf (stderr, " expected ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n",
- b->ob_id, b->ob_oid, b->ob_npeers,
- b->ob_ordinal, b->ob_count);
+ fprintf(stderr, "%s "LPX64": corrupt\n",
+ __FUNCTION__, b->ob_id);
+ fprintf(stderr, " got ["LPX64","LPX64","LPX64","
+ LPX64","LPX64"]\n",
+ fileb->ob_id, fileb->ob_oid, fileb->ob_npeers,
+ fileb->ob_ordinal, fileb->ob_count);
+ fprintf(stderr, " expected ["LPX64","LPX64","LPX64
+ ","LPX64","LPX64"]\n",
+ b->ob_id, b->ob_oid, b->ob_npeers,
+ b->ob_ordinal, b->ob_count);
rc = -1;
goto out_2;
}
out_2:
rc2 = obdio_cancel (conn, &lh);
if (rc == 0 && rc2 != 0) {
- fprintf (stderr, "obdio_barrier "LPX64": Error on cancel: %s\n",
- b->ob_oid, strerror (errno));
+ fprintf(stderr, "%s "LPX64": Error on cancel: %s\n",
+ __FUNCTION__, b->ob_oid, strerror(errno));
rc = rc2;
}
out_1:
free (space);
return (rc);
}
-
-
uint64_t ob_count;
};
-extern struct obdio_conn * obdio_connect (int device);
+extern struct obdio_conn *obdio_connect(int device);
extern void obdio_disconnect(struct obdio_conn *conn, int flags);
extern int obdio_open(struct obdio_conn *conn, uint64_t oid,
struct lustre_handle *fh);
extern int obdio_close(struct obdio_conn *conn, uint64_t oid,
struct lustre_handle *fh);
extern int obdio_pread(struct obdio_conn *conn, uint64_t oid,
- char *buffer, uint32_t count, uint64_t offset);
+ void *buffer, uint32_t count, uint64_t offset);
extern int obdio_pwrite(struct obdio_conn *conn, uint64_t oid,
- char *buffer, uint32_t count, uint64_t offset);
+ void *buffer, uint32_t count, uint64_t offset);
extern int obdio_enqueue(struct obdio_conn *conn, uint64_t oid,
int mode, uint64_t offset, uint32_t count,
struct lustre_handle *lh);
extern void *obdio_alloc_aligned_buffer(void **spacep, int size);
extern struct obdio_barrier *obdio_new_barrier(uint64_t oid, uint64_t id,
int npeers);
-extern int obdio_setup_barrier(struct obdio_conn *conn,
- struct obdio_barrier *b);
+extern int obdio_setup_barrier(struct obdio_conn *conn,struct obdio_barrier *b);
extern int obdio_barrier(struct obdio_conn *conn, struct obdio_barrier *b);
#endif
COMMENT("Sizes and Offsets");
BLANK_LINE();
+ CHECK_STRUCT(obd_uuid);
check_lustre_handle();
check_lustre_msg_v1();
check_lustre_msg_v2();