/* Link to the device, for debugging. */
struct lu_ref_link *ot_dev_link;
+#if OSD_THANDLE_STATS
+ /** time when this handle was allocated */
+ cfs_time_t oth_alloced;
+
+ /** time when this thanle was started */
+ cfs_time_t oth_started;
+#endif
};
/*
osd_push_ctxt(const struct lu_env *env, struct osd_ctxt *save)
{
struct md_ucred *uc = md_ucred(env);
+ struct cred *tc;
LASSERT(uc != NULL);
- save->oc_uid = current->fsuid;
- save->oc_gid = current->fsgid;
- save->oc_cap = current->cap_effective;
- current->fsuid = uc->mu_fsuid;
- current->fsgid = uc->mu_fsgid;
- current->cap_effective = uc->mu_cap;
+ save->oc_uid = current_fsuid();
+ save->oc_gid = current_fsgid();
+ save->oc_cap = current_cap();
+ if ((tc = prepare_creds())) {
+ tc->fsuid = uc->mu_fsuid;
+ tc->fsgid = uc->mu_fsgid;
+ commit_creds(tc);
+ }
+ /* XXX not suboptimal */
+ cfs_curproc_cap_unpack(uc->mu_cap);
}
static inline void
osd_pop_ctxt(struct osd_ctxt *save)
{
- current->fsuid = save->oc_uid;
- current->fsgid = save->oc_gid;
- current->cap_effective = save->oc_cap;
+ struct cred *tc;
+
+ if ((tc = prepare_creds())) {
+ tc->fsuid = save->oc_uid;
+ tc->fsgid = save->oc_gid;
+ tc->cap_effective = save->oc_cap;
+ commit_creds(tc);
+ }
}
#endif
CERROR("bad inode %lx\n",inode->i_ino);
iput(inode);
inode = ERR_PTR(-ENOENT);
+ } else {
+ /* Do not update file c/mtime in ldiskfs.
+ * NB: we don't have any lock to protect this because we don't
+ * have reference on osd_object now, but contention with
+ * another lookup + attr_set can't happen in the tiny window
+ * between if (...) and set S_NOCMTIME. */
+ if (!(inode->i_flags & S_NOCMTIME))
+ inode->i_flags |= S_NOCMTIME;
}
return inode;
}
* Journal
*/
+#if OSD_THANDLE_STATS
+/**
+ * Set time when the handle is allocated
+ */
+static void osd_th_alloced(struct osd_thandle *oth)
+{
+ oth->oth_alloced = cfs_time_current();
+}
+
+/**
+ * Set time when the handle started
+ */
+static void osd_th_started(struct osd_thandle *oth)
+{
+ oth->oth_started = cfs_time_current();
+}
+
+/**
+ * Helper function to convert time interval to microseconds packed in
+ * long int (default time units for the counter in "stats" initialized
+ * by lu_time_init() )
+ */
+static long interval_to_usec(cfs_time_t start, cfs_time_t end)
+{
+ struct timeval val;
+
+ cfs_duration_usec(cfs_time_sub(end, start), &val);
+ return val.tv_sec * 1000000 + val.tv_usec;
+}
+
+/**
+ * Check whether the we deal with this handle for too long.
+ */
+static void __osd_th_check_slow(void *oth, struct osd_device *dev,
+ cfs_time_t alloced, cfs_time_t started,
+ cfs_time_t closed)
+{
+ cfs_time_t now = cfs_time_current();
+
+ LASSERT(dev != NULL);
+
+ lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_STARTING,
+ interval_to_usec(alloced, started));
+ lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_OPEN,
+ interval_to_usec(started, closed));
+ lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_CLOSING,
+ interval_to_usec(closed, now));
+
+ if (cfs_time_before(cfs_time_add(alloced, cfs_time_seconds(30)), now)) {
+ CWARN("transaction handle %p was open for too long: "
+ "now "CFS_TIME_T" ,"
+ "alloced "CFS_TIME_T" ,"
+ "started "CFS_TIME_T" ,"
+ "closed "CFS_TIME_T"\n",
+ oth, now, alloced, started, closed);
+ libcfs_debug_dumpstack(NULL);
+ }
+}
+
+#define OSD_CHECK_SLOW_TH(oth, dev, expr) \
+{ \
+ cfs_time_t __closed = cfs_time_current(); \
+ cfs_time_t __alloced = oth->oth_alloced; \
+ cfs_time_t __started = oth->oth_started; \
+ \
+ expr; \
+ __osd_th_check_slow(oth, dev, __alloced, __started, __closed); \
+}
+
+#else /* OSD_THANDLE_STATS */
+
+#define osd_th_alloced(h) do {} while(0)
+#define osd_th_started(h) do {} while(0)
+#define OSD_CHECK_SLOW_TH(oth, dev, expr) expr
+
+#endif /* OSD_THANDLE_STATS */
+
/*
* Concurrency: doesn't access mutable data.
*/
* XXX temporary stuff. Some abstraction layer should
* be used.
*/
-
+ oti->oti_dev = dev;
+ osd_th_alloced(oh);
jh = ldiskfs_journal_start_sb(osd_sb(dev), p->tp_credits);
+ osd_th_started(oh);
if (!IS_ERR(jh)) {
oh->ot_handle = jh;
th = &oh->ot_super;
if (result != 0)
CERROR("Failure in transaction hook: %d\n", result);
oh->ot_handle = NULL;
- result = ldiskfs_journal_stop(hdl);
+ OSD_CHECK_SLOW_TH(oh, oti->oti_dev,
+ result = ldiskfs_journal_stop(hdl));
if (result != 0)
CERROR("Failure to stop transaction: %d\n", result);
}
iattr.ia_uid = attr->la_uid;
iattr.ia_gid = attr->la_gid;
osd_push_ctxt(env, save);
- rc = DQUOT_TRANSFER(inode, &iattr) ? -EDQUOT : 0;
+ rc = ll_vfs_dq_transfer(inode, &iattr) ? -EDQUOT : 0;
osd_pop_ctxt(save);
if (rc != 0)
return rc;
if (bits & LA_RDEV)
inode->i_rdev = attr->la_rdev;
- if (bits & LA_FLAGS)
- inode->i_flags = ll_ext_to_inode_flags(attr->la_flags);
+ if (bits & LA_FLAGS) {
+ /* always keep S_NOCMTIME */
+ inode->i_flags = ll_ext_to_inode_flags(attr->la_flags) |
+ S_NOCMTIME;
+ }
return 0;
}
cfs_spin_unlock(&obj->oo_guard);
if (!rc)
- mark_inode_dirty(obj->oo_inode);
+ obj->oo_inode->i_sb->s_op->dirty_inode(obj->oo_inode);
return rc;
}
struct lu_attr *attr, struct thandle *th)
{
osd_object_init0(obj);
+ if (obj->oo_inode && (obj->oo_inode->i_state & I_NEW))
+ unlock_new_inode(obj->oo_inode);
return 0;
}
osd_pop_ctxt(save);
#endif
if (!IS_ERR(inode)) {
+ /* Do not update file c/mtime in ldiskfs.
+ * NB: don't need any lock because no contention at this
+ * early stage */
+ inode->i_flags |= S_NOCMTIME;
obj->oo_inode = inode;
result = 0;
} else
struct inode *inode = obj->oo_inode;
struct osd_thread_info *info = osd_oti_get(env);
struct dentry *dentry = &info->oti_child_dentry;
- struct timespec *t = &info->oti_time;
int fs_flags = 0;
int rc;
fs_flags |= XATTR_CREATE;
dentry->d_inode = inode;
- *t = inode->i_ctime;
rc = inode->i_op->setxattr(dentry, name, buf->lb_buf,
buf->lb_len, fs_flags);
- /* ctime should not be updated with server-side time. */
- cfs_spin_lock(&obj->oo_guard);
- inode->i_ctime = *t;
- cfs_spin_unlock(&obj->oo_guard);
- mark_inode_dirty(inode);
return rc;
}
result = __osd_object_create(info, obj, attr, hint, dof, th);
/* objects under osd root shld have igif fid, so dont add fid EA */
- if (result == 0 && fid_seq(fid) >= FID_SEQ_DISTRIBUTED_START)
+ if (result == 0 && fid_seq(fid) >= FID_SEQ_NORMAL)
result = osd_ea_fid_set(env, dt, fid);
if (result == 0)
LASSERT(inode->i_nlink < LDISKFS_LINK_MAX);
inode->i_nlink++;
cfs_spin_unlock(&obj->oo_guard);
- mark_inode_dirty(inode);
+ inode->i_sb->s_op->dirty_inode(inode);
LINVRNT(osd_invariant(obj));
}
LASSERT(inode->i_nlink > 0);
inode->i_nlink--;
cfs_spin_unlock(&obj->oo_guard);
- mark_inode_dirty(inode);
+ inode->i_sb->s_op->dirty_inode(inode);
LINVRNT(osd_invariant(obj));
}
struct inode *inode = obj->oo_inode;
struct osd_thread_info *info = osd_oti_get(env);
struct dentry *dentry = &info->oti_obj_dentry;
- struct timespec *t = &info->oti_time;
int rc;
LASSERT(dt_object_exists(dt));
return -EACCES;
dentry->d_inode = inode;
- *t = inode->i_ctime;
rc = inode->i_op->removexattr(dentry, name);
- /* ctime should not be updated with server-side time. */
- cfs_spin_lock(&obj->oo_guard);
- inode->i_ctime = *t;
- cfs_spin_unlock(&obj->oo_guard);
- mark_inode_dirty(inode);
return rc;
}
__u32 d[4], s[4];
s[0] = obj->oo_inode->i_uid;
- ll_get_random_bytes(&(s[1]), sizeof(__u32));
+ cfs_get_random_bytes(&(s[1]), sizeof(__u32));
s[2] = obj->oo_inode->i_gid;
- ll_get_random_bytes(&(s[3]), sizeof(__u32));
+ cfs_get_random_bytes(&(s[3]), sizeof(__u32));
rc = capa_encrypt_id(d, s, key->lk_key, CAPA_HMAC_KEY_MAX_LEN);
if (unlikely(rc))
RETURN(ERR_PTR(rc));
struct osd_thandle *oh;
ssize_t result = 0;
#ifdef HAVE_QUOTA_SUPPORT
- cfs_cap_t save = current->cap_effective;
+ cfs_cap_t save = cfs_curproc_cap_pack();
#endif
LASSERT(handle != NULL);
LASSERT(oh->ot_handle->h_transaction != NULL);
#ifdef HAVE_QUOTA_SUPPORT
if (ignore_quota)
- current->cap_effective |= CFS_CAP_SYS_RESOURCE_MASK;
+ cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
else
- current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK;
+ cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
#endif
/* Write small symlink to inode body as we need to maintain correct
* on-disk symlinks for ldiskfs.
buf->lb_len, pos,
oh->ot_handle);
#ifdef HAVE_QUOTA_SUPPORT
- current->cap_effective = save;
+ cfs_curproc_cap_unpack(save);
#endif
if (result == 0)
result = buf->lb_len;
cfs_down_write(&obj->oo_ext_idx_sem);
bh = ll_ldiskfs_find_entry(dir, dentry, &de);
if (bh) {
- struct osd_thread_info *oti = osd_oti_get(env);
- struct timespec *ctime = &oti->oti_time;
- struct timespec *mtime = &oti->oti_time2;
-
- *ctime = dir->i_ctime;
- *mtime = dir->i_mtime;
rc = ldiskfs_delete_entry(oh->ot_handle,
dir, de, bh);
- /* xtime should not be updated with server-side time. */
- cfs_spin_lock(&obj->oo_guard);
- dir->i_ctime = *ctime;
- dir->i_mtime = *mtime;
- cfs_spin_unlock(&obj->oo_guard);
- mark_inode_dirty(dir);
brelse(bh);
} else
rc = -ENOENT;
struct osd_thandle *oh;
struct iam_container *bag = &obj->oo_dir->od_container;
#ifdef HAVE_QUOTA_SUPPORT
- cfs_cap_t save = current->cap_effective;
+ cfs_cap_t save = cfs_curproc_cap_pack();
#endif
struct osd_thread_info *oti = osd_oti_get(env);
struct iam_rec *iam_rec = (struct iam_rec *)oti->oti_ldp;
LASSERT(oh->ot_handle->h_transaction != NULL);
#ifdef HAVE_QUOTA_SUPPORT
if (ignore_quota)
- current->cap_effective |= CFS_CAP_SYS_RESOURCE_MASK;
+ cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
else
- current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK;
+ cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
#endif
if (S_ISDIR(obj->oo_inode->i_mode))
osd_fid_pack((struct osd_fid_pack *)iam_rec, rec, &oti->oti_fid);
rc = iam_insert(oh->ot_handle, bag, (const struct iam_key *)key,
iam_rec, ipd);
#ifdef HAVE_QUOTA_SUPPORT
- current->cap_effective = save;
+ cfs_curproc_cap_unpack(save);
#endif
osd_ipd_put(env, bag, ipd);
LINVRNT(osd_invariant(obj));
child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name));
if (fid_is_igif((struct lu_fid *)fid) ||
- fid_seq((struct lu_fid *)fid) >= FID_SEQ_DISTRIBUTED_START) {
+ fid_is_norm((struct lu_fid *)fid)) {
ldp = (struct ldiskfs_dentry_param *)info->oti_ldp;
osd_get_ldiskfs_dirent_param(ldp, fid);
child->d_fsdata = (void*) ldp;
if (!dir->oo_compat_dot_created)
return -EINVAL;
- if (fid_seq((struct lu_fid *) dot_fid) >= FID_SEQ_DISTRIBUTED_START) {
+ if (fid_seq((struct lu_fid *)dot_fid) >= FID_SEQ_NORMAL) {
osd_get_ldiskfs_dirent_param(dot_ldp, dot_fid);
osd_get_ldiskfs_dirent_param(dot_dot_ldp, dot_dot_fid);
} else {
const char *name = (const char *)key;
struct osd_object *child;
#ifdef HAVE_QUOTA_SUPPORT
- cfs_cap_t save = current->cap_effective;
+ cfs_cap_t save = cfs_curproc_cap_pack();
#endif
int rc;
child = osd_object_find(env, dt, fid);
if (!IS_ERR(child)) {
- struct inode *inode = obj->oo_inode;
- struct osd_thread_info *oti = osd_oti_get(env);
- struct timespec *ctime = &oti->oti_time;
- struct timespec *mtime = &oti->oti_time2;
-
- *ctime = inode->i_ctime;
- *mtime = inode->i_mtime;
#ifdef HAVE_QUOTA_SUPPORT
if (ignore_quota)
- current->cap_effective |= CFS_CAP_SYS_RESOURCE_MASK;
+ cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
else
- current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK;
+ cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
#endif
cfs_down_write(&obj->oo_ext_idx_sem);
rc = osd_ea_add_rec(env, obj, child->oo_inode, name, rec, th);
cfs_up_write(&obj->oo_ext_idx_sem);
#ifdef HAVE_QUOTA_SUPPORT
- current->cap_effective = save;
+ cfs_curproc_cap_unpack(save);
#endif
osd_object_put(env, child);
- /* xtime should not be updated with server-side time. */
- cfs_spin_lock(&obj->oo_guard);
- inode->i_ctime = *ctime;
- inode->i_mtime = *mtime;
- cfs_spin_unlock(&obj->oo_guard);
- mark_inode_dirty(inode);
} else {
rc = PTR_ERR(child);
}