/*
* Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
- */
-/*
- * Copyright (c) 2011 Whamcloud, Inc.
+ *
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
if (!exp->exp_obd->obd_replayable || oti == NULL)
RETURN(rc);
- cfs_mutex_down(&ted->ted_lcd_lock);
+ cfs_mutex_lock(&ted->ted_lcd_lock);
lcd = ted->ted_lcd;
/* if the export has already been disconnected, we have no last_rcvd slot,
* update server data with latest transno then */
if (lcd == NULL) {
- cfs_mutex_up(&ted->ted_lcd_lock);
+ cfs_mutex_unlock(&ted->ted_lcd_lock);
CWARN("commit transaction for disconnected client %s: rc %d\n",
exp->exp_client_uuid.uuid, rc);
err = filter_update_server_data(exp->exp_obd);
exp->exp_vbr_failed = 1;
cfs_spin_unlock(&exp->exp_lock);
cfs_spin_unlock(&obt->obt_lut->lut_translock);
- cfs_mutex_up(&ted->ted_lcd_lock);
+ cfs_mutex_unlock(&ted->ted_lcd_lock);
RETURN(-EOVERFLOW);
}
}
CDEBUG(log_pri, "wrote trans "LPU64" for client %s at #%d: err = %d\n",
last_rcvd, lcd->lcd_uuid, ted->ted_lr_idx, err);
- cfs_mutex_up(&ted->ted_lcd_lock);
+ cfs_mutex_unlock(&ted->ted_lcd_lock);
RETURN(rc);
}
ted->ted_lr_idx = cl_idx;
ted->ted_lr_off = le32_to_cpu(lsd->lsd_client_start) +
cl_idx * le16_to_cpu(lsd->lsd_client_size);
- cfs_init_mutex(&ted->ted_lcd_lock);
+ cfs_mutex_init(&ted->ted_lcd_lock);
LASSERTF(ted->ted_lr_off > 0, "ted_lr_off = %llu\n", ted->ted_lr_off);
CDEBUG(D_INFO, "client at index %d (%llu) with UUID '%s' added\n",
* be in server data or in client data in case of failure */
filter_update_server_data(exp->exp_obd);
- cfs_mutex_down(&ted->ted_lcd_lock);
+ cfs_mutex_lock(&ted->ted_lcd_lock);
memset(ted->ted_lcd->lcd_uuid, 0, sizeof ted->ted_lcd->lcd_uuid);
rc = fsfilt_write_record(exp->exp_obd, obt->obt_rcvd_filp,
ted->ted_lcd,
sizeof(*ted->ted_lcd), &off, 0);
- cfs_mutex_up(&ted->ted_lcd_lock);
+ cfs_mutex_unlock(&ted->ted_lcd_lock);
pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
CDEBUG(rc == 0 ? D_INFO : D_ERROR,
struct filter_obd *filter = &obd->u.filter;
int old_count, group, rc = 0;
- cfs_down(&filter->fo_init_lock);
+ cfs_mutex_lock(&filter->fo_init_lock);
old_count = filter->fo_group_count;
for (group = old_count; group <= last_group; group++) {
rc = filter_read_group_internal(obd, group, create);
if (rc != 0)
break;
}
- cfs_up(&filter->fo_init_lock);
+ cfs_mutex_unlock(&filter->fo_init_lock);
return rc;
}
IS_IMMUTABLE(dentry->d_inode))
GOTO(out, rc = -EPERM);
- /* NOTE: This might need to go outside i_mutex, though it isn't clear if
- * that was done because of journal_start (which is already done
- * here) or some other ordering issue. */
+ /* Locking order: i_mutex -> journal_lock -> dqptr_sem. LU-952 */
ll_vfs_dq_init(dir);
rc = ll_security_inode_unlink(dir, dentry, mnt);
struct ldlm_lock **lockp, void *req_cookie,
ldlm_mode_t mode, int flags, void *data)
{
- CFS_LIST_HEAD(rpc_list);
struct ptlrpc_request *req = req_cookie;
struct ldlm_lock *lock = *lockp, *l = NULL;
struct ldlm_resource *res = lock->l_resource;
* lock, and should not be granted if the lock will be blocked.
*/
+ if (flags & LDLM_FL_BLOCK_NOWAIT) {
+ OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_AGL_DELAY, 5);
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_AGL_NOLOCK))
+ RETURN(ELDLM_LOCK_ABORTED);
+ }
+
LASSERT(ns == ldlm_res_to_ns(res));
lock_res(res);
- rc = policy(lock, &tmpflags, 0, &err, &rpc_list);
+ rc = policy(lock, &tmpflags, 0, &err, NULL);
check_res_locked(res);
- /* FIXME: we should change the policy function slightly, to not make
- * this list at all, since we just turn around and free it */
- while (!cfs_list_empty(&rpc_list)) {
- struct ldlm_lock *wlock =
- cfs_list_entry(rpc_list.next, struct ldlm_lock,
- l_cp_ast);
- LASSERT((lock->l_flags & LDLM_FL_AST_SENT) == 0);
- LASSERT(lock->l_flags & LDLM_FL_CP_REQD);
- lock->l_flags &= ~LDLM_FL_CP_REQD;
- cfs_list_del_init(&wlock->l_cp_ast);
- LDLM_LOCK_RELEASE(wlock);
- }
-
/* The lock met with no resistance; we're finished. */
if (rc == LDLM_ITER_CONTINUE) {
/* do not grant locks to the liblustre clients: they cannot
}
unlock_res(res);
RETURN(err);
+ } else if (flags & LDLM_FL_BLOCK_NOWAIT) {
+ /* LDLM_FL_BLOCK_NOWAIT means it is for AGL. Do not send glimpse
+ * callback for glimpse size. The real size user will trigger
+ * the glimpse callback when necessary. */
+ unlock_res(res);
+ RETURN(ELDLM_LOCK_ABORTED);
}
/* Do not grant any lock, but instead send GL callbacks. The extent
* therefore, that res->lr_lvb_data cannot increase beyond the
* end of already granted lock. As a result, it is safe to
* check against "stale" reply_lvb->lvb_size value without
- * res->lr_lvb_sem.
+ * res->lr_lvb_mutex.
*/
arg.size = reply_lvb->lvb_size;
arg.victim = &l;
{
struct filter_obd *filter = &obd->u.filter;
struct vfsmount *mnt;
+ struct file_system_type *type;
struct lustre_mount_info *lmi;
struct obd_uuid uuid;
__u8 *uuid_ptr;
} else {
/* old path - used by lctl */
CERROR("Using old MDS mount method\n");
- mnt = ll_kern_mount(lustre_cfg_string(lcfg, 2),
- MS_NOATIME|MS_NODIRATIME,
- lustre_cfg_string(lcfg, 1), option);
+ type = get_fs_type(lustre_cfg_string(lcfg, 2));
+ if (!type) {
+ CERROR("get_fs_type failed\n");
+ RETURN(-ENODEV);
+ }
+ mnt = vfs_kern_mount(type, MS_NOATIME|MS_NODIRATIME,
+ lustre_cfg_string(lcfg, 1), option);
+ cfs_module_put(type->owner);
if (IS_ERR(mnt)) {
rc = PTR_ERR(mnt);
LCONSOLE_ERROR_MSG(0x135, "Can't mount disk %s (%d)\n",
filter->fo_fstype = mnt->mnt_sb->s_type->name;
CDEBUG(D_SUPER, "%s: mnt = %p\n", filter->fo_fstype, mnt);
- fsfilt_setup(obd, obd->u.obt.obt_sb);
+ rc = fsfilt_setup(obd, obd->u.obt.obt_sb);
+ if (rc)
+ GOTO(err_ops, rc);
OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
obd->obd_lvfs_ctxt.pwdmnt = mnt;
obd->obd_lvfs_ctxt.fs = get_ds();
obd->obd_lvfs_ctxt.cb_ops = filter_lvfs_ops;
- cfs_init_mutex(&filter->fo_init_lock);
+ cfs_mutex_init(&filter->fo_init_lock);
filter->fo_committed_group = 0;
filter->fo_destroys_in_progress = 0;
for (i = 0; i < 32; i++)
- cfs_sema_init(&filter->fo_create_locks[i], 1);
+ cfs_mutex_init(&filter->fo_create_locks[i]);
cfs_spin_lock_init(&filter->fo_objidlock);
CFS_INIT_LIST_HEAD(&filter->fo_export_list);
- cfs_sema_init(&filter->fo_alloc_lock, 1);
+ cfs_mutex_init(&filter->fo_alloc_lock);
init_brw_stats(&filter->fo_filter_stats);
cfs_spin_lock_init(&filter->fo_flags_lock);
filter->fo_read_cache = 1; /* enable read-only cache by default */
* This is safe to do, as llog is already synchronized
* and its import may go.
*/
- cfs_mutex_down(&ctxt->loc_sem);
+ cfs_mutex_lock(&ctxt->loc_mutex);
if (ctxt->loc_imp) {
class_import_put(ctxt->loc_imp);
ctxt->loc_imp = NULL;
}
- cfs_mutex_up(&ctxt->loc_sem);
+ cfs_mutex_unlock(&ctxt->loc_mutex);
llog_ctxt_put(ctxt);
}
if (filter->fo_lcm) {
- cfs_mutex_down(&ctxt->loc_sem);
+ cfs_mutex_lock(&ctxt->loc_mutex);
llog_recov_thread_fini(filter->fo_lcm, obd->obd_force);
filter->fo_lcm = NULL;
- cfs_mutex_up(&ctxt->loc_sem);
+ cfs_mutex_unlock(&ctxt->loc_mutex);
}
RETURN(filter_olg_fini(&obd->obd_olg));
}
RETURN(ERR_PTR(-ENOENT));
}
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2,7,50,0)
+ /* Try to correct for a bug in 2.1.0 (LU-221) that caused negative
+ * timestamps to appear to be in the far future, due old timestamp
+ * being stored on disk as an unsigned value. This fixes up any
+ * bad values stored on disk before returning them to the client,
+ * and ensures any timestamp updates are correct. LU-1042 */
+ if (unlikely(LTIME_S(dchild->d_inode->i_atime) == LU221_BAD_TIME))
+ LTIME_S(dchild->d_inode->i_atime) = 0;
+ if (unlikely(LTIME_S(dchild->d_inode->i_mtime) == LU221_BAD_TIME))
+ LTIME_S(dchild->d_inode->i_mtime) = 0;
+ if (unlikely(LTIME_S(dchild->d_inode->i_ctime) == LU221_BAD_TIME))
+ LTIME_S(dchild->d_inode->i_ctime) = 0;
+#else
+#warning "remove old LU-221/LU-1042 workaround code"
+#endif
+
return dchild;
}
{
struct dentry *dentry = NULL;
struct obd_device *obd;
+ __u64 curr_version;
int rc = 0;
ENTRY;
/* Limit the valid bits in the return data to what we actually use */
oinfo->oi_oa->o_valid = OBD_MD_FLID;
- obdo_from_inode(oinfo->oi_oa, dentry->d_inode, NULL, FILTER_VALID_FLAGS);
+ obdo_from_inode(oinfo->oi_oa, dentry->d_inode, FILTER_VALID_FLAGS);
+
+ /* Store inode version in reply */
+ curr_version = fsfilt_get_version(exp->exp_obd, dentry->d_inode);
+ if ((__s64)curr_version != -EOPNOTSUPP) {
+ oinfo->oi_oa->o_valid |= OBD_MD_FLDATAVERSION;
+ oinfo->oi_oa->o_data_version = curr_version;
+ }
f_dput(dentry);
RETURN(rc);
}
if (ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID)) {
unsigned long now = jiffies;
- ll_vfs_dq_init(inode);
/* Filter truncates and writes are serialized by
* i_alloc_sem, see the comment in
* filter_preprw_write.*/
if (IS_ERR(handle))
GOTO(out_unlock, rc = PTR_ERR(handle));
}
+
+ /* Locking order: i_mutex -> journal_lock -> dqptr_sem. LU-952 */
+ if (ia_valid & (ATTR_SIZE | ATTR_UID | ATTR_GID))
+ ll_vfs_dq_init(inode);
+
if (oa->o_valid & OBD_MD_FLFLAGS) {
rc = fsfilt_iocontrol(exp->exp_obd, dentry,
FSFILT_IOC_SETFLAGS, (long)&oa->o_flags);
oa->o_valid = OBD_MD_FLID;
/* Quota release need uid/gid info */
- obdo_from_inode(oa, dentry->d_inode, NULL,
+ obdo_from_inode(oa, dentry->d_inode,
FILTER_VALID_FLAGS | OBD_MD_FLUID | OBD_MD_FLGID);
EXIT;
int skip_orphan;
ENTRY;
- LASSERT(down_trylock(&filter->fo_create_locks[oa->o_seq]) != 0);
+ LASSERT_MUTEX_LOCKED(&filter->fo_create_locks[oa->o_seq]);
memset(&doa, 0, sizeof(doa));
}
/* This causes inflight precreates to abort and drop lock */
cfs_set_bit(group, &filter->fo_destroys_in_progress);
- cfs_down(&filter->fo_create_locks[group]);
+ cfs_mutex_lock(&filter->fo_create_locks[group]);
if (!cfs_test_bit(group, &filter->fo_destroys_in_progress)) {
CERROR("%s:["LPU64"] destroys_in_progress already cleared\n",
exp->exp_obd->obd_name, group);
- cfs_up(&filter->fo_create_locks[group]);
+ cfs_mutex_unlock(&filter->fo_create_locks[group]);
RETURN(0);
}
diff = oa->o_id - last;
CDEBUG(D_HA, "filter_last_id() = "LPU64" -> diff = %d\n",
last, diff);
- if (-diff > OST_MAX_PRECREATE) {
+ if (-diff > (OST_MAX_PRECREATE * 3) / 2) {
CERROR("%s: ignoring bogus orphan destroy request: "
"obdid "LPU64" last_id "LPU64"\n", obd->obd_name,
oa->o_id, last);
cfs_clear_bit(group, &filter->fo_destroys_in_progress);
}
} else {
- cfs_down(&filter->fo_create_locks[group]);
+ cfs_mutex_lock(&filter->fo_create_locks[group]);
if (oti->oti_conn_cnt < exp->exp_conn_cnt) {
CERROR("%s: dropping old precreate request\n",
obd->obd_name);
/* else diff == 0 */
GOTO(out, rc = 0);
out:
- cfs_up(&filter->fo_create_locks[group]);
+ cfs_mutex_unlock(&filter->fo_create_locks[group]);
return rc;
}
{
struct filter_obd *filter = &obd->u.filter;
int blockbits = obd->u.obt.obt_sb->s_blocksize_bits;
+ struct lr_server_data *lsd = class_server_data(obd);
int rc;
ENTRY;
((filter->fo_tot_dirty + filter->fo_tot_pending +
osfs->os_bsize - 1) >> blockbits));
- if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOSPC)) {
- struct lr_server_data *lsd = class_server_data(obd);
- int index = le32_to_cpu(lsd->lsd_ost_index);
+ if (OBD_FAIL_CHECK_VALUE(OBD_FAIL_OST_ENOSPC,
+ le32_to_cpu(lsd->lsd_ost_index)))
+ osfs->os_bfree = osfs->os_bavail = 2;
- if (cfs_fail_val == -1 || index == cfs_fail_val)
- osfs->os_bfree = osfs->os_bavail = 2;
- else if (cfs_fail_loc & OBD_FAIL_ONCE)
- cfs_fail_loc &= ~OBD_FAILED; /* reset flag */
- }
+ if (OBD_FAIL_CHECK_VALUE(OBD_FAIL_OST_ENOINO,
+ le32_to_cpu(lsd->lsd_ost_index)))
+ osfs->os_ffree = 0;
/* set EROFS to state field if FS is mounted as RDONLY. The goal is to
* stop creating files on MDS if OST is not good shape to create
filter = &obd->u.filter;
- LASSERT(down_trylock(&filter->fo_create_locks[group]) != 0);
+ LASSERT_MUTEX_LOCKED(&filter->fo_create_locks[group]);
OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_PRECREATE, obd_timeout / 2);
} else {
/* Use these existing objects if they are
* zero length. */
- if (dchild->d_inode->i_size == 0) {
+ if (i_size_read(dchild->d_inode) == 0) {
rc = filter_use_existing_obj(obd,dchild,
&handle, &cleanup_phase);
if (rc == 0)
rc = ll_vfs_create(dparent->d_inode, dchild,
S_IFREG | S_ISUID | S_ISGID | 0666, NULL);
if (rc) {
- CERROR("create failed rc = %d\n", rc);
+ CWARN("%s: create failed: rc = %d\n", obd->obd_name,rc);
if (rc == -ENOSPC) {
os_ffree = filter_calc_free_inodes(obd);
- if (os_ffree == -1)
+ if (os_ffree == -1)
GOTO(cleanup, rc);
if (obd->obd_osfs.os_bavail <
(obd->obd_osfs.os_blocks >> 10)) {
- if (oa->o_valid & OBD_MD_FLFLAGS)
+ if (oa->o_valid & OBD_MD_FLFLAGS) {
oa->o_flags |= OBD_FL_NOSPC_BLK;
- else {
+ } else {
oa->o_valid |= OBD_MD_FLFLAGS;
oa->o_flags = OBD_FL_NOSPC_BLK;
}
- CERROR("%s: free inode "LPU64"\n",
- obd->obd_name, os_ffree);
+ CWARN("%s: free inode "LPU64"\n",
+ obd->obd_name, os_ffree);
}
}
GOTO(cleanup, rc);
dchild->d_inode->i_ino);
set_last_id:
- /* Set a/c/m time to a insane large negative value at creation
- * time so that any timestamp arriving from the client will
- * always be newer and update the inode.
- * See LU-221 for details */
+ /* Initialize a/c/m time so any client timestamp will always
+ * be newer and update the inode. ctime = 0 is also handled
+ * specially in fsfilt_ext3_setattr(). See LU-221, LU-1042 */
iattr.ia_valid = ATTR_ATIME | ATTR_MTIME | ATTR_CTIME;
- LTIME_S(iattr.ia_atime) = INT_MIN + 24 * 3600;
- LTIME_S(iattr.ia_mtime) = INT_MIN + 24 * 3600;
- LTIME_S(iattr.ia_ctime) = INT_MIN + 24 * 3600;
+ LTIME_S(iattr.ia_atime) = 0;
+ LTIME_S(iattr.ia_mtime) = 0;
+ LTIME_S(iattr.ia_ctime) = 0;
err = fsfilt_setattr(obd, dchild, handle, &iattr, 0);
- if (err)
- CERROR("unable to initialize a/c/m time of newly"
- "created inode\n");
+ if (err)
+ CWARN("%s: unable to initialize a/c/m time of newly "
+ "created object %.*s: rc = %d\n",
+ obd->obd_name, dchild->d_name.len,
+ dchild->d_name.name, err);
if (!recreate_obj) {
filter_set_last_id(filter, next_id, group);
err = filter_update_last_objid(obd, group, 0);
if (err)
- CERROR("unable to write lastobjid "
- "but file created\n");
+ CERROR("%s: unable to write lastobjid "
+ "but file created: rc = %d\n",
+ obd->obd_name, err);
}
cleanup:
rc = -EINVAL;
} else {
diff = 1;
- cfs_down(&filter->fo_create_locks[oa->o_seq]);
+ cfs_mutex_lock(&filter->fo_create_locks[oa->o_seq]);
rc = filter_precreate(obd, oa, oa->o_seq, &diff);
- cfs_up(&filter->fo_create_locks[oa->o_seq]);
+ cfs_mutex_unlock(&filter->fo_create_locks[oa->o_seq]);
}
} else {
rc = filter_handle_precreate(exp, oa, oa->o_seq, oti);
if (fcc != NULL)
*fcc = oa->o_lcookie;
}
- ll_vfs_dq_init(dchild->d_inode);
/* we're gonna truncate it first in order to avoid possible deadlock:
* P1 P2
GOTO(cleanup, rc = PTR_ERR(handle));
}
+ /* Locking order: i_mutex -> journal_lock -> dqptr_sem. LU-952 */
+ ll_vfs_dq_init(dchild->d_inode);
+
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = 0;
rc = fsfilt_setattr(obd, dchild, handle, &iattr, 1);
cleanup_phase = 4; /* fsfilt_commit */
/* Quota release need uid/gid of inode */
- obdo_from_inode(oa, dchild->d_inode, NULL, OBD_MD_FLUID|OBD_MD_FLGID);
+ obdo_from_inode(oa, dchild->d_inode, OBD_MD_FLUID | OBD_MD_FLGID);
filter_fmd_drop(exp, oa->o_id, oa->o_seq);
UNLOCK_INODE_MUTEX(dentry->d_inode);
oinfo->oi_oa->o_valid = OBD_MD_FLID;
- obdo_from_inode(oinfo->oi_oa, dentry->d_inode, NULL,
- FILTER_VALID_FLAGS);
+ obdo_from_inode(oinfo->oi_oa, dentry->d_inode, FILTER_VALID_FLAGS);
pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
CDEBUG(D_HA, "syncing ost %s\n", obd->obd_name);
rc = fsfilt_sync(obd, obd->u.obt.obt_sb);
- lvfs_set_rdonly(obd, obd->u.obt.obt_sb);
- RETURN(0);
+ rc = lvfs_set_rdonly(obd, obd->u.obt.obt_sb);
+ RETURN(rc);
}
case OBD_IOC_CATLOGLIST: {
.o_create = filter_create,
.o_setattr = filter_setattr,
.o_destroy = filter_destroy,
- .o_brw = filter_brw,
.o_punch = filter_truncate,
.o_sync = filter_sync,
.o_preprw = filter_preprw,