* can make sure the client can be mounted as long as MDT0 is
* avaible */
err = obd_statfs(NULL, sbi->ll_md_exp, osfs,
- ktime_get_seconds() -OBD_STATFS_CACHE_SECONDS,
+ ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS,
OBD_STATFS_FOR_MDT0);
if (err)
GOTO(out_md_fid, err);
/* OBD_CONNECT_CKSUM should always be set, even if checksums are
* disabled by default, because it can still be enabled on the
- * fly via /proc. As a consequence, we still need to come to an
- * agreement on the supported algorithms at connect time */
+ * fly via /sys. As a consequence, we still need to come to an
+ * agreement on the supported algorithms at connect time
+ */
data->ocd_connect_flags |= OBD_CONNECT_CKSUM;
if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY))
lli->lli_opendir_pid = 0;
lli->lli_sa_enabled = 0;
lli->lli_def_stripe_offset = -1;
+ init_rwsem(&lli->lli_lsm_sem);
} else {
mutex_init(&lli->lli_size_mutex);
lli->lli_symlink_name = NULL;
OBD_ALLOC_PTR(cfg);
if (cfg == NULL)
- GOTO(out_free, err = -ENOMEM);
+ GOTO(out_free_cfg, err = -ENOMEM);
/* client additional sb info */
lsi->lsi_llsbi = sbi = ll_init_sbi();
if (!sbi)
- GOTO(out_free, err = -ENOMEM);
+ GOTO(out_free_cfg, err = -ENOMEM);
err = ll_options(lsi->lsi_lmd->lmd_opts, sbi);
if (err)
- GOTO(out_free, err);
+ GOTO(out_free_cfg, err);
err = super_setup_bdi_name(sb, "lustre-%p", sb);
if (err)
- GOTO(out_free, err);
+ GOTO(out_free_cfg, err);
#ifndef HAVE_DCACHE_LOCK
/* kernel >= 2.6.38 store dentry operations in sb->s_d_op. */
}
/* Generate a string unique to this super, in case some joker tries
- to mount the same fs at two mount points.
- Use the address of the super itself.*/
+ * to mount the same fs at two mount points.
+ * Use the address of the super itself.
+ */
cfg->cfg_instance = sb;
cfg->cfg_uuid = lsi->lsi_llsbi->ll_sb_uuid;
cfg->cfg_callback = class_config_llog_handler;
/* set up client obds */
err = lustre_process_log(sb, profilenm, cfg);
if (err < 0)
- GOTO(out_proc, err);
+ GOTO(out_debugfs, err);
/* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */
lprof = class_get_profile(profilenm);
LCONSOLE_ERROR_MSG(0x156, "The client profile '%s' could not be"
" read from the MGS. Does that filesystem "
"exist?\n", profilenm);
- GOTO(out_proc, err = -EINVAL);
+ GOTO(out_debugfs, err = -EINVAL);
}
CDEBUG(D_CONFIG, "Found profile %s: mdc=%s osc=%s\n", profilenm,
lprof->lp_md, lprof->lp_dt);
dt_len = strlen(lprof->lp_dt) + instlen + 2;
OBD_ALLOC(dt, dt_len);
if (!dt)
- GOTO(out_proc, err = -ENOMEM);
+ GOTO(out_profile, err = -ENOMEM);
snprintf(dt, dt_len - 1, "%s-%p", lprof->lp_dt, cfg->cfg_instance);
md_len = strlen(lprof->lp_md) + instlen + 2;
OBD_ALLOC(md, md_len);
if (!md)
- GOTO(out_proc, err = -ENOMEM);
+ GOTO(out_free_dt, err = -ENOMEM);
snprintf(md, md_len - 1, "%s-%p", lprof->lp_md, cfg->cfg_instance);
/* connections, registrations, sb setup */
err = client_common_fill_super(sb, md, dt, mnt);
if (err < 0)
- GOTO(out_proc, err);
+ GOTO(out_free_md, err);
sbi->ll_client_common_fill_super_succeeded = 1;
-out_proc:
- if (err < 0)
- ll_debugfs_unregister_super(sb);
-out_free:
+out_free_md:
if (md)
OBD_FREE(md, md_len);
+out_free_dt:
if (dt)
OBD_FREE(dt, dt_len);
- if (lprof != NULL)
+out_profile:
+ if (lprof)
class_put_profile(lprof);
+out_debugfs:
+ if (err < 0)
+ ll_debugfs_unregister_super(sb);
+out_free_cfg:
if (cfg)
OBD_FREE_PTR(cfg);
+
if (err)
ll_put_super(sb);
else if (sbi->ll_flags & LL_SBI_VERBOSE)
{
struct lu_fid *fid;
struct lmv_stripe_md *lsm = md->lmv;
+ struct ll_inode_info *lli = ll_i2info(inode);
int i;
LASSERT(lsm != NULL);
+
+ CDEBUG(D_INODE, "%s: "DFID" set dir layout:\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(&lli->lli_fid));
+ lsm_md_dump(D_INODE, lsm);
+
/* XXX sigh, this lsm_root initialization should be in
* LMV layer, but it needs ll_iget right now, so we
* put this here right now. */
int rc = PTR_ERR(lsm->lsm_md_oinfo[i].lmo_root);
lsm->lsm_md_oinfo[i].lmo_root = NULL;
+ while (i-- > 0) {
+ iput(lsm->lsm_md_oinfo[i].lmo_root);
+ lsm->lsm_md_oinfo[i].lmo_root = NULL;
+ }
return rc;
}
}
+ lli->lli_lsm_md = lsm;
+
return 0;
}
{
struct ll_inode_info *lli = ll_i2info(inode);
struct lmv_stripe_md *lsm = md->lmv;
- int rc;
+ int rc = 0;
+
ENTRY;
LASSERT(S_ISDIR(inode->i_mode));
if (!lsm)
RETURN(0);
- /* Compare the old and new stripe information */
+ /*
+ * normally dir layout doesn't change, only take read lock to check
+ * that to avoid blocking other MD operations.
+ */
+ if (lli->lli_lsm_md)
+ down_read(&lli->lli_lsm_sem);
+ else
+ down_write(&lli->lli_lsm_sem);
+
+ /*
+ * if dir layout mismatch, check whether version is increased, which
+ * means layout is changed, this happens in dir migration and lfsck.
+ */
if (lli->lli_lsm_md && !lsm_md_eq(lli->lli_lsm_md, lsm)) {
- struct lmv_stripe_md *old_lsm = lli->lli_lsm_md;
- int idx;
- bool layout_changed = lsm->lsm_md_layout_version >
- old_lsm->lsm_md_layout_version;
-
- int mask = layout_changed ? D_INODE : D_ERROR;
-
- CDEBUG(mask,
- "%s: inode@%p "DFID" lmv layout %s magic %#x/%#x "
- "stripe count %d/%d master_mdt %d/%d "
- "hash_type %#x/%#x version %d/%d migrate offset %d/%d "
- "migrate hash %#x/%#x pool %s/%s\n",
- ll_get_fsname(inode->i_sb, NULL, 0), inode,
- PFID(&lli->lli_fid),
- layout_changed ? "changed" : "mismatch",
- lsm->lsm_md_magic, old_lsm->lsm_md_magic,
- lsm->lsm_md_stripe_count,
- old_lsm->lsm_md_stripe_count,
- lsm->lsm_md_master_mdt_index,
- old_lsm->lsm_md_master_mdt_index,
- lsm->lsm_md_hash_type, old_lsm->lsm_md_hash_type,
- lsm->lsm_md_layout_version,
- old_lsm->lsm_md_layout_version,
- lsm->lsm_md_migrate_offset,
- old_lsm->lsm_md_migrate_offset,
- lsm->lsm_md_migrate_hash,
- old_lsm->lsm_md_migrate_hash,
- lsm->lsm_md_pool_name,
- old_lsm->lsm_md_pool_name);
-
- for (idx = 0; idx < old_lsm->lsm_md_stripe_count; idx++)
- CDEBUG(mask, "old stripe[%d] "DFID"\n",
- idx, PFID(&old_lsm->lsm_md_oinfo[idx].lmo_fid));
-
- for (idx = 0; idx < lsm->lsm_md_stripe_count; idx++)
- CDEBUG(mask, "new stripe[%d] "DFID"\n",
- idx, PFID(&lsm->lsm_md_oinfo[idx].lmo_fid));
-
- if (!layout_changed)
- RETURN(-EINVAL);
+ if (lsm->lsm_md_layout_version <=
+ lli->lli_lsm_md->lsm_md_layout_version) {
+ CERROR("%s: "DFID" dir layout mismatch:\n",
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(&lli->lli_fid));
+ lsm_md_dump(D_ERROR, lli->lli_lsm_md);
+ lsm_md_dump(D_ERROR, lsm);
+ GOTO(unlock, rc = -EINVAL);
+ }
+ /* layout changed, switch to write lock */
+ up_read(&lli->lli_lsm_sem);
+ down_write(&lli->lli_lsm_sem);
ll_dir_clear_lsm_md(inode);
}
- /* set the directory layout */
+ /* set directory layout */
if (!lli->lli_lsm_md) {
struct cl_attr *attr;
rc = ll_init_lsm_md(inode, md);
+ up_write(&lli->lli_lsm_sem);
if (rc != 0)
RETURN(rc);
/* set md->lmv to NULL, so the following free lustre_md
* will not free this lsm */
md->lmv = NULL;
- lli->lli_lsm_md = lsm;
+
+ /*
+ * md_merge_attr() may take long, since lsm is already set,
+ * switch to read lock.
+ */
+ down_read(&lli->lli_lsm_sem);
OBD_ALLOC_PTR(attr);
if (attr == NULL)
- RETURN(-ENOMEM);
+ GOTO(unlock, rc = -ENOMEM);
/* validate the lsm */
rc = md_merge_attr(ll_i2mdexp(inode), lsm, attr,
ll_md_blocking_ast);
if (rc != 0) {
OBD_FREE_PTR(attr);
- RETURN(rc);
+ GOTO(unlock, rc);
}
if (md->body->mbo_valid & OBD_MD_FLNLINK)
md->body->mbo_mtime = attr->cat_mtime;
OBD_FREE_PTR(attr);
-
- CDEBUG(D_INODE, "Set lsm %p magic %x to "DFID"\n", lsm,
- lsm->lsm_md_magic, PFID(ll_inode2fid(inode)));
- RETURN(0);
}
+unlock:
+ up_read(&lli->lli_lsm_sem);
- /* Compare the old and new stripe information */
- if (!lsm_md_eq(lli->lli_lsm_md, lsm)) {
- struct lmv_stripe_md *old_lsm = lli->lli_lsm_md;
- int idx;
-
- CERROR("%s: inode "DFID"(%p)'s lmv layout mismatch (%p)/(%p)"
- "magic:0x%x/0x%x stripe count: %d/%d master_mdt: %d/%d"
- "hash_type:0x%x/0x%x layout: 0x%x/0x%x pool:%s/%s\n",
- ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid),
- inode, lsm, old_lsm,
- lsm->lsm_md_magic, old_lsm->lsm_md_magic,
- lsm->lsm_md_stripe_count,
- old_lsm->lsm_md_stripe_count,
- lsm->lsm_md_master_mdt_index,
- old_lsm->lsm_md_master_mdt_index,
- lsm->lsm_md_hash_type, old_lsm->lsm_md_hash_type,
- lsm->lsm_md_layout_version,
- old_lsm->lsm_md_layout_version,
- lsm->lsm_md_pool_name,
- old_lsm->lsm_md_pool_name);
-
- for (idx = 0; idx < old_lsm->lsm_md_stripe_count; idx++) {
- CERROR("%s: sub FIDs in old lsm idx %d, old: "DFID"\n",
- ll_get_fsname(inode->i_sb, NULL, 0), idx,
- PFID(&old_lsm->lsm_md_oinfo[idx].lmo_fid));
- }
-
- for (idx = 0; idx < lsm->lsm_md_stripe_count; idx++) {
- CERROR("%s: sub FIDs in new lsm idx %d, new: "DFID"\n",
- ll_get_fsname(inode->i_sb, NULL, 0), idx,
- PFID(&lsm->lsm_md_oinfo[idx].lmo_fid));
- }
-
- RETURN(-EIO);
- }
-
- RETURN(0);
+ RETURN(rc);
}
void ll_clear_inode(struct inode *inode)
void ll_delete_inode(struct inode *inode)
{
struct ll_inode_info *lli = ll_i2info(inode);
+ struct address_space *mapping = &inode->i_data;
+ unsigned long nrpages;
ENTRY;
if (S_ISREG(inode->i_mode) && lli->lli_clob != NULL)
* otherwise we may lose data while umount */
cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1);
- truncate_inode_pages_final(&inode->i_data);
+ truncate_inode_pages_final(mapping);
- LASSERTF(inode->i_data.nrpages == 0, "inode="DFID"(%p) nrpages=%lu, "
+ /* Workaround for LU-118: Note nrpages may not be totally updated when
+ * truncate_inode_pages() returns, as there can be a page in the process
+ * of deletion (inside __delete_from_page_cache()) in the specified
+ * range. Thus mapping->nrpages can be non-zero when this function
+ * returns even after truncation of the whole mapping. Only do this if
+ * npages isn't already zero.
+ */
+ nrpages = mapping->nrpages;
+ if (nrpages) {
+ spin_lock_irq(&mapping->tree_lock);
+ nrpages = mapping->nrpages;
+ spin_unlock_irq(&mapping->tree_lock);
+ } /* Workaround end */
+
+ LASSERTF(nrpages == 0, "%s: inode="DFID"(%p) nrpages=%lu, "
"see https://jira.whamcloud.com/browse/LU-118\n",
- PFID(ll_inode2fid(inode)), inode, inode->i_data.nrpages);
+ ll_get_fsname(inode->i_sb, NULL, 0),
+ PFID(ll_inode2fid(inode)), inode, nrpages);
#ifdef HAVE_SBOPS_EVICT_INODE
ll_clear_inode(inode);
struct iattr *attr;
struct md_op_data *op_data;
struct cl_object *obj;
+ struct fsxattr fa = { 0 };
if (get_user(flags, (int __user *)arg))
RETURN(-EFAULT);
+ fa.fsx_projid = ll_i2info(inode)->lli_projid;
+ if (flags & LUSTRE_PROJINHERIT_FL)
+ fa.fsx_xflags = FS_XFLAG_PROJINHERIT;
+
+ rc = ll_ioctl_check_project(inode, &fa);
+ if (rc)
+ RETURN(rc);
+
op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
LUSTRE_OPC_ANY, NULL);
if (IS_ERR(op_data))
return rc;
}
+/*
+ * this is normally called in ll_fini_md_op_data(), but sometimes it needs to
+ * be called early to avoid deadlock.
+ */
+void ll_unlock_md_op_lsm(struct md_op_data *op_data)
+{
+ if (op_data->op_mea2_sem) {
+ up_read(op_data->op_mea2_sem);
+ op_data->op_mea2_sem = NULL;
+ }
+
+ if (op_data->op_mea1_sem) {
+ up_read(op_data->op_mea1_sem);
+ op_data->op_mea1_sem = NULL;
+ }
+}
+
/* this function prepares md_op_data hint for passing it down to MD stack. */
struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
struct inode *i1, struct inode *i2,
ll_i2gids(op_data->op_suppgids, i1, i2);
op_data->op_fid1 = *ll_inode2fid(i1);
op_data->op_default_stripe_offset = -1;
+
if (S_ISDIR(i1->i_mode)) {
+ down_read(&ll_i2info(i1)->lli_lsm_sem);
+ op_data->op_mea1_sem = &ll_i2info(i1)->lli_lsm_sem;
op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md;
if (opc == LUSTRE_OPC_MKDIR)
op_data->op_default_stripe_offset =
if (i2) {
op_data->op_fid2 = *ll_inode2fid(i2);
- if (S_ISDIR(i2->i_mode))
+ if (S_ISDIR(i2->i_mode)) {
+ if (i2 != i1) {
+ down_read(&ll_i2info(i2)->lli_lsm_sem);
+ op_data->op_mea2_sem =
+ &ll_i2info(i2)->lli_lsm_sem;
+ }
op_data->op_mea2 = ll_i2info(i2)->lli_lsm_md;
+ }
} else {
fid_zero(&op_data->op_fid2);
}
void ll_finish_md_op_data(struct md_op_data *op_data)
{
+ ll_unlock_md_op_lsm(op_data);
security_release_secctx(op_data->op_file_secctx,
op_data->op_file_secctx_size);
OBD_FREE_PTR(op_data);