rc = __osd_xattr_get(inode, dentry, XATTR_NAME_FID, ff, sizeof(*ff));
if (rc == sizeof(*ff)) {
/* 2) delete the old XATTR_NAME_FID */
- ll_vfs_dq_init(inode);
- rc = osd_removexattr(dentry, inode, XATTR_NAME_FID);
+ dquot_initialize(inode);
+ rc = ll_vfs_removexattr(dentry, inode, XATTR_NAME_FID);
if (rc)
GOTO(stop, rc);
GOTO(out, rc = 0);
}
- if (!scrub->os_partial_scan)
+ if (!scrub->os_partial_scan) {
+ spin_lock(&scrub->os_lock);
scrub->os_full_speed = 1;
+ spin_unlock(&scrub->os_lock);
+ }
switch (val) {
case SCRUB_NEXT_NOLMA:
GOTO(out, rc = 0);
} else {
- if (!scrub->os_partial_scan)
+ if (!scrub->os_partial_scan) {
+ spin_lock(&scrub->os_lock);
scrub->os_full_speed = 1;
-
+ spin_unlock(&scrub->os_lock);
+ }
sf->sf_flags |= SF_INCONSISTENT;
/* XXX: If the device is restored from file-level backup, then
if (flags & SS_RESET)
scrub_file_reset(scrub, dev->od_uuid, 0);
+ spin_lock(&scrub->os_lock);
if (flags & SS_AUTO_FULL) {
scrub->os_full_speed = 1;
scrub->os_partial_scan = 0;
scrub->os_partial_scan = 0;
}
- spin_lock(&scrub->os_lock);
scrub->os_in_prior = 0;
scrub->os_waiting = 0;
scrub->os_paused = 0;
struct osd_inode_id *lid;
int rc;
- if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_DELAY) && cfs_fail_val > 0) {
- struct l_wait_info lwi;
-
- lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), NULL, NULL);
- if (likely(lwi.lwi_timeout > 0))
- l_wait_event(thread->t_ctl_waitq,
- !list_empty(&scrub->os_inconsistent_items) ||
- !thread_is_running(thread),
- &lwi);
- }
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_DELAY) && cfs_fail_val > 0)
+ wait_event_idle_timeout(
+ thread->t_ctl_waitq,
+ !list_empty(&scrub->os_inconsistent_items) ||
+ !thread_is_running(thread),
+ cfs_time_seconds(cfs_fail_val));
if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_CRASH)) {
spin_lock(&scrub->os_lock);
oii = list_entry(scrub->os_inconsistent_items.next,
struct osd_inconsistent_item, oii_list);
- spin_unlock(&scrub->os_lock);
*oic = &oii->oii_cache;
scrub->os_in_prior = 1;
+ spin_unlock(&scrub->os_lock);
return 0;
}
struct osd_iit_param *param,
struct osd_idmap_cache *oic, bool *noslot, int rc)
{
- struct l_wait_info lwi = { 0 };
struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
struct scrub_file *sf = &scrub->os_file;
struct ptlrpc_thread *thread = &scrub->os_thread;
rc = osd_scrub_check_update(info, dev, oic, rc);
if (rc != 0) {
+ spin_lock(&scrub->os_lock);
scrub->os_in_prior = 0;
+ spin_unlock(&scrub->os_lock);
return rc;
}
}
if (scrub->os_in_prior) {
+ spin_lock(&scrub->os_lock);
scrub->os_in_prior = 0;
+ spin_unlock(&scrub->os_lock);
return 0;
}
}
if (it != NULL)
- l_wait_event(thread->t_ctl_waitq, osd_scrub_wakeup(scrub, it),
- &lwi);
+ wait_event_idle(thread->t_ctl_waitq,
+ osd_scrub_wakeup(scrub, it));
if (!ooc || osd_scrub_has_window(scrub, ooc))
*noslot = false;
LASSERT(!(flags & SS_AUTO_PARTIAL));
down_write(&scrub->os_rwsem);
+ spin_lock(&scrub->os_lock);
scrub->os_in_join = 1;
if (flags & SS_SET_FAILOUT)
sf->sf_param |= SP_FAILOUT;
sf->sf_flags |= SF_AUTO;
scrub->os_full_speed = 1;
}
+ spin_unlock(&scrub->os_lock);
scrub->os_new_checked = 0;
if (sf->sf_pos_last_checkpoint != 0)
__u64 *pos;
__u64 *count;
struct osd_iit_param *param;
- struct l_wait_info lwi = { 0 };
__u32 limit;
int rc;
bool noslot = true;
if (rc == 0) {
__u64 used = ksfs->f_files - ksfs->f_ffree;
- do_div(used, sf->sf_items_updated_prior);
+ used = div64_u64(used, sf->sf_items_updated_prior);
/* If we hit too much inconsistent OI
* mappings during the partial scan,
* then scan the device completely. */
sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT |
SF_UPGRADE | SF_AUTO);
sf->sf_status = SS_COMPLETED;
- l_wait_event(thread->t_ctl_waitq,
- !thread_is_running(thread) ||
- !scrub->os_partial_scan ||
- scrub->os_in_join ||
- !list_empty(&scrub->os_inconsistent_items),
- &lwi);
+ wait_event_idle(
+ thread->t_ctl_waitq,
+ !thread_is_running(thread) ||
+ !scrub->os_partial_scan ||
+ scrub->os_in_join ||
+ !list_empty(&scrub->os_inconsistent_items));
sf->sf_flags = saved_flags;
sf->sf_status = SS_SCANNING;
full:
if (!preload) {
- l_wait_event(thread->t_ctl_waitq,
- !thread_is_running(thread) || !scrub->os_in_join,
- &lwi);
+ wait_event_idle(thread->t_ctl_waitq,
+ !thread_is_running(thread) ||
+ !scrub->os_in_join);
if (unlikely(!thread_is_running(thread)))
RETURN(0);
}
if (!scrub->os_full_speed && !scrub->os_partial_scan) {
- struct l_wait_info lwi = { 0 };
struct osd_otable_it *it = dev->od_otable_it;
struct osd_otable_cache *ooc = &it->ooi_cache;
- l_wait_event(thread->t_ctl_waitq,
- it->ooi_user_ready || !thread_is_running(thread),
- &lwi);
+ wait_event_idle(thread->t_ctl_waitq,
+ it->ooi_user_ready ||
+ !thread_is_running(thread));
if (unlikely(!thread_is_running(thread)))
GOTO(post, rc = 0);
};
struct osd_ios_filldir_buf {
-#ifdef HAVE_DIR_CONTEXT
/* please keep it as first member */
struct dir_context ctx;
-#endif
struct osd_thread_info *oifb_info;
struct osd_device *oifb_dev;
struct dentry *oifb_dentry;
int oifb_items;
};
-static inline struct dentry *
-osd_ios_lookup_one_len(const char *name, struct dentry *parent, int namelen)
-{
- struct dentry *dentry;
-
- dentry = ll_lookup_one_len(name, parent, namelen);
- if (IS_ERR(dentry)) {
- int rc = PTR_ERR(dentry);
-
- if (rc != -ENOENT)
- CERROR("Fail to find %.*s in %.*s (%lu/%u): rc = %d\n",
- namelen, name, parent->d_name.len,
- parent->d_name.name, parent->d_inode->i_ino,
- parent->d_inode->i_generation, rc);
-
- return dentry;
- }
-
- if (dentry->d_inode == NULL) {
- dput(dentry);
- return ERR_PTR(-ENOENT);
- }
-
- return dentry;
-}
-
static int
osd_ios_new_item(struct osd_device *dev, struct dentry *dentry,
scandir_t scandir, filldir_t filldir)
int rc;
ENTRY;
+ if (!inode) {
+ CDEBUG(D_INODE, "%s: child '%.*s' lacks inode: rc = -2\n",
+ osd_name(dev), namelen, name);
+ RETURN(-ENOENT);
+ }
+
rc = osd_get_lma(info, inode, &info->oti_obj_dentry,
&info->oti_ost_attrs);
if (rc != 0 && rc != -ENODATA) {
inode);
}
- rc = osd_oi_lookup(info, dev, &tfid, id2, 0);
+ /* Since this called from iterate_dir() the inode lock will be taken */
+ rc = osd_oi_lookup(info, dev, &tfid, id2, OI_LOCKED);
if (rc != 0) {
if (rc != -ENOENT)
RETURN(rc);
RETURN(0);
scrub->os_lf_scanned++;
- child = osd_ios_lookup_one_len(name, parent, namelen);
+ child = osd_lookup_one_len(dev, name, parent, namelen);
if (IS_ERR(child)) {
+ rc = PTR_ERR(child);
CDEBUG(D_LFSCK, "%s: cannot lookup child '%.*s': rc = %d\n",
- osd_name(dev), namelen, name, (int)PTR_ERR(child));
- RETURN(0);
+ osd_name(dev), namelen, name, rc);
+ RETURN(rc);
+ } else if (!child->d_inode) {
+ dput(child);
+ CDEBUG(D_INODE, "%s: child '%.*s' lacks inode\n",
+ osd_name(dev), namelen, name);
+ RETURN(-ENOENT);
}
inode = child->d_inode;
if (name[0] == '.')
RETURN(0);
- child = osd_ios_lookup_one_len(name, fill_buf->oifb_dentry, namelen);
+ child = osd_lookup_one_len(dev, name, fill_buf->oifb_dentry, namelen);
if (IS_ERR(child))
RETURN(PTR_ERR(child));
if (map->olm_name == NULL)
RETURN(0);
- child = osd_ios_lookup_one_len(name, fill_buf->oifb_dentry, namelen);
+ child = osd_lookup_one_len(dev, name, fill_buf->oifb_dentry, namelen);
if (IS_ERR(child))
RETURN(PTR_ERR(child));
{
struct osd_ios_filldir_buf *fill_buf =
(struct osd_ios_filldir_buf *)buf;
+ struct osd_device *dev = fill_buf->oifb_dev;
struct dentry *child;
struct lu_fid tfid;
int rc = 0;
if (name[0] != '[')
RETURN(0);
- child = osd_ios_lookup_one_len(name, fill_buf->oifb_dentry, namelen);
+ child = osd_lookup_one_len(dev, name, fill_buf->oifb_dentry, namelen);
if (IS_ERR(child))
RETURN(PTR_ERR(child));
if (map->olm_name == NULL)
RETURN(0);
- child = osd_ios_lookup_one_len(name, fill_buf->oifb_dentry, namelen);
+ child = osd_lookup_one_len(dev, name, fill_buf->oifb_dentry, namelen);
if (IS_ERR(child))
RETURN(PTR_ERR(child));
+ else if (!child->d_inode)
+ GOTO(out_put, rc = -ENOENT);
if (!(map->olm_flags & OLF_NO_OI))
rc = osd_ios_scan_one(fill_buf->oifb_info, dev,
if (rc == 0 && map->olm_flags & OLF_SCAN_SUBITEMS)
rc = osd_ios_new_item(dev, child, map->olm_scandir,
map->olm_filldir);
+out_put:
dput(child);
RETURN(rc);
struct dentry *dentry, filldir_t filldir)
{
struct osd_ios_filldir_buf buf = {
-#ifdef HAVE_DIR_CONTEXT
.ctx.actor = filldir,
-#endif
.oifb_info = info,
.oifb_dev = dev,
.oifb_dentry = dentry };
filp->f_pos = 0;
filp->f_path.dentry = dentry;
- filp->f_mode = FMODE_64BITHASH;
+ filp->f_flags |= O_NOATIME;
+ filp->f_mode = FMODE_64BITHASH | FMODE_NONOTIFY;
filp->f_mapping = inode->i_mapping;
filp->f_op = fops;
filp->private_data = NULL;
- set_file_inode(filp, inode);
+ filp->f_cred = current_cred();
+ filp->f_inode = inode;
+ rc = osd_security_file_alloc(filp);
+ if (rc)
+ RETURN(rc);
do {
buf.oifb_items = 0;
-#ifdef HAVE_DIR_CONTEXT
- buf.ctx.pos = filp->f_pos;
-#ifdef HAVE_ITERATE_SHARED
- rc = fops->iterate_shared(filp, &buf.ctx);
-#else
- rc = fops->iterate(filp, &buf.ctx);
-#endif
- filp->f_pos = buf.ctx.pos;
-#else
- rc = fops->readdir(filp, &buf, filldir);
-#endif
+ rc = iterate_dir(filp, &buf.ctx);
} while (rc >= 0 && buf.oifb_items > 0 &&
filp->f_pos != LDISKFS_HTREE_EOF_64BIT);
fops->release(inode, filp);
* FID directly, instead, the OI scrub will scan the OI structure
* and try to re-generate the LMA from the OI mapping. But if the
* OI mapping crashed or lost also, then we have to give up under
- * double failure cases. */
+ * double failure cases.
+ */
+ spin_lock(&scrub->os_lock);
scrub->os_convert_igif = 1;
- child = osd_ios_lookup_one_len(dot_lustre_name, dentry,
- strlen(dot_lustre_name));
+ spin_unlock(&scrub->os_lock);
+ child = osd_lookup_one_len_unlocked(dev, dot_lustre_name, dentry,
+ strlen(dot_lustre_name));
if (IS_ERR(child)) {
- rc = PTR_ERR(child);
- if (rc == -ENOENT) {
- /* It is 1.8 MDT device. */
- if (!(sf->sf_flags & SF_UPGRADE)) {
- scrub_file_reset(scrub, dev->od_uuid,
- SF_UPGRADE);
- sf->sf_internal_flags &= ~SIF_NO_HANDLE_OLD_FID;
- rc = scrub_file_store(info->oti_env, scrub);
- } else {
- rc = 0;
- }
+ if (PTR_ERR(child) != -ENOENT)
+ RETURN(PTR_ERR(child));
+ goto out_scrub;
+ }
+
+ /* For lustre-2.x (x <= 3), the ".lustre" has NO FID-in-LMA,
+ * so the client will get IGIF for the ".lustre" object when
+ * the MDT restart.
+ *
+ * From the OI scrub view, when the MDT upgrade to Lustre-2.4,
+ * it does not know whether there are some old clients cached
+ * the ".lustre" IGIF during the upgrading. Two choices:
+ *
+ * 1) Generate IGIF-in-LMA and IGIF-in-OI for the ".lustre".
+ * It will allow the old connected clients to access the
+ * ".lustre" with cached IGIF. But it will cause others
+ * on the MDT failed to check "fid_is_dot_lustre()".
+ *
+ * 2) Use fixed FID {FID_SEQ_DOT_LUSTRE, FID_OID_DOT_LUSTRE, 0}
+ * for ".lustre" in spite of whether there are some clients
+ * cached the ".lustre" IGIF or not. It enables the check
+ * "fid_is_dot_lustre()" on the MDT, although it will cause
+ * that the old connected clients cannot access the ".lustre"
+ * with the cached IGIF.
+ *
+ * Usually, it is rare case for the old connected clients
+ * to access the ".lustre" with cached IGIF. So we prefer
+ * to the solution 2).
+ */
+ inode_lock(dentry->d_inode);
+ rc = osd_ios_scan_one(info, dev, dentry->d_inode,
+ child->d_inode, &LU_DOT_LUSTRE_FID,
+ dot_lustre_name,
+ strlen(dot_lustre_name), 0);
+ inode_unlock(dentry->d_inode);
+ if (rc == -ENOENT) {
+out_scrub:
+ /* It is 1.8 MDT device. */
+ if (!(sf->sf_flags & SF_UPGRADE)) {
+ scrub_file_reset(scrub, dev->od_uuid,
+ SF_UPGRADE);
+ sf->sf_internal_flags &= ~SIF_NO_HANDLE_OLD_FID;
+ rc = scrub_file_store(info->oti_env, scrub);
+ } else {
+ rc = 0;
}
- } else {
- /* For lustre-2.x (x <= 3), the ".lustre" has NO FID-in-LMA,
- * so the client will get IGIF for the ".lustre" object when
- * the MDT restart.
- *
- * From the OI scrub view, when the MDT upgrade to Lustre-2.4,
- * it does not know whether there are some old clients cached
- * the ".lustre" IGIF during the upgrading. Two choices:
- *
- * 1) Generate IGIF-in-LMA and IGIF-in-OI for the ".lustre".
- * It will allow the old connected clients to access the
- * ".lustre" with cached IGIF. But it will cause others
- * on the MDT failed to check "fid_is_dot_lustre()".
- *
- * 2) Use fixed FID {FID_SEQ_DOT_LUSTRE, FID_OID_DOT_LUSTRE, 0}
- * for ".lustre" in spite of whether there are some clients
- * cached the ".lustre" IGIF or not. It enables the check
- * "fid_is_dot_lustre()" on the MDT, although it will cause
- * that the old connected clients cannot access the ".lustre"
- * with the cached IGIF.
- *
- * Usually, it is rare case for the old connected clients
- * to access the ".lustre" with cached IGIF. So we prefer
- * to the solution 2). */
- rc = osd_ios_scan_one(info, dev, dentry->d_inode,
- child->d_inode, &LU_DOT_LUSTRE_FID,
- dot_lustre_name,
- strlen(dot_lustre_name), 0);
- if (rc == 0)
- rc = osd_ios_new_item(dev, child, osd_ios_general_scan,
- osd_ios_dl_fill);
- dput(child);
+ } else if (rc == 0) {
+ rc = osd_ios_new_item(dev, child, osd_ios_general_scan,
+ osd_ios_dl_fill);
}
+ dput(child);
RETURN(rc);
}
RETURN(rc);
}
- child = osd_ios_lookup_one_len(ADMIN_USR, dentry, strlen(ADMIN_USR));
- if (!IS_ERR(child)) {
+ child = osd_lookup_one_len_unlocked(dev, ADMIN_USR, dentry,
+ strlen(ADMIN_USR));
+ if (IS_ERR(child)) {
+ rc = PTR_ERR(child);
+ } else {
+ inode_lock(dentry->d_inode);
rc = osd_ios_scan_one(info, dev, dentry->d_inode,
child->d_inode, NULL, ADMIN_USR,
strlen(ADMIN_USR), 0);
+ inode_unlock(dentry->d_inode);
dput(child);
- } else {
- rc = PTR_ERR(child);
}
if (rc != 0 && rc != -ENOENT)
- RETURN(rc);
-
- child = osd_ios_lookup_one_len(ADMIN_GRP, dentry, strlen(ADMIN_GRP));
- if (!IS_ERR(child)) {
- rc = osd_ios_scan_one(info, dev, dentry->d_inode,
- child->d_inode, NULL, ADMIN_GRP,
- strlen(ADMIN_GRP), 0);
- dput(child);
- } else {
- rc = PTR_ERR(child);
- }
+ GOTO(out, rc);
- if (rc == -ENOENT)
- rc = 0;
+ child = osd_lookup_one_len_unlocked(dev, ADMIN_GRP, dentry,
+ strlen(ADMIN_GRP));
+ if (IS_ERR(child))
+ GOTO(out, rc = PTR_ERR(child));
- RETURN(rc);
+ inode_lock(dentry->d_inode);
+ rc = osd_ios_scan_one(info, dev, dentry->d_inode,
+ child->d_inode, NULL, ADMIN_GRP,
+ strlen(ADMIN_GRP), 0);
+ inode_unlock(dentry->d_inode);
+ dput(child);
+out:
+ RETURN(rc == -ENOENT ? 0 : rc);
}
static void osd_initial_OI_scrub(struct osd_thread_info *info,
dev->od_igif_inoi = 1;
while (1) {
+ /* Don't take inode_lock here since scandir() callbacks
+ * can call VFS functions which may manully take the
+ * inode lock itself like iterate_dir(). Since this
+ * is the case it is best to leave the scandir()
+ * callbacks to managing the inode lock.
+ */
scandir(info, dev, dentry, filldir);
if (item != NULL) {
dput(item->oii_dentry);
continue;
}
- child = osd_ios_lookup_one_len(map->olm_name,
- osd_sb(dev)->s_root,
- map->olm_namelen);
- if (!IS_ERR(child))
- dput(child);
- else if (PTR_ERR(child) == -ENOENT)
+ child = osd_lookup_one_len_unlocked(dev, map->olm_name,
+ osd_sb(dev)->s_root,
+ map->olm_namelen);
+ if (PTR_ERR(child) == -ENOENT ||
+ (!IS_ERR(child) && !child->d_inode))
osd_scrub_refresh_mapping(info, dev, &map->olm_fid,
NULL, DTO_INDEX_DELETE,
true, 0, NULL);
+ if (!IS_ERR(child))
+ dput(child);
map++;
}
/* od_otable_mutex: prevent curcurrent start/stop */
mutex_lock(&dev->od_otable_mutex);
+ spin_lock(&scrub->os_lock);
scrub->os_paused = 1;
+ spin_unlock(&scrub->os_lock);
scrub_stop(scrub);
mutex_unlock(&dev->od_otable_mutex);
}
OBD_SET_CTXT_MAGIC(ctxt);
ctxt->pwdmnt = dev->od_mnt;
ctxt->pwd = dev->od_mnt->mnt_root;
- ctxt->fs = get_ds();
+ ctxt->fs = KERNEL_DS;
init_waitqueue_head(&scrub->os_thread.t_ctl_waitq);
init_rwsem(&scrub->os_rwsem);
}
inode = file_inode(filp);
+ ldiskfs_set_inode_flag(inode, LDISKFS_INODE_JOURNAL_DATA);
if (!dev->od_dt_dev.dd_rdonly) {
/* 'What the @fid is' is not imporatant, because the object
* has no OI mapping, and only is visible inside the OSD.*/
RETURN(obj ? PTR_ERR(obj) : -ENOENT);
#ifndef HAVE_S_UUID_AS_UUID_T
- memcpy(dev->od_uuid.b, sb->s_uuid, UUID_SIZE);
+ memcpy(dev->od_uuid.b, sb->s_uuid, sizeof(dev->od_uuid));
#else
uuid_copy(&dev->od_uuid, &sb->s_uuid);
#endif
dt_object_put_nocache(env, scrub->os_obj);
scrub->os_obj = NULL;
}
- if (dev->od_oi_table != NULL)
- osd_oi_fini(osd_oti_get(env), dev);
}
/* object table based iteration APIs */
struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
struct osd_otable_cache *ooc = &it->ooi_cache;
struct ptlrpc_thread *thread = &scrub->os_thread;
- struct l_wait_info lwi = { 0 };
int rc;
ENTRY;
}
if (it->ooi_all_cached) {
- l_wait_event(thread->t_ctl_waitq,
- !thread_is_running(thread),
- &lwi);
+ wait_event_idle(thread->t_ctl_waitq,
+ !thread_is_running(thread));
RETURN(1);
}
}
if (it->ooi_cache.ooc_pos_preload >= scrub->os_pos_current)
- l_wait_event(thread->t_ctl_waitq,
- osd_otable_it_wakeup(scrub, it),
- &lwi);
+ wait_event_idle(thread->t_ctl_waitq,
+ osd_otable_it_wakeup(scrub, it));
if (!thread_is_running(thread) && !it->ooi_used_outside)
RETURN(1);
oii->oii_cache = *oic;
oii->oii_insert = insert;
+ spin_lock(&lscrub->os_lock);
if (lscrub->os_partial_scan) {
__u64 now = ktime_get_real_seconds();
lscrub->os_full_scrub = 1;
}
- spin_lock(&lscrub->os_lock);
if (unlikely(!thread_is_running(thread))) {
spin_unlock(&lscrub->os_lock);
OBD_FREE_PTR(oii);