* \retval 0, changed successfully
* \retval -ve, on error
*/
-static int osd_scrub_refresh_mapping(struct osd_thread_info *info,
- struct osd_device *dev,
- const struct lu_fid *fid,
- const struct osd_inode_id *id,
- int ops, bool force,
- enum oi_check_flags flags, bool *exist)
+int osd_scrub_refresh_mapping(struct osd_thread_info *info,
+ struct osd_device *dev,
+ const struct lu_fid *fid,
+ const struct osd_inode_id *id,
+ int ops, bool force,
+ enum oi_check_flags flags, bool *exist)
{
handle_t *th;
int rc;
rc = __osd_xattr_get(inode, dentry, XATTR_NAME_FID, ff, sizeof(*ff));
if (rc == sizeof(*ff)) {
/* 2) delete the old XATTR_NAME_FID */
- ll_vfs_dq_init(inode);
- rc = osd_removexattr(dentry, inode, XATTR_NAME_FID);
+ dquot_initialize(inode);
+ rc = ll_vfs_removexattr(dentry, inode, XATTR_NAME_FID);
if (rc)
GOTO(stop, rc);
GOTO(out, rc = 0);
}
- if (!scrub->os_partial_scan)
- scrub->os_full_speed = 1;
-
switch (val) {
case SCRUB_NEXT_NOLMA:
sf->sf_flags |= SF_UPGRADE;
GOTO(out, rc = 0);
} else {
- if (!scrub->os_partial_scan)
+ if (!scrub->os_partial_scan) {
+ spin_lock(&scrub->os_lock);
scrub->os_full_speed = 1;
-
+ spin_unlock(&scrub->os_lock);
+ }
sf->sf_flags |= SF_INCONSISTENT;
/* XXX: If the device is restored from file-level backup, then
OI_KNOWN_ON_OST : 0, NULL);
up_write(&scrub->os_rwsem);
- if (inode != NULL && !IS_ERR(inode))
+ if (!IS_ERR(inode))
iput(inode);
if (oii != NULL) {
static int osd_scrub_prep(const struct lu_env *env, struct osd_device *dev)
{
struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
- struct ptlrpc_thread *thread = &scrub->os_thread;
struct scrub_file *sf = &scrub->os_file;
__u32 flags = scrub->os_start_flags;
int rc;
if (flags & SS_RESET)
scrub_file_reset(scrub, dev->od_uuid, 0);
+ spin_lock(&scrub->os_lock);
if (flags & SS_AUTO_FULL) {
scrub->os_full_speed = 1;
scrub->os_partial_scan = 0;
scrub->os_partial_scan = 0;
}
- spin_lock(&scrub->os_lock);
scrub->os_in_prior = 0;
scrub->os_waiting = 0;
scrub->os_paused = 0;
rc = scrub_file_store(env, scrub);
if (rc == 0) {
spin_lock(&scrub->os_lock);
- thread_set_flags(thread, SVC_RUNNING);
+ scrub->os_running = 1;
spin_unlock(&scrub->os_lock);
- wake_up_all(&thread->t_ctl_waitq);
+ wake_up_var(scrub);
}
up_write(&scrub->os_rwsem);
down_write(&scrub->os_rwsem);
spin_lock(&scrub->os_lock);
- thread_set_flags(&scrub->os_thread, SVC_STOPPING);
+ scrub->os_running = 0;
spin_unlock(&scrub->os_lock);
if (scrub->os_new_checked > 0) {
sf->sf_items_checked += scrub->os_new_checked;
/* We should NOT find the same object more than once. */
CERROR("%s: scan the same object multiple times at the pos: "
"group = %u, base = %u, offset = %u, start = %u\n",
- param->sb->s_id, (__u32)param->bg, param->gbase,
+ osd_sb2name(param->sb), (__u32)param->bg, param->gbase,
offset, param->start);
goto again;
}
RETURN(rc);
}
+ if (dev->od_is_ost && S_ISREG(inode->i_mode) && inode->i_nlink > 1)
+ dev->od_scrub.os_scrub.os_has_ml_file = 1;
+
/* It is an EA inode, no OI mapping for it, skip it. */
if (osd_is_ea_inode(inode))
GOTO(put, rc = SCRUB_NEXT_CONTINUE);
struct osd_idmap_cache **oic, const bool noslot)
{
struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
- struct ptlrpc_thread *thread = &scrub->os_thread;
struct lu_fid *fid;
struct osd_inode_id *lid;
int rc;
- if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_DELAY) && cfs_fail_val > 0) {
- struct l_wait_info lwi;
-
- lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), NULL, NULL);
- if (likely(lwi.lwi_timeout > 0))
- l_wait_event(thread->t_ctl_waitq,
- !list_empty(&scrub->os_inconsistent_items) ||
- !thread_is_running(thread),
- &lwi);
- }
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_DELAY) && cfs_fail_val > 0)
+ wait_var_event_timeout(
+ scrub,
+ !list_empty(&scrub->os_inconsistent_items) ||
+ kthread_should_stop(),
+ cfs_time_seconds(cfs_fail_val));
if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_CRASH)) {
spin_lock(&scrub->os_lock);
- thread_set_flags(thread, SVC_STOPPING);
+ scrub->os_running = 0;
spin_unlock(&scrub->os_lock);
return SCRUB_NEXT_CRASH;
}
if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_FATAL))
return SCRUB_NEXT_FATAL;
- if (unlikely(!thread_is_running(thread)))
+ if (kthread_should_stop())
return SCRUB_NEXT_EXIT;
if (!list_empty(&scrub->os_inconsistent_items)) {
oii = list_entry(scrub->os_inconsistent_items.next,
struct osd_inconsistent_item, oii_list);
- spin_unlock(&scrub->os_lock);
*oic = &oii->oii_cache;
scrub->os_in_prior = 1;
+ spin_unlock(&scrub->os_lock);
return 0;
}
{
struct osd_otable_cache *ooc = &dev->od_otable_it->ooi_cache;
struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
- struct ptlrpc_thread *thread = &scrub->os_thread;
int rc;
- if (thread_is_running(thread) &&
+ if (scrub->os_running &&
ooc->ooc_pos_preload >= scrub->os_pos_current)
return SCRUB_NEXT_EXIT;
spin_lock(&scrub->os_lock);
if (osd_scrub_has_window(scrub, &it->ooi_cache) ||
!list_empty(&scrub->os_inconsistent_items) ||
- it->ooi_waiting || !thread_is_running(&scrub->os_thread))
+ it->ooi_waiting || kthread_should_stop())
scrub->os_waiting = 0;
else
scrub->os_waiting = 1;
struct osd_iit_param *param,
struct osd_idmap_cache *oic, bool *noslot, int rc)
{
- struct l_wait_info lwi = { 0 };
struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
struct scrub_file *sf = &scrub->os_file;
- struct ptlrpc_thread *thread = &scrub->os_thread;
struct osd_otable_it *it = dev->od_otable_it;
struct osd_otable_cache *ooc = it ? &it->ooi_cache : NULL;
rc = osd_scrub_check_update(info, dev, oic, rc);
if (rc != 0) {
+ spin_lock(&scrub->os_lock);
scrub->os_in_prior = 0;
+ spin_unlock(&scrub->os_lock);
return rc;
}
}
if (scrub->os_in_prior) {
+ spin_lock(&scrub->os_lock);
scrub->os_in_prior = 0;
+ spin_unlock(&scrub->os_lock);
return 0;
}
ooc->ooc_pos_preload < scrub->os_pos_current) {
spin_lock(&scrub->os_lock);
it->ooi_waiting = 0;
- wake_up_all(&thread->t_ctl_waitq);
+ wake_up_var(scrub);
spin_unlock(&scrub->os_lock);
}
return 0;
}
- if (it != NULL)
- l_wait_event(thread->t_ctl_waitq, osd_scrub_wakeup(scrub, it),
- &lwi);
+ if (it)
+ wait_var_event(scrub, osd_scrub_wakeup(scrub, it));
if (!ooc || osd_scrub_has_window(scrub, ooc))
*noslot = false;
__u32 flags, bool inconsistent)
{
struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
- struct ptlrpc_thread *thread = &scrub->os_thread;
struct scrub_file *sf = &scrub->os_file;
int rc;
ENTRY;
LASSERT(!(flags & SS_AUTO_PARTIAL));
down_write(&scrub->os_rwsem);
+ spin_lock(&scrub->os_lock);
scrub->os_in_join = 1;
if (flags & SS_SET_FAILOUT)
sf->sf_param |= SP_FAILOUT;
sf->sf_flags |= SF_AUTO;
scrub->os_full_speed = 1;
}
+ spin_unlock(&scrub->os_lock);
scrub->os_new_checked = 0;
if (sf->sf_pos_last_checkpoint != 0)
scrub->os_in_join = 0;
scrub->os_full_scrub = 0;
spin_unlock(&scrub->os_lock);
- wake_up_all(&thread->t_ctl_waitq);
+ wake_up_var(scrub);
up_write(&scrub->os_rwsem);
CDEBUG(D_LFSCK, "%s: joined in the OI scrub with flag %u: rc = %d\n",
struct osd_device *dev, __u32 max, bool preload)
{
struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
- struct ptlrpc_thread *thread = &scrub->os_thread;
struct scrub_file *sf = &scrub->os_file;
osd_iit_next_policy next;
osd_iit_exec_policy exec;
__u64 *pos;
__u64 *count;
struct osd_iit_param *param;
- struct l_wait_info lwi = { 0 };
__u32 limit;
int rc;
bool noslot = true;
if (rc == 0) {
__u64 used = ksfs->f_files - ksfs->f_ffree;
- do_div(used, sf->sf_items_updated_prior);
+ used = div64_u64(used, sf->sf_items_updated_prior);
/* If we hit too much inconsistent OI
* mappings during the partial scan,
* then scan the device completely. */
sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT |
SF_UPGRADE | SF_AUTO);
sf->sf_status = SS_COMPLETED;
- l_wait_event(thread->t_ctl_waitq,
- !thread_is_running(thread) ||
- !scrub->os_partial_scan ||
- scrub->os_in_join ||
- !list_empty(&scrub->os_inconsistent_items),
- &lwi);
+ wait_var_event(
+ scrub,
+ kthread_should_stop() ||
+ !scrub->os_partial_scan ||
+ scrub->os_in_join ||
+ !list_empty(&scrub->os_inconsistent_items));
sf->sf_flags = saved_flags;
sf->sf_status = SS_SCANNING;
- if (unlikely(!thread_is_running(thread)))
+ if (kthread_should_stop())
RETURN(0);
if (!scrub->os_partial_scan || scrub->os_in_join)
full:
if (!preload) {
- l_wait_event(thread->t_ctl_waitq,
- !thread_is_running(thread) || !scrub->os_in_join,
- &lwi);
+ wait_var_event(scrub,
+ kthread_should_stop() ||
+ !scrub->os_in_join);
- if (unlikely(!thread_is_running(thread)))
+ if (kthread_should_stop())
RETURN(0);
}
if (scrub->os_waiting && osd_scrub_has_window(scrub, ooc)) {
spin_lock(&scrub->os_lock);
scrub->os_waiting = 0;
- wake_up_all(&scrub->os_thread.t_ctl_waitq);
+ wake_up_var(scrub);
spin_unlock(&scrub->os_lock);
}
RETURN(rc < 0 ? rc : ooc->ooc_cached_items);
}
+static int osd_scan_ml_file_main(const struct lu_env *env,
+ struct osd_device *dev);
+
static int osd_scrub_main(void *args)
{
struct lu_env env;
struct osd_device *dev = (struct osd_device *)args;
struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
- struct ptlrpc_thread *thread = &scrub->os_thread;
- int rc;
+ int rc, ret;
ENTRY;
rc = lu_env_init(&env, LCT_LOCAL | LCT_DT_THREAD);
}
if (!scrub->os_full_speed && !scrub->os_partial_scan) {
- struct l_wait_info lwi = { 0 };
struct osd_otable_it *it = dev->od_otable_it;
struct osd_otable_cache *ooc = &it->ooi_cache;
- l_wait_event(thread->t_ctl_waitq,
- it->ooi_user_ready || !thread_is_running(thread),
- &lwi);
- if (unlikely(!thread_is_running(thread)))
+ wait_var_event(scrub,
+ it->ooi_user_ready || kthread_should_stop());
+ if (kthread_should_stop())
GOTO(post, rc = 0);
scrub->os_pos_current = ooc->ooc_pos_preload;
rc = osd_inode_iteration(osd_oti_get(&env), dev, ~0U, false);
if (unlikely(rc == SCRUB_IT_CRASH)) {
spin_lock(&scrub->os_lock);
- thread_set_flags(&scrub->os_thread, SVC_STOPPING);
+ scrub->os_running = 0;
spin_unlock(&scrub->os_lock);
GOTO(out, rc = -EINVAL);
}
+ if (scrub->os_has_ml_file) {
+ ret = osd_scan_ml_file_main(&env, dev);
+ if (ret != 0)
+ rc = ret;
+ }
+
GOTO(post, rc);
post:
list_del_init(&oii->oii_list);
OBD_FREE_PTR(oii);
}
+
lu_env_fini(&env);
noenv:
spin_lock(&scrub->os_lock);
- thread_set_flags(thread, SVC_STOPPED);
- wake_up_all(&thread->t_ctl_waitq);
+ scrub->os_running = 0;
spin_unlock(&scrub->os_lock);
+ if (xchg(&scrub->os_task, NULL) == NULL)
+ /* scrub_stop() is waiting, we need to synchronize */
+ wait_var_event(scrub, kthread_should_stop());
+ wake_up_var(scrub);
return rc;
}
};
struct osd_ios_filldir_buf {
-#ifdef HAVE_DIR_CONTEXT
/* please keep it as first member */
struct dir_context ctx;
-#endif
struct osd_thread_info *oifb_info;
struct osd_device *oifb_dev;
struct dentry *oifb_dentry;
int oifb_items;
};
-static inline struct dentry *
-osd_ios_lookup_one_len(const char *name, struct dentry *parent, int namelen)
-{
- struct dentry *dentry;
-
- dentry = ll_lookup_one_len(name, parent, namelen);
- if (IS_ERR(dentry)) {
- int rc = PTR_ERR(dentry);
-
- if (rc != -ENOENT)
- CERROR("Fail to find %.*s in %.*s (%lu/%u): rc = %d\n",
- namelen, name, parent->d_name.len,
- parent->d_name.name, parent->d_inode->i_ino,
- parent->d_inode->i_generation, rc);
-
- return dentry;
- }
-
- if (dentry->d_inode == NULL) {
- dput(dentry);
- return ERR_PTR(-ENOENT);
- }
-
- return dentry;
-}
-
static int
osd_ios_new_item(struct osd_device *dev, struct dentry *dentry,
scandir_t scandir, filldir_t filldir)
int rc;
ENTRY;
+ if (!inode) {
+ CDEBUG(D_INODE, "%s: child '%.*s' lacks inode: rc = -2\n",
+ osd_name(dev), namelen, name);
+ RETURN(-ENOENT);
+ }
+
rc = osd_get_lma(info, inode, &info->oti_obj_dentry,
&info->oti_ost_attrs);
if (rc != 0 && rc != -ENODATA) {
osd_index_need_recreate(info->oti_env, dev, inode)) {
struct lu_fid *pfid = &info->oti_fid3;
- if (parent == osd_sb(dev)->s_root->d_inode) {
+ if (is_root_inode(parent)) {
lu_local_obj_fid(pfid, OSD_FS_ROOT_OID);
} else {
rc = osd_scrub_get_fid(info, dev, parent, pfid,
inode);
}
- rc = osd_oi_lookup(info, dev, &tfid, id2, 0);
+ /* Since this called from iterate_dir() the inode lock will be taken */
+ rc = osd_oi_lookup(info, dev, &tfid, id2, OI_LOCKED);
if (rc != 0) {
if (rc != -ENOENT)
RETURN(rc);
RETURN(0);
scrub->os_lf_scanned++;
- child = osd_ios_lookup_one_len(name, parent, namelen);
+ child = osd_lookup_one_len(dev, name, parent, namelen);
if (IS_ERR(child)) {
+ rc = PTR_ERR(child);
CDEBUG(D_LFSCK, "%s: cannot lookup child '%.*s': rc = %d\n",
- osd_name(dev), namelen, name, (int)PTR_ERR(child));
- RETURN(0);
+ osd_name(dev), namelen, name, rc);
+ RETURN(rc);
+ } else if (!child->d_inode) {
+ dput(child);
+ CDEBUG(D_INODE, "%s: child '%.*s' lacks inode\n",
+ osd_name(dev), namelen, name);
+ RETURN(-ENOENT);
}
inode = child->d_inode;
if (name[0] == '.')
RETURN(0);
- child = osd_ios_lookup_one_len(name, fill_buf->oifb_dentry, namelen);
+ child = osd_lookup_one_len(dev, name, fill_buf->oifb_dentry, namelen);
if (IS_ERR(child))
RETURN(PTR_ERR(child));
if (map->olm_name == NULL)
RETURN(0);
- child = osd_ios_lookup_one_len(name, fill_buf->oifb_dentry, namelen);
+ child = osd_lookup_one_len(dev, name, fill_buf->oifb_dentry, namelen);
if (IS_ERR(child))
RETURN(PTR_ERR(child));
{
struct osd_ios_filldir_buf *fill_buf =
(struct osd_ios_filldir_buf *)buf;
+ struct osd_device *dev = fill_buf->oifb_dev;
struct dentry *child;
struct lu_fid tfid;
int rc = 0;
if (name[0] != '[')
RETURN(0);
- child = osd_ios_lookup_one_len(name, fill_buf->oifb_dentry, namelen);
+ child = osd_lookup_one_len(dev, name, fill_buf->oifb_dentry, namelen);
if (IS_ERR(child))
RETURN(PTR_ERR(child));
if (map->olm_name == NULL)
RETURN(0);
- child = osd_ios_lookup_one_len(name, fill_buf->oifb_dentry, namelen);
+ child = osd_lookup_one_len(dev, name, fill_buf->oifb_dentry, namelen);
if (IS_ERR(child))
RETURN(PTR_ERR(child));
+ else if (!child->d_inode)
+ GOTO(out_put, rc = -ENOENT);
if (!(map->olm_flags & OLF_NO_OI))
rc = osd_ios_scan_one(fill_buf->oifb_info, dev,
if (rc == 0 && map->olm_flags & OLF_SCAN_SUBITEMS)
rc = osd_ios_new_item(dev, child, map->olm_scandir,
map->olm_filldir);
+out_put:
dput(child);
RETURN(rc);
osd_ios_general_scan(struct osd_thread_info *info, struct osd_device *dev,
struct dentry *dentry, filldir_t filldir)
{
- struct osd_ios_filldir_buf buf = {
-#ifdef HAVE_DIR_CONTEXT
- .ctx.actor = filldir,
-#endif
- .oifb_info = info,
- .oifb_dev = dev,
- .oifb_dentry = dentry };
- struct file *filp = &info->oti_file;
- struct inode *inode = dentry->d_inode;
- const struct file_operations *fops = inode->i_fop;
- int rc;
+ struct osd_ios_filldir_buf buf = {
+ .ctx.actor = filldir,
+ .oifb_info = info,
+ .oifb_dev = dev,
+ .oifb_dentry = dentry
+ };
+ struct file *filp;
+ struct inode *inode = dentry->d_inode;
+ int rc;
+
ENTRY;
LASSERT(filldir != NULL);
- filp->f_pos = 0;
- filp->f_path.dentry = dentry;
- filp->f_mode = FMODE_64BITHASH;
- filp->f_mapping = inode->i_mapping;
- filp->f_op = fops;
- filp->private_data = NULL;
- set_file_inode(filp, inode);
+ filp = osd_quasi_file_by_dentry(info->oti_env, dentry);
+ rc = osd_security_file_alloc(filp);
+ if (rc)
+ RETURN(rc);
do {
buf.oifb_items = 0;
-#ifdef HAVE_DIR_CONTEXT
- buf.ctx.pos = filp->f_pos;
-#ifdef HAVE_ITERATE_SHARED
- rc = fops->iterate_shared(filp, &buf.ctx);
-#else
- rc = fops->iterate(filp, &buf.ctx);
-#endif
- filp->f_pos = buf.ctx.pos;
-#else
- rc = fops->readdir(filp, &buf, filldir);
-#endif
+ rc = iterate_dir(filp, &buf.ctx);
} while (rc >= 0 && buf.oifb_items > 0 &&
filp->f_pos != LDISKFS_HTREE_EOF_64BIT);
- fops->release(inode, filp);
+ inode->i_fop->release(inode, filp);
RETURN(rc);
}
* FID directly, instead, the OI scrub will scan the OI structure
* and try to re-generate the LMA from the OI mapping. But if the
* OI mapping crashed or lost also, then we have to give up under
- * double failure cases. */
+ * double failure cases.
+ */
+ spin_lock(&scrub->os_lock);
scrub->os_convert_igif = 1;
- child = osd_ios_lookup_one_len(dot_lustre_name, dentry,
- strlen(dot_lustre_name));
+ spin_unlock(&scrub->os_lock);
+ child = osd_lookup_one_len_unlocked(dev, dot_lustre_name, dentry,
+ strlen(dot_lustre_name));
if (IS_ERR(child)) {
- rc = PTR_ERR(child);
- if (rc == -ENOENT) {
- /* It is 1.8 MDT device. */
- if (!(sf->sf_flags & SF_UPGRADE)) {
- scrub_file_reset(scrub, dev->od_uuid,
- SF_UPGRADE);
- sf->sf_internal_flags &= ~SIF_NO_HANDLE_OLD_FID;
- rc = scrub_file_store(info->oti_env, scrub);
- } else {
- rc = 0;
- }
+ if (PTR_ERR(child) != -ENOENT)
+ RETURN(PTR_ERR(child));
+ goto out_scrub;
+ }
+
+ /* For lustre-2.x (x <= 3), the ".lustre" has NO FID-in-LMA,
+ * so the client will get IGIF for the ".lustre" object when
+ * the MDT restart.
+ *
+ * From the OI scrub view, when the MDT upgrade to Lustre-2.4,
+ * it does not know whether there are some old clients cached
+ * the ".lustre" IGIF during the upgrading. Two choices:
+ *
+ * 1) Generate IGIF-in-LMA and IGIF-in-OI for the ".lustre".
+ * It will allow the old connected clients to access the
+ * ".lustre" with cached IGIF. But it will cause others
+ * on the MDT failed to check "fid_is_dot_lustre()".
+ *
+ * 2) Use fixed FID {FID_SEQ_DOT_LUSTRE, FID_OID_DOT_LUSTRE, 0}
+ * for ".lustre" in spite of whether there are some clients
+ * cached the ".lustre" IGIF or not. It enables the check
+ * "fid_is_dot_lustre()" on the MDT, although it will cause
+ * that the old connected clients cannot access the ".lustre"
+ * with the cached IGIF.
+ *
+ * Usually, it is rare case for the old connected clients
+ * to access the ".lustre" with cached IGIF. So we prefer
+ * to the solution 2).
+ */
+ inode_lock(dentry->d_inode);
+ rc = osd_ios_scan_one(info, dev, dentry->d_inode,
+ child->d_inode, &LU_DOT_LUSTRE_FID,
+ dot_lustre_name,
+ strlen(dot_lustre_name), 0);
+ inode_unlock(dentry->d_inode);
+ if (rc == -ENOENT) {
+out_scrub:
+ /* It is 1.8 MDT device. */
+ if (!(sf->sf_flags & SF_UPGRADE)) {
+ scrub_file_reset(scrub, dev->od_uuid,
+ SF_UPGRADE);
+ sf->sf_internal_flags &= ~SIF_NO_HANDLE_OLD_FID;
+ rc = scrub_file_store(info->oti_env, scrub);
+ } else {
+ rc = 0;
}
- } else {
- /* For lustre-2.x (x <= 3), the ".lustre" has NO FID-in-LMA,
- * so the client will get IGIF for the ".lustre" object when
- * the MDT restart.
- *
- * From the OI scrub view, when the MDT upgrade to Lustre-2.4,
- * it does not know whether there are some old clients cached
- * the ".lustre" IGIF during the upgrading. Two choices:
- *
- * 1) Generate IGIF-in-LMA and IGIF-in-OI for the ".lustre".
- * It will allow the old connected clients to access the
- * ".lustre" with cached IGIF. But it will cause others
- * on the MDT failed to check "fid_is_dot_lustre()".
- *
- * 2) Use fixed FID {FID_SEQ_DOT_LUSTRE, FID_OID_DOT_LUSTRE, 0}
- * for ".lustre" in spite of whether there are some clients
- * cached the ".lustre" IGIF or not. It enables the check
- * "fid_is_dot_lustre()" on the MDT, although it will cause
- * that the old connected clients cannot access the ".lustre"
- * with the cached IGIF.
- *
- * Usually, it is rare case for the old connected clients
- * to access the ".lustre" with cached IGIF. So we prefer
- * to the solution 2). */
- rc = osd_ios_scan_one(info, dev, dentry->d_inode,
- child->d_inode, &LU_DOT_LUSTRE_FID,
- dot_lustre_name,
- strlen(dot_lustre_name), 0);
- if (rc == 0)
- rc = osd_ios_new_item(dev, child, osd_ios_general_scan,
- osd_ios_dl_fill);
- dput(child);
+ } else if (rc == 0) {
+ rc = osd_ios_new_item(dev, child, osd_ios_general_scan,
+ osd_ios_dl_fill);
}
+ dput(child);
RETURN(rc);
}
RETURN(rc);
}
- child = osd_ios_lookup_one_len(ADMIN_USR, dentry, strlen(ADMIN_USR));
- if (!IS_ERR(child)) {
+ child = osd_lookup_one_len_unlocked(dev, ADMIN_USR, dentry,
+ strlen(ADMIN_USR));
+ if (IS_ERR(child)) {
+ rc = PTR_ERR(child);
+ } else {
+ inode_lock(dentry->d_inode);
rc = osd_ios_scan_one(info, dev, dentry->d_inode,
child->d_inode, NULL, ADMIN_USR,
strlen(ADMIN_USR), 0);
+ inode_unlock(dentry->d_inode);
dput(child);
- } else {
- rc = PTR_ERR(child);
}
if (rc != 0 && rc != -ENOENT)
- RETURN(rc);
-
- child = osd_ios_lookup_one_len(ADMIN_GRP, dentry, strlen(ADMIN_GRP));
- if (!IS_ERR(child)) {
- rc = osd_ios_scan_one(info, dev, dentry->d_inode,
- child->d_inode, NULL, ADMIN_GRP,
- strlen(ADMIN_GRP), 0);
- dput(child);
- } else {
- rc = PTR_ERR(child);
- }
+ GOTO(out, rc);
- if (rc == -ENOENT)
- rc = 0;
+ child = osd_lookup_one_len_unlocked(dev, ADMIN_GRP, dentry,
+ strlen(ADMIN_GRP));
+ if (IS_ERR(child))
+ GOTO(out, rc = PTR_ERR(child));
- RETURN(rc);
+ inode_lock(dentry->d_inode);
+ rc = osd_ios_scan_one(info, dev, dentry->d_inode,
+ child->d_inode, NULL, ADMIN_GRP,
+ strlen(ADMIN_GRP), 0);
+ inode_unlock(dentry->d_inode);
+ dput(child);
+out:
+ RETURN(rc == -ENOENT ? 0 : rc);
}
static void osd_initial_OI_scrub(struct osd_thread_info *info,
dev->od_igif_inoi = 1;
while (1) {
+ /* Don't take inode_lock here since scandir() callbacks
+ * can call VFS functions which may manully take the
+ * inode lock itself like iterate_dir(). Since this
+ * is the case it is best to leave the scandir()
+ * callbacks to managing the inode lock.
+ */
scandir(info, dev, dentry, filldir);
if (item != NULL) {
dput(item->oii_dentry);
continue;
}
- child = osd_ios_lookup_one_len(map->olm_name,
- osd_sb(dev)->s_root,
- map->olm_namelen);
- if (!IS_ERR(child))
- dput(child);
- else if (PTR_ERR(child) == -ENOENT)
+ child = osd_lookup_one_len_unlocked(dev, map->olm_name,
+ osd_sb(dev)->s_root,
+ map->olm_namelen);
+ if (PTR_ERR(child) == -ENOENT ||
+ (!IS_ERR(child) && !child->d_inode))
osd_scrub_refresh_mapping(info, dev, &map->olm_fid,
NULL, DTO_INDEX_DELETE,
true, 0, NULL);
+ if (!IS_ERR(child))
+ dput(child);
map++;
}
/* od_otable_mutex: prevent curcurrent start/stop */
mutex_lock(&dev->od_otable_mutex);
+ spin_lock(&scrub->os_lock);
scrub->os_paused = 1;
+ spin_unlock(&scrub->os_lock);
scrub_stop(scrub);
mutex_unlock(&dev->od_otable_mutex);
}
static const char osd_scrub_name[] = "OI_scrub";
-int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev)
+int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev,
+ bool restored)
{
struct osd_thread_info *info = osd_oti_get(env);
struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
struct osd_inode_id *id = &info->oti_id;
struct dt_object *obj;
bool dirty = false;
- bool restored = false;
int rc = 0;
ENTRY;
OBD_SET_CTXT_MAGIC(ctxt);
ctxt->pwdmnt = dev->od_mnt;
ctxt->pwd = dev->od_mnt->mnt_root;
- ctxt->fs = KERNEL_DS;
- init_waitqueue_head(&scrub->os_thread.t_ctl_waitq);
init_rwsem(&scrub->os_rwsem);
spin_lock_init(&scrub->os_lock);
INIT_LIST_HEAD(&scrub->os_inconsistent_items);
}
inode = file_inode(filp);
+ ldiskfs_set_inode_flag(inode, LDISKFS_INODE_JOURNAL_DATA);
if (!dev->od_dt_dev.dd_rdonly) {
/* 'What the @fid is' is not imporatant, because the object
* has no OI mapping, and only is visible inside the OSD.*/
RETURN(obj ? PTR_ERR(obj) : -ENOENT);
#ifndef HAVE_S_UUID_AS_UUID_T
- memcpy(dev->od_uuid.b, sb->s_uuid, UUID_SIZE);
+ memcpy(dev->od_uuid.b, sb->s_uuid, sizeof(dev->od_uuid));
#else
uuid_copy(&dev->od_uuid, &sb->s_uuid);
#endif
dt_object_put_nocache(env, scrub->os_obj);
scrub->os_obj = NULL;
}
- if (dev->od_oi_table != NULL)
- osd_oi_fini(osd_oti_get(env), dev);
}
/* object table based iteration APIs */
{
spin_lock(&scrub->os_lock);
if (it->ooi_cache.ooc_pos_preload < scrub->os_pos_current ||
- scrub->os_waiting ||
- !thread_is_running(&scrub->os_thread))
+ scrub->os_waiting || !scrub->os_running)
it->ooi_waiting = 0;
else
it->ooi_waiting = 1;
static int osd_otable_it_next(const struct lu_env *env, struct dt_it *di)
{
- struct osd_otable_it *it = (struct osd_otable_it *)di;
- struct osd_device *dev = it->ooi_dev;
+ struct osd_otable_it *it = (struct osd_otable_it *)di;
+ struct osd_device *dev = it->ooi_dev;
struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
- struct osd_otable_cache *ooc = &it->ooi_cache;
- struct ptlrpc_thread *thread = &scrub->os_thread;
- struct l_wait_info lwi = { 0 };
- int rc;
+ struct osd_otable_cache *ooc = &it->ooi_cache;
+ int rc;
ENTRY;
LASSERT(it->ooi_user_ready);
again:
- if (!thread_is_running(thread) && !it->ooi_used_outside)
+ if (!scrub->os_running && !it->ooi_used_outside)
RETURN(1);
if (ooc->ooc_cached_items > 0) {
}
if (it->ooi_all_cached) {
- l_wait_event(thread->t_ctl_waitq,
- !thread_is_running(thread),
- &lwi);
+ wait_var_event(scrub, !scrub->os_running);
RETURN(1);
}
if (scrub->os_waiting && osd_scrub_has_window(scrub, ooc)) {
spin_lock(&scrub->os_lock);
scrub->os_waiting = 0;
- wake_up_all(&scrub->os_thread.t_ctl_waitq);
+ wake_up_var(scrub);
spin_unlock(&scrub->os_lock);
}
if (it->ooi_cache.ooc_pos_preload >= scrub->os_pos_current)
- l_wait_event(thread->t_ctl_waitq,
- osd_otable_it_wakeup(scrub, it),
- &lwi);
+ wait_var_event(scrub, osd_otable_it_wakeup(scrub, it));
- if (!thread_is_running(thread) && !it->ooi_used_outside)
+ if (!scrub->os_running && !it->ooi_used_outside)
RETURN(1);
rc = osd_otable_it_preload(env, it);
it->ooi_user_ready = 1;
if (!scrub->os_full_speed)
- wake_up_all(&scrub->os_thread.t_ctl_waitq);
+ wake_up_var(scrub);
memset(param, 0, sizeof(*param));
param->sb = osd_sb(dev);
#define SCRUB_BAD_OIMAP_DECAY_INTERVAL 60
-int osd_oii_insert(struct osd_device *dev, struct osd_idmap_cache *oic,
- int insert)
+int osd_oii_insert(struct osd_device *dev, const struct lu_fid *fid,
+ struct osd_inode_id *id, int insert)
{
struct osd_inconsistent_item *oii;
struct osd_scrub *oscrub = &dev->od_scrub;
struct lustre_scrub *lscrub = &oscrub->os_scrub;
- struct ptlrpc_thread *thread = &lscrub->os_thread;
int wakeup = 0;
ENTRY;
RETURN(-ENOMEM);
INIT_LIST_HEAD(&oii->oii_list);
- oii->oii_cache = *oic;
+ oii->oii_cache.oic_fid = *fid;
+ oii->oii_cache.oic_lid = *id;
+ oii->oii_cache.oic_dev = dev;
oii->oii_insert = insert;
+ spin_lock(&lscrub->os_lock);
if (lscrub->os_partial_scan) {
__u64 now = ktime_get_real_seconds();
lscrub->os_full_scrub = 1;
}
- spin_lock(&lscrub->os_lock);
- if (unlikely(!thread_is_running(thread))) {
+ if (!lscrub->os_running) {
spin_unlock(&lscrub->os_lock);
OBD_FREE_PTR(oii);
RETURN(-EAGAIN);
list_add_tail(&oii->oii_list, &lscrub->os_inconsistent_items);
spin_unlock(&lscrub->os_lock);
- if (wakeup != 0)
- wake_up_all(&thread->t_ctl_waitq);
+ if (wakeup)
+ wake_up_var(lscrub);
RETURN(0);
}
scrub->os_lf_repaired,
scrub->os_lf_failed);
}
+
+typedef int (*scan_dir_helper_t)(const struct lu_env *env,
+ struct osd_device *dev, struct inode *dir,
+ struct osd_it_ea *oie);
+
+static int osd_scan_dir(const struct lu_env *env, struct osd_device *dev,
+ struct inode *inode, scan_dir_helper_t cb)
+{
+ struct osd_it_ea *oie;
+ int rc;
+
+ ENTRY;
+
+ oie = osd_it_dir_init(env, inode, LUDA_TYPE);
+ if (IS_ERR(oie))
+ RETURN(PTR_ERR(oie));
+
+ oie->oie_file.f_pos = 0;
+ rc = osd_ldiskfs_it_fill(env, (struct dt_it *)oie);
+ if (rc > 0)
+ rc = -ENODATA;
+ if (rc)
+ GOTO(out, rc);
+
+ while (oie->oie_it_dirent <= oie->oie_rd_dirent) {
+ if (!name_is_dot_or_dotdot(oie->oie_dirent->oied_name,
+ oie->oie_dirent->oied_namelen))
+ cb(env, dev, inode, oie);
+
+ oie->oie_dirent = (void *)oie->oie_dirent +
+ cfs_size_round(sizeof(struct osd_it_ea_dirent) +
+ oie->oie_dirent->oied_namelen);
+
+ oie->oie_it_dirent++;
+ if (oie->oie_it_dirent <= oie->oie_rd_dirent)
+ continue;
+
+ if (oie->oie_file.f_pos ==
+ ldiskfs_get_htree_eof(&oie->oie_file))
+ break;
+
+ rc = osd_ldiskfs_it_fill(env, (struct dt_it *)oie);
+ if (rc) {
+ if (rc > 0)
+ rc = 0;
+ break;
+ }
+ }
+
+out:
+ osd_it_dir_fini(env, oie, inode);
+ RETURN(rc);
+}
+
+static int osd_remove_ml_file(struct osd_thread_info *info,
+ struct osd_device *dev, struct inode *dir,
+ struct inode *inode, struct osd_it_ea *oie)
+{
+ handle_t *th;
+ struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
+ struct dentry *dentry;
+ int rc;
+
+ ENTRY;
+
+ if (scrub->os_file.sf_param & SP_DRYRUN)
+ RETURN(0);
+
+ th = osd_journal_start_sb(osd_sb(dev), LDISKFS_HT_MISC,
+ osd_dto_credits_noquota[DTO_INDEX_DELETE] +
+ osd_dto_credits_noquota[DTO_ATTR_SET_BASE]);
+ if (IS_ERR(th))
+ RETURN(PTR_ERR(th));
+
+ dentry = &oie->oie_dentry;
+ dentry->d_inode = dir;
+ dentry->d_sb = dir->i_sb;
+ rc = osd_obj_del_entry(info, dev, dentry, oie->oie_dirent->oied_name,
+ oie->oie_dirent->oied_namelen, th);
+ drop_nlink(inode);
+ mark_inode_dirty(inode);
+ ldiskfs_journal_stop(th);
+ RETURN(rc);
+}
+
+static int osd_scan_ml_file(const struct lu_env *env, struct osd_device *dev,
+ struct inode *dir, struct osd_it_ea *oie)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct osd_inode_id id;
+ struct inode *inode;
+ struct osd_obj_seq *oseq;
+ struct ost_id *ostid = &info->oti_ostid;
+ struct lu_fid *fid = &oie->oie_dirent->oied_fid;
+ char name[32];
+ int dirn, rc = 0;
+
+ ENTRY;
+
+ osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN);
+
+ if (!fid_is_sane(fid))
+ inode = osd_iget_fid(info, dev, &id, fid);
+ else
+ inode = osd_iget(info, dev, &id);
+
+ if (IS_ERR(inode))
+ RETURN(PTR_ERR(inode));
+
+ fid_to_ostid(fid, ostid);
+ oseq = osd_seq_load(info, dev, ostid_seq(ostid));
+ if (IS_ERR(oseq))
+ RETURN(PTR_ERR(oseq));
+
+ dirn = ostid_id(ostid) & (oseq->oos_subdir_count - 1);
+ LASSERT(oseq->oos_dirs[dirn] != NULL);
+
+ osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
+ if (((strlen(oseq->oos_root->d_name.name) !=
+ info->oti_seq_dirent->oied_namelen) ||
+ strncmp(oseq->oos_root->d_name.name,
+ info->oti_seq_dirent->oied_name,
+ info->oti_seq_dirent->oied_namelen) != 0) ||
+ ((strlen(oseq->oos_dirs[dirn]->d_name.name) !=
+ info->oti_dir_dirent->oied_namelen) ||
+ strncmp(oseq->oos_dirs[dirn]->d_name.name,
+ info->oti_dir_dirent->oied_name,
+ info->oti_dir_dirent->oied_namelen) != 0) ||
+ ((strlen(name) != oie->oie_dirent->oied_namelen) ||
+ strncmp(oie->oie_dirent->oied_name, name,
+ oie->oie_dirent->oied_namelen) != 0)) {
+ CDEBUG(D_LFSCK, "%s: the file O/%s/%s/%s is corrupted\n",
+ osd_name(dev), info->oti_seq_dirent->oied_name,
+ info->oti_dir_dirent->oied_name,
+ oie->oie_dirent->oied_name);
+
+ rc = osd_remove_ml_file(info, dev, dir, inode, oie);
+ }
+
+ iput(inode);
+ RETURN(rc);
+}
+
+static int osd_scan_ml_file_dir(const struct lu_env *env,
+ struct osd_device *dev, struct inode *dir,
+ struct osd_it_ea *oie)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct inode *inode;
+ struct osd_inode_id id;
+ int rc;
+
+ ENTRY;
+
+ osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN);
+ inode = osd_iget(info, dev, &id);
+ if (IS_ERR(inode))
+ RETURN(PTR_ERR(inode));
+
+ if (!S_ISDIR(inode->i_mode))
+ GOTO(out, rc = 0);
+
+ info->oti_dir_dirent = oie->oie_dirent;
+ rc = osd_scan_dir(env, dev, inode, osd_scan_ml_file);
+ info->oti_dir_dirent = NULL;
+
+out:
+ iput(inode);
+ RETURN(rc);
+}
+
+static int osd_scan_ml_file_seq(const struct lu_env *env,
+ struct osd_device *dev, struct inode *dir,
+ struct osd_it_ea *oie)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct inode *inode;
+ struct osd_inode_id id;
+ int rc;
+
+ ENTRY;
+
+ osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN);
+ inode = osd_iget(info, dev, &id);
+ if (IS_ERR(inode))
+ RETURN(PTR_ERR(inode));
+
+ if (!S_ISDIR(inode->i_mode))
+ GOTO(out, rc = 0);
+
+ info->oti_seq_dirent = oie->oie_dirent;
+ rc = osd_scan_dir(env, dev, inode, osd_scan_ml_file_dir);
+ info->oti_seq_dirent = NULL;
+
+out:
+ iput(inode);
+ RETURN(rc);
+}
+
+static int osd_scan_ml_file_main(const struct lu_env *env,
+ struct osd_device *dev)
+{
+ return osd_scan_dir(env, dev, dev->od_ost_map->om_root->d_inode,
+ osd_scan_ml_file_seq);
+}