*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
return inode;
}
-int osd_ldiskfs_add_entry(struct osd_thread_info *info,
+int osd_ldiskfs_add_entry(struct osd_thread_info *info, struct osd_device *osd,
handle_t *handle, struct dentry *child,
struct inode *inode, struct htree_lock *hlock)
{
rc = __ldiskfs_add_entry(handle, child, inode, hlock);
if (rc == -ENOBUFS || rc == -ENOSPC) {
- char fidbuf[FID_LEN + 1];
- struct lustre_mdt_attrs lma;
- struct lu_fid fid = { };
- char *errstr;
- struct dentry *p_dentry = child->d_parent;
-
- rc2 = osd_get_lma(info, p_dentry->d_inode, p_dentry,
- &lma);
+ struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
+ struct inode *parent = child->d_parent->d_inode;
+ struct lu_fid *fid = NULL;
+
+ rc2 = osd_get_lma(info, parent, child->d_parent, lma);
if (rc2 == 0) {
- fid = lma.lma_self_fid;
- snprintf(fidbuf, sizeof(fidbuf), DFID, PFID(&fid));
+ fid = &lma->lma_self_fid;
} else if (rc2 == -ENODATA) {
- if (unlikely(p_dentry->d_inode ==
- inode->i_sb->s_root->d_inode))
- lu_local_obj_fid(&fid, OSD_FS_ROOT_OID);
- else if (info->oti_dev && !info->oti_dev->od_is_ost &&
- fid_seq_is_mdt0(fid_seq(&fid)))
- lu_igif_build(&fid, p_dentry->d_inode->i_ino,
- p_dentry->d_inode->i_generation);
- snprintf(fidbuf, sizeof(fidbuf), DFID, PFID(&fid));
- } else {
- snprintf(fidbuf, FID_LEN, "%s", "unknown");
+ if (unlikely(parent == inode->i_sb->s_root->d_inode)) {
+ fid = &info->oti_fid3;
+ lu_local_obj_fid(fid, OSD_FS_ROOT_OID);
+ } else if (!osd->od_is_ost && osd->od_index == 0) {
+ fid = &info->oti_fid3;
+ lu_igif_build(fid, parent->i_ino,
+ parent->i_generation);
+ }
}
- if (rc == -ENOSPC)
- errstr = "has reached";
+ if (fid != NULL)
+ CWARN("%s: directory (inode: %lu, FID: "DFID") %s "
+ "maximum entry limit\n",
+ osd_name(osd), parent->i_ino, PFID(fid),
+ rc == -ENOSPC ? "has reached" : "is approaching");
else
- errstr = "is approaching";
- CWARN("%.16s: directory (inode: %lu FID: %s) %s maximum entry limit\n",
- LDISKFS_SB(inode->i_sb)->s_es->s_volume_name,
- p_dentry->d_inode->i_ino, fidbuf, errstr);
+ CWARN("%s: directory (inode: %lu, FID: unknown) %s "
+ "maximum entry limit\n",
+ osd_name(osd), parent->i_ino,
+ rc == -ENOSPC ? "has reached" : "is approaching");
+
/* ignore such error now */
if (rc == -ENOBUFS)
rc = 0;
}
+
return rc;
}
* Generally, when the device is mounted, it will
* auto check whether the system is restored from
* file-level backup or not. We trust such detect
- * to distinguish the 1st case from the 2nd case. */
- if (rc == 0) {
- if (!IS_ERR(inode) && inode->i_generation != 0 &&
- inode->i_generation == id->oii_gen)
- /* "id->oii_gen != OSD_OII_NOGEN" is for
- * "@cached == false" case. */
- rc = -ENOENT;
- else
- rc = -EREMCHG;
- } else {
+ * to distinguish the 1st case from the 2nd case:
+ * if the OI files are consistent but may contain
+ * stale OI mappings because of case 2, if iget()
+ * returns -ENOENT or -ESTALE, then it should be
+ * the case 2. */
+ if (rc != 0)
/* If the OI mapping was in OI file before the
* osd_iget_check(), but now, it is disappear,
* then it must be removed by race. That is a
* normal race case. */
- }
+ GOTO(put, rc);
+
+ if ((!IS_ERR(inode) && inode->i_generation != 0 &&
+ inode->i_generation == id->oii_gen) ||
+ (IS_ERR(inode) && !(dev->od_scrub.os_file.sf_flags &
+ SF_INCONSISTENT)))
+ rc = -ENOENT;
+ else
+ rc = -EREMCHG;
} else {
if (id->oii_gen == OSD_OII_NOGEN)
osd_id_gen(id, inode->i_ino, inode->i_generation);
RETURN(rc);
}
+struct osd_check_lmv_buf {
+#ifdef HAVE_DIR_CONTEXT
+ /* please keep it as first member */
+ struct dir_context ctx;
+#endif
+ struct osd_thread_info *oclb_info;
+ struct osd_device *oclb_dev;
+ struct osd_idmap_cache *oclb_oic;
+};
+
+/**
+ * It is called internally by ->readdir() to filter out the
+ * local slave object's FID of the striped directory.
+ *
+ * \retval 1 found the local slave's FID
+ * \retval 0 continue to check next item
+ * \retval -ve for failure
+ */
+#ifdef HAVE_FILLDIR_USE_CTX
+static int osd_stripe_dir_filldir(struct dir_context *buf,
+#else
+static int osd_stripe_dir_filldir(void *buf,
+#endif
+ const char *name, int namelen,
+ loff_t offset, __u64 ino, unsigned d_type)
+{
+ struct osd_check_lmv_buf *oclb = (struct osd_check_lmv_buf *)buf;
+ struct osd_thread_info *oti = oclb->oclb_info;
+ struct lu_fid *fid = &oti->oti_fid3;
+ struct osd_inode_id *id = &oti->oti_id3;
+ struct osd_device *dev = oclb->oclb_dev;
+ struct osd_idmap_cache *oic = oclb->oclb_oic;
+ struct inode *inode;
+ int rc;
+
+ if (name[0] == '.')
+ return 0;
+
+ fid_zero(fid);
+ sscanf(name + 1, SFID, RFID(fid));
+ if (!fid_is_sane(fid))
+ return 0;
+
+ if (osd_remote_fid(oti->oti_env, dev, fid))
+ return 0;
+
+ osd_id_gen(id, ino, OSD_OII_NOGEN);
+ inode = osd_iget(oti, dev, id);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ iput(inode);
+ osd_add_oi_cache(oti, dev, id, fid);
+ oic->oic_fid = *fid;
+ oic->oic_lid = *id;
+ oic->oic_dev = dev;
+ rc = osd_oii_insert(dev, oic, true);
+
+ return rc == 0 ? 1 : rc;
+}
+
+/* When lookup item under striped directory, we need to locate the master
+ * MDT-object of the striped directory firstly, then the client will send
+ * lookup (getattr_by_name) RPC to the MDT with some slave MDT-object's FID
+ * and the item's name. If the system is restored from MDT file level backup,
+ * then before the OI scrub completely built the OI files, the OI mappings of
+ * the master MDT-object and slave MDT-object may be invalid. Usually, it is
+ * not a problem for the master MDT-object. Because when locate the master
+ * MDT-object, we will do name based lookup (for the striped directory itself)
+ * firstly, during such process we can setup the correct OI mapping for the
+ * master MDT-object. But it will be trouble for the slave MDT-object. Because
+ * the client will not trigger name based lookup on the MDT to locate the slave
+ * MDT-object before locating item under the striped directory, then when
+ * osd_fid_lookup(), it will find that the OI mapping for the slave MDT-object
+ * is invalid and does not know what the right OI mapping is, then the MDT has
+ * to return -EINPROGRESS to the client to notify that the OI scrub is rebuiding
+ * the OI file, related OI mapping is unknown yet, please try again later. And
+ * then client will re-try the RPC again and again until related OI mapping has
+ * been updated. That is quite inefficient.
+ *
+ * To resolve above trouble, we will handle it as the following two cases:
+ *
+ * 1) The slave MDT-object and the master MDT-object are on different MDTs.
+ * It is relative easy. Be as one of remote MDT-objects, the slave MDT-object
+ * is linked under /REMOTE_PARENT_DIR with the name of its FID string.
+ * We can locate the slave MDT-object via lookup the /REMOTE_PARENT_DIR
+ * directly. Please check osd_fid_lookup().
+ *
+ * 2) The slave MDT-object and the master MDT-object reside on the same MDT.
+ * Under such case, during lookup the master MDT-object, we will lookup the
+ * slave MDT-object via readdir against the master MDT-object, because the
+ * slave MDT-objects information are stored as sub-directories with the name
+ * "${FID}:${index}". Then when find the local slave MDT-object, its OI
+ * mapping will be recorded. Then subsequent osd_fid_lookup() will know
+ * the correct OI mapping for the slave MDT-object. */
+static int osd_check_lmv(struct osd_thread_info *oti, struct osd_device *dev,
+ struct inode *inode, struct osd_idmap_cache *oic)
+{
+ struct lu_buf *buf = &oti->oti_big_buf;
+ struct dentry *dentry = &oti->oti_obj_dentry;
+ struct file *filp = &oti->oti_file;
+ const struct file_operations *fops;
+ struct lmv_mds_md_v1 *lmv1;
+ struct osd_check_lmv_buf oclb = {
+#ifdef HAVE_DIR_CONTEXT
+ .ctx.actor = osd_stripe_dir_filldir,
+#endif
+ .oclb_info = oti,
+ .oclb_dev = dev,
+ .oclb_oic = oic
+ };
+ int rc = 0;
+ ENTRY;
+
+again:
+ rc = __osd_xattr_get(inode, dentry, XATTR_NAME_LMV, buf->lb_buf,
+ buf->lb_len);
+ if (rc == -ERANGE) {
+ rc = __osd_xattr_get(inode, dentry, XATTR_NAME_LMV, NULL, 0);
+ if (rc > 0) {
+ lu_buf_realloc(buf, rc);
+ if (buf->lb_buf == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ goto again;
+ }
+ }
+
+ if (unlikely(rc == 0 || rc == -ENODATA))
+ GOTO(out, rc = 0);
+
+ if (rc < 0)
+ GOTO(out, rc);
+
+ if (unlikely(buf->lb_buf == NULL)) {
+ lu_buf_realloc(buf, rc);
+ if (buf->lb_buf == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ goto again;
+ }
+
+ lmv1 = buf->lb_buf;
+ if (le32_to_cpu(lmv1->lmv_magic) != LMV_MAGIC_V1)
+ GOTO(out, rc = 0);
+
+ fops = inode->i_fop;
+ dentry->d_inode = inode;
+ dentry->d_sb = inode->i_sb;
+ filp->f_pos = 0;
+ filp->f_path.dentry = dentry;
+ filp->f_mode = FMODE_64BITHASH;
+ filp->f_mapping = inode->i_mapping;
+ filp->f_op = fops;
+ filp->private_data = NULL;
+ set_file_inode(filp, inode);
+
+#ifdef HAVE_DIR_CONTEXT
+ oclb.ctx.pos = filp->f_pos;
+ rc = fops->iterate(filp, &oclb.ctx);
+ filp->f_pos = oclb.ctx.pos;
+#else
+ rc = fops->readdir(filp, &oclb, osd_stripe_dir_filldir);
+#endif
+ fops->release(inode, filp);
+
+out:
+ if (rc < 0)
+ CDEBUG(D_LFSCK, "%.16s: fail to check LMV EA, inode = %lu/%u,"
+ DFID": rc = %d\n",
+ LDISKFS_SB(inode->i_sb)->s_es->s_volume_name,
+ inode->i_ino, inode->i_generation,
+ PFID(&oic->oic_fid), rc);
+ else
+ rc = 0;
+
+ RETURN(rc);
+}
+
static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj,
const struct lu_fid *fid,
const struct lu_object_conf *conf)
struct osd_device *dev;
struct osd_idmap_cache *oic;
struct osd_inode_id *id;
+ struct osd_inode_id *tid;
struct inode *inode = NULL;
struct osd_scrub *scrub;
struct scrub_file *sf;
__u32 flags = SS_CLEAR_DRYRUN | SS_CLEAR_FAILOUT |
SS_AUTO_FULL;
+ __u32 saved_ino;
+ __u32 saved_gen;
int result = 0;
int rc1 = 0;
bool cached = true;
if (IS_ERR(inode)) {
result = PTR_ERR(inode);
if (result == -ENOENT || result == -ESTALE)
- GOTO(out, result = -ENOENT);
+ GOTO(out, result = 0);
if (result == -EREMCHG) {
LASSERT(obj->oo_inode->i_sb == osd_sb(dev));
result = osd_check_lma(env, obj);
- if (result != 0) {
- if (result == -ENODATA) {
- if (cached) {
- result = osd_oi_lookup(info, dev, fid, id,
- OI_CHECK_FLD);
- if (result != 0) {
- /* result == -ENOENT means that the OI
- * mapping has been removed by race,
- * the target inode belongs to other
- * object.
- *
- * Others error also can be returned
- * directly. */
- iput(inode);
- obj->oo_inode = NULL;
- GOTO(out, result);
- } else {
- /* result == 0 means the cached OI
- * mapping is still in the OI file,
- * the target the inode is valid. */
- }
- } else {
- /* The current OI mapping is from the OI file,
- * since the inode has been found via
- * osd_iget_check(), no need recheck OI. */
- }
-
- goto found;
- }
-
- iput(inode);
- inode = NULL;
- obj->oo_inode = NULL;
- if (result != -EREMCHG)
- GOTO(out, result);
+ if (result == 0)
+ goto found;
- if (cached) {
- result = osd_oi_lookup(info, dev, fid, id,
- OI_CHECK_FLD);
- /* result == -ENOENT means the cached OI mapping
- * has been removed from the OI file by race,
- * above target inode belongs to other object.
- *
- * Others error also can be returned directly. */
- if (result != 0)
- GOTO(out, result);
+ tid = &info->oti_id3;
+ LASSERT(tid != id);
- /* result == 0, goto trigger */
- } else {
+ if (result == -ENODATA) {
+ if (!cached)
/* The current OI mapping is from the OI file,
* since the inode has been found via
* osd_iget_check(), no need recheck OI. */
+ goto found;
+
+ result = osd_oi_lookup(info, dev, fid, tid, OI_CHECK_FLD);
+ if (result == 0) {
+ LASSERTF(tid->oii_ino == id->oii_ino &&
+ tid->oii_gen == id->oii_gen,
+ "OI mapping changed(1): %u/%u => %u/%u",
+ tid->oii_ino, tid->oii_gen,
+ id->oii_ino, id->oii_gen);
+
+ LASSERTF(tid->oii_ino == inode->i_ino &&
+ tid->oii_gen == inode->i_generation,
+ "locate wrong inode(1): %u/%u => %ld/%u",
+ tid->oii_ino, tid->oii_gen,
+ inode->i_ino, inode->i_generation);
+
+ /* "result == 0" means the cached OI mapping is still in
+ * the OI file, so the target the inode is valid. */
+ goto found;
}
- goto trigger;
+ /* "result == -ENOENT" means that the OI mappinghas been removed
+ * by race, the target inode belongs to other object.
+ *
+ * Others error can be returned directly. */
+ if (result == -ENOENT)
+ result = 0;
}
+ saved_ino = inode->i_ino;
+ saved_gen = inode->i_generation;
+ iput(inode);
+ inode = NULL;
+ obj->oo_inode = NULL;
+
+ if (result != -EREMCHG)
+ GOTO(out, result);
+
+ if (!cached)
+ /* The current OI mapping is from the OI file,
+ * since the inode has been found via
+ * osd_iget_check(), no need recheck OI. */
+ goto trigger;
+
+ result = osd_oi_lookup(info, dev, fid, tid, OI_CHECK_FLD);
+ /* "result == -ENOENT" means the cached OI mapping has been removed from
+ * the OI file by race, above target inode belongs to other object.
+ *
+ * Others error can be returned directly. */
+ if (result != 0)
+ GOTO(out, result = (result == -ENOENT ? 0 : result));
+
+ LASSERTF(tid->oii_ino == id->oii_ino && tid->oii_gen == id->oii_gen,
+ "OI mapping changed(2): %u/%u => %u/%u",
+ tid->oii_ino, tid->oii_gen, id->oii_ino, id->oii_gen);
+
+ LASSERTF(tid->oii_ino == saved_ino && tid->oii_gen == saved_gen,
+ "locate wrong inode(2): %u/%u => %u/%u",
+ tid->oii_ino, tid->oii_gen, saved_ino, saved_gen);
+
+ goto trigger;
+
found:
obj->oo_compat_dot_created = 1;
obj->oo_compat_dotdot_created = 1;
- if (!S_ISDIR(inode->i_mode) || !ldiskfs_pdo) /* done */
+ if (S_ISDIR(inode->i_mode) &&
+ (flags & SS_AUTO_PARTIAL || sf->sf_status == SS_SCANNING))
+ osd_check_lmv(info, dev, inode, oic);
+
+ if (!ldiskfs_pdo)
GOTO(out, result = 0);
LASSERT(obj->oo_hl_head == NULL);
char oxe_buf[0];
};
-static struct osd_xattr_entry *osd_oxc_lookup(struct osd_object *obj,
- const char *name,
- size_t namelen)
-{
- struct osd_xattr_entry *oxe;
-
- list_for_each_entry(oxe, &obj->oo_xattr_list, oxe_list) {
- if (namelen == oxe->oxe_namelen &&
- strncmp(name, oxe->oxe_buf, namelen) == 0)
- return oxe;
- }
-
- return NULL;
-}
-
static int osd_oxc_get(struct osd_object *obj, const char *name,
struct lu_buf *buf)
{
- struct osd_xattr_entry *oxe;
- size_t vallen;
+ struct osd_xattr_entry *tmp;
+ struct osd_xattr_entry *oxe = NULL;
+ size_t namelen = strlen(name);
+ int rc;
ENTRY;
rcu_read_lock();
- oxe = osd_oxc_lookup(obj, name, strlen(name));
- if (oxe == NULL) {
- rcu_read_unlock();
- RETURN(-ENOENT);
+ list_for_each_entry_rcu(tmp, &obj->oo_xattr_list, oxe_list) {
+ if (namelen == tmp->oxe_namelen &&
+ strncmp(name, tmp->oxe_buf, namelen) == 0) {
+ oxe = tmp;
+ break;
+ }
}
- if (!oxe->oxe_exist) {
- rcu_read_unlock();
- RETURN(-ENODATA);
- }
+ if (oxe == NULL)
+ GOTO(out, rc = -ENOENT);
- vallen = oxe->oxe_len - sizeof(*oxe) - oxe->oxe_namelen - 1;
- LASSERT(vallen > 0);
+ if (!oxe->oxe_exist)
+ GOTO(out, rc = -ENODATA);
- if (buf->lb_buf == NULL) {
- rcu_read_unlock();
- RETURN(vallen);
- }
+ /* vallen */
+ rc = oxe->oxe_len - sizeof(*oxe) - oxe->oxe_namelen - 1;
+ LASSERT(rc > 0);
- if (buf->lb_len < vallen) {
- rcu_read_unlock();
- RETURN(-ERANGE);
- }
+ if (buf->lb_buf == NULL)
+ GOTO(out, rc);
- memcpy(buf->lb_buf, oxe->oxe_buf + oxe->oxe_namelen + 1, vallen);
+ if (buf->lb_len < rc)
+ GOTO(out, rc = -ERANGE);
+
+ memcpy(buf->lb_buf, &oxe->oxe_buf[namelen + 1], rc);
+ EXIT;
+out:
rcu_read_unlock();
- RETURN(vallen);
+ return rc;
}
static void osd_oxc_free(struct rcu_head *head)
OBD_FREE(oxe, oxe->oxe_len);
}
-static inline void __osd_oxc_del(struct osd_object *obj, const char *name)
-{
- struct osd_xattr_entry *oxe;
-
- oxe = osd_oxc_lookup(obj, name, strlen(name));
- if (oxe != NULL) {
- list_del(&oxe->oxe_list);
- call_rcu(&oxe->oxe_rcu, osd_oxc_free);
- }
-}
-
static void osd_oxc_add(struct osd_object *obj, const char *name,
const char *buf, int buflen)
{
struct osd_xattr_entry *oxe;
+ struct osd_xattr_entry *old = NULL;
+ struct osd_xattr_entry *tmp;
size_t namelen = strlen(name);
size_t len = sizeof(*oxe) + namelen + 1 + buflen;
/* this should be rarely called, just remove old and add new */
spin_lock(&obj->oo_guard);
- __osd_oxc_del(obj, name);
- list_add_tail(&oxe->oxe_list, &obj->oo_xattr_list);
+ list_for_each_entry(tmp, &obj->oo_xattr_list, oxe_list) {
+ if (namelen == tmp->oxe_namelen &&
+ strncmp(name, tmp->oxe_buf, namelen) == 0) {
+ old = tmp;
+ break;
+ }
+ }
+ if (old != NULL) {
+ list_replace_rcu(&old->oxe_list, &oxe->oxe_list);
+ call_rcu(&old->oxe_rcu, osd_oxc_free);
+ } else {
+ list_add_tail_rcu(&oxe->oxe_list, &obj->oo_xattr_list);
+ }
spin_unlock(&obj->oo_guard);
}
static void osd_oxc_del(struct osd_object *obj, const char *name)
{
+ struct osd_xattr_entry *oxe;
+ size_t namelen = strlen(name);
+
spin_lock(&obj->oo_guard);
- __osd_oxc_del(obj, name);
+ list_for_each_entry(oxe, &obj->oo_xattr_list, oxe_list) {
+ if (namelen == oxe->oxe_namelen &&
+ strncmp(name, oxe->oxe_buf, namelen) == 0) {
+ list_del_rcu(&oxe->oxe_list);
+ call_rcu(&oxe->oxe_rcu, osd_oxc_free);
+ break;
+ }
+ }
spin_unlock(&obj->oo_guard);
}
time_after(jiffies, last_printed +
msecs_to_jiffies(60 * MSEC_PER_SEC)) &&
osd_transaction_size(dev) > 512) {
+ CWARN("%s: credits %u > trans_max %u\n", osd_name(dev),
+ oh->ot_credits, osd_transaction_size(dev));
osd_trans_dump_creds(env, th);
libcfs_debug_dumpstack(NULL);
last_credits = oh->ot_credits;
rc = osd_fld_lookup(env, osd, seq, range);
if (rc != 0) {
if (rc != -ENOENT)
- CERROR("%s: can't lookup FLD sequence "LPX64
- ": rc = %d\n", osd_name(osd), seq, rc);
+ CERROR("%s: can't lookup FLD sequence %#llx: rc = %d\n",
+ osd_name(osd), seq, rc);
RETURN(0);
}
statfs_pack(sfs, ksfs);
if (unlikely(sb->s_flags & MS_RDONLY))
- sfs->os_state = OS_STATE_READONLY;
+ sfs->os_state |= OS_STATE_READONLY;
if (LDISKFS_HAS_INCOMPAT_FEATURE(sb,
LDISKFS_FEATURE_INCOMPAT_EXTENTS))
sfs->os_maxbytes = sb->s_maxbytes;
if (bits & LA_MTIME)
inode->i_mtime = *osd_inode_time(env, inode, attr->la_mtime);
if (bits & LA_SIZE) {
+ spin_lock(&inode->i_lock);
LDISKFS_I(inode)->i_disksize = attr->la_size;
i_size_write(inode, attr->la_size);
+ spin_unlock(&inode->i_lock);
}
/* OSD should not change "i_blocks" which is used by quota.
osd_dto_credits_noquota[DTO_OBJECT_DELETE]);
/* Recycle idle OI leaf may cause additional three OI blocks
* to be changed. */
- osd_trans_declare_op(env, oh, OSD_OT_DELETE,
+ if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_MDTOBJ2))
+ osd_trans_declare_op(env, oh, OSD_OT_DELETE,
osd_dto_credits_noquota[DTO_INDEX_DELETE] + 3);
/* one less inode */
rc = osd_declare_inode_qid(env, i_uid_read(inode), i_gid_read(inode),
/* it will check/delete the inode from remote parent,
* how to optimize it? unlink performance impaction XXX */
result = osd_delete_from_remote_parent(env, osd, obj, oh);
- if (result != 0 && result != -ENOENT) {
+ if (result != 0)
CERROR("%s: delete inode "DFID": rc = %d\n",
osd_name(osd), PFID(fid), result);
- }
+
spin_lock(&obj->oo_guard);
clear_nlink(inode);
spin_unlock(&obj->oo_guard);
osd_trans_exec_op(env, th, OSD_OT_DESTROY);
ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_DESTROY);
- result = osd_oi_delete(osd_oti_get(env), osd, fid, oh->ot_handle,
- OI_CHECK_FLD);
+
+ if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_MDTOBJ2))
+ result = osd_oi_delete(osd_oti_get(env), osd, fid,
+ oh->ot_handle, OI_CHECK_FLD);
osd_trans_exec_check(env, th, OSD_OT_DESTROY);
/* XXX: add to ext3 orphan list */
{
struct inode *inode = osd_dt_obj(dt)->oo_inode;
- CDEBUG(D_INODE, "Get version "LPX64" for inode %lu\n",
+ CDEBUG(D_INODE, "Get version %#llx for inode %lu\n",
LDISKFS_I(inode)->i_fs_version, inode->i_ino);
*ver = LDISKFS_I(inode)->i_fs_version;
return 0;
{
struct inode *inode = osd_dt_obj(dt)->oo_inode;
- CDEBUG(D_INODE, "Set version "LPX64" (old "LPX64") for inode %lu\n",
+ CDEBUG(D_INODE, "Set version %#llx (old %#llx) for inode %lu\n",
*new_version, LDISKFS_I(inode)->i_fs_version, inode->i_ino);
LDISKFS_I(inode)->i_fs_version = *new_version;
.do_xattr_del = osd_xattr_del,
.do_xattr_list = osd_xattr_list,
.do_object_sync = osd_object_sync,
+ .do_invalidate = osd_invalidate,
};
static const struct dt_object_operations osd_obj_otable_it_ops = {
* /Agent directory, Check whether it needs to delete
* from agent directory */
if (unlikely(strcmp((char *)key, dotdot) == 0)) {
- rc = osd_delete_from_remote_parent(env, osd_obj2dev(obj), obj,
- oh);
- if (rc != 0 && rc != -ENOENT) {
- CERROR("%s: delete agent inode "DFID": rc = %d\n",
- osd_name(osd), PFID(fid), rc);
- }
-
- if (rc == -ENOENT)
- rc = 0;
-
- GOTO(out, rc);
+ int ret;
+
+ ret = osd_delete_from_remote_parent(env, osd_obj2dev(obj),
+ obj, oh);
+ if (ret != 0)
+ /* Sigh, the entry has been deleted, and
+ * it is not easy to revert it back, so
+ * let's keep this error private, and let
+ * LFSCK fix it. XXX */
+ CERROR("%s: delete remote parent "DFID": rc = %d\n",
+ osd_name(osd), PFID(fid), ret);
}
out:
-
LASSERT(osd_invariant(obj));
osd_trans_exec_check(env, handle, OSD_OT_DELETE);
RETURN(rc);
child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name));
child->d_fsdata = (void *)ldp;
ll_vfs_dq_init(pobj->oo_inode);
- rc = osd_ldiskfs_add_entry(info, oth->ot_handle, child,
- cinode, hlock);
+ rc = osd_ldiskfs_add_entry(info, osd_obj2dev(pobj), oth->ot_handle,
+ child, cinode, hlock);
if (rc == 0 && OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_TYPE)) {
struct ldiskfs_dir_entry_2 *de;
struct buffer_head *bh;
osd_consistency_check(struct osd_thread_info *oti, struct osd_device *dev,
struct osd_idmap_cache *oic)
{
- struct osd_scrub *scrub = &dev->od_scrub;
- struct lu_fid *fid = &oic->oic_fid;
- struct osd_inode_id *id = &oti->oti_id;
- int once = 0;
- int rc;
+ struct osd_scrub *scrub = &dev->od_scrub;
+ struct lu_fid *fid = &oic->oic_fid;
+ struct osd_inode_id *id = &oic->oic_lid;
+ struct inode *inode = NULL;
+ int once = 0;
+ bool insert;
+ int rc;
ENTRY;
if (!fid_is_norm(fid) && !fid_is_igif(fid))
RETURN(0);
again:
- rc = osd_oi_lookup(oti, dev, fid, id, 0);
+ rc = osd_oi_lookup(oti, dev, fid, &oti->oti_id, 0);
if (rc == -ENOENT) {
- struct inode *inode;
+ __u32 gen = id->oii_gen;
- *id = oic->oic_lid;
- inode = osd_iget(oti, dev, &oic->oic_lid);
+ insert = true;
+ if (inode != NULL)
+ goto trigger;
+ inode = osd_iget(oti, dev, id);
/* The inode has been removed (by race maybe). */
if (IS_ERR(inode)) {
rc = PTR_ERR(inode);
RETURN(rc == -ESTALE ? -ENOENT : rc);
}
- iput(inode);
/* The OI mapping is lost. */
- if (id->oii_gen != OSD_OII_NOGEN)
+ if (gen != OSD_OII_NOGEN)
goto trigger;
+ iput(inode);
/* The inode may has been reused by others, we do not know,
* leave it to be handled by subsequent osd_fid_lookup(). */
RETURN(0);
- } else if (rc != 0 || osd_id_eq(id, &oic->oic_lid)) {
+ } else if (rc != 0 || osd_id_eq(id, &oti->oti_id)) {
RETURN(rc);
+ } else {
+ insert = false;
}
trigger:
if (thread_is_running(&scrub->os_thread)) {
- rc = osd_oii_insert(dev, oic, rc == -ENOENT);
+ if (inode == NULL) {
+ inode = osd_iget(oti, dev, id);
+ /* The inode has been removed (by race maybe). */
+ if (IS_ERR(inode)) {
+ rc = PTR_ERR(inode);
+
+ RETURN(rc == -ESTALE ? -ENOENT : rc);
+ }
+ }
+
+ rc = osd_oii_insert(dev, oic, insert);
/* There is race condition between osd_oi_lookup and OI scrub.
* The OI scrub finished just after osd_oi_lookup() failure.
* Under such case, it is unnecessary to trigger OI scrub again,
if (unlikely(rc == -EAGAIN))
goto again;
- RETURN(0);
+ if (!S_ISDIR(inode->i_mode))
+ rc = 0;
+ else
+ rc = osd_check_lmv(oti, dev, inode, oic);
+
+ iput(inode);
+ RETURN(rc);
}
if (!dev->od_noscrub && ++once == 1) {
rc = osd_scrub_start(dev, SS_AUTO_PARTIAL | SS_CLEAR_DRYRUN |
SS_CLEAR_FAILOUT);
- CDEBUG(D_LFSCK | D_CONSOLE, "%.16s: trigger OI scrub by RPC "
- "for "DFID", rc = %d [2]\n",
+ CDEBUG(D_LFSCK | D_CONSOLE | D_WARNING,
+ "%.16s: trigger partial OI scrub for RPC inconsistency "
+ "checking FID "DFID": rc = %d\n",
LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name,
PFID(fid), rc);
if (rc == 0 || rc == -EALREADY)
goto again;
}
- RETURN(0);
+ if (inode != NULL)
+ iput(inode);
+
+ RETURN(rc);
}
static int osd_fail_fid_lookup(struct osd_thread_info *oti,
* \retval 1 on buffer full
*/
#ifdef HAVE_FILLDIR_USE_CTX
-static int osd_ldiskfs_filldir(struct dir_context *buf,
- const char *name, int namelen,
+static int osd_ldiskfs_filldir(struct dir_context *buf,
#else
-static int osd_ldiskfs_filldir(void *buf, const char *name, int namelen,
+static int osd_ldiskfs_filldir(void *buf,
#endif
- loff_t offset, __u64 ino,
- unsigned d_type)
+ const char *name, int namelen,
+ loff_t offset, __u64 ino, unsigned d_type)
{
struct osd_it_ea *it =
((struct osd_filldir_cbs *)buf)->it;
}
static int
-osd_dirent_reinsert(const struct lu_env *env, handle_t *jh,
- struct dentry *dentry, const struct lu_fid *fid,
- struct buffer_head *bh, struct ldiskfs_dir_entry_2 *de,
- struct htree_lock *hlock, int dot_dotdot)
+osd_dirent_reinsert(const struct lu_env *env, struct osd_device *dev,
+ handle_t *jh, struct dentry *dentry,
+ const struct lu_fid *fid, struct buffer_head *bh,
+ struct ldiskfs_dir_entry_2 *de, struct htree_lock *hlock,
+ int dot_dotdot)
{
struct inode *dir = dentry->d_parent->d_inode;
struct inode *inode = dentry->d_inode;
osd_get_ldiskfs_dirent_param(ldp, fid);
dentry->d_fsdata = (void *)ldp;
ll_vfs_dq_init(dir);
- rc = osd_ldiskfs_add_entry(info, jh, dentry, inode, hlock);
+ rc = osd_ldiskfs_add_entry(info, dev, jh, dentry, inode, hlock);
/* It is too bad, we cannot reinsert the name entry back.
* That means we lose it! */
if (rc != 0)
*fid = lma->lma_self_fid;
dirty = true;
/* Update the FID-in-dirent. */
- rc = osd_dirent_reinsert(env, jh, dentry, fid, bh, de,
- hlock, dot_dotdot);
+ rc = osd_dirent_reinsert(env, dev, jh, dentry, fid,
+ bh, de, hlock, dot_dotdot);
if (rc == 0)
*attr |= LUDA_REPAIR;
else
*fid = lma->lma_self_fid;
dirty = true;
/* Append the FID-in-dirent. */
- rc = osd_dirent_reinsert(env, jh, dentry, fid, bh, de,
- hlock, dot_dotdot);
+ rc = osd_dirent_reinsert(env, dev, jh, dentry, fid,
+ bh, de, hlock, dot_dotdot);
if (rc == 0)
*attr |= LUDA_REPAIR;
else
lu_igif_build(fid, inode->i_ino, inode->i_generation);
/* It is probably IGIF object. Only aappend the
* FID-in-dirent. OI scrub will process FID-in-LMA. */
- rc = osd_dirent_reinsert(env, jh, dentry, fid, bh, de,
- hlock, dot_dotdot);
+ rc = osd_dirent_reinsert(env, dev, jh, dentry, fid,
+ bh, de, hlock, dot_dotdot);
if (rc == 0)
*attr |= LUDA_UPGRADE;
else
/* Glom up mount options */
if (*options != '\0')
strcat(options, ",");
- strlcat(options, "no_mbcache", PAGE_SIZE);
+ strlcat(options, "no_mbcache,nodelalloc", PAGE_SIZE);
type = get_fs_type("ldiskfs");
if (!type) {