*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
return inode;
}
-int osd_ldiskfs_add_entry(struct osd_thread_info *info,
+int osd_ldiskfs_add_entry(struct osd_thread_info *info, struct osd_device *osd,
handle_t *handle, struct dentry *child,
struct inode *inode, struct htree_lock *hlock)
{
rc = __ldiskfs_add_entry(handle, child, inode, hlock);
if (rc == -ENOBUFS || rc == -ENOSPC) {
- char fidbuf[FID_LEN + 1];
- struct lustre_mdt_attrs lma;
- struct lu_fid fid = { };
- char *errstr;
- struct dentry *p_dentry = child->d_parent;
-
- rc2 = osd_get_lma(info, p_dentry->d_inode, p_dentry,
- &lma);
+ struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
+ struct inode *parent = child->d_parent->d_inode;
+ struct lu_fid *fid = NULL;
+
+ rc2 = osd_get_lma(info, parent, child->d_parent, lma);
if (rc2 == 0) {
- fid = lma.lma_self_fid;
- snprintf(fidbuf, sizeof(fidbuf), DFID, PFID(&fid));
+ fid = &lma->lma_self_fid;
} else if (rc2 == -ENODATA) {
- if (unlikely(p_dentry->d_inode ==
- inode->i_sb->s_root->d_inode))
- lu_local_obj_fid(&fid, OSD_FS_ROOT_OID);
- else if (info->oti_dev && !info->oti_dev->od_is_ost &&
- fid_seq_is_mdt0(fid_seq(&fid)))
- lu_igif_build(&fid, p_dentry->d_inode->i_ino,
- p_dentry->d_inode->i_generation);
- snprintf(fidbuf, sizeof(fidbuf), DFID, PFID(&fid));
- } else {
- snprintf(fidbuf, FID_LEN, "%s", "unknown");
+ if (unlikely(parent == inode->i_sb->s_root->d_inode)) {
+ fid = &info->oti_fid3;
+ lu_local_obj_fid(fid, OSD_FS_ROOT_OID);
+ } else if (!osd->od_is_ost && osd->od_index == 0) {
+ fid = &info->oti_fid3;
+ lu_igif_build(fid, parent->i_ino,
+ parent->i_generation);
+ }
}
- if (rc == -ENOSPC)
- errstr = "has reached";
+ if (fid != NULL)
+ CWARN("%s: directory (inode: %lu, FID: "DFID") %s "
+ "maximum entry limit\n",
+ osd_name(osd), parent->i_ino, PFID(fid),
+ rc == -ENOSPC ? "has reached" : "is approaching");
else
- errstr = "is approaching";
- CWARN("%.16s: directory (inode: %lu FID: %s) %s maximum entry limit\n",
- LDISKFS_SB(inode->i_sb)->s_es->s_volume_name,
- p_dentry->d_inode->i_ino, fidbuf, errstr);
+ CWARN("%s: directory (inode: %lu, FID: unknown) %s "
+ "maximum entry limit\n",
+ osd_name(osd), parent->i_ino,
+ rc == -ENOSPC ? "has reached" : "is approaching");
+
/* ignore such error now */
if (rc == -ENOBUFS)
rc = 0;
}
+
return rc;
}
* Generally, when the device is mounted, it will
* auto check whether the system is restored from
* file-level backup or not. We trust such detect
- * to distinguish the 1st case from the 2nd case. */
- if (rc == 0) {
- if (!IS_ERR(inode) && inode->i_generation != 0 &&
- inode->i_generation == id->oii_gen)
- /* "id->oii_gen != OSD_OII_NOGEN" is for
- * "@cached == false" case. */
- rc = -ENOENT;
- else
- rc = -EREMCHG;
- } else {
+ * to distinguish the 1st case from the 2nd case:
+ * if the OI files are consistent but may contain
+ * stale OI mappings because of case 2, if iget()
+ * returns -ENOENT or -ESTALE, then it should be
+ * the case 2. */
+ if (rc != 0)
/* If the OI mapping was in OI file before the
* osd_iget_check(), but now, it is disappear,
* then it must be removed by race. That is a
* normal race case. */
- }
+ GOTO(put, rc);
+
+ if ((!IS_ERR(inode) && inode->i_generation != 0 &&
+ inode->i_generation == id->oii_gen) ||
+ (IS_ERR(inode) && !(dev->od_scrub.os_file.sf_flags &
+ SF_INCONSISTENT)))
+ rc = -ENOENT;
+ else
+ rc = -EREMCHG;
} else {
if (id->oii_gen == OSD_OII_NOGEN)
osd_id_gen(id, inode->i_ino, inode->i_generation);
struct osd_device *dev;
struct osd_idmap_cache *oic;
struct osd_inode_id *id;
+ struct osd_inode_id *tid;
struct inode *inode = NULL;
struct osd_scrub *scrub;
struct scrub_file *sf;
__u32 flags = SS_CLEAR_DRYRUN | SS_CLEAR_FAILOUT |
SS_AUTO_FULL;
+ __u32 saved_ino;
+ __u32 saved_gen;
int result = 0;
int rc1 = 0;
bool cached = true;
if (IS_ERR(inode)) {
result = PTR_ERR(inode);
if (result == -ENOENT || result == -ESTALE)
- GOTO(out, result = -ENOENT);
+ GOTO(out, result = 0);
if (result == -EREMCHG) {
LASSERT(obj->oo_inode->i_sb == osd_sb(dev));
result = osd_check_lma(env, obj);
- if (result != 0) {
- if (result == -ENODATA) {
- if (cached) {
- result = osd_oi_lookup(info, dev, fid, id,
- OI_CHECK_FLD);
- if (result != 0) {
- /* result == -ENOENT means that the OI
- * mapping has been removed by race,
- * the target inode belongs to other
- * object.
- *
- * Others error also can be returned
- * directly. */
- iput(inode);
- obj->oo_inode = NULL;
- GOTO(out, result);
- } else {
- /* result == 0 means the cached OI
- * mapping is still in the OI file,
- * the target the inode is valid. */
- }
- } else {
- /* The current OI mapping is from the OI file,
- * since the inode has been found via
- * osd_iget_check(), no need recheck OI. */
- }
-
- goto found;
- }
-
- iput(inode);
- inode = NULL;
- obj->oo_inode = NULL;
- if (result != -EREMCHG)
- GOTO(out, result);
+ if (result == 0)
+ goto found;
- if (cached) {
- result = osd_oi_lookup(info, dev, fid, id,
- OI_CHECK_FLD);
- /* result == -ENOENT means the cached OI mapping
- * has been removed from the OI file by race,
- * above target inode belongs to other object.
- *
- * Others error also can be returned directly. */
- if (result != 0)
- GOTO(out, result);
+ tid = &info->oti_id3;
+ LASSERT(tid != id);
- /* result == 0, goto trigger */
- } else {
+ if (result == -ENODATA) {
+ if (!cached)
/* The current OI mapping is from the OI file,
* since the inode has been found via
* osd_iget_check(), no need recheck OI. */
+ goto found;
+
+ result = osd_oi_lookup(info, dev, fid, tid, OI_CHECK_FLD);
+ if (result == 0) {
+ LASSERTF(tid->oii_ino == id->oii_ino &&
+ tid->oii_gen == id->oii_gen,
+ "OI mapping changed(1): %u/%u => %u/%u",
+ tid->oii_ino, tid->oii_gen,
+ id->oii_ino, id->oii_gen);
+
+ LASSERTF(tid->oii_ino == inode->i_ino &&
+ tid->oii_gen == inode->i_generation,
+ "locate wrong inode(1): %u/%u => %ld/%u",
+ tid->oii_ino, tid->oii_gen,
+ inode->i_ino, inode->i_generation);
+
+ /* "result == 0" means the cached OI mapping is still in
+ * the OI file, so the target the inode is valid. */
+ goto found;
}
- goto trigger;
+ /* "result == -ENOENT" means that the OI mappinghas been removed
+ * by race, the target inode belongs to other object.
+ *
+ * Others error can be returned directly. */
+ if (result == -ENOENT)
+ result = 0;
}
+ saved_ino = inode->i_ino;
+ saved_gen = inode->i_generation;
+ iput(inode);
+ inode = NULL;
+ obj->oo_inode = NULL;
+
+ if (result != -EREMCHG)
+ GOTO(out, result);
+
+ if (!cached)
+ /* The current OI mapping is from the OI file,
+ * since the inode has been found via
+ * osd_iget_check(), no need recheck OI. */
+ goto trigger;
+
+ result = osd_oi_lookup(info, dev, fid, tid, OI_CHECK_FLD);
+ /* "result == -ENOENT" means the cached OI mapping has been removed from
+ * the OI file by race, above target inode belongs to other object.
+ *
+ * Others error can be returned directly. */
+ if (result != 0)
+ GOTO(out, result = (result == -ENOENT ? 0 : result));
+
+ LASSERTF(tid->oii_ino == id->oii_ino && tid->oii_gen == id->oii_gen,
+ "OI mapping changed(2): %u/%u => %u/%u",
+ tid->oii_ino, tid->oii_gen, id->oii_ino, id->oii_gen);
+
+ LASSERTF(tid->oii_ino == saved_ino && tid->oii_gen == saved_gen,
+ "locate wrong inode(2): %u/%u => %u/%u",
+ tid->oii_ino, tid->oii_gen, saved_ino, saved_gen);
+
+ goto trigger;
+
found:
obj->oo_compat_dot_created = 1;
obj->oo_compat_dotdot_created = 1;
char oxe_buf[0];
};
-static struct osd_xattr_entry *osd_oxc_lookup(struct osd_object *obj,
- const char *name,
- size_t namelen)
-{
- struct osd_xattr_entry *oxe;
-
- list_for_each_entry(oxe, &obj->oo_xattr_list, oxe_list) {
- if (namelen == oxe->oxe_namelen &&
- strncmp(name, oxe->oxe_buf, namelen) == 0)
- return oxe;
- }
-
- return NULL;
-}
-
static int osd_oxc_get(struct osd_object *obj, const char *name,
struct lu_buf *buf)
{
- struct osd_xattr_entry *oxe;
- size_t vallen;
+ struct osd_xattr_entry *tmp;
+ struct osd_xattr_entry *oxe = NULL;
+ size_t namelen = strlen(name);
+ int rc;
ENTRY;
rcu_read_lock();
- oxe = osd_oxc_lookup(obj, name, strlen(name));
- if (oxe == NULL) {
- rcu_read_unlock();
- RETURN(-ENOENT);
+ list_for_each_entry_rcu(tmp, &obj->oo_xattr_list, oxe_list) {
+ if (namelen == tmp->oxe_namelen &&
+ strncmp(name, tmp->oxe_buf, namelen) == 0) {
+ oxe = tmp;
+ break;
+ }
}
- if (!oxe->oxe_exist) {
- rcu_read_unlock();
- RETURN(-ENODATA);
- }
+ if (oxe == NULL)
+ GOTO(out, rc = -ENOENT);
- vallen = oxe->oxe_len - sizeof(*oxe) - oxe->oxe_namelen - 1;
- LASSERT(vallen > 0);
+ if (!oxe->oxe_exist)
+ GOTO(out, rc = -ENODATA);
- if (buf->lb_buf == NULL) {
- rcu_read_unlock();
- RETURN(vallen);
- }
+ /* vallen */
+ rc = oxe->oxe_len - sizeof(*oxe) - oxe->oxe_namelen - 1;
+ LASSERT(rc > 0);
- if (buf->lb_len < vallen) {
- rcu_read_unlock();
- RETURN(-ERANGE);
- }
+ if (buf->lb_buf == NULL)
+ GOTO(out, rc);
+
+ if (buf->lb_len < rc)
+ GOTO(out, rc = -ERANGE);
- memcpy(buf->lb_buf, oxe->oxe_buf + oxe->oxe_namelen + 1, vallen);
+ memcpy(buf->lb_buf, &oxe->oxe_buf[namelen + 1], rc);
+ EXIT;
+out:
rcu_read_unlock();
- RETURN(vallen);
+ return rc;
}
static void osd_oxc_free(struct rcu_head *head)
OBD_FREE(oxe, oxe->oxe_len);
}
-static inline void __osd_oxc_del(struct osd_object *obj, const char *name)
-{
- struct osd_xattr_entry *oxe;
-
- oxe = osd_oxc_lookup(obj, name, strlen(name));
- if (oxe != NULL) {
- list_del(&oxe->oxe_list);
- call_rcu(&oxe->oxe_rcu, osd_oxc_free);
- }
-}
-
static void osd_oxc_add(struct osd_object *obj, const char *name,
const char *buf, int buflen)
{
struct osd_xattr_entry *oxe;
+ struct osd_xattr_entry *old = NULL;
+ struct osd_xattr_entry *tmp;
size_t namelen = strlen(name);
size_t len = sizeof(*oxe) + namelen + 1 + buflen;
/* this should be rarely called, just remove old and add new */
spin_lock(&obj->oo_guard);
- __osd_oxc_del(obj, name);
- list_add_tail(&oxe->oxe_list, &obj->oo_xattr_list);
+ list_for_each_entry(tmp, &obj->oo_xattr_list, oxe_list) {
+ if (namelen == tmp->oxe_namelen &&
+ strncmp(name, tmp->oxe_buf, namelen) == 0) {
+ old = tmp;
+ break;
+ }
+ }
+ if (old != NULL) {
+ list_replace_rcu(&old->oxe_list, &oxe->oxe_list);
+ call_rcu(&old->oxe_rcu, osd_oxc_free);
+ } else {
+ list_add_tail_rcu(&oxe->oxe_list, &obj->oo_xattr_list);
+ }
spin_unlock(&obj->oo_guard);
}
static void osd_oxc_del(struct osd_object *obj, const char *name)
{
+ struct osd_xattr_entry *oxe;
+ size_t namelen = strlen(name);
+
spin_lock(&obj->oo_guard);
- __osd_oxc_del(obj, name);
+ list_for_each_entry(oxe, &obj->oo_xattr_list, oxe_list) {
+ if (namelen == oxe->oxe_namelen &&
+ strncmp(name, oxe->oxe_buf, namelen) == 0) {
+ list_del_rcu(&oxe->oxe_list);
+ call_rcu(&oxe->oxe_rcu, osd_oxc_free);
+ break;
+ }
+ }
spin_unlock(&obj->oo_guard);
}
time_after(jiffies, last_printed +
msecs_to_jiffies(60 * MSEC_PER_SEC)) &&
osd_transaction_size(dev) > 512) {
+ CWARN("%s: credits %u > trans_max %u\n", osd_name(dev),
+ oh->ot_credits, osd_transaction_size(dev));
osd_trans_dump_creds(env, th);
libcfs_debug_dumpstack(NULL);
last_credits = oh->ot_credits;
rc = osd_fld_lookup(env, osd, seq, range);
if (rc != 0) {
if (rc != -ENOENT)
- CERROR("%s: can't lookup FLD sequence "LPX64
- ": rc = %d\n", osd_name(osd), seq, rc);
+ CERROR("%s: can't lookup FLD sequence %#llx: rc = %d\n",
+ osd_name(osd), seq, rc);
RETURN(0);
}
statfs_pack(sfs, ksfs);
if (unlikely(sb->s_flags & MS_RDONLY))
- sfs->os_state = OS_STATE_READONLY;
+ sfs->os_state |= OS_STATE_READONLY;
if (LDISKFS_HAS_INCOMPAT_FEATURE(sb,
LDISKFS_FEATURE_INCOMPAT_EXTENTS))
sfs->os_maxbytes = sb->s_maxbytes;
if (bits & LA_MTIME)
inode->i_mtime = *osd_inode_time(env, inode, attr->la_mtime);
if (bits & LA_SIZE) {
+ spin_lock(&inode->i_lock);
LDISKFS_I(inode)->i_disksize = attr->la_size;
i_size_write(inode, attr->la_size);
+ spin_unlock(&inode->i_lock);
}
/* OSD should not change "i_blocks" which is used by quota.
osd_dto_credits_noquota[DTO_OBJECT_DELETE]);
/* Recycle idle OI leaf may cause additional three OI blocks
* to be changed. */
- osd_trans_declare_op(env, oh, OSD_OT_DELETE,
+ if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_MDTOBJ2))
+ osd_trans_declare_op(env, oh, OSD_OT_DELETE,
osd_dto_credits_noquota[DTO_INDEX_DELETE] + 3);
/* one less inode */
rc = osd_declare_inode_qid(env, i_uid_read(inode), i_gid_read(inode),
/* it will check/delete the inode from remote parent,
* how to optimize it? unlink performance impaction XXX */
result = osd_delete_from_remote_parent(env, osd, obj, oh);
- if (result != 0 && result != -ENOENT) {
+ if (result != 0)
CERROR("%s: delete inode "DFID": rc = %d\n",
osd_name(osd), PFID(fid), result);
- }
+
spin_lock(&obj->oo_guard);
clear_nlink(inode);
spin_unlock(&obj->oo_guard);
osd_trans_exec_op(env, th, OSD_OT_DESTROY);
ldiskfs_set_inode_state(inode, LDISKFS_STATE_LUSTRE_DESTROY);
- result = osd_oi_delete(osd_oti_get(env), osd, fid, oh->ot_handle,
- OI_CHECK_FLD);
+
+ if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_MDTOBJ2))
+ result = osd_oi_delete(osd_oti_get(env), osd, fid,
+ oh->ot_handle, OI_CHECK_FLD);
osd_trans_exec_check(env, th, OSD_OT_DESTROY);
/* XXX: add to ext3 orphan list */
{
struct inode *inode = osd_dt_obj(dt)->oo_inode;
- CDEBUG(D_INODE, "Get version "LPX64" for inode %lu\n",
+ CDEBUG(D_INODE, "Get version %#llx for inode %lu\n",
LDISKFS_I(inode)->i_fs_version, inode->i_ino);
*ver = LDISKFS_I(inode)->i_fs_version;
return 0;
{
struct inode *inode = osd_dt_obj(dt)->oo_inode;
- CDEBUG(D_INODE, "Set version "LPX64" (old "LPX64") for inode %lu\n",
+ CDEBUG(D_INODE, "Set version %#llx (old %#llx) for inode %lu\n",
*new_version, LDISKFS_I(inode)->i_fs_version, inode->i_ino);
LDISKFS_I(inode)->i_fs_version = *new_version;
* /Agent directory, Check whether it needs to delete
* from agent directory */
if (unlikely(strcmp((char *)key, dotdot) == 0)) {
- rc = osd_delete_from_remote_parent(env, osd_obj2dev(obj), obj,
- oh);
- if (rc != 0 && rc != -ENOENT) {
- CERROR("%s: delete agent inode "DFID": rc = %d\n",
- osd_name(osd), PFID(fid), rc);
- }
-
- if (rc == -ENOENT)
- rc = 0;
-
- GOTO(out, rc);
+ int ret;
+
+ ret = osd_delete_from_remote_parent(env, osd_obj2dev(obj),
+ obj, oh);
+ if (ret != 0)
+ /* Sigh, the entry has been deleted, and
+ * it is not easy to revert it back, so
+ * let's keep this error private, and let
+ * LFSCK fix it. XXX */
+ CERROR("%s: delete remote parent "DFID": rc = %d\n",
+ osd_name(osd), PFID(fid), ret);
}
out:
-
LASSERT(osd_invariant(obj));
osd_trans_exec_check(env, handle, OSD_OT_DELETE);
RETURN(rc);
child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name));
child->d_fsdata = (void *)ldp;
ll_vfs_dq_init(pobj->oo_inode);
- rc = osd_ldiskfs_add_entry(info, oth->ot_handle, child,
- cinode, hlock);
+ rc = osd_ldiskfs_add_entry(info, osd_obj2dev(pobj), oth->ot_handle,
+ child, cinode, hlock);
if (rc == 0 && OBD_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_TYPE)) {
struct ldiskfs_dir_entry_2 *de;
struct buffer_head *bh;
}
static int
-osd_dirent_reinsert(const struct lu_env *env, handle_t *jh,
- struct dentry *dentry, const struct lu_fid *fid,
- struct buffer_head *bh, struct ldiskfs_dir_entry_2 *de,
- struct htree_lock *hlock, int dot_dotdot)
+osd_dirent_reinsert(const struct lu_env *env, struct osd_device *dev,
+ handle_t *jh, struct dentry *dentry,
+ const struct lu_fid *fid, struct buffer_head *bh,
+ struct ldiskfs_dir_entry_2 *de, struct htree_lock *hlock,
+ int dot_dotdot)
{
struct inode *dir = dentry->d_parent->d_inode;
struct inode *inode = dentry->d_inode;
osd_get_ldiskfs_dirent_param(ldp, fid);
dentry->d_fsdata = (void *)ldp;
ll_vfs_dq_init(dir);
- rc = osd_ldiskfs_add_entry(info, jh, dentry, inode, hlock);
+ rc = osd_ldiskfs_add_entry(info, dev, jh, dentry, inode, hlock);
/* It is too bad, we cannot reinsert the name entry back.
* That means we lose it! */
if (rc != 0)
*fid = lma->lma_self_fid;
dirty = true;
/* Update the FID-in-dirent. */
- rc = osd_dirent_reinsert(env, jh, dentry, fid, bh, de,
- hlock, dot_dotdot);
+ rc = osd_dirent_reinsert(env, dev, jh, dentry, fid,
+ bh, de, hlock, dot_dotdot);
if (rc == 0)
*attr |= LUDA_REPAIR;
else
*fid = lma->lma_self_fid;
dirty = true;
/* Append the FID-in-dirent. */
- rc = osd_dirent_reinsert(env, jh, dentry, fid, bh, de,
- hlock, dot_dotdot);
+ rc = osd_dirent_reinsert(env, dev, jh, dentry, fid,
+ bh, de, hlock, dot_dotdot);
if (rc == 0)
*attr |= LUDA_REPAIR;
else
lu_igif_build(fid, inode->i_ino, inode->i_generation);
/* It is probably IGIF object. Only aappend the
* FID-in-dirent. OI scrub will process FID-in-LMA. */
- rc = osd_dirent_reinsert(env, jh, dentry, fid, bh, de,
- hlock, dot_dotdot);
+ rc = osd_dirent_reinsert(env, dev, jh, dentry, fid,
+ bh, de, hlock, dot_dotdot);
if (rc == 0)
*attr |= LUDA_UPGRADE;
else
/* Glom up mount options */
if (*options != '\0')
strcat(options, ",");
- strlcat(options, "no_mbcache", PAGE_SIZE);
+ strlcat(options, "no_mbcache,nodelalloc", PAGE_SIZE);
type = get_fs_type("ldiskfs");
if (!type) {