return count;
}
-static const char remote_parent_dir[] = "REMOTE_PARENT_DIR";
static int osd_mdt_init(const struct lu_env *env, struct osd_device *dev)
{
struct lvfs_run_ctxt new;
parent = osd_sb(dev)->s_root;
osd_push_ctxt(dev, &new, &save);
- d = simple_mkdir(parent, dev->od_mnt, remote_parent_dir,
+ d = simple_mkdir(parent, dev->od_mnt, REMOTE_PARENT_DIR,
0755, 1);
if (IS_ERR(d))
GOTO(cleanup, rc = PTR_ERR(d));
mutex_lock(&parent->d_inode->i_mutex);
rc = osd_ldiskfs_add_entry(info, osd, oh->ot_handle, dentry,
obj->oo_inode, NULL);
- CDEBUG(D_INODE, "%s: add %s:%lu to remote parent %lu.\n", osd_name(osd),
- name, obj->oo_inode->i_ino, parent->d_inode->i_ino);
- ldiskfs_inc_count(oh->ot_handle, parent->d_inode);
+ if (!rc && S_ISDIR(obj->oo_inode->i_mode))
+ ldiskfs_inc_count(oh->ot_handle, parent->d_inode);
+ else if (unlikely(rc == -EEXIST))
+ rc = 0;
+ if (!rc)
+ lu_object_set_agent_entry(&obj->oo_dt.do_lu);
+ CDEBUG(D_INODE, "%s: create agent entry for %s: rc = %d\n",
+ osd_name(osd), name, rc);
mark_inode_dirty(parent->d_inode);
mutex_unlock(&parent->d_inode->i_mutex);
RETURN(rc);
int osd_delete_from_remote_parent(const struct lu_env *env,
struct osd_device *osd,
struct osd_object *obj,
- struct osd_thandle *oh)
+ struct osd_thandle *oh, bool destroy)
{
struct osd_mdobj_map *omm = osd->od_mdt_map;
struct osd_thread_info *oti = osd_oti_get(env);
struct buffer_head *bh;
int rc;
- /* Check lma to see whether it is remote object */
- rc = osd_get_lma(oti, obj->oo_inode, &oti->oti_obj_dentry,
- &oti->oti_ost_attrs);
- if (rc != 0) {
- /* No LMA if the directory is created before 2.0 */
- if (rc == -ENODATA)
- rc = 0;
- RETURN(rc);
- }
-
- if (likely(!(lma->lma_incompat & LMAI_REMOTE_PARENT)))
- RETURN(0);
-
parent = omm->omm_remote_parent;
sprintf(name, DFID_NOBRACE, PFID(lu_object_fid(&obj->oo_dt.do_lu)));
dentry = osd_child_dentry_by_inode(env, parent->d_inode,
NULL, NULL);
if (IS_ERR(bh)) {
mutex_unlock(&parent->d_inode->i_mutex);
- RETURN(PTR_ERR(bh));
+ rc = PTR_ERR(bh);
+ if (unlikely(rc == -ENOENT))
+ rc = 0;
+ } else {
+ rc = ldiskfs_delete_entry(oh->ot_handle, parent->d_inode,
+ de, bh);
+ if (!rc && S_ISDIR(obj->oo_inode->i_mode))
+ ldiskfs_dec_count(oh->ot_handle, parent->d_inode);
+ mark_inode_dirty(parent->d_inode);
+ mutex_unlock(&parent->d_inode->i_mutex);
+ brelse(bh);
+ CDEBUG(D_INODE, "%s: remove agent entry for %s: rc = %d\n",
+ osd_name(osd), name, rc);
}
- CDEBUG(D_INODE, "%s: el %s:%lu to remote parent %lu.\n", osd_name(osd),
- name, obj->oo_inode->i_ino, parent->d_inode->i_ino);
- rc = ldiskfs_delete_entry(oh->ot_handle, parent->d_inode, de, bh);
- ldiskfs_dec_count(oh->ot_handle, parent->d_inode);
- mark_inode_dirty(parent->d_inode);
- mutex_unlock(&parent->d_inode->i_mutex);
- brelse(bh);
+
+ if (destroy || rc) {
+ if (!rc)
+ lu_object_clear_agent_entry(&obj->oo_dt.do_lu);
+
+ RETURN(rc);
+ }
+
+ rc = osd_get_lma(oti, obj->oo_inode, &oti->oti_obj_dentry,
+ &oti->oti_ost_attrs);
+ if (rc)
+ RETURN(rc);
/* Get rid of REMOTE_PARENT flag from incompat */
lma->lma_incompat &= ~LMAI_REMOTE_PARENT;
lustre_lma_swab(lma);
rc = __osd_xattr_set(oti, obj->oo_inode, XATTR_NAME_LMA, lma,
sizeof(*lma), XATTR_REPLACE);
+ if (!rc)
+ lu_object_clear_agent_entry(&obj->oo_dt.do_lu);
RETURN(rc);
}
if (lma->lma_compat & LMAC_STRIPE_INFO &&
osd->od_is_ost)
obj->oo_pfid_in_lma = 1;
+ if (unlikely(lma->lma_incompat & LMAI_REMOTE_PARENT) &&
+ !osd->od_is_ost)
+ lu_object_set_agent_entry(&obj->oo_dt.do_lu);
}
}
if (lma->lma_compat & LMAC_STRIPE_INFO &&
dev->od_is_ost)
obj->oo_pfid_in_lma = 1;
+ if (unlikely(lma->lma_incompat & LMAI_REMOTE_PARENT) &&
+ !dev->od_is_ost)
+ lu_object_set_agent_entry(&obj->oo_dt.do_lu);
} else if (result != -ENODATA) {
GOTO(out, result);
}
osd_trans_declare_op(env, oh, OSD_OT_DESTROY,
osd_dto_credits_noquota[DTO_OBJECT_DELETE]);
+
+ /* For removing agent entry */
+ if (lu_object_has_agent_entry(&obj->oo_dt.do_lu))
+ oh->ot_credits += osd_dto_credits_noquota[DTO_INDEX_DELETE];
+
/* Recycle idle OI leaf may cause additional three OI blocks
* to be changed. */
if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_MDTOBJ2))
if (unlikely(fid_is_acct(fid)))
RETURN(-EPERM);
+ if (lu_object_has_agent_entry(&obj->oo_dt.do_lu)) {
+ result = osd_delete_from_remote_parent(env, osd, obj, oh, true);
+ if (result != 0)
+ CERROR("%s: remove agent entry "DFID": rc = %d\n",
+ osd_name(osd), PFID(fid), result);
+ }
+
if (S_ISDIR(inode->i_mode)) {
LASSERT(osd_inode_unlinked(inode) || inode->i_nlink == 1 ||
inode->i_nlink == 2);
- /* it will check/delete the inode from remote parent,
- * how to optimize it? unlink performance impaction XXX */
- result = osd_delete_from_remote_parent(env, osd, obj, oh);
- if (result != 0)
- CERROR("%s: delete inode "DFID": rc = %d\n",
- osd_name(osd), PFID(fid), result);
spin_lock(&obj->oo_guard);
clear_nlink(inode);
else
goto upgrade;
} else {
+ /* If some name entry resides on remote MDT, then will create
+ * agent entry under remote parent. On the other hand, if the
+ * remote entry will be removed, then related agent entry may
+ * need to be removed from the remote parent. So there may be
+ * kinds of cases, let's declare enough credits. The credits
+ * for create agent entry is enough for remove case. */
+ if (strcmp(name, XATTR_NAME_LINK) == 0) {
+ credits += osd_dto_credits_noquota[DTO_INDEX_INSERT];
+ if (dt_object_exists(dt))
+ credits += 1; /* For updating LMA */
+ }
upgrade:
credits += osd_dto_credits_noquota[DTO_XATTR_SET];
}
/*
+ * In DNE environment, the object (in spite of regular file or directory)
+ * and its name entry may reside on different MDTs. Under such case, we will
+ * create an agent entry on the MDT where the object resides. The agent entry
+ * references the object locally, that makes the object to be visible to the
+ * userspace when mounted as 'ldiskfs' directly. Then the userspace tools,
+ * such as 'tar' can handle the object properly.
+ *
+ * We handle the agent entry during set linkEA that is the common interface
+ * for both regular file and directroy, can handle kinds of cases, such as
+ * create/link/unlink/rename, and so on.
+ *
+ * NOTE: we can NOT do that when ea_{insert,delete} that is only for directory.
+ *
+ * XXX: There are two known issues:
+ * 1. For one object, we will create at most one agent entry even if there
+ * may be more than one cross-MDTs hard links on the object. So the local
+ * e2fsck may claim that the object's nlink is larger than the name entries
+ * that reference such inode. And in further, the e2fsck will fix the nlink
+ * attribute to match the local references. Then it will cause the object's
+ * nlink attribute to be inconsistent with the global references. it is bad
+ * but not fatal. The ref_del() can handle the zero-referenced case. On the
+ * other hand, the global namespace LFSCK can repair the object's attribute
+ * according to the linkEA.
+ * 2. There may be too many hard links on the object as to its linkEA overflow,
+ * then the linkEA entry for cross-MDTs reference may be discarded. If such
+ * case happened, then at this point, we do not know whether there are some
+ * cross-MDTs reference. But there are local references, it guarantees that
+ * object is visible to userspace when mounted as 'ldiskfs'. That is enough.
+ */
+static int osd_xattr_handle_linkea(const struct lu_env *env,
+ struct osd_device *osd,
+ struct osd_object *obj,
+ const struct lu_buf *buf,
+ struct thandle *handle)
+{
+ const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
+ struct lu_fid *tfid = &osd_oti_get(env)->oti_fid3;
+ struct linkea_data ldata = { .ld_buf = (struct lu_buf *)buf };
+ struct lu_name tmpname;
+ struct osd_thandle *oh;
+ int rc;
+ bool remote = false;
+ ENTRY;
+
+ oh = container_of0(handle, struct osd_thandle, ot_super);
+ LASSERT(oh->ot_handle != NULL);
+
+ rc = linkea_init_with_rec(&ldata);
+ if (!rc) {
+ linkea_first_entry(&ldata);
+ while (ldata.ld_lee != NULL && !remote) {
+ linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen,
+ &tmpname, tfid);
+ if (osd_remote_fid(env, osd, tfid) > 0)
+ remote = true;
+ else
+ linkea_next_entry(&ldata);
+ }
+ } else if (rc == -ENODATA) {
+ rc = 0;
+ } else {
+ RETURN(rc);
+ }
+
+ if (lu_object_has_agent_entry(&obj->oo_dt.do_lu) && !remote) {
+ rc = osd_delete_from_remote_parent(env, osd, obj, oh, false);
+ if (rc)
+ CERROR("%s: failed to remove agent entry for "DFID
+ ": rc = %d\n", osd_name(osd), PFID(fid), rc);
+ } else if (!lu_object_has_agent_entry(&obj->oo_dt.do_lu) && remote) {
+ rc = osd_add_to_remote_parent(env, osd, obj, oh);
+ if (rc)
+ CERROR("%s: failed to create agent entry for "DFID
+ ": rc = %d\n", osd_name(osd), PFID(fid), rc);
+ }
+
+ RETURN(rc);
+}
+
+/*
* Concurrency: @dt is write locked.
*/
static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
sizeof(*lma), XATTR_REPLACE);
if (rc != 0)
RETURN(rc);
+ } else if (strcmp(name, XATTR_NAME_LINK) == 0) {
+ LASSERT(!osd->od_is_ost);
+
+ rc = osd_xattr_handle_linkea(env, osd, obj, buf, handle);
+ if (rc)
+ RETURN(rc);
}
if (fl & LU_XATTR_REPLACE)
LASSERT(oh->ot_handle == NULL);
credits = osd_dto_credits_noquota[DTO_INDEX_DELETE];
- if (key != NULL && unlikely(strcmp((char *)key, dotdot) == 0)) {
- /* '..' to a remote object has a local representative */
- credits += osd_dto_credits_noquota[DTO_INDEX_DELETE];
- /* to reset LMAI_REMOTE_PARENT */
- credits += 1;
- }
osd_trans_declare_op(env, oh, OSD_OT_DELETE, credits);
inode = osd_dt_obj(dt)->oo_inode;
else
up_write(&obj->oo_ext_idx_sem);
- if (rc != 0)
- GOTO(out, rc);
+ GOTO(out, rc);
- /* For inode on the remote MDT, .. will point to
- * /Agent directory, Check whether it needs to delete
- * from agent directory */
- if (unlikely(strcmp((char *)key, dotdot) == 0)) {
- int ret;
-
- ret = osd_delete_from_remote_parent(env, osd_obj2dev(obj),
- obj, oh);
- if (ret != 0)
- /* Sigh, the entry has been deleted, and
- * it is not easy to revert it back, so
- * let's keep this error private, and let
- * LFSCK fix it. XXX */
- CERROR("%s: delete remote parent "DFID": rc = %d\n",
- osd_name(osd), PFID(fid), ret);
- }
out:
LASSERT(osd_invariant(obj));
osd_trans_exec_check(env, handle, OSD_OT_DELETE);
if (idc->oic_remote) {
/* Insert remote entry */
if (strcmp(name, dotdot) == 0 && strlen(name) == 2) {
- struct osd_mdobj_map *omm = osd->od_mdt_map;
- struct osd_thandle *oh;
-
- /* If parent on remote MDT, we need put this object
- * under AGENT */
- oh = container_of(th, typeof(*oh), ot_super);
- rc = osd_add_to_remote_parent(env, osd, obj, oh);
- if (rc != 0) {
- CERROR("%s: add "DFID" error: rc = %d\n",
- osd_name(osd),
- PFID(lu_object_fid(&dt->do_lu)), rc);
- RETURN(rc);
- }
-
- child_inode = igrab(omm->omm_remote_parent->d_inode);
+ child_inode =
+ igrab(osd->od_mdt_map->omm_remote_parent->d_inode);
} else {
child_inode = osd_create_local_agent_inode(env, osd,
obj, fid, rec1->rec_type & S_IFMT, th);
}
run_test 803 "verify agent object for remote object"
+test_804() {
+ [[ $MDSCOUNT -lt 2 ]] && skip "needs >= 2 MDTs" && return
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.54) ] &&
+ skip "MDS needs to be newer than 2.10.54" && return
+
+ [ "$(facet_fstype $SINGLEMDS)" != "ldiskfs" ] &&
+ skip "ldiskfs only test" && return 0
+
+ mkdir -p $DIR/$tdir
+ $LFS mkdir -c 1 -i 1 $DIR/$tdir/dir0 ||
+ error "Fail to create $DIR/$tdir/dir0"
+
+ local fid=$($LFS path2fid $DIR/$tdir/dir0)
+ local dev=$(mdsdevname 2)
+
+ do_facet mds2 "$DEBUGFS -c -R 'ls /REMOTE_PARENT_DIR' $dev" |
+ grep ${fid} || error "NOT found agent entry for dir0"
+
+ $LFS mkdir -c $MDSCOUNT -i 0 $DIR/$tdir/dir1 ||
+ error "Fail to create $DIR/$tdir/dir1"
+
+ touch $DIR/$tdir/dir1/foo0 ||
+ error "Fail to create $DIR/$tdir/dir1/foo0"
+ fid=$($LFS path2fid $DIR/$tdir/dir1/foo0)
+ local rc=0
+
+ for idx in $(seq $MDSCOUNT); do
+ dev=$(mdsdevname $idx)
+ do_facet mds${idx} \
+ "$DEBUGFS -c -R 'ls /REMOTE_PARENT_DIR' $dev" |
+ grep ${fid} && rc=$idx
+ done
+
+ mv $DIR/$tdir/dir1/foo0 $DIR/$tdir/dir1/foo1 ||
+ error "Fail to rename foo0 to foo1"
+ if [ $rc -eq 0 ]; then
+ for idx in $(seq $MDSCOUNT); do
+ dev=$(mdsdevname $idx)
+ do_facet mds${idx} \
+ "$DEBUGFS -c -R 'ls /REMOTE_PARENT_DIR' $dev" |
+ grep ${fid} && rc=$idx
+ done
+ fi
+
+ mv $DIR/$tdir/dir1/foo1 $DIR/$tdir/dir1/foo2 ||
+ error "Fail to rename foo1 to foo2"
+ if [ $rc -eq 0 ]; then
+ for idx in $(seq $MDSCOUNT); do
+ dev=$(mdsdevname $idx)
+ do_facet mds${idx} \
+ "$DEBUGFS -c -R 'ls /REMOTE_PARENT_DIR' $dev" |
+ grep ${fid} && rc=$idx
+ done
+ fi
+
+ [ $rc -ne 0 ] || error "NOT found agent entry for foo"
+
+ ln $DIR/$tdir/dir1/foo2 $DIR/$tdir/dir0/guard ||
+ error "Fail to link to $DIR/$tdir/dir1/foo2"
+ mv $DIR/$tdir/dir1/foo2 $DIR/$tdir/dir1/foo0 ||
+ error "Fail to rename foo2 to foo0"
+ unlink $DIR/$tdir/dir1/foo0 ||
+ error "Fail to unlink $DIR/$tdir/dir1/foo0"
+ rm -rf $DIR/$tdir/dir0 ||
+ error "Fail to rm $DIR/$tdir/dir0"
+
+ for idx in $(seq $MDSCOUNT); do
+ dev=$(mdsdevname $idx)
+ rc=0
+
+ stop mds${idx}
+ run_e2fsck $(facet_active_host mds$idx) $dev -n ||
+ rc=$?
+ start mds${idx} $dev $MDS_MOUNT_OPTS ||
+ error "mount mds$idx failed"
+ df $MOUNT > /dev/null 2>&1
+
+ # e2fsck should not return error
+ [ $rc -eq 0 ] ||
+ error "e2fsck detected error on MDT${idx}: rc=$rc"
+ done
+}
+run_test 804 "verify agent entry for remote entry"
+
#
# tests that do cleanup/setup should be run at the end
#