From a35113b690e39dcd39e126efc9085b3bc160b4ff Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Fri, 7 Jun 2013 15:56:29 -0600 Subject: [PATCH] LU-2901 mdt: duplicate link names in directory When creating a hard link to a file, the MDT/MDD/OSD code does not verify whether the target link name already exists in the directory. The ZFS ZAP code checks for duplicate entries. The add_dirent_to_buf() function in ldiskfs only checks entries for duplicates while it is traversing the leaf block looking for free space. Even if it scanned the whole leaf block, this would not work for non-htree directories since there is no guarantee that the name is being inserted into the same leaf block. To fix this, link should check target object doesn't exist as other creat operations. Add sanity.sh test_31o with multiple threads racing to link a new name into the directory, while ensuring that there is a free entry in the leaf block that is large enough to hold the duplicate name. This needs to be racy, because otherwise the client VFS will see the existing name and not send the RPC to the MDS, hiding the bug. Add DLDLMRES/PLDLMRES macros for printing the whole lock resource name (including the name hash) in LDLM_DEBUG() messages in a format similar to DFID/PFID so they can be found in debug logs more easily. Signed-off-by: Andreas Dilger Signed-off-by: Lai Siyao Change-Id: Iaec6098332fe6b0f6a534f8dbecfb14f6f500c1e Reviewed-on: http://review.whamcloud.com/6591 Tested-by: Hudson Reviewed-by: Alex Zhuravlev Reviewed-by: Mike Pershin Tested-by: Maloo --- lustre/include/lustre/lustre_idl.h | 4 + lustre/ldlm/ldlm_lock.c | 175 ++++++++++++++++++------------------- lustre/mdt/mdt_reint.c | 10 ++- lustre/tests/mlink.c | 22 ++--- lustre/tests/sanity.sh | 23 +++++ 5 files changed, 133 insertions(+), 101 deletions(-) diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 174fd42..f2900b4 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -2696,6 +2696,10 @@ struct ldlm_res_id { __u64 name[RES_NAME_SIZE]; }; +#define DLDLMRES "["LPX64":"LPX64":"LPX64"]."LPX64i +#define PLDLMRES(res) (res)->lr_name.name[0], (res)->lr_name.name[1], \ + (res)->lr_name.name[2], (res)->lr_name.name[3] + extern void lustre_swab_ldlm_res_id (struct ldlm_res_id *id); static inline int ldlm_res_eq(const struct ldlm_res_id *res0, diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 196dca6..49d0034 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -2475,96 +2475,95 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, return; } - switch (resource->lr_type) { - case LDLM_EXTENT: - libcfs_debug_vmsg2(msgdata, fmt, args, - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " - "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64 - "] (req "LPU64"->"LPU64") flags: "LPX64" nid: %s remote:" - " "LPX64" expref: %d pid: %u timeout: %lu lvb_type: %d\n", - ldlm_lock_to_ns_name(lock), lock, - lock->l_handle.h_cookie, cfs_atomic_read(&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - resource->lr_name.name[0], - resource->lr_name.name[1], - cfs_atomic_read(&resource->lr_refcount), - ldlm_typename[resource->lr_type], - lock->l_policy_data.l_extent.start, - lock->l_policy_data.l_extent.end, - lock->l_req_extent.start, lock->l_req_extent.end, - lock->l_flags, nid, lock->l_remote_handle.cookie, - exp ? cfs_atomic_read(&exp->exp_refcount) : -99, - lock->l_pid, lock->l_callback_timeout, lock->l_lvb_type); - break; + switch (resource->lr_type) { + case LDLM_EXTENT: + libcfs_debug_vmsg2(msgdata, fmt, args, + " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " + "res: "DLDLMRES" rrc: %d type: %s ["LPU64"->"LPU64"] " + "(req "LPU64"->"LPU64") flags: "LPX64" nid: %s remote: " + LPX64" expref: %d pid: %u timeout: %lu lvb_type: %d\n", + ldlm_lock_to_ns_name(lock), lock, + lock->l_handle.h_cookie, cfs_atomic_read(&lock->l_refc), + lock->l_readers, lock->l_writers, + ldlm_lockname[lock->l_granted_mode], + ldlm_lockname[lock->l_req_mode], + PLDLMRES(resource), + cfs_atomic_read(&resource->lr_refcount), + ldlm_typename[resource->lr_type], + lock->l_policy_data.l_extent.start, + lock->l_policy_data.l_extent.end, + lock->l_req_extent.start, lock->l_req_extent.end, + lock->l_flags, nid, lock->l_remote_handle.cookie, + exp ? cfs_atomic_read(&exp->exp_refcount) : -99, + lock->l_pid, lock->l_callback_timeout, + lock->l_lvb_type); + break; - case LDLM_FLOCK: - libcfs_debug_vmsg2(msgdata, fmt, args, - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " - "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d " - "["LPU64"->"LPU64"] flags: "LPX64" nid: %s remote: "LPX64 - " expref: %d pid: %u timeout: %lu\n", - ldlm_lock_to_ns_name(lock), lock, - lock->l_handle.h_cookie, cfs_atomic_read(&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - resource->lr_name.name[0], - resource->lr_name.name[1], - cfs_atomic_read(&resource->lr_refcount), - ldlm_typename[resource->lr_type], - lock->l_policy_data.l_flock.pid, - lock->l_policy_data.l_flock.start, - lock->l_policy_data.l_flock.end, - lock->l_flags, nid, lock->l_remote_handle.cookie, - exp ? cfs_atomic_read(&exp->exp_refcount) : -99, - lock->l_pid, lock->l_callback_timeout); - break; + case LDLM_FLOCK: + libcfs_debug_vmsg2(msgdata, fmt, args, + " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " + "res: "DLDLMRES" rrc: %d type: %s pid: %d " + "["LPU64"->"LPU64"] flags: "LPX64" nid: %s " + "remote: "LPX64" expref: %d pid: %u timeout: %lu\n", + ldlm_lock_to_ns_name(lock), lock, + lock->l_handle.h_cookie, cfs_atomic_read(&lock->l_refc), + lock->l_readers, lock->l_writers, + ldlm_lockname[lock->l_granted_mode], + ldlm_lockname[lock->l_req_mode], + PLDLMRES(resource), + cfs_atomic_read(&resource->lr_refcount), + ldlm_typename[resource->lr_type], + lock->l_policy_data.l_flock.pid, + lock->l_policy_data.l_flock.start, + lock->l_policy_data.l_flock.end, + lock->l_flags, nid, lock->l_remote_handle.cookie, + exp ? cfs_atomic_read(&exp->exp_refcount) : -99, + lock->l_pid, lock->l_callback_timeout); + break; - case LDLM_IBITS: - libcfs_debug_vmsg2(msgdata, fmt, args, - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " - "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s " - "flags: "LPX64" nid: %s remote: "LPX64" expref: %d " - "pid: %u timeout: %lu lvb_type: %d\n", - ldlm_lock_to_ns_name(lock), - lock, lock->l_handle.h_cookie, - cfs_atomic_read (&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - resource->lr_name.name[0], - resource->lr_name.name[1], - lock->l_policy_data.l_inodebits.bits, - cfs_atomic_read(&resource->lr_refcount), - ldlm_typename[resource->lr_type], - lock->l_flags, nid, lock->l_remote_handle.cookie, - exp ? cfs_atomic_read(&exp->exp_refcount) : -99, - lock->l_pid, lock->l_callback_timeout, lock->l_lvb_type); - break; + case LDLM_IBITS: + libcfs_debug_vmsg2(msgdata, fmt, args, + " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " + "res: "DLDLMRES" bits "LPX64" rrc: %d type: %s " + "flags: "LPX64" nid: %s remote: "LPX64" expref: %d " + "pid: %u timeout: %lu lvb_type: %d\n", + ldlm_lock_to_ns_name(lock), + lock, lock->l_handle.h_cookie, + cfs_atomic_read(&lock->l_refc), + lock->l_readers, lock->l_writers, + ldlm_lockname[lock->l_granted_mode], + ldlm_lockname[lock->l_req_mode], + PLDLMRES(resource), + lock->l_policy_data.l_inodebits.bits, + cfs_atomic_read(&resource->lr_refcount), + ldlm_typename[resource->lr_type], + lock->l_flags, nid, lock->l_remote_handle.cookie, + exp ? cfs_atomic_read(&exp->exp_refcount) : -99, + lock->l_pid, lock->l_callback_timeout, + lock->l_lvb_type); + break; - default: - libcfs_debug_vmsg2(msgdata, fmt, args, - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " - "res: "LPU64"/"LPU64" rrc: %d type: %s flags: "LPX64" " - "nid: %s remote: "LPX64" expref: %d pid: %u timeout: %lu" - "lvb_type: %d\n", - ldlm_lock_to_ns_name(lock), - lock, lock->l_handle.h_cookie, - cfs_atomic_read (&lock->l_refc), - lock->l_readers, lock->l_writers, - ldlm_lockname[lock->l_granted_mode], - ldlm_lockname[lock->l_req_mode], - resource->lr_name.name[0], - resource->lr_name.name[1], - cfs_atomic_read(&resource->lr_refcount), - ldlm_typename[resource->lr_type], - lock->l_flags, nid, lock->l_remote_handle.cookie, - exp ? cfs_atomic_read(&exp->exp_refcount) : -99, - lock->l_pid, lock->l_callback_timeout, lock->l_lvb_type); - break; - } - va_end(args); + default: + libcfs_debug_vmsg2(msgdata, fmt, args, + " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " + "res: "DLDLMRES" rrc: %d type: %s flags: "LPX64" " + "nid: %s remote: "LPX64" expref: %d pid: %u " + "timeout: %lu lvb_type: %d\n", + ldlm_lock_to_ns_name(lock), + lock, lock->l_handle.h_cookie, + cfs_atomic_read(&lock->l_refc), + lock->l_readers, lock->l_writers, + ldlm_lockname[lock->l_granted_mode], + ldlm_lockname[lock->l_req_mode], + PLDLMRES(resource), + cfs_atomic_read(&resource->lr_refcount), + ldlm_typename[resource->lr_type], + lock->l_flags, nid, lock->l_remote_handle.cookie, + exp ? cfs_atomic_read(&exp->exp_refcount) : -99, + lock->l_pid, lock->l_callback_timeout, + lock->l_lvb_type); + break; + } + va_end(args); } EXPORT_SYMBOL(_ldlm_lock_debug); diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index b2d97f5..1545724 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -936,8 +936,9 @@ static int mdt_reint_link(struct mdt_thread_info *info, if (mdt_object_remote(ms)) { mdt_object_put(info->mti_env, ms); - CERROR("Target directory "DFID" is on another MDT\n", - PFID(rr->rr_fid1)); + CERROR("%s: source inode "DFID" on remote MDT from "DFID"\n", + mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1), + PFID(rr->rr_fid2)); GOTO(out_unlock_parent, rc = -EXDEV); } @@ -964,6 +965,11 @@ static int mdt_reint_link(struct mdt_thread_info *info, GOTO(out_unlock_child, rc); /* save version of file name for replay, it must be ENOENT here */ if (!req_is_replay(mdt_info_req(info))) { + if (rc != -ENOENT) { + CDEBUG(D_INFO, "link target %.*s existed!\n", + rr->rr_namelen, (char *)rr->rr_name); + GOTO(out_unlock_child, rc = -EEXIST); + } info->mti_ver[2] = ENOENT_VERSION; mdt_version_save(mdt_info_req(info), info->mti_ver[2], 2); } diff --git a/lustre/tests/mlink.c b/lustre/tests/mlink.c index b0cb509..d2843c5 100644 --- a/lustre/tests/mlink.c +++ b/lustre/tests/mlink.c @@ -42,18 +42,18 @@ int main(int argc, char ** argv) { - int rc; + int rc; - if (argc < 3) { - printf("Usage: %s file link\n", argv[0]); - return 1; - } + if (argc < 3) { + fprintf(stderr, "usage: %s file link\n", argv[0]); + return 1; + } - rc = link(argv[1], argv[2]); - if (rc) { - printf("link(%s, %s) error: %s\n", argv[1], argv[2], - strerror(errno)); + rc = link(argv[1], argv[2]); + if (rc) { + fprintf(stderr, "link(%s, %s) error: %s\n", + argv[1], argv[2], strerror(errno)); return errno; - } + } return 0; -} +} diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 3f8badb..20599f5 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -2111,6 +2111,29 @@ test_31n() { } run_test 31n "check link count of unlinked file" +link_one() { + local TEMPNAME=$(mktemp $1_XXXXXX) + mlink $TEMPNAME $1 2> /dev/null && + echo "$BASHPID: link $TEMPNAME to $1 succeeded" + munlink $TEMPNAME +} + +test_31o() { # LU-2901 + mkdir -p $DIR/$tdir + for LOOP in $(seq 100); do + rm -f $DIR/$tdir/$tfile* + for THREAD in $(seq 8); do + link_one $DIR/$tdir/$tfile.$LOOP & + done + wait + local LINKS=$(ls -1 $DIR/$tdir | grep -c $tfile.$LOOP) + [ $LINKS -gt 1 ] && ls $DIR/$tdir && + error "$LINKS duplicate links to $tfile.$LOOP" && + break || true + done +} +run_test 31o "duplicate hard links with same filename" + cleanup_test32_mount() { trap 0 $UMOUNT $DIR/$tdir/ext2-mountpoint -- 1.8.3.1