From: Jinshan Xiong Date: Fri, 12 Jul 2013 20:07:11 +0000 (-0700) Subject: LU-2919 hsm: Implementation of exclusive open X-Git-Tag: 2.4.53~64 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=06e652ce88cf485135df249b29ca8d0afdc9af64 LU-2919 hsm: Implementation of exclusive open In this patch, a framework of lease is implemented. However, only exclusive lease is supported right now. To apply a lease, MDS_OPEN_LEASE must be set to open the file, EX mode open lock is returned to the client side to hold a lease. From that time on, if this file is opened again by other processes, the open lock will be revoked so the client who holds the lease will know the lease is already broken by checking that open lock. To release a lease, normal close is used. The client will revoke the open lock before sending CLOSE request. Lease can be applied in two ways. ll_lease_open()/close() can be called directly if the lease holder is in kernel space; or if the lease holder lives in user space, it has to open the file first and then use ioctl() with command LL_IOC_SET_LEASE to apply a lease. The lease holder has to poll the lease status itself. Signed-off-by: Jinshan Xiong Signed-off-by: John L. Hammond Change-Id: I181edbb0b31fcf60858656170528d2ffda9c9bb6 Reviewed-on: http://review.whamcloud.com/6730 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- diff --git a/contrib/bit-masks/lustre_dlm_flags.def b/contrib/bit-masks/lustre_dlm_flags.def index 6c2e2b9..0f59691 100644 --- a/contrib/bit-masks/lustre_dlm_flags.def +++ b/contrib/bit-masks/lustre_dlm_flags.def @@ -329,3 +329,9 @@ flag[54] = { f-mask = local_only; f-desc = 'Flag whether this is a server namespace lock.'; }; + +flag[55] = { + f-name = excl; + f-mask = local_only; + f-desc = 'Flag whether this lock can be reused. Used by exclusive open.'; +}; diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index cf41790..1cf77fc 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -215,14 +215,14 @@ struct lustre_intent_data { }; struct lookup_intent { - int it_magic; - void (*it_op_release)(struct lookup_intent *); - int it_op; - int it_flags; - int it_create_mode; - union { - struct lustre_intent_data lustre; - } d; + int it_magic; + void (*it_op_release)(struct lookup_intent *); + int it_op; + int it_create_mode; + __u64 it_flags; + union { + struct lustre_intent_data lustre; + } d; }; static inline void intent_init(struct lookup_intent *it, int op, int flags) diff --git a/lustre/include/linux/lustre_intent.h b/lustre/include/linux/lustre_intent.h index 8f676b0..588f211 100644 --- a/lustre/include/linux/lustre_intent.h +++ b/lustre/include/linux/lustre_intent.h @@ -51,12 +51,12 @@ struct lustre_intent_data { }; struct lookup_intent { - int it_op; - int it_flags; + int it_op; int it_create_mode; - union { - struct lustre_intent_data lustre; - } d; + __u64 it_flags; + union { + struct lustre_intent_data lustre; + } d; }; #endif diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 86800eb..fd0fb2a 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -2113,6 +2113,7 @@ extern void lustre_swab_generic_32s (__u32 *val); #define DISP_ENQ_OPEN_REF 0x00800000 #define DISP_ENQ_CREATE_REF 0x01000000 #define DISP_OPEN_LOCK 0x02000000 +#define DISP_OPEN_LEASE 0x04000000 /* INODE LOCK PARTS */ #define MDS_INODELOCK_LOOKUP 0x000001 /* dentry, mode, owner, group */ @@ -2375,6 +2376,10 @@ extern void lustre_swab_mdt_rec_setattr (struct mdt_rec_setattr *sa); * hsm restore) */ #define MDS_OPEN_VOLATILE 0400000000000ULL /* File is volatile = created unlinked */ +#define MDS_OPEN_LEASE 01000000000000ULL /* Open the file and grant lease + * delegation, succeed if it's not + * being opened with conflict mode. + */ /* permission for create non-directory file */ #define MAY_CREATE (1 << 7) diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h index 9de0815..211d4cb 100644 --- a/lustre/include/lustre/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -252,12 +252,15 @@ struct ost_id { struct hsm_current_action) /* see for ioctl numbers 221-232 */ -#define LL_IOC_LMV_SETSTRIPE _IOWR('f', 240, struct lmv_user_md) -#define LL_IOC_LMV_GETSTRIPE _IOWR('f', 241, struct lmv_user_md) -#define LL_IOC_REMOVE_ENTRY _IOWR('f', 242, __u64) +#define LL_IOC_LMV_SETSTRIPE _IOWR('f', 240, struct lmv_user_md) +#define LL_IOC_LMV_GETSTRIPE _IOWR('f', 241, struct lmv_user_md) +#define LL_IOC_REMOVE_ENTRY _IOWR('f', 242, __u64) -#define LL_STATFS_LMV 1 -#define LL_STATFS_LOV 2 +#define LL_IOC_SET_LEASE _IOWR('f', 243, long) +#define LL_IOC_GET_LEASE _IO('f', 244) + +#define LL_STATFS_LMV 1 +#define LL_STATFS_LOV 2 #define LL_STATFS_NODELAY 4 #define IOC_MDC_TYPE 'i' diff --git a/lustre/include/lustre_dlm_flags.h b/lustre/include/lustre_dlm_flags.h index 8c34d9d..a632217 100644 --- a/lustre/include/lustre_dlm_flags.h +++ b/lustre/include/lustre_dlm_flags.h @@ -35,7 +35,7 @@ #ifndef LDLM_ALL_FLAGS_MASK /** l_flags bits marked as "all_flags" bits */ -#define LDLM_FL_ALL_FLAGS_MASK 0x007FFFFFC08F132FULL +#define LDLM_FL_ALL_FLAGS_MASK 0x00FFFFFFC08F132FULL /** l_flags bits marked as "ast" bits */ #define LDLM_FL_AST_MASK 0x0000000080000000ULL @@ -53,7 +53,7 @@ #define LDLM_FL_INHERIT_MASK 0x0000000000800000ULL /** l_flags bits marked as "local_only" bits */ -#define LDLM_FL_LOCAL_ONLY_MASK 0x007FFFFF00000000ULL +#define LDLM_FL_LOCAL_ONLY_MASK 0x00FFFFFF00000000ULL /** l_flags bits marked as "on_wire" bits */ #define LDLM_FL_ON_WIRE_MASK 0x00000000C08F132FULL @@ -358,6 +358,12 @@ #define ldlm_set_ns_srv(_l) LDLM_SET_FLAG(( _l), 1ULL << 54) #define ldlm_clear_ns_srv(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 54) +/** Flag whether this lock can be reused. Used by exclusive open. */ +#define LDLM_FL_EXCL 0x0080000000000000ULL // bit 55 +#define ldlm_is_excl(_l) LDLM_TEST_FLAG(( _l), 1ULL << 55) +#define ldlm_set_excl(_l) LDLM_SET_FLAG(( _l), 1ULL << 55) +#define ldlm_clear_excl(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 55) + /** test for ldlm_lock flag bit set */ #define LDLM_TEST_FLAG(_l, _b) (((_l)->l_flags & (_b)) != 0) @@ -414,6 +420,7 @@ static int hf_lustre_ldlm_fl_server_lock = -1; static int hf_lustre_ldlm_fl_res_locked = -1; static int hf_lustre_ldlm_fl_waited = -1; static int hf_lustre_ldlm_fl_ns_srv = -1; +static int hf_lustre_ldlm_fl_excl = -1; const value_string lustre_ldlm_flags_vals[] = { {LDLM_FL_LOCK_CHANGED, "LDLM_FL_LOCK_CHANGED"}, @@ -454,6 +461,7 @@ const value_string lustre_ldlm_flags_vals[] = { {LDLM_FL_RES_LOCKED, "LDLM_FL_RES_LOCKED"}, {LDLM_FL_WAITED, "LDLM_FL_WAITED"}, {LDLM_FL_NS_SRV, "LDLM_FL_NS_SRV"}, + {LDLM_FL_EXCL, "LDLM_FL_EXCL"}, { 0, NULL } }; #endif /* WIRESHARK_COMPILE */ diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h index bc876ab..94072c0 100644 --- a/lustre/include/lustre_lib.h +++ b/lustre/include/lustre_lib.h @@ -105,11 +105,12 @@ struct client_obd *client_conn2cli(struct lustre_handle *conn); struct md_open_data; struct obd_client_handle { - struct lustre_handle och_fh; - struct lu_fid och_fid; - struct md_open_data *och_mod; - __u32 och_magic; - int och_flags; + struct lustre_handle och_fh; + struct lu_fid och_fid; + struct md_open_data *och_mod; + struct lustre_handle och_lease_handle; /* open lock for lease */ + __u32 och_magic; + int och_flags; }; #define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 334de9c..bde3bd4 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -1187,6 +1187,11 @@ static struct ldlm_lock *search_queue(cfs_list_t *queue, if (lock == old_lock) break; + /* Check if this lock can be matched. + * Used by LU-2919(exclusive open) for open lease lock */ + if (ldlm_is_excl(lock)) + continue; + /* llite sometimes wants to match locks that will be * canceled when their users drop, but we allow it to match * if it passes in CBPENDING and the lock still has users. diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 61ac593..7d9ab66 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -928,7 +928,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, lock->l_conn_export = exp; lock->l_export = NULL; lock->l_blocking_ast = einfo->ei_cb_bl; - lock->l_flags |= (*flags & LDLM_FL_NO_LRU); + lock->l_flags |= (*flags & (LDLM_FL_NO_LRU | LDLM_FL_EXCL)); /* lock not sent to server yet */ @@ -1359,10 +1359,10 @@ int ldlm_cli_cancel(struct lustre_handle *lockh, } rc = ldlm_cli_cancel_local(lock); - if (rc == LDLM_FL_LOCAL_ONLY) { - LDLM_LOCK_RELEASE(lock); + if (rc == LDLM_FL_LOCAL_ONLY || cancel_flags & LCF_LOCAL) { + LDLM_LOCK_RELEASE(lock); RETURN(0); - } + } /* Even if the lock is marked as LDLM_FL_BL_AST, this is a LDLM_CANCEL * RPC which goes to canceld portal, so we can cancel other LRU locks * here and send them all as one LDLM_CANCEL RPC. */ diff --git a/lustre/llite/file.c b/lustre/llite/file.c index ca7dd43..066018e 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -251,6 +251,24 @@ int ll_md_close(struct obd_export *md_exp, struct inode *inode, if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid); + if (fd->fd_lease_och != NULL) { + bool lease_broken; + + /* Usually the lease is not released when the + * application crashed, we need to release here. */ + rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken); + CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n", + PFID(&lli->lli_fid), rc, lease_broken); + + fd->fd_lease_och = NULL; + } + + if (fd->fd_och != NULL) { + rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och); + fd->fd_och = NULL; + GOTO(out, rc); + } + /* Let's see if we have good enough OPEN lock on the file and if we can skip talking to MDS */ if (file->f_dentry->d_inode) { /* Can this ever be false? */ @@ -287,11 +305,12 @@ int ll_md_close(struct obd_export *md_exp, struct inode *inode, file, file->f_dentry, file->f_dentry->d_name.name); } - LUSTRE_FPRIVATE(file) = NULL; - ll_file_data_put(fd); - ll_capa_close(inode); +out: + LUSTRE_FPRIVATE(file) = NULL; + ll_file_data_put(fd); + ll_capa_close(inode); - RETURN(rc); + RETURN(rc); } /* While this returns an error code, fput() the caller does not, so we need @@ -452,6 +471,7 @@ static int ll_och_fill(struct obd_export *md_exp, struct lookup_intent *it, body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); och->och_fh = body->handle; och->och_fid = body->fid1; + och->och_lease_handle.cookie = it->d.lustre.it_lock_handle; och->och_magic = OBD_CLIENT_HANDLE_MAGIC; och->och_flags = it->it_flags; @@ -484,7 +504,7 @@ int ll_local_open(struct file *file, struct lookup_intent *it, LUSTRE_FPRIVATE(file) = fd; ll_readahead_init(inode, &fd->fd_ras); - fd->fd_omode = it->it_flags; + fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); RETURN(0); } @@ -688,6 +708,199 @@ out_openerr: return rc; } +static int ll_md_blocking_lease_ast(struct ldlm_lock *lock, + struct ldlm_lock_desc *desc, void *data, int flag) +{ + int rc; + struct lustre_handle lockh; + ENTRY; + + switch (flag) { + case LDLM_CB_BLOCKING: + ldlm_lock2handle(lock, &lockh); + rc = ldlm_cli_cancel(&lockh, LCF_ASYNC); + if (rc < 0) { + CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc); + RETURN(rc); + } + break; + case LDLM_CB_CANCELING: + /* do nothing */ + break; + } + RETURN(0); +} + +/** + * Acquire a lease and open the file. + */ +struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file, + fmode_t fmode) +{ + struct lookup_intent it = { .it_op = IT_OPEN }; + struct ll_sb_info *sbi = ll_i2sbi(inode); + struct md_op_data *op_data; + struct ptlrpc_request *req; + struct lustre_handle old_handle = { 0 }; + struct obd_client_handle *och = NULL; + int rc; + int rc2; + ENTRY; + + if (fmode != FMODE_WRITE && fmode != FMODE_READ) + RETURN(ERR_PTR(-EINVAL)); + + if (file != NULL) { + struct ll_inode_info *lli = ll_i2info(inode); + struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct obd_client_handle **och_p; + __u64 *och_usecount; + + if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC)) + RETURN(ERR_PTR(-EPERM)); + + /* Get the openhandle of the file */ + rc = -EBUSY; + mutex_lock(&lli->lli_och_mutex); + if (fd->fd_lease_och != NULL) { + mutex_unlock(&lli->lli_och_mutex); + RETURN(ERR_PTR(rc)); + } + + if (fd->fd_och == NULL) { + if (file->f_mode & FMODE_WRITE) { + LASSERT(lli->lli_mds_write_och != NULL); + och_p = &lli->lli_mds_write_och; + och_usecount = &lli->lli_open_fd_write_count; + } else { + LASSERT(lli->lli_mds_read_och != NULL); + och_p = &lli->lli_mds_read_och; + och_usecount = &lli->lli_open_fd_read_count; + } + if (*och_usecount == 1) { + fd->fd_och = *och_p; + *och_p = NULL; + *och_usecount = 0; + rc = 0; + } + } + mutex_unlock(&lli->lli_och_mutex); + if (rc < 0) /* more than 1 opener */ + RETURN(ERR_PTR(rc)); + + LASSERT(fd->fd_och != NULL); + old_handle = fd->fd_och->och_fh; + } + + OBD_ALLOC_PTR(och); + if (och == NULL) + RETURN(ERR_PTR(-ENOMEM)); + + op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0, + LUSTRE_OPC_ANY, NULL); + if (IS_ERR(op_data)) + GOTO(out, rc = PTR_ERR(op_data)); + + /* To tell the MDT this openhandle is from the same owner */ + op_data->op_handle = old_handle; + + it.it_flags = fmode | MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE; + rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req, + ll_md_blocking_lease_ast, + /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise + * it can be cancelled which may mislead applications that the lease is + * broken; + * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal + * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast + * doesn't deal with openhandle, so normal openhandle will be leaked. */ + LDLM_FL_NO_LRU | LDLM_FL_EXCL); + ll_finish_md_op_data(op_data); + if (req != NULL) { + ptlrpc_req_finished(req); + it_clear_disposition(&it, DISP_ENQ_COMPLETE); + } + if (rc < 0) + GOTO(out_release_it, rc); + + if (it_disposition(&it, DISP_LOOKUP_NEG)) + GOTO(out_release_it, rc = -ENOENT); + + rc = it_open_error(DISP_OPEN_OPEN, &it); + if (rc) + GOTO(out_release_it, rc); + + LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF)); + ll_och_fill(sbi->ll_md_exp, &it, och); + + if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */ + GOTO(out_close, rc = -EOPNOTSUPP); + + /* already get lease, handle lease lock */ + ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL); + if (it.d.lustre.it_lock_mode == 0 || + it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) { + /* open lock must return for lease */ + CERROR(DFID "lease granted but no open lock, %d/%Lu.\n", + PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode, + it.d.lustre.it_lock_bits); + GOTO(out_close, rc = -EPROTO); + } + + ll_intent_release(&it); + RETURN(och); + +out_close: + rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och); + if (rc2) + CERROR("Close openhandle returned %d\n", rc2); + + /* cancel open lock */ + if (it.d.lustre.it_lock_mode != 0) { + ldlm_lock_decref_and_cancel(&och->och_lease_handle, + it.d.lustre.it_lock_mode); + it.d.lustre.it_lock_mode = 0; + } +out_release_it: + ll_intent_release(&it); +out: + OBD_FREE_PTR(och); + RETURN(ERR_PTR(rc)); +} +EXPORT_SYMBOL(ll_lease_open); + +/** + * Release lease and close the file. + * It will check if the lease has ever broken. + */ +int ll_lease_close(struct obd_client_handle *och, struct inode *inode, + bool *lease_broken) +{ + struct ldlm_lock *lock; + bool cancelled = true; + int rc; + ENTRY; + + lock = ldlm_handle2lock(&och->och_lease_handle); + if (lock != NULL) { + lock_res_and_lock(lock); + cancelled = ldlm_is_cancel(lock); + unlock_res_and_lock(lock); + ldlm_lock_put(lock); + } + + CDEBUG(D_INODE, "lease for "DFID" broken? %d\n", + PFID(&ll_i2info(inode)->lli_fid), cancelled); + + if (!cancelled) + ldlm_cli_cancel(&och->och_lease_handle, 0); + if (lease_broken != NULL) + *lease_broken = cancelled; + + rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och); + RETURN(rc); +} +EXPORT_SYMBOL(ll_lease_close); + /* Fills the obdo with the attributes for the lsm */ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, struct obd_capa *capa, struct obdo *obdo, @@ -2210,6 +2423,90 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) OBD_FREE_PTR(hca); RETURN(rc); } + case LL_IOC_SET_LEASE: { + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_client_handle *och = NULL; + bool lease_broken; + fmode_t mode = 0; + + switch (arg) { + case F_WRLCK: + if (!(file->f_mode & FMODE_WRITE)) + RETURN(-EPERM); + mode = FMODE_WRITE; + break; + case F_RDLCK: + if (!(file->f_mode & FMODE_READ)) + RETURN(-EPERM); + mode = FMODE_READ; + break; + case F_UNLCK: + mutex_lock(&lli->lli_och_mutex); + if (fd->fd_lease_och != NULL) { + och = fd->fd_lease_och; + fd->fd_lease_och = NULL; + } + mutex_unlock(&lli->lli_och_mutex); + + if (och != NULL) { + mode = och->och_flags &(FMODE_READ|FMODE_WRITE); + rc = ll_lease_close(och, inode, &lease_broken); + if (rc == 0 && lease_broken) + mode = 0; + } else { + rc = -ENOLCK; + } + + /* return the type of lease or error */ + RETURN(rc < 0 ? rc : (int)mode); + default: + RETURN(-EINVAL); + } + + CDEBUG(D_INODE, "Set lease with mode %d\n", mode); + + /* apply for lease */ + och = ll_lease_open(inode, file, mode); + if (IS_ERR(och)) + RETURN(PTR_ERR(och)); + + rc = 0; + mutex_lock(&lli->lli_och_mutex); + if (fd->fd_lease_och == NULL) { + fd->fd_lease_och = och; + och = NULL; + } + mutex_unlock(&lli->lli_och_mutex); + if (och != NULL) { + /* impossible now that only excl is supported for now */ + ll_lease_close(och, inode, &lease_broken); + rc = -EBUSY; + } + RETURN(rc); + } + case LL_IOC_GET_LEASE: { + struct ll_inode_info *lli = ll_i2info(inode); + struct ldlm_lock *lock = NULL; + + rc = 0; + mutex_lock(&lli->lli_och_mutex); + if (fd->fd_lease_och != NULL) { + struct obd_client_handle *och = fd->fd_lease_och; + + lock = ldlm_handle2lock(&och->och_lease_handle); + if (lock != NULL) { + lock_res_and_lock(lock); + if (!ldlm_is_cancel(lock)) + rc = och->och_flags & + (FMODE_READ | FMODE_WRITE); + unlock_res_and_lock(lock); + ldlm_lock_put(lock); + } + } + mutex_unlock(&lli->lli_och_mutex); + + RETURN(rc); + } default: { int err; diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 0915797..a7216f4 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -615,12 +615,16 @@ struct ll_readahead_state { extern struct kmem_cache *ll_file_data_slab; struct lustre_handle; struct ll_file_data { - struct ll_readahead_state fd_ras; - int fd_omode; - struct ccc_grouplock fd_grouplock; + struct ll_readahead_state fd_ras; + struct ccc_grouplock fd_grouplock; __u64 lfd_pos; - __u32 fd_flags; - struct file *fd_file; + __u32 fd_flags; + fmode_t fd_omode; + /* openhandle if lease exists for this file. + * Borrow lli->lli_och_mutex to protect assignment */ + struct obd_client_handle *fd_lease_och; + struct obd_client_handle *fd_och; + struct file *fd_file; /* Indicate whether need to report failure when close. * true: failure is known, not report again. * false: unknown failure, should report. */ @@ -808,6 +812,11 @@ int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg); int ll_fid2path(struct inode *inode, void *arg); int ll_data_version(struct inode *inode, __u64 *data_version, int extent_lock); +struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file, + fmode_t mode); +int ll_lease_close(struct obd_client_handle *och, struct inode *inode, + bool *lease_broken); + /* llite/dcache.c */ int ll_dops_init(struct dentry *de, int block, int init_sa); diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index badbc47..38eecb0 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -69,8 +69,8 @@ void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data, const void *data, int datalen, __u32 mode, __u32 uid, __u32 gid, cfs_cap_t capability, __u64 rdev); void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data, - __u32 mode, __u64 rdev, __u32 flags, const void *data, - int datalen); + __u32 mode, __u64 rdev, __u64 flags, const void *data, + int datalen); void mdc_unlink_pack(struct ptlrpc_request *req, struct md_op_data *op_data); void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data); void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data, diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c index 1e86153..09b0ffa 100644 --- a/lustre/mdc/mdc_lib.c +++ b/lustre/mdc/mdc_lib.c @@ -184,12 +184,12 @@ void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data, } } -static __u64 mds_pack_open_flags(__u32 flags, __u32 mode) +static __u64 mds_pack_open_flags(__u64 flags, __u32 mode) { __u64 cr_flags = (flags & (FMODE_READ | FMODE_WRITE | MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK | - MDS_OPEN_BY_FID)); + MDS_OPEN_BY_FID | MDS_OPEN_LEASE)); if (flags & O_CREAT) cr_flags |= MDS_OPEN_CREAT; if (flags & O_EXCL) @@ -217,7 +217,7 @@ static __u64 mds_pack_open_flags(__u32 flags, __u32 mode) /* packing of MDS records */ void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data, - __u32 mode, __u64 rdev, __u32 flags, const void *lmm, + __u32 mode, __u64 rdev, __u64 flags, const void *lmm, int lmmlen) { struct mdt_rec_create *rec; @@ -244,6 +244,7 @@ void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data, rec->cr_suppgid2 = op_data->op_suppgids[1]; rec->cr_bias = op_data->op_bias; rec->cr_umask = cfs_curproc_umask(); + rec->cr_old_handle = op_data->op_handle; mdc_pack_capa(req, &RMF_CAPA1, op_data->op_capa1); /* the next buffer is child capa, which is used for replay, diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index cc60d2e..560bdf9 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -79,6 +79,12 @@ EXPORT_SYMBOL(it_clear_disposition); int it_open_error(int phase, struct lookup_intent *it) { + if (it_disposition(it, DISP_OPEN_LEASE)) { + if (phase >= DISP_OPEN_LEASE) + return it->d.lustre.it_status; + else + return 0; + } if (it_disposition(it, DISP_OPEN_OPEN)) { if (phase >= DISP_OPEN_OPEN) return it->d.lustre.it_status; @@ -293,19 +299,26 @@ static struct ptlrpc_request *mdc_intent_open_pack(struct obd_export *exp, /* XXX: openlock is not cancelled for cross-refs. */ /* If inode is known, cancel conflicting OPEN locks. */ - if (fid_is_sane(&op_data->op_fid2)) { - if (it->it_flags & (FMODE_WRITE|MDS_OPEN_TRUNC)) - mode = LCK_CW; + if (fid_is_sane(&op_data->op_fid2)) { + if (it->it_flags & MDS_OPEN_LEASE) { /* try to get lease */ + if (it->it_flags & FMODE_WRITE) + mode = LCK_EX; + else + mode = LCK_PR; + } else { + if (it->it_flags & (FMODE_WRITE|MDS_OPEN_TRUNC)) + mode = LCK_CW; #ifdef FMODE_EXEC - else if (it->it_flags & FMODE_EXEC) - mode = LCK_PR; + else if (it->it_flags & FMODE_EXEC) + mode = LCK_PR; #endif - else - mode = LCK_CR; - count = mdc_resource_get_unused(exp, &op_data->op_fid2, - &cancels, mode, - MDS_INODELOCK_OPEN); - } + else + mode = LCK_CR; + } + count = mdc_resource_get_unused(exp, &op_data->op_fid2, + &cancels, mode, + MDS_INODELOCK_OPEN); + } /* If CREATE, cancel parent's UPDATE lock. */ if (it->it_op & IT_CREAT) @@ -1080,16 +1093,16 @@ int mdc_intent_lock(struct obd_export *exp, struct md_op_data *op_data, ldlm_blocking_callback cb_blocking, __u64 extra_lock_flags) { - struct lustre_handle lockh; - int rc = 0; - ENTRY; - LASSERT(it); + struct lustre_handle lockh; + int rc = 0; + ENTRY; + LASSERT(it); - CDEBUG(D_DLMTRACE, "(name: %.*s,"DFID") in obj "DFID - ", intent: %s flags %#o\n", op_data->op_namelen, - op_data->op_name, PFID(&op_data->op_fid2), - PFID(&op_data->op_fid1), ldlm_it2str(it->it_op), - it->it_flags); + CDEBUG(D_DLMTRACE, "(name: %.*s,"DFID") in obj "DFID + ", intent: %s flags %#Lo\n", op_data->op_namelen, + op_data->op_name, PFID(&op_data->op_fid2), + PFID(&op_data->op_fid1), ldlm_it2str(it->it_op), + it->it_flags); lockh.cookie = 0; if (fid_is_sane(&op_data->op_fid2) && @@ -1216,11 +1229,11 @@ int mdc_intent_getattr_async(struct obd_export *exp, }; int rc = 0; __u64 flags = LDLM_FL_HAS_INTENT; - ENTRY; + ENTRY; - CDEBUG(D_DLMTRACE,"name: %.*s in inode "DFID", intent: %s flags %#o\n", - op_data->op_namelen, op_data->op_name, PFID(&op_data->op_fid1), - ldlm_it2str(it->it_op), it->it_flags); + CDEBUG(D_DLMTRACE,"name: %.*s in inode "DFID", intent: %s flags %#Lo\n", + op_data->op_namelen, op_data->op_name, PFID(&op_data->op_fid1), + ldlm_it2str(it->it_op), it->it_flags); fid_build_reg_res_name(&op_data->op_fid1, &res_id); req = mdc_intent_getattr_pack(exp, it, op_data); diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index bcd644b..276ddfd 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -5031,9 +5031,10 @@ static struct lu_object *mdt_object_alloc(const struct lu_env *env, o->lo_ops = &mdt_obj_ops; mutex_init(&mo->mot_ioepoch_mutex); mutex_init(&mo->mot_lov_mutex); - RETURN(o); - } else - RETURN(NULL); + init_rwsem(&mo->mot_open_sem); + RETURN(o); + } + RETURN(NULL); } static int mdt_object_init(const struct lu_env *env, struct lu_object *o, @@ -5068,11 +5069,14 @@ static void mdt_object_free(const struct lu_env *env, struct lu_object *o) CDEBUG(D_INFO, "object free, fid = "DFID"\n", PFID(lu_object_fid(o))); - lu_object_fini(o); - lu_object_header_fini(h); + LASSERT(atomic_read(&mo->mot_open_count) == 0); + LASSERT(atomic_read(&mo->mot_lease_count) == 0); + + lu_object_fini(o); + lu_object_header_fini(h); OBD_SLAB_FREE_PTR(mo, mdt_object_kmem); - EXIT; + EXIT; } static int mdt_object_print(const struct lu_env *env, void *cookie, diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index af677f1..1022e9a 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -78,12 +78,12 @@ struct mdt_object; /* file data for open files on MDS */ struct mdt_file_data { - struct portals_handle mfd_handle; /* must be first */ - int mfd_mode; /* open mode provided by client */ - cfs_list_t mfd_list; /* protected by med_open_lock */ - __u64 mfd_xid; /* xid of the open request */ - struct lustre_handle mfd_old_handle; /* old handle in replay case */ - struct mdt_object *mfd_object; /* point to opened object */ + struct portals_handle mfd_handle; /* must be first */ + __u64 mfd_mode; /* open mode provided by client */ + cfs_list_t mfd_list; /* protected by med_open_lock */ + __u64 mfd_xid; /* xid of the open request */ + struct lustre_handle mfd_old_handle; /* old handle in replay case */ + struct mdt_object *mfd_object; /* point to opened object */ }; #define CDT_NONBLOCKING_RESTORE 0x0000000000000001ULL @@ -249,6 +249,11 @@ struct mdt_object { struct mutex mot_ioepoch_mutex; /* Lock to protect create_data */ struct mutex mot_lov_mutex; + /* Lock to protect lease open. + * Lease open acquires write lock; normal open acquires read lock */ + struct rw_semaphore mot_open_sem; + atomic_t mot_lease_count; + atomic_t mot_open_count; }; enum mdt_object_flags { @@ -290,13 +295,14 @@ struct mdt_lock_handle { }; enum { - MDT_LH_PARENT, /* parent lockh */ - MDT_LH_CHILD, /* child lockh */ - MDT_LH_OLD, /* old lockh for rename */ + MDT_LH_PARENT, /* parent lockh */ + MDT_LH_CHILD, /* child lockh */ + MDT_LH_OLD, /* old lockh for rename */ MDT_LH_LAYOUT = MDT_LH_OLD, /* layout lock */ - MDT_LH_NEW, /* new lockh for rename */ - MDT_LH_RMT, /* used for return lh to caller */ - MDT_LH_NR + MDT_LH_NEW, /* new lockh for rename */ + MDT_LH_RMT, /* used for return lh to caller */ + MDT_LH_LOCAL, /* local lock never return to client */ + MDT_LH_NR }; enum { @@ -800,7 +806,7 @@ int mdt_lock_new_child(struct mdt_thread_info *info, struct mdt_lock_handle *child_lockh); void mdt_mfd_set_mode(struct mdt_file_data *mfd, - int mode); + __u64 mode); int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc); diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c index f408eba..2e90841 100644 --- a/lustre/mdt/mdt_open.c +++ b/lustre/mdt/mdt_open.c @@ -81,7 +81,7 @@ struct mdt_file_data *mdt_mfd_new(const struct mdt_export_data *med) */ struct mdt_file_data *mdt_handle2mfd(struct mdt_export_data *med, const struct lustre_handle *handle, - bool is_replay) + bool is_replay_or_resent) { struct mdt_file_data *mfd; ENTRY; @@ -89,7 +89,7 @@ struct mdt_file_data *mdt_handle2mfd(struct mdt_export_data *med, LASSERT(handle != NULL); mfd = class_handle2object(handle->cookie, med); /* during dw/setattr replay the mfd can be found by old handle */ - if (mfd == NULL && is_replay) { + if (mfd == NULL && is_replay_or_resent) { cfs_list_for_each_entry(mfd, &med->med_open_head, mfd_list) { if (mfd->mfd_old_handle.cookie == handle->cookie) RETURN(mfd); @@ -651,14 +651,14 @@ static void mdt_empty_transno(struct mdt_thread_info *info, int rc) EXIT; } -void mdt_mfd_set_mode(struct mdt_file_data *mfd, int mode) +void mdt_mfd_set_mode(struct mdt_file_data *mfd, __u64 mode) { - LASSERT(mfd != NULL); + LASSERT(mfd != NULL); - CDEBUG(D_HA, "Change mfd %p mode 0x%x->0x%x\n", - mfd, (unsigned int)mfd->mfd_mode, (unsigned int)mode); + CDEBUG(D_HA, DFID "Change mfd mode 0x%Lx->0x%Lx\n", + PFID(mdt_object_fid(mfd->mfd_object)), mfd->mfd_mode, mode); - mfd->mfd_mode = mode; + mfd->mfd_mode = mode; } static int mdt_mfd_open(struct mdt_thread_info *info, struct mdt_object *p, @@ -736,6 +736,8 @@ static int mdt_mfd_open(struct mdt_thread_info *info, struct mdt_object *p, * released by mdt_mfd_close(). */ mdt_object_get(info->mti_env, o); + mfd->mfd_object = o; + mfd->mfd_xid = req->rq_xid; /* * @flags is always not zero. At least it should be FMODE_READ, @@ -746,8 +748,9 @@ static int mdt_mfd_open(struct mdt_thread_info *info, struct mdt_object *p, /* Open handling. */ mdt_mfd_set_mode(mfd, flags); - mfd->mfd_object = o; - mfd->mfd_xid = req->rq_xid; + atomic_inc(&o->mot_open_count); + if (flags & MDS_OPEN_LEASE) + atomic_inc(&o->mot_lease_count); /* replay handle */ if (req_is_replay(req)) { @@ -1159,26 +1162,17 @@ static int mdt_object_open_lock(struct mdt_thread_info *info, struct md_attr *ma = &info->mti_attr; __u64 open_flags = info->mti_spec.sp_cr_flags; ldlm_mode_t lm = LCK_CR; + bool acq_lease = !!(open_flags & MDS_OPEN_LEASE); bool try_layout = false; bool create_layout = false; int rc = 0; ENTRY; *ibits = 0; - if (open_flags & MDS_OPEN_LOCK) { - if (open_flags & FMODE_WRITE) - lm = LCK_CW; - /* if file is released, we can't deny write because we must - * restore (write) it to access it. */ - else if ((open_flags & MDS_FMODE_EXEC) && - !((ma->ma_valid & MA_HSM) && - (ma->ma_hsm.mh_flags & HS_RELEASED))) - lm = LCK_PR; - else - lm = LCK_CR; + mdt_lock_handle_init(lhc); - *ibits = MDS_INODELOCK_LOOKUP | MDS_INODELOCK_OPEN; - } + if (req_is_replay(mdt_info_req(info))) + RETURN(0); if (S_ISREG(lu_object_attr(&obj->mot_obj))) { if (ma->ma_need & MA_LOV && !(ma->ma_valid & MA_LOV) && @@ -1189,7 +1183,65 @@ static int mdt_object_open_lock(struct mdt_thread_info *info, try_layout = true; } - mdt_lock_handle_init(lhc); + if (acq_lease) { + /* lease open, acquire write mode of open sem */ + down_write(&obj->mot_open_sem); + + /* Lease exists and ask for new lease */ + if (atomic_read(&obj->mot_lease_count) > 0) { + /* only exclusive open is supported, so lease + * are conflicted to each other */ + GOTO(out, rc = -EBUSY); + } + + /* Lease must be with open lock */ + if (!(open_flags & MDS_OPEN_LOCK)) { + CERROR("Request lease for file:"DFID ", but open lock " + "is missed, open_flags = "LPO64".\n", + PFID(mdt_object_fid(obj)), open_flags); + GOTO(out, rc = -EPROTO); + } + + /* XXX: only exclusive open is supported. */ + lm = LCK_EX; + *ibits = MDS_INODELOCK_OPEN; + + /* never grant LCK_EX layout lock to client */ + try_layout = false; + } else { /* normal open */ + /* normal open holds read mode of open sem */ + down_read(&obj->mot_open_sem); + + if (open_flags & MDS_OPEN_LOCK) { + if (open_flags & FMODE_WRITE) + lm = LCK_CW; + /* if file is released, we can't deny write because we must + * restore (write) it to access it. */ + else if ((open_flags & MDS_FMODE_EXEC) && + !((ma->ma_valid & MA_HSM) && + (ma->ma_hsm.mh_flags & HS_RELEASED))) + lm = LCK_PR; + else + lm = LCK_CR; + + *ibits = MDS_INODELOCK_LOOKUP | MDS_INODELOCK_OPEN; + } else if (atomic_read(&obj->mot_lease_count) > 0) { + if (open_flags & FMODE_WRITE) + lm = LCK_CW; + else + lm = LCK_CR; + + /* revoke lease */ + *ibits = MDS_INODELOCK_OPEN; + try_layout = false; + + lhc = &info->mti_lh[MDT_LH_LOCAL]; + } + CDEBUG(D_INODE, "normal open:"DFID" lease count: %d, lm: %d\n", + PFID(mdt_object_fid(obj)), + atomic_read(&obj->mot_open_count), lm); + } + mdt_lock_reg_init(lhc, lm); /* one problem to return layout lock on open is that it may result @@ -1233,6 +1285,44 @@ static int mdt_object_open_lock(struct mdt_thread_info *info, OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_LL_BLOCK, 2); } + /* Check if there is any other open handles after acquiring + * open lock. At this point, caching open handles have been revoked + * by open lock. + * XXX: Now only exclusive open is supported. Need to check the + * type of open for generic lease support. */ + if (rc == 0 && acq_lease) { + struct ptlrpc_request *req = mdt_info_req(info); + struct mdt_export_data *med = &req->rq_export->exp_mdt_data; + struct mdt_file_data *mfd; + bool is_replay_or_resent; + int open_count = 0; + + /* For lease: application can open a file and then apply lease, + * @handle contains original open handle in that case. + * In recovery, open REQ will be replayed and the lease REQ may + * be resent that means the open handle is already stale, so we + * need to fix it up here by finding new handle. */ + is_replay_or_resent = req_is_replay(req) || + lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT; + + /* if the request is _not_ a replay request, rr_handle + * may be used to hold an openhandle which is issuing the + * lease request, so that this openhandle doesn't count. */ + mfd = mdt_handle2mfd(med, info->mti_rr.rr_handle, + is_replay_or_resent); + if (mfd != NULL) + ++open_count; + + CDEBUG(D_INODE, "acq_lease "DFID": openers: %d, want: %d\n", + PFID(mdt_object_fid(obj)), + atomic_read(&obj->mot_open_count), open_count); + + if (atomic_read(&obj->mot_open_count) > open_count) + GOTO(out, rc = -EBUSY); + } + GOTO(out, rc); + +out: RETURN(rc); } @@ -1242,18 +1332,32 @@ static void mdt_object_open_unlock(struct mdt_thread_info *info, __u64 ibits, int rc) { __u64 open_flags = info->mti_spec.sp_cr_flags; - struct mdt_lock_handle *ll = &info->mti_lh[MDT_LH_LAYOUT]; + struct mdt_lock_handle *ll = &info->mti_lh[MDT_LH_LOCAL]; + ENTRY; + + if (req_is_replay(mdt_info_req(info))) + RETURN_EXIT; + + /* Release local lock - the lock put in MDT_LH_LOCAL will never + * return to client side. */ + if (lustre_handle_is_used(&ll->mlh_reg_lh)) + mdt_object_unlock(info, obj, ll, 1); - /* Release local layout lock - the layout lock put in MDT_LH_LAYOUT - * will never return to client side. */ + ll = &info->mti_lh[MDT_LH_LAYOUT]; + /* Release local layout lock, layout was created */ if (lustre_handle_is_used(&ll->mlh_reg_lh)) { LASSERT(!(ibits & MDS_INODELOCK_LAYOUT)); mdt_object_unlock(info, obj, ll, 1); } + if (open_flags & MDS_OPEN_LEASE) + up_write(&obj->mot_open_sem); + else + up_read(&obj->mot_open_sem); + /* Cross-ref case, the lock should be returned to the client */ if (ibits == 0 || rc == -EREMOTE) - return; + RETURN_EXIT; if (!(open_flags & MDS_OPEN_LOCK) && !(ibits & MDS_INODELOCK_LAYOUT)) { /* for the open request, the lock will only return to client @@ -1268,6 +1372,7 @@ static void mdt_object_open_unlock(struct mdt_thread_info *info, mdt_clear_disposition(info, ldlm_rep, DISP_OPEN_LOCK); mdt_object_unlock(info, obj, lhc, 1); } + RETURN_EXIT; } int mdt_open_by_fid_lock(struct mdt_thread_info *info, struct ldlm_reply *rep, @@ -1275,7 +1380,7 @@ int mdt_open_by_fid_lock(struct mdt_thread_info *info, struct ldlm_reply *rep, { const struct lu_env *env = info->mti_env; struct mdt_device *mdt = info->mti_mdt; - __u32 flags = info->mti_spec.sp_cr_flags; + __u64 flags = info->mti_spec.sp_cr_flags; struct mdt_reint_record *rr = &info->mti_rr; struct md_attr *ma = &info->mti_attr; struct mdt_object *parent= NULL; @@ -1324,7 +1429,7 @@ int mdt_open_by_fid_lock(struct mdt_thread_info *info, struct ldlm_reply *rep, rc = mdt_object_open_lock(info, o, lhc, &ibits); if (rc) - GOTO(out, rc); + GOTO(out_unlock, rc); if (ma->ma_valid & MA_PFID) { parent = mdt_object_find(env, mdt, &ma->ma_pfid); @@ -1342,15 +1447,18 @@ int mdt_open_by_fid_lock(struct mdt_thread_info *info, struct ldlm_reply *rep, mdt_set_disposition(info, rep, DISP_LOOKUP_POS); if (flags & MDS_OPEN_LOCK) mdt_set_disposition(info, rep, DISP_OPEN_LOCK); + if (flags & MDS_OPEN_LEASE) + mdt_set_disposition(info, rep, DISP_OPEN_LEASE); } - GOTO(out, rc); + GOTO(out_unlock, rc); -out: +out_unlock: mdt_object_open_unlock(info, o, lhc, ibits, rc); - mdt_object_put(env, o); - if (parent != NULL) - mdt_object_put(env, parent); - return rc; +out: + mdt_object_put(env, o); + if (parent != NULL) + mdt_object_put(env, parent); + return rc; } int mdt_pin(struct mdt_thread_info* info) @@ -1694,7 +1802,7 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) if (!req_is_replay(req)) { rc = mdt_object_open_lock(info, child, lhc, &ibits); if (rc != 0) - GOTO(out_child, result = rc); + GOTO(out_child_unlock, result = rc); else if (create_flags & MDS_OPEN_LOCK) mdt_set_disposition(info, ldlm_rep, DISP_OPEN_LOCK); } @@ -1722,12 +1830,13 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) mdt_clear_disposition(info, ldlm_rep, DISP_OPEN_CREATE); } } - EXIT; -out_child: + EXIT; +out_child_unlock: mdt_object_open_unlock(info, child, lhc, ibits, result); - mdt_object_put(info->mti_env, child); +out_child: + mdt_object_put(info->mti_env, child); out_parent: - mdt_object_unlock_put(info, parent, lh, result || !created); + mdt_object_unlock_put(info, parent, lh, result || !created); out: if (result) lustre_msg_set_transno(req->rq_repmsg, 0); @@ -1749,7 +1858,7 @@ int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd) struct md_attr *ma = &info->mti_attr; int ret = MDT_IOEPOCH_CLOSED; int rc = 0; - int mode; + __u64 mode; ENTRY; mode = mfd->mfd_mode; @@ -1822,11 +1931,19 @@ int mdt_mfd_close(struct mdt_thread_info *info, struct mdt_file_data *mfd) "needed on "DFID"\n", PFID(mdt_object_fid(o))); } } else { - mdt_mfd_free(mfd); - mdt_object_put(info->mti_env, o); - } + /* adjust open and lease count */ + if (mode & MDS_OPEN_LEASE) { + LASSERT(atomic_read(&o->mot_lease_count) > 0); + atomic_dec(&o->mot_lease_count); + } + LASSERT(atomic_read(&o->mot_open_count) > 0); + atomic_dec(&o->mot_open_count); - RETURN(rc ? rc : ret); + mdt_mfd_free(mfd); + mdt_object_put(info->mti_env, o); + } + + RETURN(rc ? rc : ret); } int mdt_close(struct mdt_thread_info *info) diff --git a/lustre/tests/multiop.c b/lustre/tests/multiop.c index 8b83c17..c1c44bc 100644 --- a/lustre/tests/multiop.c +++ b/lustre/tests/multiop.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -71,6 +72,8 @@ char usage[] = " C[num] create with optional stripes\n" " d mkdir\n" " D open(O_DIRECTORY)\n" +" e[R|W|U] apply lease. R: Read; W: Write; U: Unlock\n" +" E[+|-] get lease. +/-: expect lease to (not) exist\n" " f statfs\n" " F print FID\n" " H[num] create HSM released file with num stripes\n" @@ -286,13 +289,71 @@ int main(int argc, char **argv) exit(save_errno); } break; - case 'f': - if (statfs(fname, &stfs) == -1) { - save_errno = errno; - perror("statfs()"); - exit(save_errno); - } - break; + case 'e': + commands++; + switch (*commands) { + case 'U': + flags = F_UNLCK; + break; + case 'R': + flags = F_RDLCK; + break; + case 'W': + flags = F_WRLCK; + break; + default: + errx(-1, "unknown mode: %c", *commands); + } + + rc = ioctl(fd, LL_IOC_SET_LEASE, flags); + if (rc < 0) + err(errno, "apply lease error"); + + if (flags != F_UNLCK) + break; + + /* F_UNLCK, interpret return code */ + if (rc > 0) { + const char *str = "Unknown"; + if (rc == FMODE_READ) + str = "FMODE_READ"; + else if (rc == FMODE_WRITE) + str = "FMODE_WRITE"; + fprintf(stdout, "%s lease(%d) released.\n", + str, rc); + } else if (rc == 0) { + fprintf(stdout, "lease already broken.\n"); + } + break; + case 'E': + commands++; + if (*commands != '-' && *commands != '+') + errx(-1, "unknown mode: %c\n", *commands); + + rc = ioctl(fd, LL_IOC_GET_LEASE); + if (rc > 0) { + const char *str = "Unknown"; + + if (rc == FMODE_READ) + str = "FMODE_READ"; + else if (rc == FMODE_WRITE) + str = "FMODE_WRITE"; + fprintf(stdout, "%s lease(%d) has applied.\n", + str, rc); + if (*commands == '-') + errx(-1, "expect lease to not exist"); + } else if (rc == 0) { + fprintf(stdout, "no lease applied.\n"); + if (*commands == '+') + errx(-1, "expect lease exists"); + } else { + err(errno, "free lease error"); + } + break; + case 'f': + if (statfs(fname, &stfs) == -1) + errx(-1, "statfs()"); + break; case 'F': if (fd == -1) rc = llapi_path2fid(fname, &fid); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 20c2d04..d46f02f 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -10488,6 +10488,57 @@ test_207b() { } run_test 207b "can refresh layout at open" +test_208() { + # FIXME: in this test suite, only RD lease is used. This is okay + # for now as only exclusive open is supported. After generic lease + # is done, this test suite should be revised. - Jinshan + + [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.4.52) ]] || + { skip "Need MDS version at least 2.4.52"; return 0; } + + echo "==== test 1: verify get lease work" + $MULTIOP $DIR/$tfile oO_CREAT:O_RDWR:eRE+eU || error "get lease error" + + echo "==== test 2: verify lease can be broken by upcoming open" + $MULTIOP $DIR/$tfile oO_RDONLY:eR_E-eUc & + local PID=$! + sleep 1 + + $MULTIOP $DIR/$tfile oO_RDONLY:c + kill -USR1 $PID && wait $PID || error "break lease error" + + echo "==== test 3: verify lease can't be granted if an open already exists" + $MULTIOP $DIR/$tfile oO_RDONLY:_c & + local PID=$! + sleep 1 + + $MULTIOP $DIR/$tfile oO_RDONLY:eReUc && error "apply lease should fail" + kill -USR1 $PID && wait $PID || error "open file error" + + echo "==== test 4: lease can sustain over recovery" + $MULTIOP $DIR/$tfile oO_RDONLY:eR_E+eUc & + PID=$! + sleep 1 + + fail mds1 + + kill -USR1 $PID && wait $PID || error "lease broken over recovery" + + echo "==== test 5: lease broken can't be regained by replay" + $MULTIOP $DIR/$tfile oO_RDONLY:eR_E-eUc & + PID=$! + sleep 1 + + # open file to break lease and then recovery + $MULTIOP $DIR/$tfile oO_RDWR:c || error "open file error" + fail mds1 + + kill -USR1 $PID && wait $PID || error "lease not broken over recovery" + + rm -f $DIR/$tfile +} +run_test 208 "Exclusive open" + test_212() { size=`date +%s` size=$((size % 8192 + 1))