f-mask = local_only;
f-desc = 'Flag whether this is a server namespace lock.';
};
+
+flag[55] = {
+ f-name = excl;
+ f-mask = local_only;
+ f-desc = 'Flag whether this lock can be reused. Used by exclusive open.';
+};
};
struct lookup_intent {
- int it_magic;
- void (*it_op_release)(struct lookup_intent *);
- int it_op;
- int it_flags;
- int it_create_mode;
- union {
- struct lustre_intent_data lustre;
- } d;
+ int it_magic;
+ void (*it_op_release)(struct lookup_intent *);
+ int it_op;
+ int it_create_mode;
+ __u64 it_flags;
+ union {
+ struct lustre_intent_data lustre;
+ } d;
};
static inline void intent_init(struct lookup_intent *it, int op, int flags)
};
struct lookup_intent {
- int it_op;
- int it_flags;
+ int it_op;
int it_create_mode;
- union {
- struct lustre_intent_data lustre;
- } d;
+ __u64 it_flags;
+ union {
+ struct lustre_intent_data lustre;
+ } d;
};
#endif
#define DISP_ENQ_OPEN_REF 0x00800000
#define DISP_ENQ_CREATE_REF 0x01000000
#define DISP_OPEN_LOCK 0x02000000
+#define DISP_OPEN_LEASE 0x04000000
/* INODE LOCK PARTS */
#define MDS_INODELOCK_LOOKUP 0x000001 /* dentry, mode, owner, group */
* hsm restore) */
#define MDS_OPEN_VOLATILE 0400000000000ULL /* File is volatile = created
unlinked */
+#define MDS_OPEN_LEASE 01000000000000ULL /* Open the file and grant lease
+ * delegation, succeed if it's not
+ * being opened with conflict mode.
+ */
/* permission for create non-directory file */
#define MAY_CREATE (1 << 7)
struct hsm_current_action)
/* see <lustre_lib.h> for ioctl numbers 221-232 */
-#define LL_IOC_LMV_SETSTRIPE _IOWR('f', 240, struct lmv_user_md)
-#define LL_IOC_LMV_GETSTRIPE _IOWR('f', 241, struct lmv_user_md)
-#define LL_IOC_REMOVE_ENTRY _IOWR('f', 242, __u64)
+#define LL_IOC_LMV_SETSTRIPE _IOWR('f', 240, struct lmv_user_md)
+#define LL_IOC_LMV_GETSTRIPE _IOWR('f', 241, struct lmv_user_md)
+#define LL_IOC_REMOVE_ENTRY _IOWR('f', 242, __u64)
-#define LL_STATFS_LMV 1
-#define LL_STATFS_LOV 2
+#define LL_IOC_SET_LEASE _IOWR('f', 243, long)
+#define LL_IOC_GET_LEASE _IO('f', 244)
+
+#define LL_STATFS_LMV 1
+#define LL_STATFS_LOV 2
#define LL_STATFS_NODELAY 4
#define IOC_MDC_TYPE 'i'
#ifndef LDLM_ALL_FLAGS_MASK
/** l_flags bits marked as "all_flags" bits */
-#define LDLM_FL_ALL_FLAGS_MASK 0x007FFFFFC08F132FULL
+#define LDLM_FL_ALL_FLAGS_MASK 0x00FFFFFFC08F132FULL
/** l_flags bits marked as "ast" bits */
#define LDLM_FL_AST_MASK 0x0000000080000000ULL
#define LDLM_FL_INHERIT_MASK 0x0000000000800000ULL
/** l_flags bits marked as "local_only" bits */
-#define LDLM_FL_LOCAL_ONLY_MASK 0x007FFFFF00000000ULL
+#define LDLM_FL_LOCAL_ONLY_MASK 0x00FFFFFF00000000ULL
/** l_flags bits marked as "on_wire" bits */
#define LDLM_FL_ON_WIRE_MASK 0x00000000C08F132FULL
#define ldlm_set_ns_srv(_l) LDLM_SET_FLAG(( _l), 1ULL << 54)
#define ldlm_clear_ns_srv(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 54)
+/** Flag whether this lock can be reused. Used by exclusive open. */
+#define LDLM_FL_EXCL 0x0080000000000000ULL // bit 55
+#define ldlm_is_excl(_l) LDLM_TEST_FLAG(( _l), 1ULL << 55)
+#define ldlm_set_excl(_l) LDLM_SET_FLAG(( _l), 1ULL << 55)
+#define ldlm_clear_excl(_l) LDLM_CLEAR_FLAG((_l), 1ULL << 55)
+
/** test for ldlm_lock flag bit set */
#define LDLM_TEST_FLAG(_l, _b) (((_l)->l_flags & (_b)) != 0)
static int hf_lustre_ldlm_fl_res_locked = -1;
static int hf_lustre_ldlm_fl_waited = -1;
static int hf_lustre_ldlm_fl_ns_srv = -1;
+static int hf_lustre_ldlm_fl_excl = -1;
const value_string lustre_ldlm_flags_vals[] = {
{LDLM_FL_LOCK_CHANGED, "LDLM_FL_LOCK_CHANGED"},
{LDLM_FL_RES_LOCKED, "LDLM_FL_RES_LOCKED"},
{LDLM_FL_WAITED, "LDLM_FL_WAITED"},
{LDLM_FL_NS_SRV, "LDLM_FL_NS_SRV"},
+ {LDLM_FL_EXCL, "LDLM_FL_EXCL"},
{ 0, NULL }
};
#endif /* WIRESHARK_COMPILE */
struct md_open_data;
struct obd_client_handle {
- struct lustre_handle och_fh;
- struct lu_fid och_fid;
- struct md_open_data *och_mod;
- __u32 och_magic;
- int och_flags;
+ struct lustre_handle och_fh;
+ struct lu_fid och_fid;
+ struct md_open_data *och_mod;
+ struct lustre_handle och_lease_handle; /* open lock for lease */
+ __u32 och_magic;
+ int och_flags;
};
#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
if (lock == old_lock)
break;
+ /* Check if this lock can be matched.
+ * Used by LU-2919(exclusive open) for open lease lock */
+ if (ldlm_is_excl(lock))
+ continue;
+
/* llite sometimes wants to match locks that will be
* canceled when their users drop, but we allow it to match
* if it passes in CBPENDING and the lock still has users.
lock->l_conn_export = exp;
lock->l_export = NULL;
lock->l_blocking_ast = einfo->ei_cb_bl;
- lock->l_flags |= (*flags & LDLM_FL_NO_LRU);
+ lock->l_flags |= (*flags & (LDLM_FL_NO_LRU | LDLM_FL_EXCL));
/* lock not sent to server yet */
}
rc = ldlm_cli_cancel_local(lock);
- if (rc == LDLM_FL_LOCAL_ONLY) {
- LDLM_LOCK_RELEASE(lock);
+ if (rc == LDLM_FL_LOCAL_ONLY || cancel_flags & LCF_LOCAL) {
+ LDLM_LOCK_RELEASE(lock);
RETURN(0);
- }
+ }
/* Even if the lock is marked as LDLM_FL_BL_AST, this is a LDLM_CANCEL
* RPC which goes to canceld portal, so we can cancel other LRU locks
* here and send them all as one LDLM_CANCEL RPC. */
if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED))
ll_put_grouplock(inode, file, fd->fd_grouplock.cg_gid);
+ if (fd->fd_lease_och != NULL) {
+ bool lease_broken;
+
+ /* Usually the lease is not released when the
+ * application crashed, we need to release here. */
+ rc = ll_lease_close(fd->fd_lease_och, inode, &lease_broken);
+ CDEBUG(rc ? D_ERROR : D_INODE, "Clean up lease "DFID" %d/%d\n",
+ PFID(&lli->lli_fid), rc, lease_broken);
+
+ fd->fd_lease_och = NULL;
+ }
+
+ if (fd->fd_och != NULL) {
+ rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och);
+ fd->fd_och = NULL;
+ GOTO(out, rc);
+ }
+
/* Let's see if we have good enough OPEN lock on the file and if
we can skip talking to MDS */
if (file->f_dentry->d_inode) { /* Can this ever be false? */
file, file->f_dentry, file->f_dentry->d_name.name);
}
- LUSTRE_FPRIVATE(file) = NULL;
- ll_file_data_put(fd);
- ll_capa_close(inode);
+out:
+ LUSTRE_FPRIVATE(file) = NULL;
+ ll_file_data_put(fd);
+ ll_capa_close(inode);
- RETURN(rc);
+ RETURN(rc);
}
/* While this returns an error code, fput() the caller does not, so we need
body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
och->och_fh = body->handle;
och->och_fid = body->fid1;
+ och->och_lease_handle.cookie = it->d.lustre.it_lock_handle;
och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
och->och_flags = it->it_flags;
LUSTRE_FPRIVATE(file) = fd;
ll_readahead_init(inode, &fd->fd_ras);
- fd->fd_omode = it->it_flags;
+ fd->fd_omode = it->it_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
RETURN(0);
}
return rc;
}
+static int ll_md_blocking_lease_ast(struct ldlm_lock *lock,
+ struct ldlm_lock_desc *desc, void *data, int flag)
+{
+ int rc;
+ struct lustre_handle lockh;
+ ENTRY;
+
+ switch (flag) {
+ case LDLM_CB_BLOCKING:
+ ldlm_lock2handle(lock, &lockh);
+ rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
+ if (rc < 0) {
+ CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
+ RETURN(rc);
+ }
+ break;
+ case LDLM_CB_CANCELING:
+ /* do nothing */
+ break;
+ }
+ RETURN(0);
+}
+
+/**
+ * Acquire a lease and open the file.
+ */
+struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
+ fmode_t fmode)
+{
+ struct lookup_intent it = { .it_op = IT_OPEN };
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct md_op_data *op_data;
+ struct ptlrpc_request *req;
+ struct lustre_handle old_handle = { 0 };
+ struct obd_client_handle *och = NULL;
+ int rc;
+ int rc2;
+ ENTRY;
+
+ if (fmode != FMODE_WRITE && fmode != FMODE_READ)
+ RETURN(ERR_PTR(-EINVAL));
+
+ if (file != NULL) {
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+ struct obd_client_handle **och_p;
+ __u64 *och_usecount;
+
+ if (!(fmode & file->f_mode) || (file->f_mode & FMODE_EXEC))
+ RETURN(ERR_PTR(-EPERM));
+
+ /* Get the openhandle of the file */
+ rc = -EBUSY;
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_lease_och != NULL) {
+ mutex_unlock(&lli->lli_och_mutex);
+ RETURN(ERR_PTR(rc));
+ }
+
+ if (fd->fd_och == NULL) {
+ if (file->f_mode & FMODE_WRITE) {
+ LASSERT(lli->lli_mds_write_och != NULL);
+ och_p = &lli->lli_mds_write_och;
+ och_usecount = &lli->lli_open_fd_write_count;
+ } else {
+ LASSERT(lli->lli_mds_read_och != NULL);
+ och_p = &lli->lli_mds_read_och;
+ och_usecount = &lli->lli_open_fd_read_count;
+ }
+ if (*och_usecount == 1) {
+ fd->fd_och = *och_p;
+ *och_p = NULL;
+ *och_usecount = 0;
+ rc = 0;
+ }
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+ if (rc < 0) /* more than 1 opener */
+ RETURN(ERR_PTR(rc));
+
+ LASSERT(fd->fd_och != NULL);
+ old_handle = fd->fd_och->och_fh;
+ }
+
+ OBD_ALLOC_PTR(och);
+ if (och == NULL)
+ RETURN(ERR_PTR(-ENOMEM));
+
+ op_data = ll_prep_md_op_data(NULL, inode, inode, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ GOTO(out, rc = PTR_ERR(op_data));
+
+ /* To tell the MDT this openhandle is from the same owner */
+ op_data->op_handle = old_handle;
+
+ it.it_flags = fmode | MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
+ rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
+ ll_md_blocking_lease_ast,
+ /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
+ * it can be cancelled which may mislead applications that the lease is
+ * broken;
+ * LDLM_FL_EXCL: Set this flag so that it won't be matched by normal
+ * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
+ * doesn't deal with openhandle, so normal openhandle will be leaked. */
+ LDLM_FL_NO_LRU | LDLM_FL_EXCL);
+ ll_finish_md_op_data(op_data);
+ if (req != NULL) {
+ ptlrpc_req_finished(req);
+ it_clear_disposition(&it, DISP_ENQ_COMPLETE);
+ }
+ if (rc < 0)
+ GOTO(out_release_it, rc);
+
+ if (it_disposition(&it, DISP_LOOKUP_NEG))
+ GOTO(out_release_it, rc = -ENOENT);
+
+ rc = it_open_error(DISP_OPEN_OPEN, &it);
+ if (rc)
+ GOTO(out_release_it, rc);
+
+ LASSERT(it_disposition(&it, DISP_ENQ_OPEN_REF));
+ ll_och_fill(sbi->ll_md_exp, &it, och);
+
+ if (!it_disposition(&it, DISP_OPEN_LEASE)) /* old server? */
+ GOTO(out_close, rc = -EOPNOTSUPP);
+
+ /* already get lease, handle lease lock */
+ ll_set_lock_data(sbi->ll_md_exp, inode, &it, NULL);
+ if (it.d.lustre.it_lock_mode == 0 ||
+ it.d.lustre.it_lock_bits != MDS_INODELOCK_OPEN) {
+ /* open lock must return for lease */
+ CERROR(DFID "lease granted but no open lock, %d/%Lu.\n",
+ PFID(ll_inode2fid(inode)), it.d.lustre.it_lock_mode,
+ it.d.lustre.it_lock_bits);
+ GOTO(out_close, rc = -EPROTO);
+ }
+
+ ll_intent_release(&it);
+ RETURN(och);
+
+out_close:
+ rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och);
+ if (rc2)
+ CERROR("Close openhandle returned %d\n", rc2);
+
+ /* cancel open lock */
+ if (it.d.lustre.it_lock_mode != 0) {
+ ldlm_lock_decref_and_cancel(&och->och_lease_handle,
+ it.d.lustre.it_lock_mode);
+ it.d.lustre.it_lock_mode = 0;
+ }
+out_release_it:
+ ll_intent_release(&it);
+out:
+ OBD_FREE_PTR(och);
+ RETURN(ERR_PTR(rc));
+}
+EXPORT_SYMBOL(ll_lease_open);
+
+/**
+ * Release lease and close the file.
+ * It will check if the lease has ever broken.
+ */
+int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
+ bool *lease_broken)
+{
+ struct ldlm_lock *lock;
+ bool cancelled = true;
+ int rc;
+ ENTRY;
+
+ lock = ldlm_handle2lock(&och->och_lease_handle);
+ if (lock != NULL) {
+ lock_res_and_lock(lock);
+ cancelled = ldlm_is_cancel(lock);
+ unlock_res_and_lock(lock);
+ ldlm_lock_put(lock);
+ }
+
+ CDEBUG(D_INODE, "lease for "DFID" broken? %d\n",
+ PFID(&ll_i2info(inode)->lli_fid), cancelled);
+
+ if (!cancelled)
+ ldlm_cli_cancel(&och->och_lease_handle, 0);
+ if (lease_broken != NULL)
+ *lease_broken = cancelled;
+
+ rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och);
+ RETURN(rc);
+}
+EXPORT_SYMBOL(ll_lease_close);
+
/* Fills the obdo with the attributes for the lsm */
static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
struct obd_capa *capa, struct obdo *obdo,
OBD_FREE_PTR(hca);
RETURN(rc);
}
+ case LL_IOC_SET_LEASE: {
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_client_handle *och = NULL;
+ bool lease_broken;
+ fmode_t mode = 0;
+
+ switch (arg) {
+ case F_WRLCK:
+ if (!(file->f_mode & FMODE_WRITE))
+ RETURN(-EPERM);
+ mode = FMODE_WRITE;
+ break;
+ case F_RDLCK:
+ if (!(file->f_mode & FMODE_READ))
+ RETURN(-EPERM);
+ mode = FMODE_READ;
+ break;
+ case F_UNLCK:
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_lease_och != NULL) {
+ och = fd->fd_lease_och;
+ fd->fd_lease_och = NULL;
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+
+ if (och != NULL) {
+ mode = och->och_flags &(FMODE_READ|FMODE_WRITE);
+ rc = ll_lease_close(och, inode, &lease_broken);
+ if (rc == 0 && lease_broken)
+ mode = 0;
+ } else {
+ rc = -ENOLCK;
+ }
+
+ /* return the type of lease or error */
+ RETURN(rc < 0 ? rc : (int)mode);
+ default:
+ RETURN(-EINVAL);
+ }
+
+ CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
+
+ /* apply for lease */
+ och = ll_lease_open(inode, file, mode);
+ if (IS_ERR(och))
+ RETURN(PTR_ERR(och));
+
+ rc = 0;
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_lease_och == NULL) {
+ fd->fd_lease_och = och;
+ och = NULL;
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+ if (och != NULL) {
+ /* impossible now that only excl is supported for now */
+ ll_lease_close(och, inode, &lease_broken);
+ rc = -EBUSY;
+ }
+ RETURN(rc);
+ }
+ case LL_IOC_GET_LEASE: {
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ldlm_lock *lock = NULL;
+
+ rc = 0;
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_lease_och != NULL) {
+ struct obd_client_handle *och = fd->fd_lease_och;
+
+ lock = ldlm_handle2lock(&och->och_lease_handle);
+ if (lock != NULL) {
+ lock_res_and_lock(lock);
+ if (!ldlm_is_cancel(lock))
+ rc = och->och_flags &
+ (FMODE_READ | FMODE_WRITE);
+ unlock_res_and_lock(lock);
+ ldlm_lock_put(lock);
+ }
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+
+ RETURN(rc);
+ }
default: {
int err;
extern struct kmem_cache *ll_file_data_slab;
struct lustre_handle;
struct ll_file_data {
- struct ll_readahead_state fd_ras;
- int fd_omode;
- struct ccc_grouplock fd_grouplock;
+ struct ll_readahead_state fd_ras;
+ struct ccc_grouplock fd_grouplock;
__u64 lfd_pos;
- __u32 fd_flags;
- struct file *fd_file;
+ __u32 fd_flags;
+ fmode_t fd_omode;
+ /* openhandle if lease exists for this file.
+ * Borrow lli->lli_och_mutex to protect assignment */
+ struct obd_client_handle *fd_lease_och;
+ struct obd_client_handle *fd_och;
+ struct file *fd_file;
/* Indicate whether need to report failure when close.
* true: failure is known, not report again.
* false: unknown failure, should report. */
int ll_fid2path(struct inode *inode, void *arg);
int ll_data_version(struct inode *inode, __u64 *data_version, int extent_lock);
+struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file,
+ fmode_t mode);
+int ll_lease_close(struct obd_client_handle *och, struct inode *inode,
+ bool *lease_broken);
+
/* llite/dcache.c */
int ll_dops_init(struct dentry *de, int block, int init_sa);
const void *data, int datalen, __u32 mode, __u32 uid,
__u32 gid, cfs_cap_t capability, __u64 rdev);
void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- __u32 mode, __u64 rdev, __u32 flags, const void *data,
- int datalen);
+ __u32 mode, __u64 rdev, __u64 flags, const void *data,
+ int datalen);
void mdc_unlink_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
void mdc_link_pack(struct ptlrpc_request *req, struct md_op_data *op_data);
void mdc_rename_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
}
}
-static __u64 mds_pack_open_flags(__u32 flags, __u32 mode)
+static __u64 mds_pack_open_flags(__u64 flags, __u32 mode)
{
__u64 cr_flags = (flags & (FMODE_READ | FMODE_WRITE |
MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS |
MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK |
- MDS_OPEN_BY_FID));
+ MDS_OPEN_BY_FID | MDS_OPEN_LEASE));
if (flags & O_CREAT)
cr_flags |= MDS_OPEN_CREAT;
if (flags & O_EXCL)
/* packing of MDS records */
void mdc_open_pack(struct ptlrpc_request *req, struct md_op_data *op_data,
- __u32 mode, __u64 rdev, __u32 flags, const void *lmm,
+ __u32 mode, __u64 rdev, __u64 flags, const void *lmm,
int lmmlen)
{
struct mdt_rec_create *rec;
rec->cr_suppgid2 = op_data->op_suppgids[1];
rec->cr_bias = op_data->op_bias;
rec->cr_umask = cfs_curproc_umask();
+ rec->cr_old_handle = op_data->op_handle;
mdc_pack_capa(req, &RMF_CAPA1, op_data->op_capa1);
/* the next buffer is child capa, which is used for replay,
int it_open_error(int phase, struct lookup_intent *it)
{
+ if (it_disposition(it, DISP_OPEN_LEASE)) {
+ if (phase >= DISP_OPEN_LEASE)
+ return it->d.lustre.it_status;
+ else
+ return 0;
+ }
if (it_disposition(it, DISP_OPEN_OPEN)) {
if (phase >= DISP_OPEN_OPEN)
return it->d.lustre.it_status;
/* XXX: openlock is not cancelled for cross-refs. */
/* If inode is known, cancel conflicting OPEN locks. */
- if (fid_is_sane(&op_data->op_fid2)) {
- if (it->it_flags & (FMODE_WRITE|MDS_OPEN_TRUNC))
- mode = LCK_CW;
+ if (fid_is_sane(&op_data->op_fid2)) {
+ if (it->it_flags & MDS_OPEN_LEASE) { /* try to get lease */
+ if (it->it_flags & FMODE_WRITE)
+ mode = LCK_EX;
+ else
+ mode = LCK_PR;
+ } else {
+ if (it->it_flags & (FMODE_WRITE|MDS_OPEN_TRUNC))
+ mode = LCK_CW;
#ifdef FMODE_EXEC
- else if (it->it_flags & FMODE_EXEC)
- mode = LCK_PR;
+ else if (it->it_flags & FMODE_EXEC)
+ mode = LCK_PR;
#endif
- else
- mode = LCK_CR;
- count = mdc_resource_get_unused(exp, &op_data->op_fid2,
- &cancels, mode,
- MDS_INODELOCK_OPEN);
- }
+ else
+ mode = LCK_CR;
+ }
+ count = mdc_resource_get_unused(exp, &op_data->op_fid2,
+ &cancels, mode,
+ MDS_INODELOCK_OPEN);
+ }
/* If CREATE, cancel parent's UPDATE lock. */
if (it->it_op & IT_CREAT)
ldlm_blocking_callback cb_blocking,
__u64 extra_lock_flags)
{
- struct lustre_handle lockh;
- int rc = 0;
- ENTRY;
- LASSERT(it);
+ struct lustre_handle lockh;
+ int rc = 0;
+ ENTRY;
+ LASSERT(it);
- CDEBUG(D_DLMTRACE, "(name: %.*s,"DFID") in obj "DFID
- ", intent: %s flags %#o\n", op_data->op_namelen,
- op_data->op_name, PFID(&op_data->op_fid2),
- PFID(&op_data->op_fid1), ldlm_it2str(it->it_op),
- it->it_flags);
+ CDEBUG(D_DLMTRACE, "(name: %.*s,"DFID") in obj "DFID
+ ", intent: %s flags %#Lo\n", op_data->op_namelen,
+ op_data->op_name, PFID(&op_data->op_fid2),
+ PFID(&op_data->op_fid1), ldlm_it2str(it->it_op),
+ it->it_flags);
lockh.cookie = 0;
if (fid_is_sane(&op_data->op_fid2) &&
};
int rc = 0;
__u64 flags = LDLM_FL_HAS_INTENT;
- ENTRY;
+ ENTRY;
- CDEBUG(D_DLMTRACE,"name: %.*s in inode "DFID", intent: %s flags %#o\n",
- op_data->op_namelen, op_data->op_name, PFID(&op_data->op_fid1),
- ldlm_it2str(it->it_op), it->it_flags);
+ CDEBUG(D_DLMTRACE,"name: %.*s in inode "DFID", intent: %s flags %#Lo\n",
+ op_data->op_namelen, op_data->op_name, PFID(&op_data->op_fid1),
+ ldlm_it2str(it->it_op), it->it_flags);
fid_build_reg_res_name(&op_data->op_fid1, &res_id);
req = mdc_intent_getattr_pack(exp, it, op_data);
o->lo_ops = &mdt_obj_ops;
mutex_init(&mo->mot_ioepoch_mutex);
mutex_init(&mo->mot_lov_mutex);
- RETURN(o);
- } else
- RETURN(NULL);
+ init_rwsem(&mo->mot_open_sem);
+ RETURN(o);
+ }
+ RETURN(NULL);
}
static int mdt_object_init(const struct lu_env *env, struct lu_object *o,
CDEBUG(D_INFO, "object free, fid = "DFID"\n",
PFID(lu_object_fid(o)));
- lu_object_fini(o);
- lu_object_header_fini(h);
+ LASSERT(atomic_read(&mo->mot_open_count) == 0);
+ LASSERT(atomic_read(&mo->mot_lease_count) == 0);
+
+ lu_object_fini(o);
+ lu_object_header_fini(h);
OBD_SLAB_FREE_PTR(mo, mdt_object_kmem);
- EXIT;
+ EXIT;
}
static int mdt_object_print(const struct lu_env *env, void *cookie,
/* file data for open files on MDS */
struct mdt_file_data {
- struct portals_handle mfd_handle; /* must be first */
- int mfd_mode; /* open mode provided by client */
- cfs_list_t mfd_list; /* protected by med_open_lock */
- __u64 mfd_xid; /* xid of the open request */
- struct lustre_handle mfd_old_handle; /* old handle in replay case */
- struct mdt_object *mfd_object; /* point to opened object */
+ struct portals_handle mfd_handle; /* must be first */
+ __u64 mfd_mode; /* open mode provided by client */
+ cfs_list_t mfd_list; /* protected by med_open_lock */
+ __u64 mfd_xid; /* xid of the open request */
+ struct lustre_handle mfd_old_handle; /* old handle in replay case */
+ struct mdt_object *mfd_object; /* point to opened object */
};
#define CDT_NONBLOCKING_RESTORE 0x0000000000000001ULL
struct mutex mot_ioepoch_mutex;
/* Lock to protect create_data */
struct mutex mot_lov_mutex;
+ /* Lock to protect lease open.
+ * Lease open acquires write lock; normal open acquires read lock */
+ struct rw_semaphore mot_open_sem;
+ atomic_t mot_lease_count;
+ atomic_t mot_open_count;
};
enum mdt_object_flags {
};
enum {
- MDT_LH_PARENT, /* parent lockh */
- MDT_LH_CHILD, /* child lockh */
- MDT_LH_OLD, /* old lockh for rename */
+ MDT_LH_PARENT, /* parent lockh */
+ MDT_LH_CHILD, /* child lockh */
+ MDT_LH_OLD, /* old lockh for rename */
MDT_LH_LAYOUT = MDT_LH_OLD, /* layout lock */
- MDT_LH_NEW, /* new lockh for rename */
- MDT_LH_RMT, /* used for return lh to caller */
- MDT_LH_NR
+ MDT_LH_NEW, /* new lockh for rename */
+ MDT_LH_RMT, /* used for return lh to caller */
+ MDT_LH_LOCAL, /* local lock never return to client */
+ MDT_LH_NR
};
enum {
struct mdt_lock_handle *child_lockh);
void mdt_mfd_set_mode(struct mdt_file_data *mfd,
- int mode);
+ __u64 mode);
int mdt_reint_open(struct mdt_thread_info *info,
struct mdt_lock_handle *lhc);
*/
struct mdt_file_data *mdt_handle2mfd(struct mdt_export_data *med,
const struct lustre_handle *handle,
- bool is_replay)
+ bool is_replay_or_resent)
{
struct mdt_file_data *mfd;
ENTRY;
LASSERT(handle != NULL);
mfd = class_handle2object(handle->cookie, med);
/* during dw/setattr replay the mfd can be found by old handle */
- if (mfd == NULL && is_replay) {
+ if (mfd == NULL && is_replay_or_resent) {
cfs_list_for_each_entry(mfd, &med->med_open_head, mfd_list) {
if (mfd->mfd_old_handle.cookie == handle->cookie)
RETURN(mfd);
EXIT;
}
-void mdt_mfd_set_mode(struct mdt_file_data *mfd, int mode)
+void mdt_mfd_set_mode(struct mdt_file_data *mfd, __u64 mode)
{
- LASSERT(mfd != NULL);
+ LASSERT(mfd != NULL);
- CDEBUG(D_HA, "Change mfd %p mode 0x%x->0x%x\n",
- mfd, (unsigned int)mfd->mfd_mode, (unsigned int)mode);
+ CDEBUG(D_HA, DFID "Change mfd mode 0x%Lx->0x%Lx\n",
+ PFID(mdt_object_fid(mfd->mfd_object)), mfd->mfd_mode, mode);
- mfd->mfd_mode = mode;
+ mfd->mfd_mode = mode;
}
static int mdt_mfd_open(struct mdt_thread_info *info, struct mdt_object *p,
* released by mdt_mfd_close().
*/
mdt_object_get(info->mti_env, o);
+ mfd->mfd_object = o;
+ mfd->mfd_xid = req->rq_xid;
/*
* @flags is always not zero. At least it should be FMODE_READ,
/* Open handling. */
mdt_mfd_set_mode(mfd, flags);
- mfd->mfd_object = o;
- mfd->mfd_xid = req->rq_xid;
+ atomic_inc(&o->mot_open_count);
+ if (flags & MDS_OPEN_LEASE)
+ atomic_inc(&o->mot_lease_count);
/* replay handle */
if (req_is_replay(req)) {
struct md_attr *ma = &info->mti_attr;
__u64 open_flags = info->mti_spec.sp_cr_flags;
ldlm_mode_t lm = LCK_CR;
+ bool acq_lease = !!(open_flags & MDS_OPEN_LEASE);
bool try_layout = false;
bool create_layout = false;
int rc = 0;
ENTRY;
*ibits = 0;
- if (open_flags & MDS_OPEN_LOCK) {
- if (open_flags & FMODE_WRITE)
- lm = LCK_CW;
- /* if file is released, we can't deny write because we must
- * restore (write) it to access it. */
- else if ((open_flags & MDS_FMODE_EXEC) &&
- !((ma->ma_valid & MA_HSM) &&
- (ma->ma_hsm.mh_flags & HS_RELEASED)))
- lm = LCK_PR;
- else
- lm = LCK_CR;
+ mdt_lock_handle_init(lhc);
- *ibits = MDS_INODELOCK_LOOKUP | MDS_INODELOCK_OPEN;
- }
+ if (req_is_replay(mdt_info_req(info)))
+ RETURN(0);
if (S_ISREG(lu_object_attr(&obj->mot_obj))) {
if (ma->ma_need & MA_LOV && !(ma->ma_valid & MA_LOV) &&
try_layout = true;
}
- mdt_lock_handle_init(lhc);
+ if (acq_lease) {
+ /* lease open, acquire write mode of open sem */
+ down_write(&obj->mot_open_sem);
+
+ /* Lease exists and ask for new lease */
+ if (atomic_read(&obj->mot_lease_count) > 0) {
+ /* only exclusive open is supported, so lease
+ * are conflicted to each other */
+ GOTO(out, rc = -EBUSY);
+ }
+
+ /* Lease must be with open lock */
+ if (!(open_flags & MDS_OPEN_LOCK)) {
+ CERROR("Request lease for file:"DFID ", but open lock "
+ "is missed, open_flags = "LPO64".\n",
+ PFID(mdt_object_fid(obj)), open_flags);
+ GOTO(out, rc = -EPROTO);
+ }
+
+ /* XXX: only exclusive open is supported. */
+ lm = LCK_EX;
+ *ibits = MDS_INODELOCK_OPEN;
+
+ /* never grant LCK_EX layout lock to client */
+ try_layout = false;
+ } else { /* normal open */
+ /* normal open holds read mode of open sem */
+ down_read(&obj->mot_open_sem);
+
+ if (open_flags & MDS_OPEN_LOCK) {
+ if (open_flags & FMODE_WRITE)
+ lm = LCK_CW;
+ /* if file is released, we can't deny write because we must
+ * restore (write) it to access it. */
+ else if ((open_flags & MDS_FMODE_EXEC) &&
+ !((ma->ma_valid & MA_HSM) &&
+ (ma->ma_hsm.mh_flags & HS_RELEASED)))
+ lm = LCK_PR;
+ else
+ lm = LCK_CR;
+
+ *ibits = MDS_INODELOCK_LOOKUP | MDS_INODELOCK_OPEN;
+ } else if (atomic_read(&obj->mot_lease_count) > 0) {
+ if (open_flags & FMODE_WRITE)
+ lm = LCK_CW;
+ else
+ lm = LCK_CR;
+
+ /* revoke lease */
+ *ibits = MDS_INODELOCK_OPEN;
+ try_layout = false;
+
+ lhc = &info->mti_lh[MDT_LH_LOCAL];
+ }
+ CDEBUG(D_INODE, "normal open:"DFID" lease count: %d, lm: %d\n",
+ PFID(mdt_object_fid(obj)),
+ atomic_read(&obj->mot_open_count), lm);
+ }
+
mdt_lock_reg_init(lhc, lm);
/* one problem to return layout lock on open is that it may result
OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_LL_BLOCK, 2);
}
+ /* Check if there is any other open handles after acquiring
+ * open lock. At this point, caching open handles have been revoked
+ * by open lock.
+ * XXX: Now only exclusive open is supported. Need to check the
+ * type of open for generic lease support. */
+ if (rc == 0 && acq_lease) {
+ struct ptlrpc_request *req = mdt_info_req(info);
+ struct mdt_export_data *med = &req->rq_export->exp_mdt_data;
+ struct mdt_file_data *mfd;
+ bool is_replay_or_resent;
+ int open_count = 0;
+
+ /* For lease: application can open a file and then apply lease,
+ * @handle contains original open handle in that case.
+ * In recovery, open REQ will be replayed and the lease REQ may
+ * be resent that means the open handle is already stale, so we
+ * need to fix it up here by finding new handle. */
+ is_replay_or_resent = req_is_replay(req) ||
+ lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT;
+
+ /* if the request is _not_ a replay request, rr_handle
+ * may be used to hold an openhandle which is issuing the
+ * lease request, so that this openhandle doesn't count. */
+ mfd = mdt_handle2mfd(med, info->mti_rr.rr_handle,
+ is_replay_or_resent);
+ if (mfd != NULL)
+ ++open_count;
+
+ CDEBUG(D_INODE, "acq_lease "DFID": openers: %d, want: %d\n",
+ PFID(mdt_object_fid(obj)),
+ atomic_read(&obj->mot_open_count), open_count);
+
+ if (atomic_read(&obj->mot_open_count) > open_count)
+ GOTO(out, rc = -EBUSY);
+ }
+ GOTO(out, rc);
+
+out:
RETURN(rc);
}
__u64 ibits, int rc)
{
__u64 open_flags = info->mti_spec.sp_cr_flags;
- struct mdt_lock_handle *ll = &info->mti_lh[MDT_LH_LAYOUT];
+ struct mdt_lock_handle *ll = &info->mti_lh[MDT_LH_LOCAL];
+ ENTRY;
+
+ if (req_is_replay(mdt_info_req(info)))
+ RETURN_EXIT;
+
+ /* Release local lock - the lock put in MDT_LH_LOCAL will never
+ * return to client side. */
+ if (lustre_handle_is_used(&ll->mlh_reg_lh))
+ mdt_object_unlock(info, obj, ll, 1);
- /* Release local layout lock - the layout lock put in MDT_LH_LAYOUT
- * will never return to client side. */
+ ll = &info->mti_lh[MDT_LH_LAYOUT];
+ /* Release local layout lock, layout was created */
if (lustre_handle_is_used(&ll->mlh_reg_lh)) {
LASSERT(!(ibits & MDS_INODELOCK_LAYOUT));
mdt_object_unlock(info, obj, ll, 1);
}
+ if (open_flags & MDS_OPEN_LEASE)
+ up_write(&obj->mot_open_sem);
+ else
+ up_read(&obj->mot_open_sem);
+
/* Cross-ref case, the lock should be returned to the client */
if (ibits == 0 || rc == -EREMOTE)
- return;
+ RETURN_EXIT;
if (!(open_flags & MDS_OPEN_LOCK) && !(ibits & MDS_INODELOCK_LAYOUT)) {
/* for the open request, the lock will only return to client
mdt_clear_disposition(info, ldlm_rep, DISP_OPEN_LOCK);
mdt_object_unlock(info, obj, lhc, 1);
}
+ RETURN_EXIT;
}
int mdt_open_by_fid_lock(struct mdt_thread_info *info, struct ldlm_reply *rep,
{
const struct lu_env *env = info->mti_env;
struct mdt_device *mdt = info->mti_mdt;
- __u32 flags = info->mti_spec.sp_cr_flags;
+ __u64 flags = info->mti_spec.sp_cr_flags;
struct mdt_reint_record *rr = &info->mti_rr;
struct md_attr *ma = &info->mti_attr;
struct mdt_object *parent= NULL;
rc = mdt_object_open_lock(info, o, lhc, &ibits);
if (rc)
- GOTO(out, rc);
+ GOTO(out_unlock, rc);
if (ma->ma_valid & MA_PFID) {
parent = mdt_object_find(env, mdt, &ma->ma_pfid);
mdt_set_disposition(info, rep, DISP_LOOKUP_POS);
if (flags & MDS_OPEN_LOCK)
mdt_set_disposition(info, rep, DISP_OPEN_LOCK);
+ if (flags & MDS_OPEN_LEASE)
+ mdt_set_disposition(info, rep, DISP_OPEN_LEASE);
}
- GOTO(out, rc);
+ GOTO(out_unlock, rc);
-out:
+out_unlock:
mdt_object_open_unlock(info, o, lhc, ibits, rc);
- mdt_object_put(env, o);
- if (parent != NULL)
- mdt_object_put(env, parent);
- return rc;
+out:
+ mdt_object_put(env, o);
+ if (parent != NULL)
+ mdt_object_put(env, parent);
+ return rc;
}
int mdt_pin(struct mdt_thread_info* info)
if (!req_is_replay(req)) {
rc = mdt_object_open_lock(info, child, lhc, &ibits);
if (rc != 0)
- GOTO(out_child, result = rc);
+ GOTO(out_child_unlock, result = rc);
else if (create_flags & MDS_OPEN_LOCK)
mdt_set_disposition(info, ldlm_rep, DISP_OPEN_LOCK);
}
mdt_clear_disposition(info, ldlm_rep, DISP_OPEN_CREATE);
}
}
- EXIT;
-out_child:
+ EXIT;
+out_child_unlock:
mdt_object_open_unlock(info, child, lhc, ibits, result);
- mdt_object_put(info->mti_env, child);
+out_child:
+ mdt_object_put(info->mti_env, child);
out_parent:
- mdt_object_unlock_put(info, parent, lh, result || !created);
+ mdt_object_unlock_put(info, parent, lh, result || !created);
out:
if (result)
lustre_msg_set_transno(req->rq_repmsg, 0);
struct md_attr *ma = &info->mti_attr;
int ret = MDT_IOEPOCH_CLOSED;
int rc = 0;
- int mode;
+ __u64 mode;
ENTRY;
mode = mfd->mfd_mode;
"needed on "DFID"\n", PFID(mdt_object_fid(o)));
}
} else {
- mdt_mfd_free(mfd);
- mdt_object_put(info->mti_env, o);
- }
+ /* adjust open and lease count */
+ if (mode & MDS_OPEN_LEASE) {
+ LASSERT(atomic_read(&o->mot_lease_count) > 0);
+ atomic_dec(&o->mot_lease_count);
+ }
+ LASSERT(atomic_read(&o->mot_open_count) > 0);
+ atomic_dec(&o->mot_open_count);
- RETURN(rc ? rc : ret);
+ mdt_mfd_free(mfd);
+ mdt_object_put(info->mti_env, o);
+ }
+
+ RETURN(rc ? rc : ret);
}
int mdt_close(struct mdt_thread_info *info)
#include <unistd.h>
#include <semaphore.h>
#include <time.h>
+#include <err.h>
#include <lustre/lustre_idl.h>
#include <lustre/lustreapi.h>
" C[num] create with optional stripes\n"
" d mkdir\n"
" D open(O_DIRECTORY)\n"
+" e[R|W|U] apply lease. R: Read; W: Write; U: Unlock\n"
+" E[+|-] get lease. +/-: expect lease to (not) exist\n"
" f statfs\n"
" F print FID\n"
" H[num] create HSM released file with num stripes\n"
exit(save_errno);
}
break;
- case 'f':
- if (statfs(fname, &stfs) == -1) {
- save_errno = errno;
- perror("statfs()");
- exit(save_errno);
- }
- break;
+ case 'e':
+ commands++;
+ switch (*commands) {
+ case 'U':
+ flags = F_UNLCK;
+ break;
+ case 'R':
+ flags = F_RDLCK;
+ break;
+ case 'W':
+ flags = F_WRLCK;
+ break;
+ default:
+ errx(-1, "unknown mode: %c", *commands);
+ }
+
+ rc = ioctl(fd, LL_IOC_SET_LEASE, flags);
+ if (rc < 0)
+ err(errno, "apply lease error");
+
+ if (flags != F_UNLCK)
+ break;
+
+ /* F_UNLCK, interpret return code */
+ if (rc > 0) {
+ const char *str = "Unknown";
+ if (rc == FMODE_READ)
+ str = "FMODE_READ";
+ else if (rc == FMODE_WRITE)
+ str = "FMODE_WRITE";
+ fprintf(stdout, "%s lease(%d) released.\n",
+ str, rc);
+ } else if (rc == 0) {
+ fprintf(stdout, "lease already broken.\n");
+ }
+ break;
+ case 'E':
+ commands++;
+ if (*commands != '-' && *commands != '+')
+ errx(-1, "unknown mode: %c\n", *commands);
+
+ rc = ioctl(fd, LL_IOC_GET_LEASE);
+ if (rc > 0) {
+ const char *str = "Unknown";
+
+ if (rc == FMODE_READ)
+ str = "FMODE_READ";
+ else if (rc == FMODE_WRITE)
+ str = "FMODE_WRITE";
+ fprintf(stdout, "%s lease(%d) has applied.\n",
+ str, rc);
+ if (*commands == '-')
+ errx(-1, "expect lease to not exist");
+ } else if (rc == 0) {
+ fprintf(stdout, "no lease applied.\n");
+ if (*commands == '+')
+ errx(-1, "expect lease exists");
+ } else {
+ err(errno, "free lease error");
+ }
+ break;
+ case 'f':
+ if (statfs(fname, &stfs) == -1)
+ errx(-1, "statfs()");
+ break;
case 'F':
if (fd == -1)
rc = llapi_path2fid(fname, &fid);
}
run_test 207b "can refresh layout at open"
+test_208() {
+ # FIXME: in this test suite, only RD lease is used. This is okay
+ # for now as only exclusive open is supported. After generic lease
+ # is done, this test suite should be revised. - Jinshan
+
+ [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.4.52) ]] ||
+ { skip "Need MDS version at least 2.4.52"; return 0; }
+
+ echo "==== test 1: verify get lease work"
+ $MULTIOP $DIR/$tfile oO_CREAT:O_RDWR:eRE+eU || error "get lease error"
+
+ echo "==== test 2: verify lease can be broken by upcoming open"
+ $MULTIOP $DIR/$tfile oO_RDONLY:eR_E-eUc &
+ local PID=$!
+ sleep 1
+
+ $MULTIOP $DIR/$tfile oO_RDONLY:c
+ kill -USR1 $PID && wait $PID || error "break lease error"
+
+ echo "==== test 3: verify lease can't be granted if an open already exists"
+ $MULTIOP $DIR/$tfile oO_RDONLY:_c &
+ local PID=$!
+ sleep 1
+
+ $MULTIOP $DIR/$tfile oO_RDONLY:eReUc && error "apply lease should fail"
+ kill -USR1 $PID && wait $PID || error "open file error"
+
+ echo "==== test 4: lease can sustain over recovery"
+ $MULTIOP $DIR/$tfile oO_RDONLY:eR_E+eUc &
+ PID=$!
+ sleep 1
+
+ fail mds1
+
+ kill -USR1 $PID && wait $PID || error "lease broken over recovery"
+
+ echo "==== test 5: lease broken can't be regained by replay"
+ $MULTIOP $DIR/$tfile oO_RDONLY:eR_E-eUc &
+ PID=$!
+ sleep 1
+
+ # open file to break lease and then recovery
+ $MULTIOP $DIR/$tfile oO_RDWR:c || error "open file error"
+ fail mds1
+
+ kill -USR1 $PID && wait $PID || error "lease not broken over recovery"
+
+ rm -f $DIR/$tfile
+}
+run_test 208 "Exclusive open"
+
test_212() {
size=`date +%s`
size=$((size % 8192 + 1))