X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fmdc%2Fmdc_lib.c;h=c66de790f3a1d4f7a703043a8dbd307956849988;hb=35e024f797c2b53f500344406b908d56fdb637d9;hp=7658bc575f287151765e993cda06dc88d025afe0;hpb=ac606470fe01743b1af98b29c07cadbd792c9614;p=fs%2Flustre-release.git diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c index 7658bc5..c66de79 100644 --- a/lustre/mdc/mdc_lib.c +++ b/lustre/mdc/mdc_lib.c @@ -3,33 +3,32 @@ * * Copyright (c) 2003 Cluster File Systems, Inc. * - * This file is part of Lustre, http://www.lustre.org. + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. */ -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#define DEBUG_SUBSYSTEM S_MDS +#define DEBUG_SUBSYSTEM S_MDC #ifndef __KERNEL__ # include # include #endif -#include -#include -#include +#include +#include #include "mdc_internal.h" #ifndef __KERNEL__ @@ -39,91 +38,342 @@ #endif #endif -void mdc_readdir_pack(struct ptlrpc_request *req, int req_offset, - __u64 offset, __u32 size, struct lustre_id *mdc_id) +void mdc_readdir_pack(struct ptlrpc_request *req, int offset, __u64 pg_off, + __u32 size, struct ll_fid *fid) { struct mds_body *b; - b = lustre_msg_buf(req->rq_reqmsg, req_offset, sizeof (*b)); - b->id1 = *mdc_id; - b->size = offset; /* !! */ + b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b)); + b->fsuid = current->fsuid; + b->fsgid = current->fsgid; + b->capability = current->cap_effective; + b->fid1 = *fid; + b->size = pg_off; /* !! */ + b->suppgid = -1; b->nlink = size; /* !! */ } +static void mdc_pack_body(struct mds_body *b) +{ + LASSERT (b != NULL); + + b->fsuid = current->fsuid; + b->fsgid = current->fsgid; + b->capability = current->cap_effective; +} + +void mdc_pack_req_body(struct ptlrpc_request *req, int offset, + __u64 valid, struct ll_fid *fid, int ea_size, int flags) +{ + struct mds_body *b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b)); + + if (fid) + b->fid1 = *fid; + b->valid = valid; + b->eadatasize = ea_size; + b->flags = flags; + mdc_pack_body(b); +} + +/* packing of MDS records */ +void mdc_create_pack(struct ptlrpc_request *req, int offset, + struct mdc_op_data *op_data, const void *data, int datalen, + __u32 mode, __u32 uid, __u32 gid, __u32 cap_effective, + __u64 rdev) +{ + struct mds_rec_create *rec; + char *tmp; + rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); + + rec->cr_opcode = REINT_CREATE; + rec->cr_fsuid = uid; + rec->cr_fsgid = gid; + rec->cr_cap = cap_effective; + rec->cr_fid = op_data->fid1; + memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid)); + rec->cr_mode = mode; + rec->cr_rdev = rdev; + rec->cr_time = op_data->mod_time; + rec->cr_suppgid = op_data->suppgids[0]; + + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1); + LOGL0(op_data->name, op_data->namelen, tmp); + + if (data) { + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, datalen); + memcpy (tmp, data, datalen); + } +} + static __u32 mds_pack_open_flags(__u32 flags) { - return - (flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC | - MDS_OPEN_DELAY_CREATE | MDS_OPEN_HAS_EA | - MDS_OPEN_HAS_OBJS)) | - ((flags & O_CREAT) ? MDS_OPEN_CREAT : 0) | - ((flags & O_EXCL) ? MDS_OPEN_EXCL : 0) | - ((flags & O_TRUNC) ? MDS_OPEN_TRUNC : 0) | - ((flags & O_APPEND) ? MDS_OPEN_APPEND : 0) | - ((flags & O_SYNC) ? MDS_OPEN_SYNC : 0) | - ((flags & O_DIRECTORY) ? MDS_OPEN_DIRECTORY : 0) | - 0; + __u32 cr_flags = (flags & (FMODE_READ | FMODE_WRITE | + MDS_OPEN_DELAY_CREATE | MDS_OPEN_HAS_EA | + MDS_OPEN_HAS_OBJS | MDS_OPEN_OWNEROVERRIDE | + MDS_OPEN_LOCK)); + if (flags & O_CREAT) + cr_flags |= MDS_OPEN_CREAT; + if (flags & O_EXCL) + cr_flags |= MDS_OPEN_EXCL; + if (flags & O_TRUNC) + cr_flags |= MDS_OPEN_TRUNC; + if (flags & O_APPEND) + cr_flags |= MDS_OPEN_APPEND; + if (flags & O_SYNC) + cr_flags |= MDS_OPEN_SYNC; + if (flags & O_DIRECTORY) + cr_flags |= MDS_OPEN_DIRECTORY; + if (flags & O_JOIN_FILE) + cr_flags |= MDS_OPEN_JOIN_FILE; +#ifdef FMODE_EXEC + if (flags & FMODE_EXEC) + cr_flags |= MDS_FMODE_EXEC; +#endif + return cr_flags; } /* packing of MDS records */ -void mdc_open_pack(struct lustre_msg *msg, int offset, +void mdc_join_pack(struct ptlrpc_request *req, int offset, + struct mdc_op_data *op_data, __u64 head_size) +{ + struct mds_rec_join *rec; + + rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*rec)); + LASSERT(rec != NULL); + rec->jr_fid = op_data->fid2; + rec->jr_headsize = head_size; +} + +void mdc_open_pack(struct ptlrpc_request *req, int offset, struct mdc_op_data *op_data, __u32 mode, __u64 rdev, __u32 flags, const void *lmm, int lmmlen) { struct mds_rec_create *rec; char *tmp; - rec = lustre_msg_buf(msg, offset, sizeof (*rec)); + rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); /* XXX do something about time, uid, gid */ rec->cr_opcode = REINT_OPEN; - if (op_data != NULL) - rec->cr_id = op_data->id1; - memset(&rec->cr_replayid, 0, sizeof(rec->cr_replayid)); + rec->cr_fsuid = current->fsuid; + rec->cr_fsgid = current->fsgid; + rec->cr_cap = current->cap_effective; + rec->cr_fid = op_data->fid1; + memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid)); rec->cr_mode = mode; rec->cr_flags = mds_pack_open_flags(flags); rec->cr_rdev = rdev; rec->cr_time = op_data->mod_time; + rec->cr_suppgid = op_data->suppgids[0]; if (op_data->name) { - tmp = lustre_msg_buf(msg, offset + 1, + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1); LOGL0(op_data->name, op_data->namelen, tmp); } if (lmm) { rec->cr_flags |= MDS_OPEN_HAS_EA; - tmp = lustre_msg_buf(msg, offset + 2, lmmlen); +#ifndef __KERNEL__ + /*XXX a hack for liblustre to set EA (LL_IOC_LOV_SETSTRIPE) */ + rec->cr_replayfid = op_data->fid2; +#endif + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, lmmlen); memcpy (tmp, lmm, lmmlen); } } -void mdc_getattr_pack(struct lustre_msg *msg, int offset, int valid, +static inline __u64 attr_pack(unsigned int ia_valid) { + __u64 sa_valid = 0; + + if (ia_valid & ATTR_MODE) + sa_valid |= MDS_ATTR_MODE; + if (ia_valid & ATTR_UID) + sa_valid |= MDS_ATTR_UID; + if (ia_valid & ATTR_GID) + sa_valid |= MDS_ATTR_GID; + if (ia_valid & ATTR_SIZE) + sa_valid |= MDS_ATTR_SIZE; + if (ia_valid & ATTR_ATIME) + sa_valid |= MDS_ATTR_ATIME; + if (ia_valid & ATTR_MTIME) + sa_valid |= MDS_ATTR_MTIME; + if (ia_valid & ATTR_CTIME) + sa_valid |= MDS_ATTR_CTIME; + if (ia_valid & ATTR_ATIME_SET) + sa_valid |= MDS_ATTR_ATIME_SET; + if (ia_valid & ATTR_MTIME_SET) + sa_valid |= MDS_ATTR_MTIME_SET; + if (ia_valid & ATTR_FORCE) + sa_valid |= MDS_ATTR_FORCE; + if (ia_valid & ATTR_ATTR_FLAG) + sa_valid |= MDS_ATTR_ATTR_FLAG; + if (ia_valid & ATTR_KILL_SUID) + sa_valid |= MDS_ATTR_KILL_SUID; + if (ia_valid & ATTR_KILL_SGID) + sa_valid |= MDS_ATTR_KILL_SGID; + if (ia_valid & ATTR_CTIME_SET) + sa_valid |= MDS_ATTR_CTIME_SET; + if (ia_valid & ATTR_FROM_OPEN) + sa_valid |= MDS_ATTR_FROM_OPEN; + if (ia_valid & MDS_OPEN_OWNEROVERRIDE) + /* NFSD hack (see bug 5781) */ + sa_valid |= MDS_OPEN_OWNEROVERRIDE; + return sa_valid; +} + +void mdc_setattr_pack(struct ptlrpc_request *req, int offset, + struct mdc_op_data *data, struct iattr *iattr, void *ea, + int ealen, void *ea2, int ea2len) +{ + struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, offset, + sizeof(*rec)); + rec->sa_opcode = REINT_SETATTR; + rec->sa_fsuid = current->fsuid; + rec->sa_fsgid = current->fsgid; + rec->sa_cap = current->cap_effective; + rec->sa_fid = data->fid1; + rec->sa_suppgid = -1; + + if (iattr) { + rec->sa_valid = attr_pack(iattr->ia_valid); + rec->sa_mode = iattr->ia_mode; + rec->sa_uid = iattr->ia_uid; + rec->sa_gid = iattr->ia_gid; + rec->sa_size = iattr->ia_size; + rec->sa_atime = LTIME_S(iattr->ia_atime); + rec->sa_mtime = LTIME_S(iattr->ia_mtime); + rec->sa_ctime = LTIME_S(iattr->ia_ctime); + rec->sa_attr_flags = + ((struct ll_iattr_struct *)iattr)->ia_attr_flags; + if ((iattr->ia_valid & ATTR_GID) && in_group_p(iattr->ia_gid)) + rec->sa_suppgid = iattr->ia_gid; + else + rec->sa_suppgid = data->suppgids[0]; + } + + if (ealen == 0) + return; + + memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 1, ealen), ea, ealen); + + if (ea2len == 0) + return; + + memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 2, ea2len), ea2, ea2len); +} + +void mdc_unlink_pack(struct ptlrpc_request *req, int offset, + struct mdc_op_data *data) +{ + struct mds_rec_unlink *rec; + char *tmp; + + rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); + LASSERT (rec != NULL); + + rec->ul_opcode = REINT_UNLINK; + rec->ul_fsuid = current->fsuid; + rec->ul_fsgid = current->fsgid; + rec->ul_cap = current->cap_effective; + rec->ul_mode = data->create_mode; + rec->ul_suppgid = data->suppgids[0]; + rec->ul_fid1 = data->fid1; + rec->ul_fid2 = data->fid2; + rec->ul_time = data->mod_time; + + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, data->namelen + 1); + LASSERT (tmp != NULL); + LOGL0(data->name, data->namelen, tmp); +} + +void mdc_link_pack(struct ptlrpc_request *req, int offset, + struct mdc_op_data *data) +{ + struct mds_rec_link *rec; + char *tmp; + + rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); + + rec->lk_opcode = REINT_LINK; + rec->lk_fsuid = current->fsuid; + rec->lk_fsgid = current->fsgid; + rec->lk_cap = current->cap_effective; + rec->lk_suppgid1 = data->suppgids[0]; + rec->lk_suppgid2 = data->suppgids[1]; + rec->lk_fid1 = data->fid1; + rec->lk_fid2 = data->fid2; + rec->lk_time = data->mod_time; + + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, data->namelen + 1); + LOGL0(data->name, data->namelen, tmp); +} + +void mdc_rename_pack(struct ptlrpc_request *req, int offset, + struct mdc_op_data *data, + const char *old, int oldlen, const char *new, int newlen) +{ + struct mds_rec_rename *rec; + char *tmp; + + rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); + + /* XXX do something about time, uid, gid */ + rec->rn_opcode = REINT_RENAME; + rec->rn_fsuid = current->fsuid; + rec->rn_fsgid = current->fsgid; + rec->rn_cap = current->cap_effective; + rec->rn_suppgid1 = data->suppgids[0]; + rec->rn_suppgid2 = data->suppgids[1]; + rec->rn_fid1 = data->fid1; + rec->rn_fid2 = data->fid2; + rec->rn_time = data->mod_time; + + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, oldlen + 1); + LOGL0(old, oldlen, tmp); + + if (new) { + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, newlen + 1); + LOGL0(new, newlen, tmp); + } +} + +void mdc_getattr_pack(struct ptlrpc_request *req, int offset, __u64 valid, int flags, struct mdc_op_data *data) { struct mds_body *b; - b = lustre_msg_buf(msg, offset, sizeof (*b)); + b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b)); + b->fsuid = current->fsuid; + b->fsgid = current->fsgid; + b->capability = current->cap_effective; b->valid = valid; - b->flags = flags; + b->flags = flags | MDS_BFLAG_EXT_FLAGS; + /* skip MDS_BFLAG_EXT_FLAGS to verify the "client < 1.4.7" case + * refer to bug 12848. + */ + if (OBD_FAIL_CHECK(OBD_FAIL_MDC_OLD_EXT_FLAGS)) + b->flags &= ~MDS_BFLAG_EXT_FLAGS; + b->suppgid = data->suppgids[0]; - b->id1 = data->id1; - b->id2 = data->id2; + b->fid1 = data->fid1; + b->fid2 = data->fid2; if (data->name) { char *tmp; - tmp = lustre_msg_buf(msg, offset + 1, + tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, data->namelen + 1); - LOGL0(data->name, data->namelen, tmp); + memcpy(tmp, data->name, data->namelen); + data->name = tmp; } } void mdc_close_pack(struct ptlrpc_request *req, int offset, struct obdo *oa, - int valid, struct obd_client_handle *och) + __u64 valid, struct obd_client_handle *och) { struct mds_body *body; body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body)); - mdc_pack_id(&body->id1, oa->o_id, 0, oa->o_mode, 0, 0); + mdc_pack_fid(&body->fid1, oa->o_id, 0, oa->o_mode); memcpy(&body->handle, &och->och_fh, sizeof(body->handle)); if (oa->o_valid & OBD_MD_FLATIME) { body->atime = oa->o_atime; @@ -150,3 +400,62 @@ void mdc_close_pack(struct ptlrpc_request *req, int offset, struct obdo *oa, body->valid |= OBD_MD_FLFLAGS; } } + +struct mdc_cache_waiter { + struct list_head mcw_entry; + wait_queue_head_t mcw_waitq; +}; + +static int mdc_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw) +{ + int rc; + ENTRY; + spin_lock(&cli->cl_loi_list_lock); + rc = list_empty(&mcw->mcw_entry); + spin_unlock(&cli->cl_loi_list_lock); + RETURN(rc); +}; + +/* We record requests in flight in cli->cl_r_in_flight here. + * There is only one write rpc possible in mdc anyway. If this to change + * in the future - the code may need to be revisited. */ +void mdc_enter_request(struct client_obd *cli) +{ + struct mdc_cache_waiter mcw; + struct l_wait_info lwi = { 0 }; + + spin_lock(&cli->cl_loi_list_lock); + if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { + list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters); + init_waitqueue_head(&mcw.mcw_waitq); + spin_unlock(&cli->cl_loi_list_lock); + l_wait_event(mcw.mcw_waitq, mdc_req_avail(cli, &mcw), &lwi); + } else { + cli->cl_r_in_flight++; + spin_unlock(&cli->cl_loi_list_lock); + } +} + +void mdc_exit_request(struct client_obd *cli) +{ + struct list_head *l, *tmp; + struct mdc_cache_waiter *mcw; + + spin_lock(&cli->cl_loi_list_lock); + cli->cl_r_in_flight--; + + list_for_each_safe(l, tmp, &cli->cl_cache_waiters) { + if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) { + /* No free request slots anymore */ + break; + } + + mcw = list_entry(l, struct mdc_cache_waiter, mcw_entry); + list_del_init(&mcw->mcw_entry); + cli->cl_r_in_flight++; + wake_up(&mcw->mcw_waitq); + } + /* Empty waiting list? Decrease reqs in-flight number */ + + spin_unlock(&cli->cl_loi_list_lock); +}