*
* Copyright (c) 2003 Cluster File Systems, Inc.
*
- * This file is part of Lustre, http://www.lustre.org.
+ * This file is part of the Lustre file system, http://www.lustre.org
+ * Lustre is a trademark of Cluster File Systems, Inc.
*
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
+ * You may have signed or agreed to another license before downloading
+ * this software. If so, you are bound by the terms and conditions
+ * of that agreement, and the following does not apply to you. See the
+ * LICENSE file included with this distribution for more information.
*
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * If you did not agree to a different license, then this copy of Lustre
+ * is open source software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
*
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * In either case, Lustre is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * license text for more details.
*/
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#define DEBUG_SUBSYSTEM S_MDS
+#define DEBUG_SUBSYSTEM S_MDC
#ifndef __KERNEL__
# include <fcntl.h>
# include <liblustre.h>
#endif
-#include <linux/lustre_idl.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_mds.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_net.h>
#include "mdc_internal.h"
#ifndef __KERNEL__
#endif
#endif
-void mdc_readdir_pack(struct ptlrpc_request *req, int req_offset,
- __u64 offset, __u32 size, struct lustre_id *mdc_id)
+void mdc_readdir_pack(struct ptlrpc_request *req, int offset, __u64 pg_off,
+ __u32 size, struct ll_fid *fid)
{
struct mds_body *b;
- b = lustre_msg_buf(req->rq_reqmsg, req_offset, sizeof (*b));
- b->id1 = *mdc_id;
- b->size = offset; /* !! */
+ b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
+ b->fsuid = current->fsuid;
+ b->fsgid = current->fsgid;
+ b->capability = current->cap_effective;
+ b->fid1 = *fid;
+ b->size = pg_off; /* !! */
+ b->suppgid = -1;
b->nlink = size; /* !! */
}
+static void mdc_pack_body(struct mds_body *b)
+{
+ LASSERT (b != NULL);
+
+ b->fsuid = current->fsuid;
+ b->fsgid = current->fsgid;
+ b->capability = current->cap_effective;
+}
+
+void mdc_pack_req_body(struct ptlrpc_request *req, int offset,
+ __u64 valid, struct ll_fid *fid, int ea_size, int flags)
+{
+ struct mds_body *b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
+
+ if (fid)
+ b->fid1 = *fid;
+ b->valid = valid;
+ b->eadatasize = ea_size;
+ b->flags = flags;
+ mdc_pack_body(b);
+}
+
/* packing of MDS records */
-void mdc_open_pack(struct lustre_msg *msg, int offset,
- struct mdc_op_data *op_data, __u32 mode,
- __u64 rdev, __u32 flags, const void *lmm,
- int lmmlen, void *key, int keylen)
+void mdc_create_pack(struct ptlrpc_request *req, int offset,
+ struct mdc_op_data *op_data, const void *data, int datalen,
+ __u32 mode, __u32 uid, __u32 gid, __u32 cap_effective,
+ __u64 rdev)
{
struct mds_rec_create *rec;
char *tmp;
-
- rec = lustre_msg_buf(msg, offset, sizeof (*rec));
+ rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
+
+ rec->cr_opcode = REINT_CREATE;
+ rec->cr_fsuid = uid;
+ rec->cr_fsgid = gid;
+ rec->cr_cap = cap_effective;
+ rec->cr_fid = op_data->fid1;
+ memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
+ rec->cr_mode = mode;
+ rec->cr_rdev = rdev;
+ rec->cr_time = op_data->mod_time;
+ rec->cr_suppgid = op_data->suppgids[0];
+
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1);
+ LOGL0(op_data->name, op_data->namelen, tmp);
+
+ if (data) {
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, datalen);
+ memcpy (tmp, data, datalen);
+ }
+}
+
+static __u32 mds_pack_open_flags(__u32 flags)
+{
+ return
+ (flags & (FMODE_READ | FMODE_WRITE |
+ MDS_OPEN_DELAY_CREATE | MDS_OPEN_HAS_EA |
+ MDS_OPEN_HAS_OBJS | MDS_OPEN_OWNEROVERRIDE |
+ MDS_OPEN_LOCK)) |
+ ((flags & O_CREAT) ? MDS_OPEN_CREAT : 0) |
+ ((flags & O_EXCL) ? MDS_OPEN_EXCL : 0) |
+ ((flags & O_TRUNC) ? MDS_OPEN_TRUNC : 0) |
+ ((flags & O_APPEND) ? MDS_OPEN_APPEND : 0) |
+ ((flags & O_SYNC) ? MDS_OPEN_SYNC : 0) |
+ ((flags & O_DIRECTORY) ? MDS_OPEN_DIRECTORY : 0) |
+ ((flags & O_JOIN_FILE) ? MDS_OPEN_JOIN_FILE : 0) |
+#ifdef FMODE_EXEC
+ ((flags & FMODE_EXEC) ? MDS_FMODE_EXEC : 0) |
+#endif
+ 0;
+}
+
+/* packing of MDS records */
+void mdc_join_pack(struct ptlrpc_request *req, int offset,
+ struct mdc_op_data *op_data, __u64 head_size)
+{
+ struct mds_rec_join *rec;
+
+ rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*rec));
+ LASSERT(rec != NULL);
+ rec->jr_fid = op_data->fid2;
+ rec->jr_headsize = head_size;
+}
+
+void mdc_open_pack(struct ptlrpc_request *req, int offset,
+ struct mdc_op_data *op_data, __u32 mode, __u64 rdev,
+ __u32 flags, const void *lmm, int lmmlen)
+{
+ struct mds_rec_create *rec;
+ char *tmp;
+ rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
/* XXX do something about time, uid, gid */
rec->cr_opcode = REINT_OPEN;
- if (op_data != NULL)
- rec->cr_id = op_data->id1;
- memset(&rec->cr_replayid, 0, sizeof(rec->cr_replayid));
- rec->cr_flags = mds_pack_open_flags(flags) | op_data->flags ;
- rec->cr_time = op_data->mod_time;
+ rec->cr_fsuid = current->fsuid;
+ rec->cr_fsgid = current->fsgid;
+ rec->cr_cap = current->cap_effective;
+ rec->cr_fid = op_data->fid1;
+ memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
rec->cr_mode = mode;
+ rec->cr_flags = mds_pack_open_flags(flags);
rec->cr_rdev = rdev;
- rec->cr_ioepoch = 0;
+ rec->cr_time = op_data->mod_time;
+ rec->cr_suppgid = op_data->suppgids[0];
if (op_data->name) {
- tmp = lustre_msg_buf(msg, offset + 1,
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1,
op_data->namelen + 1);
LOGL0(op_data->name, op_data->namelen, tmp);
}
if (lmm) {
rec->cr_flags |= MDS_OPEN_HAS_EA;
- tmp = lustre_msg_buf(msg, offset + 2, lmmlen);
+#ifndef __KERNEL__
+ /*XXX a hack for liblustre to set EA (LL_IOC_LOV_SETSTRIPE) */
+ rec->cr_replayfid = op_data->fid2;
+#endif
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, lmmlen);
memcpy (tmp, lmm, lmmlen);
}
- if (key) {
- rec->cr_flags |= MDS_OPEN_HAS_KEY;
- tmp = lustre_msg_buf(msg, offset + 3, keylen);
- memcpy(tmp, key, keylen);
+}
+
+void mdc_setattr_pack(struct ptlrpc_request *req, int offset,
+ struct mdc_op_data *data, struct iattr *iattr, void *ea,
+ int ealen, void *ea2, int ea2len)
+{
+ struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, offset,
+ sizeof(*rec));
+ rec->sa_opcode = REINT_SETATTR;
+ rec->sa_fsuid = current->fsuid;
+ rec->sa_fsgid = current->fsgid;
+ rec->sa_cap = current->cap_effective;
+ rec->sa_fid = data->fid1;
+ rec->sa_suppgid = -1;
+
+ if (iattr) {
+ rec->sa_valid = iattr->ia_valid;
+ rec->sa_mode = iattr->ia_mode;
+ rec->sa_uid = iattr->ia_uid;
+ rec->sa_gid = iattr->ia_gid;
+ rec->sa_size = iattr->ia_size;
+ rec->sa_atime = LTIME_S(iattr->ia_atime);
+ rec->sa_mtime = LTIME_S(iattr->ia_mtime);
+ rec->sa_ctime = LTIME_S(iattr->ia_ctime);
+ rec->sa_attr_flags =
+ ((struct ll_iattr_struct *)iattr)->ia_attr_flags;
+ if ((iattr->ia_valid & ATTR_GID) && in_group_p(iattr->ia_gid))
+ rec->sa_suppgid = iattr->ia_gid;
+ else
+ rec->sa_suppgid = data->suppgids[0];
+ }
+
+ if (ealen == 0)
+ return;
+
+ memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 1, ealen), ea, ealen);
+
+ if (ea2len == 0)
+ return;
+
+ memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 2, ea2len), ea2, ea2len);
+}
+
+void mdc_unlink_pack(struct ptlrpc_request *req, int offset,
+ struct mdc_op_data *data)
+{
+ struct mds_rec_unlink *rec;
+ char *tmp;
+
+ rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
+ LASSERT (rec != NULL);
+
+ rec->ul_opcode = REINT_UNLINK;
+ rec->ul_fsuid = current->fsuid;
+ rec->ul_fsgid = current->fsgid;
+ rec->ul_cap = current->cap_effective;
+ rec->ul_mode = data->create_mode;
+ rec->ul_suppgid = data->suppgids[0];
+ rec->ul_fid1 = data->fid1;
+ rec->ul_fid2 = data->fid2;
+ rec->ul_time = data->mod_time;
+
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, data->namelen + 1);
+ LASSERT (tmp != NULL);
+ LOGL0(data->name, data->namelen, tmp);
+}
+
+void mdc_link_pack(struct ptlrpc_request *req, int offset,
+ struct mdc_op_data *data)
+{
+ struct mds_rec_link *rec;
+ char *tmp;
+
+ rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
+
+ rec->lk_opcode = REINT_LINK;
+ rec->lk_fsuid = current->fsuid;
+ rec->lk_fsgid = current->fsgid;
+ rec->lk_cap = current->cap_effective;
+ rec->lk_suppgid1 = data->suppgids[0];
+ rec->lk_suppgid2 = data->suppgids[1];
+ rec->lk_fid1 = data->fid1;
+ rec->lk_fid2 = data->fid2;
+ rec->lk_time = data->mod_time;
+
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, data->namelen + 1);
+ LOGL0(data->name, data->namelen, tmp);
+}
+
+void mdc_rename_pack(struct ptlrpc_request *req, int offset,
+ struct mdc_op_data *data,
+ const char *old, int oldlen, const char *new, int newlen)
+{
+ struct mds_rec_rename *rec;
+ char *tmp;
+
+ rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
+
+ /* XXX do something about time, uid, gid */
+ rec->rn_opcode = REINT_RENAME;
+ rec->rn_fsuid = current->fsuid;
+ rec->rn_fsgid = current->fsgid;
+ rec->rn_cap = current->cap_effective;
+ rec->rn_suppgid1 = data->suppgids[0];
+ rec->rn_suppgid2 = data->suppgids[1];
+ rec->rn_fid1 = data->fid1;
+ rec->rn_fid2 = data->fid2;
+ rec->rn_time = data->mod_time;
+
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, oldlen + 1);
+ LOGL0(old, oldlen, tmp);
+
+ if (new) {
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, newlen + 1);
+ LOGL0(new, newlen, tmp);
}
}
-void mdc_getattr_pack(struct lustre_msg *msg, int offset,
- __u64 valid, int flags, struct mdc_op_data *data)
+void mdc_getattr_pack(struct ptlrpc_request *req, int offset, int valid,
+ int flags, struct mdc_op_data *data)
{
struct mds_body *b;
- b = lustre_msg_buf(msg, offset, sizeof (*b));
+ b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
+ b->fsuid = current->fsuid;
+ b->fsgid = current->fsgid;
+ b->capability = current->cap_effective;
b->valid = valid;
- b->flags = flags;
+ b->flags = flags | MDS_BFLAG_EXT_FLAGS;
+ b->suppgid = data->suppgids[0];
- b->id1 = data->id1;
- b->id2 = data->id2;
+ b->fid1 = data->fid1;
+ b->fid2 = data->fid2;
if (data->name) {
char *tmp;
- tmp = lustre_msg_buf(msg, offset + 1,
+ tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1,
data->namelen + 1);
LOGL0(data->name, data->namelen, tmp);
}
}
-void mdc_close_pack(struct ptlrpc_request *req, int offset,
- struct mdc_op_data *op_data,
- struct obd_client_handle *och)
+void mdc_close_pack(struct ptlrpc_request *req, int offset, struct obdo *oa,
+ int valid, struct obd_client_handle *och)
{
- obd_valid valid = op_data->valid;
struct mds_body *body;
- body = lustre_msg_buf(req->rq_reqmsg, offset,
- sizeof(*body));
- body->id1 = op_data->id1;
+ body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body));
+ mdc_pack_fid(&body->fid1, oa->o_id, 0, oa->o_mode);
memcpy(&body->handle, &och->och_fh, sizeof(body->handle));
- if (valid & OBD_MD_FLATIME) {
- body->atime = op_data->atime;
+ if (oa->o_valid & OBD_MD_FLATIME) {
+ body->atime = oa->o_atime;
body->valid |= OBD_MD_FLATIME;
}
- if (valid & OBD_MD_FLMTIME) {
- body->mtime = op_data->mtime;
+ if (oa->o_valid & OBD_MD_FLMTIME) {
+ body->mtime = oa->o_mtime;
body->valid |= OBD_MD_FLMTIME;
}
- if (valid & OBD_MD_FLCTIME) {
- body->ctime = op_data->ctime;
+ if (oa->o_valid & OBD_MD_FLCTIME) {
+ body->ctime = oa->o_ctime;
body->valid |= OBD_MD_FLCTIME;
}
- if (valid & OBD_MD_FLSIZE) {
- body->size = op_data->size;
+ if (oa->o_valid & OBD_MD_FLSIZE) {
+ body->size = oa->o_size;
body->valid |= OBD_MD_FLSIZE;
}
- if (valid & OBD_MD_FLBLOCKS) {
- body->blocks = op_data->blocks;
+ if (oa->o_valid & OBD_MD_FLBLOCKS) {
+ body->blocks = oa->o_blocks;
body->valid |= OBD_MD_FLBLOCKS;
}
- if (valid & OBD_MD_FLFLAGS) {
- body->flags = op_data->flags;
+ if (oa->o_valid & OBD_MD_FLFLAGS) {
+ body->flags = oa->o_flags;
body->valid |= OBD_MD_FLFLAGS;
}
- if (valid & OBD_MD_FLEPOCH) {
- body->io_epoch = op_data->io_epoch;
- body->valid |= OBD_MD_FLEPOCH;
- }
}
-/*
- * these methods needed for saying higher levels that MDC does not pack/unpack
- * any EAs. This is needed to have real abstraction and do not try to recognize
- * what OBD type is to avoid calling these methods on it, as they may not be
- * implemented.
- *
- * Sometimes pack/unpack calls happen to MDC too. This is for instance default
- * striping info for directories and our goal here is to skip them with no
- * errors or any complains.
- */
-int mdc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
- struct lov_stripe_md *lsm)
+struct mdc_cache_waiter {
+ struct list_head mcw_entry;
+ wait_queue_head_t mcw_waitq;
+};
+
+static int mdc_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw)
{
+ int rc;
ENTRY;
- RETURN(0);
-}
+ spin_lock(&cli->cl_loi_list_lock);
+ rc = list_empty(&mcw->mcw_entry);
+ spin_unlock(&cli->cl_loi_list_lock);
+ RETURN(rc);
+};
-int mdc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
- struct lov_mds_md *lmm, int lmm_size)
+/* We record requests in flight in cli->cl_r_in_flight here.
+ * There is only one write rpc possible in mdc anyway. If this to change
+ * in the future - the code may need to be revisited. */
+void mdc_enter_request(struct client_obd *cli)
{
- ENTRY;
- RETURN(0);
+ struct mdc_cache_waiter mcw;
+ struct l_wait_info lwi = { 0 };
+
+ spin_lock(&cli->cl_loi_list_lock);
+ if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
+ list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters);
+ init_waitqueue_head(&mcw.mcw_waitq);
+ spin_unlock(&cli->cl_loi_list_lock);
+ l_wait_event(mcw.mcw_waitq, mdc_req_avail(cli, &mcw), &lwi);
+ } else {
+ cli->cl_r_in_flight++;
+ spin_unlock(&cli->cl_loi_list_lock);
+ }
}
+void mdc_exit_request(struct client_obd *cli)
+{
+ struct list_head *l, *tmp;
+ struct mdc_cache_waiter *mcw;
+
+ spin_lock(&cli->cl_loi_list_lock);
+ cli->cl_r_in_flight--;
+ list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
+
+ if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
+ /* No free request slots anymore */
+ break;
+ }
+
+ mcw = list_entry(l, struct mdc_cache_waiter, mcw_entry);
+ list_del_init(&mcw->mcw_entry);
+ cli->cl_r_in_flight++;
+ wake_up(&mcw->mcw_waitq);
+ }
+ /* Empty waiting list? Decrease reqs in-flight number */
+
+ spin_unlock(&cli->cl_loi_list_lock);
+}