Whamcloud - gitweb
land b1_5 onto HEAD
[fs/lustre-release.git] / lustre / mdc / mdc_lib.c
index 8130e88..7f1a4bc 100644 (file)
@@ -3,33 +3,32 @@
  *
  *  Copyright (c) 2003 Cluster File Systems, Inc.
  *
- *   This file is part of Lustre, http://www.lustre.org.
+ *   This file is part of the Lustre file system, http://www.lustre.org
+ *   Lustre is a trademark of Cluster File Systems, Inc.
  *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
+ *   You may have signed or agreed to another license before downloading
+ *   this software.  If so, you are bound by the terms and conditions
+ *   of that agreement, and the following does not apply to you.  See the
+ *   LICENSE file included with this distribution for more information.
  *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
+ *   If you did not agree to a different license, then this copy of Lustre
+ *   is open source software; you can redistribute it and/or modify it
+ *   under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
  *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *   In either case, Lustre is distributed in the hope that it will be
+ *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   license text for more details.
  */
 
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#define DEBUG_SUBSYSTEM S_MDS
+#define DEBUG_SUBSYSTEM S_MDC
 #ifndef __KERNEL__
 # include <fcntl.h>
 # include <liblustre.h>
 #endif
-#include <linux/lustre_idl.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_mds.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_net.h>
 #include "mdc_internal.h"
 
 #ifndef __KERNEL__
 #endif
 #endif
 
-void mdc_readdir_pack(struct ptlrpc_request *req, int req_offset,
-                      __u64 offset, __u32 size, struct lustre_id *mdc_id)
+void mdc_readdir_pack(struct ptlrpc_request *req, int offset, __u64 pg_off,
+                      __u32 size, struct ll_fid *fid)
 {
         struct mds_body *b;
 
-        b = lustre_msg_buf(req->rq_reqmsg, req_offset, sizeof (*b));
-        b->id1 = *mdc_id;
-        b->size = offset;                       /* !! */
+        b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
+        b->fsuid = current->fsuid;
+        b->fsgid = current->fsgid;
+        b->capability = current->cap_effective;
+        b->fid1 = *fid;
+        b->size = pg_off;                       /* !! */
+        b->suppgid = -1;
         b->nlink = size;                        /* !! */
 }
 
+static void mdc_pack_body(struct mds_body *b)
+{
+        LASSERT (b != NULL);
+
+        b->fsuid = current->fsuid;
+        b->fsgid = current->fsgid;
+        b->capability = current->cap_effective;
+}
+
+void mdc_pack_req_body(struct ptlrpc_request *req, int offset,
+                       __u64 valid, struct ll_fid *fid, int ea_size, int flags)
+{
+        struct mds_body *b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
+
+        if (fid)
+                b->fid1 = *fid;
+        b->valid = valid;
+        b->eadatasize = ea_size;
+        b->flags = flags;
+        mdc_pack_body(b);
+}
+
 /* packing of MDS records */
-void mdc_open_pack(struct lustre_msg *msg, int offset,
-                   struct mdc_op_data *op_data, __u32 mode,
-                   __u64 rdev, __u32 flags, const void *lmm,
-                   int lmmlen, void *key, int keylen)
+void mdc_create_pack(struct ptlrpc_request *req, int offset,
+                     struct mdc_op_data *op_data, const void *data, int datalen,
+                     __u32 mode, __u32 uid, __u32 gid, __u32 cap_effective,
+                     __u64 rdev)
 {
         struct mds_rec_create *rec;
         char *tmp;
-        
-        rec = lustre_msg_buf(msg, offset, sizeof (*rec));
+        rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
+
+        rec->cr_opcode = REINT_CREATE;
+        rec->cr_fsuid = uid;
+        rec->cr_fsgid = gid;
+        rec->cr_cap = cap_effective;
+        rec->cr_fid = op_data->fid1;
+        memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
+        rec->cr_mode = mode;
+        rec->cr_rdev = rdev;
+        rec->cr_time = op_data->mod_time;
+        rec->cr_suppgid = op_data->suppgids[0];
+
+        tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1);
+        LOGL0(op_data->name, op_data->namelen, tmp);
+
+        if (data) {
+                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, datalen);
+                memcpy (tmp, data, datalen);
+        }
+}
+
+static __u32 mds_pack_open_flags(__u32 flags)
+{
+        return
+                (flags & (FMODE_READ | FMODE_WRITE |
+                          MDS_OPEN_DELAY_CREATE | MDS_OPEN_HAS_EA |
+                          MDS_OPEN_HAS_OBJS | MDS_OPEN_OWNEROVERRIDE |
+                          MDS_OPEN_LOCK)) |
+                ((flags & O_CREAT) ? MDS_OPEN_CREAT : 0) |
+                ((flags & O_EXCL) ? MDS_OPEN_EXCL : 0) |
+                ((flags & O_TRUNC) ? MDS_OPEN_TRUNC : 0) |
+                ((flags & O_APPEND) ? MDS_OPEN_APPEND : 0) |
+                ((flags & O_SYNC) ? MDS_OPEN_SYNC : 0) |
+                ((flags & O_DIRECTORY) ? MDS_OPEN_DIRECTORY : 0) |
+                ((flags & O_JOIN_FILE) ? MDS_OPEN_JOIN_FILE : 0) |
+#ifdef FMODE_EXEC
+                ((flags & FMODE_EXEC) ? MDS_FMODE_EXEC : 0) |
+#endif
+                0;
+}
+
+/* packing of MDS records */
+void mdc_join_pack(struct ptlrpc_request *req, int offset,
+                   struct mdc_op_data *op_data, __u64 head_size)
+{
+        struct mds_rec_join *rec;
+
+        rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*rec));
+        LASSERT(rec != NULL);
+        rec->jr_fid = op_data->fid2;
+        rec->jr_headsize = head_size;
+}
+
+void mdc_open_pack(struct ptlrpc_request *req, int offset,
+                   struct mdc_op_data *op_data, __u32 mode, __u64 rdev,
+                   __u32 flags, const void *lmm, int lmmlen)
+{
+        struct mds_rec_create *rec;
+        char *tmp;
+        rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
 
         /* XXX do something about time, uid, gid */
         rec->cr_opcode = REINT_OPEN;
-        if (op_data != NULL)
-                rec->cr_id = op_data->id1;
-        memset(&rec->cr_replayid, 0, sizeof(rec->cr_replayid));
-        rec->cr_flags = mds_pack_open_flags(flags) | op_data->flags ;
-        rec->cr_time = op_data->mod_time;
+        rec->cr_fsuid = current->fsuid;
+        rec->cr_fsgid = current->fsgid;
+        rec->cr_cap = current->cap_effective;
+        rec->cr_fid = op_data->fid1;
+        memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid));
         rec->cr_mode = mode;
+        rec->cr_flags = mds_pack_open_flags(flags);
         rec->cr_rdev = rdev;
-        rec->cr_ioepoch = 0;
+        rec->cr_time = op_data->mod_time;
+        rec->cr_suppgid = op_data->suppgids[0];
 
         if (op_data->name) {
-                tmp = lustre_msg_buf(msg, offset + 1,
+                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1,
                                      op_data->namelen + 1);
                 LOGL0(op_data->name, op_data->namelen, tmp);
         }
 
         if (lmm) {
                 rec->cr_flags |= MDS_OPEN_HAS_EA;
-                tmp = lustre_msg_buf(msg, offset + 2, lmmlen);
+#ifndef __KERNEL__
+                /*XXX a hack for liblustre to set EA (LL_IOC_LOV_SETSTRIPE) */
+                rec->cr_replayfid = op_data->fid2;
+#endif
+                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, lmmlen);
                 memcpy (tmp, lmm, lmmlen);
         }
-        if (key) {
-                rec->cr_flags |= MDS_OPEN_HAS_KEY;
-                tmp = lustre_msg_buf(msg, offset + 3, keylen);
-                memcpy(tmp, key, keylen); 
+}
+
+void mdc_setattr_pack(struct ptlrpc_request *req, int offset,
+                      struct mdc_op_data *data, struct iattr *iattr, void *ea,
+                      int ealen, void *ea2, int ea2len)
+{
+        struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, offset,
+                                                     sizeof(*rec));
+        rec->sa_opcode = REINT_SETATTR;
+        rec->sa_fsuid = current->fsuid;
+        rec->sa_fsgid = current->fsgid;
+        rec->sa_cap = current->cap_effective;
+        rec->sa_fid = data->fid1;
+        rec->sa_suppgid = -1;
+
+        if (iattr) {
+                rec->sa_valid = iattr->ia_valid;
+                rec->sa_mode = iattr->ia_mode;
+                rec->sa_uid = iattr->ia_uid;
+                rec->sa_gid = iattr->ia_gid;
+                rec->sa_size = iattr->ia_size;
+                rec->sa_atime = LTIME_S(iattr->ia_atime);
+                rec->sa_mtime = LTIME_S(iattr->ia_mtime);
+                rec->sa_ctime = LTIME_S(iattr->ia_ctime);
+                rec->sa_attr_flags =
+                               ((struct ll_iattr_struct *)iattr)->ia_attr_flags;
+                if ((iattr->ia_valid & ATTR_GID) && in_group_p(iattr->ia_gid))
+                        rec->sa_suppgid = iattr->ia_gid;
+                else
+                        rec->sa_suppgid = data->suppgids[0];
+        }
+
+        if (ealen == 0)
+                return;
+
+        memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 1, ealen), ea, ealen);
+
+        if (ea2len == 0)
+                return;
+
+        memcpy(lustre_msg_buf(req->rq_reqmsg, offset + 2, ea2len), ea2, ea2len);
+}
+
+void mdc_unlink_pack(struct ptlrpc_request *req, int offset,
+                     struct mdc_op_data *data)
+{
+        struct mds_rec_unlink *rec;
+        char *tmp;
+
+        rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
+        LASSERT (rec != NULL);
+
+        rec->ul_opcode = REINT_UNLINK;
+        rec->ul_fsuid = current->fsuid;
+        rec->ul_fsgid = current->fsgid;
+        rec->ul_cap = current->cap_effective;
+        rec->ul_mode = data->create_mode;
+        rec->ul_suppgid = data->suppgids[0];
+        rec->ul_fid1 = data->fid1;
+        rec->ul_fid2 = data->fid2;
+        rec->ul_time = data->mod_time;
+
+        tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, data->namelen + 1);
+        LASSERT (tmp != NULL);
+        LOGL0(data->name, data->namelen, tmp);
+}
+
+void mdc_link_pack(struct ptlrpc_request *req, int offset,
+                   struct mdc_op_data *data)
+{
+        struct mds_rec_link *rec;
+        char *tmp;
+
+        rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
+
+        rec->lk_opcode = REINT_LINK;
+        rec->lk_fsuid = current->fsuid;
+        rec->lk_fsgid = current->fsgid;
+        rec->lk_cap = current->cap_effective;
+        rec->lk_suppgid1 = data->suppgids[0];
+        rec->lk_suppgid2 = data->suppgids[1];
+        rec->lk_fid1 = data->fid1;
+        rec->lk_fid2 = data->fid2;
+        rec->lk_time = data->mod_time;
+
+        tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, data->namelen + 1);
+        LOGL0(data->name, data->namelen, tmp);
+}
+
+void mdc_rename_pack(struct ptlrpc_request *req, int offset,
+                     struct mdc_op_data *data,
+                     const char *old, int oldlen, const char *new, int newlen)
+{
+        struct mds_rec_rename *rec;
+        char *tmp;
+
+        rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec));
+
+        /* XXX do something about time, uid, gid */
+        rec->rn_opcode = REINT_RENAME;
+        rec->rn_fsuid = current->fsuid;
+        rec->rn_fsgid = current->fsgid;
+        rec->rn_cap = current->cap_effective;
+        rec->rn_suppgid1 = data->suppgids[0];
+        rec->rn_suppgid2 = data->suppgids[1];
+        rec->rn_fid1 = data->fid1;
+        rec->rn_fid2 = data->fid2;
+        rec->rn_time = data->mod_time;
+
+        tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, oldlen + 1);
+        LOGL0(old, oldlen, tmp);
+
+        if (new) {
+                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, newlen + 1);
+                LOGL0(new, newlen, tmp);
         }
 }
 
-void mdc_getattr_pack(struct lustre_msg *msg, int offset,
-                      __u64 valid, int flags, struct mdc_op_data *data)
+void mdc_getattr_pack(struct ptlrpc_request *req, int offset, int valid,
+                      int flags, struct mdc_op_data *data)
 {
         struct mds_body *b;
-        b = lustre_msg_buf(msg, offset, sizeof (*b));
+        b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
 
+        b->fsuid = current->fsuid;
+        b->fsgid = current->fsgid;
+        b->capability = current->cap_effective;
         b->valid = valid;
-        b->flags = flags;
+        b->flags = flags | MDS_BFLAG_EXT_FLAGS;
+        b->suppgid = data->suppgids[0];
 
-        b->id1 = data->id1;
-        b->id2 = data->id2;
+        b->fid1 = data->fid1;
+        b->fid2 = data->fid2;
         if (data->name) {
                 char *tmp;
-                tmp = lustre_msg_buf(msg, offset + 1,
+                tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1,
                                      data->namelen + 1);
                 LOGL0(data->name, data->namelen, tmp);
         }
 }
 
-void mdc_close_pack(struct ptlrpc_request *req, int offset,
-                    struct mdc_op_data *op_data,
-                    struct obd_client_handle *och)
+void mdc_close_pack(struct ptlrpc_request *req, int offset, struct obdo *oa,
+                    int valid, struct obd_client_handle *och)
 {
-        obd_valid valid = op_data->valid;
         struct mds_body *body;
 
-        body = lustre_msg_buf(req->rq_reqmsg, offset,
-                              sizeof(*body));
-        body->id1 = op_data->id1;
+        body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body));
 
+        mdc_pack_fid(&body->fid1, oa->o_id, 0, oa->o_mode);
         memcpy(&body->handle, &och->och_fh, sizeof(body->handle));
-        if (valid & OBD_MD_FLATIME) {
-                body->atime = op_data->atime;
+        if (oa->o_valid & OBD_MD_FLATIME) {
+                body->atime = oa->o_atime;
                 body->valid |= OBD_MD_FLATIME;
         }
-        if (valid & OBD_MD_FLMTIME) {
-                body->mtime = op_data->mtime;
+        if (oa->o_valid & OBD_MD_FLMTIME) {
+                body->mtime = oa->o_mtime;
                 body->valid |= OBD_MD_FLMTIME;
         }
-        if (valid & OBD_MD_FLCTIME) {
-                body->ctime = op_data->ctime;
+        if (oa->o_valid & OBD_MD_FLCTIME) {
+                body->ctime = oa->o_ctime;
                 body->valid |= OBD_MD_FLCTIME;
         }
-        if (valid & OBD_MD_FLSIZE) {
-                body->size = op_data->size;
+        if (oa->o_valid & OBD_MD_FLSIZE) {
+                body->size = oa->o_size;
                 body->valid |= OBD_MD_FLSIZE;
         }
-        if (valid & OBD_MD_FLBLOCKS) {
-                body->blocks = op_data->blocks;
+        if (oa->o_valid & OBD_MD_FLBLOCKS) {
+                body->blocks = oa->o_blocks;
                 body->valid |= OBD_MD_FLBLOCKS;
         }
-        if (valid & OBD_MD_FLFLAGS) {
-                body->flags = op_data->flags;
+        if (oa->o_valid & OBD_MD_FLFLAGS) {
+                body->flags = oa->o_flags;
                 body->valid |= OBD_MD_FLFLAGS;
         }
-        if (valid & OBD_MD_FLEPOCH) {
-                body->io_epoch = op_data->io_epoch;
-                body->valid |= OBD_MD_FLEPOCH;
-        }
 }
 
-/* 
- * these methods needed for saying higher levels that MDC does not pack/unpack
- * any EAs. This is needed to have real abstraction and do not try to recognize
- * what OBD type is to avoid calling these methods on it, as they may not be
- * implemented.
- *
- * Sometimes pack/unpack calls happen to MDC too. This is for instance default
- * striping info for directories and our goal here is to skip them with no
- * errors or any complains.
- */
-int mdc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
-               struct lov_stripe_md *lsm)
+struct mdc_cache_waiter {       
+        struct list_head        mcw_entry;
+        wait_queue_head_t       mcw_waitq;
+};
+
+static int mdc_req_avail(struct client_obd *cli, struct mdc_cache_waiter *mcw)
 {
+        int rc;
         ENTRY;
-        RETURN(0);
-}
+        spin_lock(&cli->cl_loi_list_lock);
+        rc = list_empty(&mcw->mcw_entry);
+        spin_unlock(&cli->cl_loi_list_lock);
+        RETURN(rc);
+};
 
-int mdc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
-                 struct lov_mds_md *lmm, int lmm_size)
+/* We record requests in flight in cli->cl_r_in_flight here.
+ * There is only one write rpc possible in mdc anyway. If this to change
+ * in the future - the code may need to be revisited. */
+void mdc_enter_request(struct client_obd *cli)
 {
-        ENTRY;
-        RETURN(0);
+        struct mdc_cache_waiter mcw;
+        struct l_wait_info lwi = { 0 };
+
+        spin_lock(&cli->cl_loi_list_lock);
+        if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
+                list_add_tail(&mcw.mcw_entry, &cli->cl_cache_waiters);
+                init_waitqueue_head(&mcw.mcw_waitq);
+                spin_unlock(&cli->cl_loi_list_lock);
+                l_wait_event(mcw.mcw_waitq, mdc_req_avail(cli, &mcw), &lwi);
+        } else {
+                cli->cl_r_in_flight++;
+                spin_unlock(&cli->cl_loi_list_lock);
+        }
 }
 
+void mdc_exit_request(struct client_obd *cli)
+{
+        struct list_head *l, *tmp;
+        struct mdc_cache_waiter *mcw;
+
+        spin_lock(&cli->cl_loi_list_lock);
+        cli->cl_r_in_flight--;
+        list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
+                
+                if (cli->cl_r_in_flight >= cli->cl_max_rpcs_in_flight) {
+                        /* No free request slots anymore */
+                        break;
+                }
+
+                mcw = list_entry(l, struct mdc_cache_waiter, mcw_entry);
+                list_del_init(&mcw->mcw_entry);
+                cli->cl_r_in_flight++;
+                wake_up(&mcw->mcw_waitq);
+        }
+        /* Empty waiting list? Decrease reqs in-flight number */
+        
+        spin_unlock(&cli->cl_loi_list_lock);
+}