Whamcloud - gitweb
- merge b_devel into b_proto (incl b_swab changes)
authorpschwan <pschwan>
Sat, 12 Apr 2003 20:06:58 +0000 (20:06 +0000)
committerpschwan <pschwan>
Sat, 12 Apr 2003 20:06:58 +0000 (20:06 +0000)
- fix leak-all-bulk-pages bug
- Mike fixed a bug where a failed ptlrpc_queue_wait send buf would
  fail, but the user would still have to wait for the timeout to pass
  before being able to interrupt

lustre/include/ioctl.h [new file with mode: 0644]
lustre/liblustre/file.c [new file with mode: 0644]
lustre/liblustre/llite_lib.c [new file with mode: 0644]
lustre/liblustre/llite_lib.h [new file with mode: 0644]
lustre/liblustre/lltest.c [new file with mode: 0644]
lustre/liblustre/rw.c [new file with mode: 0644]
lustre/liblustre/super.c [new file with mode: 0644]

diff --git a/lustre/include/ioctl.h b/lustre/include/ioctl.h
new file mode 100644 (file)
index 0000000..a4ec8a5
--- /dev/null
@@ -0,0 +1,64 @@
+#ifndef _ASMI386_IOCTL_H
+#define _ASMI386_IOCTL_H
+
+/* ioctl command encoding: 32 bits total, command in lower 16 bits,
+ * size of the parameter structure in the lower 14 bits of the
+ * upper 16 bits.
+ * Encoding the size of the parameter structure in the ioctl request
+ * The highest 2 bits are reserved for indicating the ``access mode''.
+ * NOTE: This limits the max parameter size to 16kB -1 !
+ */
+
+/*
+ * The following is for compatibility across the various Linux
+ * platforms.  The i386 ioctl numbering scheme doesn't really enforce
+ * a type field.  De facto, however, the top 8 bits of the lower 16
+ * bits are indeed used as a type field, so we might just as well make
+ * this explicit here.  Please be sure to use the decoding macros
+ * below from now on.
+ */
+#define _IOC_NRBITS     8
+#define _IOC_TYPEBITS   8
+#define _IOC_SIZEBITS   14
+#define _IOC_DIRBITS    2
+
+#define _IOC_NRMASK     ((1 << _IOC_NRBITS)-1)
+#define _IOC_TYPEMASK   ((1 << _IOC_TYPEBITS)-1)
+#define _IOC_SIZEMASK   ((1 << _IOC_SIZEBITS)-1)
+#define _IOC_DIRMASK    ((1 << _IOC_DIRBITS)-1)
+
+#define _IOC_NRSHIFT    0
+#define _IOC_TYPESHIFT  (_IOC_NRSHIFT+_IOC_NRBITS)
+#define _IOC_SIZESHIFT  (_IOC_TYPESHIFT+_IOC_TYPEBITS)
+#define _IOC_DIRSHIFT   (_IOC_SIZESHIFT+_IOC_SIZEBITS)
+
+/*
+ * Direction bits.
+ */
+#define _IOC_NONE       0U
+#define _IOC_WRITE      1U
+#define _IOC_READ       2U
+
+#define _IOC(dir,type,nr,size) (((dir)  << _IOC_DIRSHIFT) | ((type) << _IOC_TYPESHIFT) | ((nr)   << _IOC_NRSHIFT) | ((size) << _IOC_SIZESHIFT))
+
+/* used to create numbers */
+#define _IO(type,nr)            _IOC(_IOC_NONE,(type),(nr),0)
+#define _IOR(type,nr,size)      _IOC(_IOC_READ,(type),(nr),sizeof(size))
+#define _IOW(type,nr,size)      _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
+#define _IOWR(type,nr,size)     _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
+
+/* used to decode ioctl numbers.. */
+#define _IOC_DIR(nr)            (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
+#define _IOC_TYPE(nr)           (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
+#define _IOC_NR(nr)             (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
+#define _IOC_SIZE(nr)           (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
+
+/* ...and for the drivers/sound files... */
+
+#define IOC_IN          (_IOC_WRITE << _IOC_DIRSHIFT)
+#define IOC_OUT         (_IOC_READ << _IOC_DIRSHIFT)
+#define IOC_INOUT       ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT)
+#define IOCSIZE_MASK    (_IOC_SIZEMASK << _IOC_SIZESHIFT)
+#define IOCSIZE_SHIFT   (_IOC_SIZESHIFT)
+
+#endif /* _ASMI386_IOCTL_H */
diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c
new file mode 100644 (file)
index 0000000..d656918
--- /dev/null
@@ -0,0 +1,551 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Lustre Light Super operations
+ *
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <stdlib.h>
+#include <string.h>
+#include <error.h>
+#include <assert.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+
+#include <sysio.h>
+#include <fs.h>
+#include <mount.h>
+#include <inode.h>
+#include <file.h>
+
+#include "llite_lib.h"
+
+void llu_prepare_mdc_op_data(struct mdc_op_data *data,
+                             struct inode *i1,
+                             struct inode *i2,
+                             const char *name,
+                             int namelen,
+                             int mode)
+{
+        struct llu_inode_info *lli1, *lli2;
+
+        LASSERT(i1);
+
+        lli1 = llu_i2info(i1);
+        data->ino1 = lli1->lli_st_ino;
+        data->gen1 = lli1->lli_st_generation;
+        data->typ1 = lli1->lli_st_mode & S_IFMT;
+        data->gid1 = lli1->lli_st_gid;
+
+        if (i2) {
+                lli2 = llu_i2info(i2);
+                data->ino2 = lli2->lli_st_ino;
+                data->gen2 = lli2->lli_st_generation;
+                data->typ2 = lli2->lli_st_mode & S_IFMT;
+                data->gid2 = lli2->lli_st_gid;
+        } else
+                data->ino2 = 0;
+
+        data->name = name;
+        data->namelen = namelen;
+        data->mode = mode;
+}
+
+static struct inode *llu_create_node(struct inode *dir, const char *name,
+                                     int namelen, const void *data, int datalen,
+                                     int mode, __u64 extra,
+                                     struct lookup_intent *it)
+{
+        struct inode *inode;
+        struct ptlrpc_request *request = NULL;
+        struct mds_body *body;
+        time_t time = 123456;//time(NULL);
+        struct llu_sb_info *sbi = llu_i2sbi(dir);
+
+        if (it && it->it_disposition) {
+                LBUG();
+#if 0
+                ll_invalidate_inode_pages(dir);
+#endif
+                request = it->it_data;
+                body = lustre_msg_buf(request->rq_repmsg, 1, sizeof(*body));
+        } else {
+                struct mdc_op_data op_data;
+                struct llu_inode_info *lli_dir = llu_i2info(dir);
+                int gid = current->fsgid;
+                int rc;
+
+                if (lli_dir->lli_st_mode & S_ISGID) {
+                        gid = lli_dir->lli_st_gid;
+                        if (S_ISDIR(mode))
+                                mode |= S_ISGID;
+                }
+
+                llu_prepare_mdc_op_data(&op_data, dir, NULL, name, namelen, 0);
+                rc = mdc_create(&sbi->ll_mdc_conn, &op_data,
+                                data, datalen, mode, current->fsuid, gid,
+                                time, extra, &request);
+                if (rc) {
+                        inode = (struct inode*)rc;
+                        goto out;
+                }
+                body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body));
+        }
+
+        inode = llu_new_inode(dir->i_fs, body->ino, body->mode);
+        if (!inode) {
+                /* FIXME more cleanup needed? */
+                goto out;
+        }
+
+        llu_update_inode(inode, body, NULL);
+
+        if (it && it->it_disposition) {
+                /* We asked for a lock on the directory, but were
+                 * granted a lock on the inode.  Since we finally have
+                 * an inode pointer, stuff it in the lock. */
+#if 0
+                ll_mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle,
+                                      inode);
+#endif
+        }
+
+ out:
+        ptlrpc_req_finished(request);
+        return inode;
+}
+
+int llu_create(struct inode *dir, struct pnode_base *pnode, int mode)
+{
+        struct inode *inode;
+#if 0
+        int rc = 0;
+
+        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu,intent=%s\n",
+               dentry->d_name.name, dir->i_ino, LL_IT2STR(dentry->d_it));
+
+        it = dentry->d_it;
+
+        rc = ll_it_open_error(IT_OPEN_CREATE, it);
+        if (rc) {
+                LL_GET_INTENT(dentry, it);
+                ptlrpc_req_finished(it->it_data);
+                RETURN(rc);
+        }
+#endif
+        inode = llu_create_node(dir, pnode->pb_name.name, pnode->pb_name.len,
+                                NULL, 0, mode, 0, NULL);
+
+        if (IS_ERR(inode))
+                RETURN(PTR_ERR(inode));
+
+        pnode->pb_ino = inode;
+
+        return 0;
+}
+
+static int llu_create_obj(struct lustre_handle *conn, struct inode *inode,
+                          struct lov_stripe_md *lsm)
+{
+        struct ptlrpc_request *req = NULL;
+        struct llu_inode_info *lli = llu_i2info(inode);
+        struct lov_mds_md *lmm = NULL;
+        struct obdo *oa;
+        struct iattr iattr;
+        struct mdc_op_data op_data;
+        int rc, err, lmm_size = 0;;
+        ENTRY;
+
+        oa = obdo_alloc();
+        if (!oa)
+                RETURN(-ENOMEM);
+
+        oa->o_mode = S_IFREG | 0600;
+        oa->o_id = lli->lli_st_ino;
+        /* Keep these 0 for now, because chown/chgrp does not change the
+         * ownership on the OST, and we don't want to allow BA OST NFS
+         * users to access these objects by mistake.
+         */
+        oa->o_uid = 0;
+        oa->o_gid = 0;
+        oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
+                OBD_MD_FLUID | OBD_MD_FLGID;
+
+        rc = obd_create(conn, oa, &lsm, NULL);
+        if (rc) {
+                CERROR("error creating objects for inode %lu: rc = %d\n",
+                       lli->lli_st_ino, rc);
+                if (rc > 0) {
+                        CERROR("obd_create returned invalid rc %d\n", rc);
+                        rc = -EIO;
+                }
+                GOTO(out_oa, rc);
+        }
+
+        LASSERT(lsm && lsm->lsm_object_id);
+        rc = obd_packmd(conn, &lmm, lsm);
+        if (rc < 0)
+                GOTO(out_destroy, rc);
+
+        lmm_size = rc;
+
+        /* Save the stripe MD with this file on the MDS */
+        memset(&iattr, 0, sizeof(iattr));
+        iattr.ia_valid = ATTR_FROM_OPEN;
+
+        llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
+
+        rc = mdc_setattr(&llu_i2sbi(inode)->ll_mdc_conn, &op_data,
+                         &iattr, lmm, lmm_size, &req);
+        ptlrpc_req_finished(req);
+
+        obd_free_diskmd(conn, &lmm);
+
+        /* If we couldn't complete mdc_open() and store the stripe MD on the
+         * MDS, we need to destroy the objects now or they will be leaked.
+         */
+        if (rc) {
+                CERROR("error: storing stripe MD for %lu: rc %d\n",
+                       lli->lli_st_ino, rc);
+                GOTO(out_destroy, rc);
+        }
+        lli->lli_smd = lsm;
+
+        EXIT;
+out_oa:
+        obdo_free(oa);
+        return rc;
+
+out_destroy:
+        obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
+        oa->o_id = lsm->lsm_object_id;
+        oa->o_valid |= OBD_MD_FLID;
+        err = obd_destroy(conn, oa, lsm, NULL);
+        obd_free_memmd(conn, &lsm);
+        if (err) {
+                CERROR("error uncreating inode %lu objects: rc %d\n",
+                       lli->lli_st_ino, err);
+        }
+        goto out_oa;
+}
+
+/* FIXME currently no "it" passed in */
+static int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it)
+{
+        struct ll_file_data *fd;
+#if 0
+        struct ptlrpc_request *req = it->it_data;
+        struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
+        ENTRY;
+#endif
+        LASSERT(!lli->lli_file_data);
+
+        fd = malloc(sizeof(struct ll_file_data));
+        /* We can't handle this well without reorganizing ll_file_open and
+         * ll_mdc_close, so don't even try right now. */
+        LASSERT(fd != NULL);
+
+        memset(fd, 0, sizeof(*fd));
+#if 0
+        memcpy(&fd->fd_mds_och.och_fh, &body->handle, sizeof(body->handle));
+        fd->fd_mds_och.och_req = it->it_data;
+#endif
+        lli->lli_file_data = fd;
+
+        RETURN(0);
+}
+
+static int llu_osc_open(struct lustre_handle *conn, struct inode *inode,
+                        struct lov_stripe_md *lsm)
+{
+        struct ll_file_data *fd = llu_i2info(inode)->lli_file_data;
+        struct obdo *oa;
+        int rc;
+        ENTRY;
+
+        oa = obdo_alloc();
+        if (!oa)
+                RETURN(-ENOMEM);
+        oa->o_id = lsm->lsm_object_id;
+        oa->o_mode = S_IFREG;
+        oa->o_valid = (OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLBLOCKS |
+                       OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+        rc = obd_open(conn, oa, lsm, NULL, &fd->fd_ost_och);
+        if (rc)
+                GOTO(out, rc);
+
+//        file->f_flags &= ~O_LOV_DELAY_CREATE;
+        obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLMTIME |
+                      OBD_MD_FLCTIME);
+
+        EXIT;
+out:
+        obdo_free(oa);
+        return rc;
+}
+
+static int llu_file_open(struct inode *inode)
+{
+#if 0
+        struct llu_sb_info *sbi = llu_i2sbi(inode);
+#endif
+        struct llu_inode_info *lli = llu_i2info(inode);
+        struct lustre_handle *conn = llu_i2obdconn(inode);
+        struct lookup_intent *it;
+        struct lov_stripe_md *lsm;
+        int rc = 0;
+
+#if 0
+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
+        LL_GET_INTENT(file->f_dentry, it);
+        rc = ll_it_open_error(IT_OPEN_OPEN, it);
+        if (rc)
+                RETURN(rc);
+#endif
+        rc = llu_local_open(lli, it);
+        if (rc)
+                LBUG();
+#if 0
+        mdc_set_open_replay_data(&((struct ll_file_data *)
+                                 file->private_data)->fd_mds_och);
+#endif
+        lsm = lli->lli_smd;
+        if (lsm == NULL) {
+#if 0
+                if (file->f_flags & O_LOV_DELAY_CREATE) {
+                        CDEBUG(D_INODE, "delaying object creation\n");
+                        RETURN(0);
+                }
+#endif
+                if (!lli->lli_smd) {
+                        rc = llu_create_obj(conn, inode, NULL);
+                        if (rc)
+                                GOTO(out_close, rc);
+                } else {
+                        CERROR("warning: stripe already set on ino %lu\n",
+                               lli->lli_st_ino);
+                }
+                lsm = lli->lli_smd;
+        }
+
+        rc = llu_osc_open(conn, inode, lsm);
+        if (rc)
+                GOTO(out_close, rc);
+        RETURN(0);
+
+ out_close:
+//        ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
+        return rc;
+}
+
+int llu_iop_open(struct pnode *pnode, int flags, mode_t mode)
+{
+        struct inode *dir = pnode->p_parent->p_base->pb_ino;
+        int rc;
+        /* FIXME later we must add the ldlm here */
+
+        LASSERT(dir);
+
+        /* libsysio forgot to guarentee mode is valid XXX */
+        mode |= S_IFREG;
+
+        if (!pnode->p_base->pb_ino) {
+                rc = llu_create(dir, pnode->p_base, mode);
+                if (rc)
+                        return rc;
+        }
+
+        LASSERT(pnode->p_base->pb_ino);
+        return llu_file_open(pnode->p_base->pb_ino);
+}
+
+
+static int llu_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode)
+{
+        struct llu_inode_info *lli = llu_i2info(inode);
+        struct ll_file_data *fd = lli->lli_file_data;
+        struct ptlrpc_request *req = NULL;
+        unsigned long flags;
+        struct obd_import *imp;
+        int rc;
+
+        /* FIXME add following code later FIXME */
+#if 0
+        /* Complete the open request and remove it from replay list */
+        rc = mdc_close(&ll_i2sbi(inode)->ll_mdc_conn, lli->lli_st_ino,
+                       inode->i_mode, &fd->fd_mds_och.och_fh, &req);
+        if (rc)
+                CERROR("inode %lu close failed: rc = %d\n",
+                                lli->lli_st_ino, rc);
+
+        imp = fd->fd_mds_och.och_req->rq_import;
+        LASSERT(imp != NULL);
+        spin_lock_irqsave(&imp->imp_lock, flags);
+
+        DEBUG_REQ(D_HA, fd->fd_mds_och.och_req, "matched open req %p", 
+                 fd->fd_mds_och.och_req);
+
+        /* We held on to the request for replay until we saw a close for that
+         * file.  Now that we've closed it, it gets replayed on the basis of
+         * its transno only. */
+        fd->fd_mds_och.och_req->rq_replay = 0;
+
+        if (fd->fd_mds_och.och_req->rq_transno) {
+                /* This open created a file, so it needs replay as a
+                 * normal transaction now.  Our reference to it now
+                 * effectively owned by the imp_replay_list, and it'll
+                 * be committed just like other transno-having
+                 * requests from here on out. */
+
+                /* We now retain this close request, so that it is
+                 * replayed if the open is replayed.  We duplicate the
+                 * transno, so that we get freed at the right time,
+                 * and rely on the difference in xid to keep
+                 * everything ordered correctly.
+                 *
+                 * But! If this close was already given a transno
+                 * (because it caused real unlinking of an
+                 * open-unlinked file, f.e.), then we'll be ordered on
+                 * the basis of that and we don't need to do anything
+                 * magical here. */
+                if (!req->rq_transno) {
+                        req->rq_transno = fd->fd_mds_och.och_req->rq_transno;
+                        ptlrpc_retain_replayable_request(req, imp);
+                }
+                spin_unlock_irqrestore(&imp->imp_lock, flags);
+
+                /* Should we free_committed now? we always free before
+                 * replay, so it's probably a wash.  We could check to
+                 * see if the fd_req should already be committed, in
+                 * which case we can avoid the whole retain_replayable
+                 * dance. */
+        } else {
+                /* No transno means that we can just drop our ref. */
+                spin_unlock_irqrestore(&imp->imp_lock, flags);
+        }
+        ptlrpc_req_finished(fd->fd_mds_och.och_req);
+
+        /* Do this after the fd_req->rq_transno check, because we don't want
+         * to bounce off zero references. */
+        ptlrpc_req_finished(req);
+        fd->fd_mds_och.och_fh.cookie = DEAD_HANDLE_MAGIC;
+#endif
+        lli->lli_file_data = NULL;
+        free(fd);
+
+        RETURN(-abs(rc));
+}
+
+static int llu_file_release(struct inode *inode)
+{
+        struct llu_sb_info *sbi = llu_i2sbi(inode);
+        struct llu_inode_info *lli = llu_i2info(inode);
+        struct lov_stripe_md *lsm = lli->lli_smd;
+        struct ll_file_data *fd;
+        struct obdo oa;
+        int rc = 0, rc2;
+
+        fd = lli->lli_file_data;
+        if (!fd) /* no process opened the file after an mcreate */
+                RETURN(rc = 0);
+
+        /* we might not be able to get a valid handle on this file
+         * again so we really want to flush our write cache.. */
+        if (S_ISREG(inode->i_mode) && lsm) {
+                memset(&oa, 0, sizeof(oa));
+                oa.o_id = lsm->lsm_object_id;
+                oa.o_mode = S_IFREG;
+                oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
+                
+                memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE);
+                oa.o_valid |= OBD_MD_FLHANDLE;
+
+                rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL);
+                if (rc)
+                        CERROR("inode %lu object close failed: rc = "
+                               "%d\n", lli->lli_st_ino, rc);
+       }
+
+        rc2 = llu_mdc_close(&sbi->ll_mdc_conn, inode);
+        if (rc2 && !rc)
+                rc = rc2;
+
+        RETURN(rc);
+}
+
+int llu_iop_close(struct inode *inode)
+{
+        return llu_file_release(inode);
+}
+
+int llu_iop_ipreadv(struct inode *ino,
+                    struct io_arguments *ioargs,
+                    struct ioctx **ioctxp)
+{
+        struct ioctx *ioctx;
+
+        if (!ioargs->ioarg_iovlen)
+                return 0;
+        if (ioargs->ioarg_iovlen < 0)
+                return -EINVAL;
+
+        ioctx = _sysio_ioctx_new(ino, ioargs);
+        if (!ioctx)
+                return -ENOMEM;
+
+        ioctx->ioctx_cc = llu_file_read(ino,
+                                        ioctx->ioctx_iovec,
+                                        ioctx->ioctx_iovlen,
+                                        ioctx->ioctx_offset);
+        if (ioctx->ioctx_cc < 0)
+                ioctx->ioctx_errno = ioctx->ioctx_cc;
+
+        *ioctxp = ioctx;
+        return 0;
+}
+
+int llu_iop_ipwritev(struct inode *ino,
+                     struct io_arguments *ioargs,
+                     struct ioctx **ioctxp)
+{
+        struct ioctx *ioctx;
+
+        if (!ioargs->ioarg_iovlen)
+                return 0;
+        if (ioargs->ioarg_iovlen < 0)
+                return -EINVAL;
+
+        ioctx = _sysio_ioctx_new(ino, ioargs);
+        if (!ioctx)
+                return -ENOMEM;
+
+        ioctx->ioctx_cc = llu_file_write(ino,
+                                         ioctx->ioctx_iovec,
+                                         ioctx->ioctx_iovlen,
+                                         ioctx->ioctx_offset);
+        if (ioctx->ioctx_cc < 0)
+                ioctx->ioctx_errno = ioctx->ioctx_cc;
+
+        *ioctxp = ioctx;
+        return 0;
+}
+
diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c
new file mode 100644 (file)
index 0000000..4fd83fd
--- /dev/null
@@ -0,0 +1,224 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Lustre Light Super operations
+ *
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <stdlib.h>
+#include <string.h>
+#include <error.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+
+#include <sysio.h>
+#include <fs.h>
+#include <mount.h>
+#include <inode.h>
+#include <file.h>
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+#include <portals/api-support.h> /* needed for ptpctl.h */
+#include <portals/ptlctl.h>    /* needed for parse_dump */
+
+#include "llite_lib.h"
+
+
+ptl_handle_ni_t         tcpnal_ni;
+struct task_struct *current;
+struct obd_class_user_state ocus;
+
+/* portals interfaces */
+ptl_handle_ni_t *
+kportal_get_ni (int nal)
+{
+        return &tcpnal_ni;
+}
+
+inline void
+kportal_put_ni (int nal)
+{
+        return;
+}
+
+struct ldlm_namespace;
+struct ldlm_res_id;
+struct obd_import;
+
+extern int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, struct ldlm_res_id *res_id, int flags);
+extern int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int local_only);
+extern int ldlm_replay_locks(struct obd_import *imp);
+
+void *inter_module_get(char *arg)
+{
+        if (!strcmp(arg, "tcpnal_ni"))
+                return &tcpnal_ni;
+        else if (!strcmp(arg, "ldlm_cli_cancel_unused"))
+                return ldlm_cli_cancel_unused;
+        else if (!strcmp(arg, "ldlm_namespace_cleanup"))
+                return ldlm_namespace_cleanup;
+        else if (!strcmp(arg, "ldlm_replay_locks"))
+                return ldlm_replay_locks;
+        else
+                return NULL;
+}
+
+void init_current(char *comm)
+{ 
+        current = malloc(sizeof(*current));
+        current->fs = malloc(sizeof(*current->fs));
+        strncpy(current->comm, comm, sizeof(current->comm));
+        current->pid = getpid();
+        current->fsuid = 0;
+        current->fsgid = 0;
+        current->cap_effective = 0;
+        memset(&current->pending, 0, sizeof(current->pending));
+}
+
+ptl_nid_t tcpnal_mynid;
+
+int init_lib_portals()
+{
+        int rc;
+
+        PtlInit();
+        rc = PtlNIInit(procbridge_interface, 0, 0, 0, &tcpnal_ni);
+        if (rc != 0) {
+                CERROR("ksocknal: PtlNIInit failed: error %d\n", rc);
+                PtlFini();
+                RETURN (rc);
+        }
+        PtlNIDebug(tcpnal_ni, ~0);
+        return rc;
+}
+
+extern int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, unsigned long arg);
+
+struct mount_option_s mount_option = {NULL, NULL};
+
+/* FIXME simple arg parser FIXME */
+void parse_mount_options(void *arg)
+{
+        char *buf = NULL;
+        struct obd_ioctl_data *data;
+        char *ptr, *comma, *eq, **tgt, *v;
+        int len;
+
+        if (obd_ioctl_getdata(&buf, &len, arg)) {
+                CERROR("OBD ioctl: data error\n");
+                return;
+        }
+        data = (struct obd_ioctl_data *)buf;
+        ptr = data->ioc_inlbuf1;
+        printf("mount option: %s\n", ptr);
+
+        while (ptr) {
+                eq = strchr(ptr, '=');
+                if (!eq)
+                        return;
+
+                *eq = 0;
+                if (!strcmp("osc", ptr))
+                        tgt = &mount_option.osc_uuid;
+                else if (!strcmp("mdc", ptr))
+                        tgt = &mount_option.mdc_uuid;
+                else {
+                        printf("Unknown mount option %s\n", ptr);
+                        return;
+                }
+
+                v = eq + 1;
+                comma = strchr(v, ',');
+                if (comma) {
+                        *comma = 0;
+                        ptr = comma + 1;
+                } else
+                        ptr = NULL;
+
+                *tgt = malloc(strlen(v)+1);
+                strcpy(*tgt, v);
+        }
+
+        if (buf)
+                obd_ioctl_freedata(buf, len);
+}
+
+int lib_ioctl(int dev_id, int opc, void * ptr)
+{
+        int rc;
+
+       if (dev_id == OBD_DEV_ID) {
+                struct obd_ioctl_data *ioc = ptr;
+
+                if (opc == OBD_IOC_MOUNTOPT) {
+                        parse_mount_options(ptr);
+                        return 0;
+                }
+
+               rc = class_handle_ioctl(&ocus, opc, (unsigned long)ptr);
+
+               /* you _may_ need to call obd_ioctl_unpack or some
+                  other verification function if you want to use ioc
+                  directly here */
+               printf ("processing ioctl cmd: %x buf len: %d, rc %d\n", 
+                       opc,  ioc->ioc_len, rc);
+
+                if (rc)
+                        return rc;
+       }
+       return (0);
+}
+
+int lllib_init(char *arg)
+{
+       tcpnal_mynid = ntohl(inet_addr(arg));
+        INIT_LIST_HEAD(&ocus.ocus_conns);
+
+        init_current("dummy");
+        if (init_obdclass() ||
+            init_lib_portals() ||
+            ptlrpc_init() ||
+            ldlm_init() ||
+            mdc_init() ||
+            lov_init() ||
+            osc_init())
+                return -1;
+
+       if (parse_dump("/tmp/DUMP_FILE", lib_ioctl))
+                return -1;
+
+        return _sysio_fssw_register("llite", &llu_fssw_ops);
+}
+
+/* FIXME */
+void generate_random_uuid(unsigned char uuid_out[16])
+{
+        int *arr = (int*)uuid_out;
+        int i;
+
+        for (i = 0; i < sizeof(uuid_out)/sizeof(int); i++)
+                arr[i] = rand();
+}
+
diff --git a/lustre/liblustre/llite_lib.h b/lustre/liblustre/llite_lib.h
new file mode 100644 (file)
index 0000000..107e51f
--- /dev/null
@@ -0,0 +1,129 @@
+#ifndef __LLU_H_
+#define __LLU_H_
+
+#include <liblustre.h>
+#include <linux/obd.h>
+#include <linux/obd_class.h>
+#include <portals/procbridge.h>
+#include <linux/lustre_lite.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+struct ll_file_data {
+        struct obd_client_handle fd_mds_och;
+        struct obd_client_handle fd_ost_och;
+        __u32 fd_flags;
+};
+
+struct llu_sb_info
+{
+        struct obd_uuid         ll_sb_uuid;
+        struct lustre_handle    ll_mdc_conn;
+        struct lustre_handle    ll_osc_conn;
+        obd_id                  ll_rootino;
+        int                     ll_flags;
+        struct list_head        ll_conn_chain;
+};
+
+struct llu_inode_info {
+       struct llu_sb_info      *lli_sbi;
+       struct ll_fid           lli_fid;
+        struct lov_stripe_md   *lli_smd;
+        char                   *lli_symlink_name;
+        /*struct semaphore      lli_open_sem;*/
+        unsigned long          lli_flags;
+        struct list_head       lli_read_extents;
+
+       /* in libsysio we have no chance to store data in file,
+        * so place it here */
+       struct ll_file_data     *lli_file_data;
+
+       /* stat FIXME not 64 bit clean */
+       dev_t                   lli_st_dev;
+       ino_t                   lli_st_ino;
+       mode_t                  lli_st_mode;
+       nlink_t                 lli_st_nlink;
+       uid_t                   lli_st_uid;
+       gid_t                   lli_st_gid;
+       dev_t                   lli_st_rdev;
+       loff_t                  lli_st_size;
+       unsigned int            lli_st_blksize;
+       unsigned int            lli_st_blocks;
+       time_t                  lli_st_atime;
+       time_t                  lli_st_mtime;
+       time_t                  lli_st_ctime;
+
+       /* not for stat, change it later */
+       int                     lli_st_flags;
+       unsigned long           lli_st_generation;
+};
+
+static inline struct llu_sb_info *llu_fs2sbi(struct filesys *fs)
+{
+       return (struct llu_sb_info*)(fs->fs_private);
+}
+
+static inline struct llu_inode_info *llu_i2info(struct inode *inode)
+{
+       return (struct llu_inode_info*)(inode->i_private);
+}
+
+static inline struct llu_sb_info *llu_i2sbi(struct inode *inode)
+{
+        return llu_i2info(inode)->lli_sbi;
+}
+
+static inline struct client_obd *sbi2mdc(struct llu_sb_info *sbi)
+{
+       struct obd_device *obd = class_conn2obd(&sbi->ll_mdc_conn);
+       if (obd == NULL)
+               LBUG();
+       return &obd->u.cli;
+}
+
+static inline struct lustre_handle *llu_i2obdconn(struct inode *inode)
+{
+        return &(llu_i2info(inode)->lli_sbi->ll_osc_conn);
+}
+
+
+struct mount_option_s
+{
+       char *mdc_uuid;
+       char *osc_uuid;
+};
+
+/* llite_lib.c */
+void generate_random_uuid(unsigned char uuid_out[16]);
+
+extern struct mount_option_s mount_option;
+
+/* super.c */
+void llu_update_inode(struct inode *inode, struct mds_body *body,
+                      struct lov_stripe_md *lmm);
+void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid);
+void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid);
+struct inode* llu_new_inode(struct filesys *fs, ino_t ino, mode_t mode);
+
+extern struct fssw_ops llu_fssw_ops;
+
+/* file.c */
+int llu_create(struct inode *dir, struct pnode_base *pnode, int mode);
+int llu_iop_open(struct pnode *pnode, int flags, mode_t mode);
+int llu_iop_close(struct inode *inode);
+int llu_iop_ipreadv(struct inode *ino,
+                    struct io_arguments *ioargs,
+                    struct ioctx **ioctxp);
+int llu_iop_ipwritev(struct inode *ino,
+                     struct io_arguments *ioargs,
+                     struct ioctx **ioctxp);
+
+/* rw.c */
+int llu_iop_iodone(struct ioctx *ioctxp __IS_UNUSED);
+ssize_t llu_file_write(struct inode *inode, const struct iovec *iovec,
+                      size_t iovlen, loff_t pos);
+ssize_t llu_file_read(struct inode *inode, const struct iovec *iovec,
+                       size_t iovlen, loff_t pos);
+
+#endif
diff --git a/lustre/liblustre/lltest.c b/lustre/liblustre/lltest.c
new file mode 100644 (file)
index 0000000..8f38fe7
--- /dev/null
@@ -0,0 +1,147 @@
+/*
+ *    This Cplant(TM) source code is the property of Sandia National
+ *    Laboratories.
+ *
+ *    This Cplant(TM) source code is copyrighted by Sandia National
+ *    Laboratories.
+ *
+ *    The redistribution of this Cplant(TM) source code is subject to the
+ *    terms of the GNU Lesser General Public License
+ *    (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html)
+ *
+ *    Cplant(TM) Copyright 1998-2003 Sandia Corporation. 
+ *    Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
+ *    license for use of this work by or on behalf of the US Government.
+ *    Export of this program may require a license from the United States
+ *    Government.
+ */
+
+/*
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Questions or comments about this library should be sent to:
+ *
+ * Lee Ward
+ * Sandia National Laboratories, New Mexico
+ * P.O. Box 5800
+ * Albuquerque, NM 87185-1110
+ *
+ * lee@sandia.gov
+ */
+
+#define _BSD_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/queue.h>
+#include <sys/statvfs.h>
+
+#include <sysio.h>
+#include <mount.h>
+
+
+/*
+ * Get stats of file and file system.
+ *
+ * Usage: test_stats [-a] [-r <root-path>] [-m <root-driver>] [<path> ...]
+ */
+
+extern int lllib_init(char *arg);
+
+char   *root_driver = "llite";
+char   *root_path = "/";
+unsigned mntflgs = 0;
+struct mount root_mount;
+
+extern int portal_debug;
+extern int portal_subsystem_debug;
+
+char* files[] = {"/dir1", "/dir1/file1", "/dir1/file2", "/dir1/dir2", "/dir1/dir2/file3"};
+
+int
+main(int argc, char * const argv[])
+{
+       struct stat statbuf;
+       int     err, i, fd, written, readed;
+       char pgbuf[4096], readbuf[4096];
+       int npages;
+
+       if (_sysio_init() != 0) {
+               perror("init sysio");
+               exit(1);
+       }
+       err = lllib_init(argv[1]);
+       if (err) {
+               perror("init llite driver");
+               exit(1);
+       }       
+
+       err = _sysio_mount_root(root_path, root_driver, mntflgs, NULL);
+       if (err) {
+               errno = -err;
+               perror(root_driver);
+               exit(1);
+       }
+#if 0
+       for (i=0; i< sizeof(files)/sizeof(char*); i++) {
+               printf("******** stat %s *********\n", files[i]);
+               /* XXX ugly, only for testing */
+               err = fixme_lstat(files[i], &statbuf);
+               if (err)
+                       perror(root_driver);
+               printf("******** end stat %s: %d*********\n", files[i], err);
+       }
+#endif
+#if 1
+       portal_debug = 0;
+       portal_subsystem_debug = 0;
+       npages = 100;
+
+       fd = open("/newfile01", O_RDWR|O_CREAT|O_TRUNC, 00664);
+       printf("***************** open return %d ****************\n", fd);
+
+       printf("***************** begin write pages ****************\n");
+       for (i = 0; i < npages; i++ ) {
+               memset(pgbuf, ('A'+ i%10), 4096);
+               written = write(fd, pgbuf, 4096);
+               printf(">>> page %d: %d bytes written\n", i, written);
+       }
+
+       printf("***************** begin read pages ****************\n");
+       lseek(fd, 0, SEEK_SET);
+
+       for (i = 0; i < npages; i++ ) {
+               memset(readbuf, '8', 4096);
+               readed = read(fd, readbuf, 4096);
+               readbuf[10] = 0;
+               printf("<<< page %d: %d bytes (%s)\n", i, readed, readbuf);
+       }
+#endif
+       printf("sysio is about shutdown\n");
+       /*
+        * Clean up.
+        */
+       _sysio_shutdown();
+
+       printf("complete successfully\n");
+       return 0;
+}
diff --git a/lustre/liblustre/rw.c b/lustre/liblustre/rw.c
new file mode 100644 (file)
index 0000000..da692b2
--- /dev/null
@@ -0,0 +1,531 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Lustre Light Super operations
+ *
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <stdlib.h>
+#include <string.h>
+#include <error.h>
+#include <assert.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+
+#include <sysio.h>
+#include <fs.h>
+#include <mount.h>
+#include <inode.h>
+#include <file.h>
+
+#include "llite_lib.h"
+
+int llu_iop_iodone(struct ioctx *ioctxp __IS_UNUSED)
+{
+        return 1;
+}
+
+/*
+ * this grabs a lock and manually implements behaviour that makes it look
+ * like the OST is returning the file size with each lock acquisition
+ */
+int llu_extent_lock(struct ll_file_data *fd, struct inode *inode,
+                   struct lov_stripe_md *lsm,
+                   int mode, struct ldlm_extent *extent,
+                   struct lustre_handle *lockh)
+{
+#if 0
+        struct ll_inode_info *lli = ll_i2info(inode);
+        int rc;
+        ENTRY;
+
+        rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh);
+        if (rc != ELDLM_OK)
+                RETURN(rc);
+
+        /* always do a getattr for the first person to pop out of lock
+         * acquisition.. the DID_GETATTR flag and semaphore serialize
+         * this initial race.  we used to make a decision based on whether
+         * the lock was matched or acquired, but the matcher could win the
+         * waking race with the first issuer so that was no good..
+         */
+        if (test_bit(LLI_F_DID_GETATTR, &lli->lli_flags))
+                RETURN(ELDLM_OK);
+
+        down(&lli->lli_getattr_sem);
+
+        if (!test_bit(LLI_F_DID_GETATTR, &lli->lli_flags)) {
+                rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL);
+                if (rc == 0) {
+                        set_bit(LLI_F_DID_GETATTR, &lli->lli_flags);
+                } else {
+                        /* XXX can this fail? */
+                        ll_extent_unlock(fd, inode, lsm, mode, lockh);
+                }
+        }
+
+        up(&lli->lli_getattr_sem);
+        RETURN(rc);
+#else
+        return ELDLM_OK;
+#endif
+}
+
+int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode,
+                struct lov_stripe_md *lsm, int mode,
+                struct lustre_handle *lockh)
+{
+#if 0
+        struct ll_sb_info *sbi = ll_i2sbi(inode);
+        int rc;
+        ENTRY;
+
+        /* XXX phil: can we do this?  won't it screw the file size up? */
+        if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) ||
+            (sbi->ll_flags & LL_SBI_NOLCK))
+                RETURN(0);
+
+        rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh);
+
+        RETURN(rc);
+#else
+        return 0;
+#endif
+}
+
+static int llu_brw(int cmd, struct inode *inode, struct page *page, int flags)
+{
+        struct llu_inode_info *lli = llu_i2info(inode);
+        struct lov_stripe_md *lsm = lli->lli_smd;
+        struct obd_brw_set *set;
+        struct brw_page pg;
+        int rc;
+        ENTRY;
+
+        set = obd_brw_set_new();
+        if (set == NULL)
+                RETURN(-ENOMEM);
+
+        pg.pg = page;
+        pg.off = ((obd_off)page->index) << PAGE_SHIFT;
+
+        /* FIXME FIXME FIXME FIXME FIXME FIXME FIXME FIXME FIXME */
+#if 0
+        if (cmd == OBD_BRW_WRITE && (pg.off + PAGE_SIZE > lli->lli_st_size))
+                pg.count = lli->lli_st_size % PAGE_SIZE;
+        else
+#endif
+                pg.count = PAGE_SIZE;
+
+        CDEBUG(D_PAGE, "%s %d bytes ino %lu at "LPU64"/"LPX64"\n",
+               cmd & OBD_BRW_WRITE ? "write" : "read", pg.count, lli->lli_st_ino,
+               pg.off, pg.off);
+        if (pg.count == 0) {
+                LBUG();
+        }
+
+        pg.flag = flags;
+
+        set->brw_callback = ll_brw_sync_wait;
+        rc = obd_brw(cmd, llu_i2obdconn(inode), lsm, 1, &pg, set, NULL);
+        if (rc) {
+                if (rc != -EIO)
+                        CERROR("error from obd_brw: rc = %d\n", rc);
+        } else {
+                rc = ll_brw_sync_wait(set, CB_PHASE_START);
+                if (rc)
+                        CERROR("error from callback: rc = %d\n", rc);
+        }
+        obd_brw_set_decref(set);
+
+        RETURN(rc);
+}
+
+static int llu_prepare_write(struct inode *inode, struct page *page,
+                             unsigned from, unsigned to)
+{
+        struct llu_inode_info *lli = llu_i2info(inode);
+        obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
+        int rc = 0;
+        ENTRY;
+
+#if 0
+        if (!PageLocked(page))
+                LBUG();
+
+        if (PageUptodate(page))
+                RETURN(0);
+
+        //POISON(addr + from, 0xca, to - from);
+#endif
+        /* We're completely overwriting an existing page, so _don't_ set it up
+         * to date until commit_write */
+        if (from == 0 && to == PAGE_SIZE)
+                RETURN(0);
+
+        /* If are writing to a new page, no need to read old data.
+         * the extent locking and getattr procedures in ll_file_write have
+         * guaranteed that i_size is stable enough for our zeroing needs */
+        if (lli->lli_st_size <= offset) {
+                memset(kmap(page), 0, PAGE_SIZE);
+                kunmap(page);
+                GOTO(prepare_done, rc = 0);
+        }
+
+        rc = llu_brw(OBD_BRW_READ, inode, page, 0);
+
+        EXIT;
+
+ prepare_done:
+        return rc;
+}
+
+static int llu_commit_write(struct inode *inode, struct page *page,
+                            unsigned from, unsigned to)
+{
+        struct llu_inode_info *lli = llu_i2info(inode);
+        loff_t size;
+        int rc;
+        ENTRY;
+#if 0
+        LASSERT(inode == file->f_dentry->d_inode);
+        LASSERT(PageLocked(page));
+
+        CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n",
+               inode, page, from, to, page->index);
+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu,from=%d,to=%d\n",
+               inode->i_ino, from, to);
+        /* to match full page case in prepare_write */
+        SetPageUptodate(page);
+        /* mark the page dirty, put it on mapping->dirty,
+         * mark the inode PAGES_DIRTY, put it on sb->dirty */
+        set_page_dirty(page);
+#endif
+        rc = llu_brw(OBD_BRW_WRITE, inode, page, 0);
+        if (rc)
+                return rc;
+
+        /* this is matched by a hack in obdo_to_inode at the moment */
+        size = (((obd_off)page->index) << PAGE_SHIFT) + to;
+        if (size > lli->lli_st_size)
+                lli->lli_st_size = size;
+
+        RETURN(0);
+} /* ll_commit_write */
+
+ssize_t
+llu_generic_file_write(struct inode *inode, const char *buf,
+                       size_t count, loff_t pos)
+{
+       struct page     *page;
+       ssize_t         written;
+       long            status = 0;
+       int             err;
+       unsigned        bytes;
+
+       if ((ssize_t) count < 0)
+               return -EINVAL;
+#if 0
+       down(&inode->i_sem);
+#endif
+       if (pos < 0)
+                return -EINVAL;
+
+       written = 0;
+
+#if 0
+       remove_suid(inode);
+       update_inode_times(inode);
+#endif
+       do {
+               unsigned long index, offset;
+               char *kaddr;
+
+               /*
+                * Try to find the page in the cache. If it isn't there,
+                * allocate a free page.
+                */
+               offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+               index = pos >> PAGE_CACHE_SHIFT;
+               bytes = PAGE_CACHE_SIZE - offset;
+               if (bytes > count) {
+                       bytes = count;
+               }
+
+               status = -ENOMEM;       /* we'll assign it later anyway */
+               page = __grab_cache_page(index);
+               if (!page)
+                       break;
+
+               kaddr = kmap(page);
+               status = llu_prepare_write(inode, page, offset, offset+bytes);
+               if (status)
+                       goto sync_failure;
+
+               memcpy(kaddr+offset, buf, bytes);
+
+               status = llu_commit_write(inode, page, offset, offset+bytes);
+               if (!status)
+                       status = bytes;
+
+               if (status >= 0) {
+                       written += status;
+                       count -= status;
+                       pos += status;
+                       buf += status;
+               }
+unlock:
+               kunmap(page);
+               page_cache_release(page);
+
+               if (status < 0)
+                       break;
+       } while (count);
+done:
+       err = written ? written : status;
+
+#if 0
+       up(&inode->i_sem);
+#endif
+       return err;
+
+       status = -EFAULT;
+       goto unlock;
+
+sync_failure:
+       /*
+        * If blocksize < pagesize, prepare_write() may have instantiated a
+        * few blocks outside i_size.  Trim these off again.
+        */
+       kunmap(page);
+       page_cache_release(page);
+       goto done;
+}
+
+ssize_t llu_file_write(struct inode *inode, const struct iovec *iovec,
+                       size_t iovlen, loff_t pos)
+{
+        struct llu_inode_info *lli = llu_i2info(inode);
+        struct ll_file_data *fd = lli->lli_file_data; /* XXX not ready don't use it now */
+        struct lustre_handle lockh = { 0, 0 };
+        struct lov_stripe_md *lsm = lli->lli_smd;
+        struct ldlm_extent extent;
+        ldlm_error_t err;
+        ssize_t retval = 0;
+        ENTRY;
+
+        /* XXX consider other types later */
+        if (!S_ISREG(lli->lli_st_mode))
+                LBUG();
+#if 0
+        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu,size="LPSZ",offset=%Ld\n",
+               inode->i_ino, count, *ppos);
+
+        /*
+         * sleep doing some writeback work of this mount's dirty data
+         * if the VM thinks we're low on memory.. other dirtying code
+         * paths should think about doing this, too, but they should be
+         * careful not to hold locked pages while they do so.  like
+         * ll_prepare_write.  *cough*
+         */
+        ll_check_dirty(inode->i_sb);
+#endif
+        while (iovlen--) {
+                const char *buf = iovec[iovlen].iov_base;
+                size_t count = iovec[iovlen].iov_len;
+
+                /* POSIX, but surprised the VFS doesn't check this already */
+                if (count == 0)
+                        continue;
+
+#if 0
+                if (!S_ISBLK(lli->lli_st_mode) && file->f_flags & O_APPEND) {
+                        extent.start = 0;
+                        extent.end = OBD_OBJECT_EOF;
+                } else  {
+                        extent.start = *ppos;
+                        extent.end = *ppos + count - 1;
+                }
+#else
+                extent.start = pos;
+                extent.end = pos + count - 1;
+#endif
+
+                err = llu_extent_lock(fd, inode, lsm, LCK_PW, &extent, &lockh);
+                if (err != ELDLM_OK)
+                        RETURN(-ENOLCK);
+
+#if 0
+                if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND)
+                        *ppos = inode->i_size;
+
+                CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n",
+                       inode->i_ino, count, *ppos);
+#endif
+                retval += llu_generic_file_write(inode, buf, count, pos);
+        }
+
+        /* XXX errors? */
+        ll_extent_unlock(fd, inode, lsm, LCK_PW, &lockh);
+        return(retval);
+}
+
+static void llu_update_atime(struct inode *inode)
+{
+#if 0
+        struct llu_inode_info *lli = llu_i2info(inode);
+
+#ifdef USE_ATIME
+        struct iattr attr;
+
+        attr.ia_atime = LTIME_S(CURRENT_TIME);
+        attr.ia_valid = ATTR_ATIME;
+
+        if (lli->lli_st_atime == attr.ia_atime) return;
+        if (IS_RDONLY(inode)) return;
+        if (IS_NOATIME(inode)) return;
+
+        /* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */
+        llu_inode_setattr(inode, &attr, 0);
+#else
+        /* update atime, but don't explicitly write it out just this change */
+        inode->i_atime = CURRENT_TIME;
+#endif
+#endif
+}
+
+static size_t llu_generic_file_read(struct inode *inode, char *buf,
+                                    size_t count, loff_t pos)
+{
+        struct llu_inode_info *lli = llu_i2info(inode);
+       unsigned long index, offset;
+       int error = 0;
+        size_t readed = 0;
+
+       index = pos >> PAGE_CACHE_SHIFT;
+       offset = pos & ~PAGE_CACHE_MASK;
+
+       do {
+               struct page *page;
+               unsigned long end_index, nr;
+
+               end_index = lli->lli_st_size >> PAGE_CACHE_SHIFT;
+
+               if (index > end_index)
+                       break;
+               nr = PAGE_CACHE_SIZE;
+               if (index == end_index) {
+                       nr = lli->lli_st_size & ~PAGE_CACHE_MASK;
+                       if (nr <= offset)
+                               break;
+               }
+
+               nr = nr - offset;
+                if (nr > count)
+                        nr = count;
+
+                page = grab_cache_page(index);
+                if (!page) {
+                        error = -ENOMEM;
+                        break;
+                }
+
+                error = llu_brw(OBD_BRW_READ, inode, page, 0);
+               if (error) {
+                       page_cache_release(page);
+                        break;
+               }
+
+                memcpy(buf, kmap(page)+offset, nr);
+               offset += nr;
+               index += offset >> PAGE_CACHE_SHIFT;
+               offset &= ~PAGE_CACHE_MASK;
+                readed += nr;
+                count -= nr;
+
+               page_cache_release(page);
+       } while (count);
+
+        if (error)
+                return error;
+        return readed;
+}
+
+ssize_t llu_file_read(struct inode *inode, const struct iovec *iovec,
+                       size_t iovlen, loff_t pos)
+{
+        struct llu_inode_info *lli = llu_i2info(inode);
+        struct ll_file_data *fd = lli->lli_file_data;
+        struct lov_stripe_md *lsm = lli->lli_smd;
+        struct lustre_handle lockh = { 0, 0 };
+#if 0
+        struct ll_read_extent rextent;
+#else
+        struct ldlm_extent extent;
+#endif
+        ldlm_error_t err;
+        ssize_t retval = 0;
+        ENTRY;
+
+        while (iovlen--) {
+                char *buf = iovec[iovlen].iov_base;
+                size_t count = iovec[iovlen].iov_len;
+
+                /* "If nbyte is 0, read() will return 0 and have no other results."
+                 *                      -- Single Unix Spec */
+                if (count == 0)
+                        RETURN(0);
+
+#if 0
+                rextent.re_extent.start = pos;
+                rextent.re_extent.end = pos + count - 1;
+#else
+                extent.start = pos;
+                extent.end = pos + count - 1;
+#endif
+                err = llu_extent_lock(fd, inode, lsm, LCK_PR, &extent, &lockh);
+                if (err != ELDLM_OK)
+                        RETURN(-ENOLCK);
+#if 0
+                rextent.re_task = current;
+                spin_lock(&lli->lli_read_extent_lock);
+                list_add(&rextent.re_lli_item, &lli->lli_read_extents);
+                spin_unlock(&lli->lli_read_extent_lock);
+#endif
+                CDEBUG(D_INFO, "Reading inode %lu, "LPSZ" bytes, offset %Ld\n",
+                       lli->lli_st_ino, count, pos);
+                retval = llu_generic_file_read(inode, buf, count, pos);
+#if 0
+                spin_lock(&lli->lli_read_extent_lock);
+                list_del(&rextent.re_lli_item);
+                spin_unlock(&lli->lli_read_extent_lock);
+#endif
+        }
+
+        if (retval > 0)
+                llu_update_atime(inode);
+
+        /* XXX errors? */
+        ll_extent_unlock(fd, inode, lsm, LCK_PR, &lockh);
+        RETURN(retval);
+}
+
diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c
new file mode 100644 (file)
index 0000000..de74554
--- /dev/null
@@ -0,0 +1,580 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Lustre Light Super operations
+ *
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LLITE
+
+#include <stdlib.h>
+#include <string.h>
+#include <error.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+
+#include <sysio.h>
+#include <fs.h>
+#include <mount.h>
+#include <inode.h>
+#include <file.h>
+
+#include "llite_lib.h"
+
+static void llu_fsop_gone(struct filesys *fs)
+{
+        /* FIXME */
+}
+
+static struct inode_ops llu_inode_ops;
+
+void llu_update_inode(struct inode *inode, struct mds_body *body,
+                      struct lov_stripe_md *lsm)
+{
+        struct llu_inode_info *lli = llu_i2info(inode);
+
+        LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
+        if (lsm != NULL) {
+                if (lli->lli_smd == NULL)                        
+                        lli->lli_smd = lsm;
+                else
+                        LASSERT (!memcmp (lli->lli_smd, lsm,
+                                          sizeof (*lsm)));
+        }
+
+        if (body->valid & OBD_MD_FLID)
+                lli->lli_st_ino = body->ino;
+        if (body->valid & OBD_MD_FLATIME)
+                LTIME_S(lli->lli_st_atime) = body->atime;
+        if (body->valid & OBD_MD_FLMTIME)
+                LTIME_S(lli->lli_st_mtime) = body->mtime;
+        if (body->valid & OBD_MD_FLCTIME)
+                LTIME_S(lli->lli_st_ctime) = body->ctime;
+        if (body->valid & OBD_MD_FLMODE)
+                lli->lli_st_mode = (lli->lli_st_mode & S_IFMT)|(body->mode & ~S_IFMT);
+        if (body->valid & OBD_MD_FLTYPE)
+                lli->lli_st_mode = (lli->lli_st_mode & ~S_IFMT)|(body->mode & S_IFMT);
+        if (body->valid & OBD_MD_FLUID)
+                lli->lli_st_uid = body->uid;
+        if (body->valid & OBD_MD_FLGID)
+                lli->lli_st_gid = body->gid;
+        if (body->valid & OBD_MD_FLFLAGS)
+                lli->lli_st_flags = body->flags;
+        if (body->valid & OBD_MD_FLNLINK)
+                lli->lli_st_nlink = body->nlink;
+        if (body->valid & OBD_MD_FLGENER)
+                lli->lli_st_generation = body->generation;
+        if (body->valid & OBD_MD_FLRDEV)
+                lli->lli_st_rdev = body->rdev;
+        if (body->valid & OBD_MD_FLSIZE)
+                lli->lli_st_size = body->size;
+        if (body->valid & OBD_MD_FLBLOCKS)
+                lli->lli_st_blocks = body->blocks;
+
+        /* fillin fid */
+        if (body->valid & OBD_MD_FLID)
+                lli->lli_fid.id = body->ino;
+        if (body->valid & OBD_MD_FLGENER)
+                lli->lli_fid.generation = body->generation;
+        if (body->valid & OBD_MD_FLTYPE)
+                lli->lli_fid.f_type = body->mode & S_IFMT;
+}
+
+void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid)
+{
+        struct llu_inode_info *lli = llu_i2info(dst);
+
+        valid &= src->o_valid;
+
+        if (valid & OBD_MD_FLATIME)
+                LTIME_S(lli->lli_st_atime) = src->o_atime;
+        if (valid & OBD_MD_FLMTIME)
+                LTIME_S(lli->lli_st_mtime) = src->o_mtime;
+        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(lli->lli_st_ctime))
+                LTIME_S(lli->lli_st_ctime) = src->o_ctime;
+        if (valid & OBD_MD_FLSIZE)
+                lli->lli_st_size = src->o_size;
+        if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
+                lli->lli_st_blocks = src->o_blocks;
+        if (valid & OBD_MD_FLBLKSZ)
+                lli->lli_st_blksize = src->o_blksize;
+        if (valid & OBD_MD_FLTYPE)
+                lli->lli_st_mode = (lli->lli_st_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
+        if (valid & OBD_MD_FLMODE)
+                lli->lli_st_mode = (lli->lli_st_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
+        if (valid & OBD_MD_FLUID)
+                lli->lli_st_uid = src->o_uid;
+        if (valid & OBD_MD_FLGID)
+                lli->lli_st_gid = src->o_gid;
+        if (valid & OBD_MD_FLFLAGS)
+                lli->lli_st_flags = src->o_flags;
+        if (valid & OBD_MD_FLNLINK)
+                lli->lli_st_nlink = src->o_nlink;
+        if (valid & OBD_MD_FLGENER)
+                lli->lli_st_generation = src->o_generation;
+        if (valid & OBD_MD_FLRDEV)
+                lli->lli_st_rdev = src->o_rdev;
+}
+
+void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid)
+{
+        struct llu_inode_info *lli = llu_i2info(src);
+
+        if (valid & OBD_MD_FLATIME)
+                dst->o_atime = LTIME_S(lli->lli_st_atime);
+        if (valid & OBD_MD_FLMTIME)
+                dst->o_mtime = LTIME_S(lli->lli_st_mtime);
+        if (valid & OBD_MD_FLCTIME)
+                dst->o_ctime = LTIME_S(lli->lli_st_ctime);
+        if (valid & OBD_MD_FLSIZE)
+                dst->o_size = lli->lli_st_size;
+        if (valid & OBD_MD_FLBLOCKS)   /* allocation of space */
+                dst->o_blocks = lli->lli_st_blocks;
+        if (valid & OBD_MD_FLBLKSZ)
+                dst->o_blksize = lli->lli_st_blksize;
+        if (valid & OBD_MD_FLTYPE)
+                dst->o_mode = (dst->o_mode & ~S_IFMT) | (lli->lli_st_mode & S_IFMT);
+        if (valid & OBD_MD_FLMODE)
+                dst->o_mode = (dst->o_mode & S_IFMT) | (lli->lli_st_mode & ~S_IFMT);
+        if (valid & OBD_MD_FLUID)
+                dst->o_uid = lli->lli_st_uid;
+        if (valid & OBD_MD_FLGID)
+                dst->o_gid = lli->lli_st_gid;
+        if (valid & OBD_MD_FLFLAGS)
+                dst->o_flags = lli->lli_st_flags;
+        if (valid & OBD_MD_FLNLINK)
+                dst->o_nlink = lli->lli_st_nlink;
+        if (valid & OBD_MD_FLGENER)
+                dst->o_generation = lli->lli_st_generation;
+        if (valid & OBD_MD_FLRDEV)
+                dst->o_rdev = (__u32)(lli->lli_st_rdev);
+
+        dst->o_valid |= (valid & ~OBD_MD_FLID);
+}
+
+int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm,
+                      char *ostdata)
+{
+        struct llu_sb_info *sbi = llu_i2sbi(inode);
+        struct obdo oa;
+        int rc;
+        ENTRY;
+
+        LASSERT(lsm);
+        LASSERT(sbi);
+
+        memset(&oa, 0, sizeof oa);
+        oa.o_id = lsm->lsm_object_id;
+        oa.o_mode = S_IFREG;
+        oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
+                OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | OBD_MD_FLCTIME;
+
+        if (ostdata != NULL) {
+                memcpy(&oa.o_inline, ostdata, FD_OSTDATA_SIZE);
+                oa.o_valid |= OBD_MD_FLHANDLE;
+        }
+
+        rc = obd_getattr(&sbi->ll_osc_conn, &oa, lsm);
+        if (rc)
+                RETURN(rc);
+
+        obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+                           OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
+        RETURN(0);
+}
+
+struct inode* llu_new_inode(struct filesys *fs, ino_t ino, mode_t mode)
+{
+       struct inode *inode;
+        struct llu_inode_info *lli;
+
+        OBD_ALLOC(lli, sizeof(*lli));
+        if (!lli)
+                return NULL;
+
+        /* initialize lli here */
+        lli->lli_sbi = llu_fs2sbi(fs);
+        lli->lli_smd = NULL;
+        lli->lli_symlink_name = NULL;
+        lli->lli_flags = 0;
+        INIT_LIST_HEAD(&lli->lli_read_extents);
+        lli->lli_file_data = NULL;
+
+        /* could file_identifier be 0 ? FIXME */
+       inode = _sysio_i_new(fs, ino, NULL,
+#ifndef AUTOMOUNT_FILE_NAME
+                            mode & S_IFMT,
+#else
+                            mode,      /* all of the bits! */
+#endif
+                             0,
+                            &llu_inode_ops, lli);
+
+       if (!inode)
+               OBD_FREE(lli, sizeof(*lli));
+
+        return inode;
+}
+
+static int llu_iop_lookup(struct pnode *pnode,
+                          struct inode **inop,
+                          struct intent *intnt __IS_UNUSED,
+                          const char *path __IS_UNUSED)
+{
+        struct pnode_base *pb_dir = pnode->p_parent->p_base;
+        struct ptlrpc_request *request = NULL;
+        struct llu_sb_info *sbi = llu_i2sbi(pb_dir->pb_ino);
+        struct ll_fid *fid = &llu_i2info(pb_dir->pb_ino)->lli_fid;
+        struct qstr *name = &pnode->p_base->pb_name;
+        struct mds_body *body;
+        unsigned long valid;
+        char *pname;
+        int rc, easize;
+        struct ll_read_inode2_cookie lic = {.lic_body = NULL, .lic_lsm = NULL};
+
+        /* the mount root inode have no name, so don't call
+         * remote in this case. but probably we need revalidate
+         * it here? FIXME */
+        if (pnode->p_mount->mnt_root == pnode) {
+                struct inode *i = pnode->p_base->pb_ino;
+                I_REF(i);
+                *inop = i;
+                return 0;
+        }
+
+        if (!name->len)
+                return -EINVAL;
+
+        /* mdc_getattr_name require NULL-terminated name */
+        pname = malloc(name->len + 1);
+        if (!pname)
+                return -ENOMEM;
+        memcpy(pname, name->name, name->len);
+        pname[name->len] = 0;
+
+        valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE;
+
+        /* FIXME before getattr_name, we don't know whether
+         * the inode we are finding is regular or not, so here
+         * we blindly require server feed in EA data */
+        easize = obd_size_diskmd(&sbi->ll_osc_conn, NULL);
+        valid |= OBD_MD_FLEASIZE;
+
+        rc = mdc_getattr_name(&sbi->ll_mdc_conn, fid,
+                              pname, name->len + 1,
+                              valid, easize, &request);
+        if (rc < 0) {
+                CERROR("mdc_getattr_name: %d\n", rc);
+                rc = -ENOENT;
+                goto out;
+        }
+        body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body));
+
+        *inop = llu_new_inode(pnode->p_mount->mnt_fs, body->ino, body->mode);
+        if (!inop)
+                goto out;
+
+        lic.lic_body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*lic.lic_body));
+        LASSERT (lic.lic_body != NULL);
+        LASSERT_REPSWABBED (request, 0);
+
+        if (S_ISREG(lic.lic_body->mode) &&
+            lic.lic_body->valid & OBD_MD_FLEASIZE) {
+                struct lov_mds_md    *lmm;
+                int                   lmm_size;
+                int                   rc;
+                
+                lmm_size = lic.lic_body->eadatasize;
+                if (lmm_size == 0) {
+                        CERROR ("OBD_MD_FLEASIZE set but eadatasize 0\n");
+                        RETURN (-EPROTO);
+                }
+                lmm = lustre_msg_buf(request->rq_repmsg, 0 + 1, lmm_size);
+                LASSERT(lmm != NULL);
+                LASSERT_REPSWABBED (request, 0 + 1);
+
+                rc = obd_unpackmd (&sbi->ll_osc_conn, 
+                                   &lic.lic_lsm, lmm, lmm_size);
+                if (rc < 0) {
+                        CERROR ("Error %d unpacking eadata\n", rc);
+                        RETURN (rc);
+                }
+                LASSERT (rc >= sizeof (*lic.lic_lsm));
+
+        } else {
+                lic.lic_lsm = NULL;
+        }
+
+        llu_update_inode(*inop, body, lic.lic_lsm);
+
+        if (llu_i2info(*inop)->lli_smd) {
+                rc = llu_inode_getattr(*inop, llu_i2info(*inop)->lli_smd, NULL);
+                if (rc)
+                        _sysio_i_gone(*inop);
+        }
+
+out:
+        ptlrpc_req_finished(request);
+
+        return rc;
+}
+
+static int llu_iop_getattr(struct pnode *pno,
+                           struct inode *ino,
+                           struct intnl_stat *b)
+{
+        struct llu_inode_info *lli = llu_i2info(ino);
+
+        b->st_dev = lli->lli_st_dev;
+        b->st_ino = lli->lli_st_ino;
+        b->st_mode = lli->lli_st_mode;
+        b->st_nlink = lli->lli_st_nlink;
+        b->st_uid = lli->lli_st_uid;
+        b->st_gid = lli->lli_st_gid;
+        b->st_rdev = lli->lli_st_rdev;
+        b->st_size = lli->lli_st_size;
+        b->st_blksize = lli->lli_st_blksize;
+        b->st_blocks = lli->lli_st_blocks;
+        b->st_atime = lli->lli_st_atime;
+        b->st_mtime = lli->lli_st_mtime;
+        b->st_ctime = lli->lli_st_ctime;
+
+        return 0;
+}
+
+int llu_mdc_cancel_unused(struct lustre_handle *conn,
+                          struct llu_inode_info *lli,
+                          int flags)
+{
+        struct ldlm_res_id res_id =
+                { .name = {lli->lli_st_ino, lli->lli_st_generation} };
+        struct obd_device *obddev = class_conn2obd(conn);
+        ENTRY;
+        RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags));
+}
+
+static void llu_clear_inode(struct inode *inode)
+{
+        struct llu_sb_info *sbi = llu_i2sbi(inode);
+        struct llu_inode_info *lli = llu_i2info(inode);
+        int rc;
+        ENTRY;
+
+        CDEBUG(D_INODE, "clear inode: %lu\n", lli->lli_st_ino);
+        rc = llu_mdc_cancel_unused(&sbi->ll_mdc_conn, lli,
+                                   LDLM_FL_NO_CALLBACK);
+        if (rc < 0) {
+                CERROR("ll_mdc_cancel_unused: %d\n", rc);
+                /* XXX FIXME do something dramatic */
+        }
+
+        if (lli->lli_smd) {
+                rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd, 0);
+                if (rc < 0) {
+                        CERROR("obd_cancel_unused: %d\n", rc);
+                        /* XXX FIXME do something dramatic */
+                }
+        }
+
+        if (lli->lli_smd)
+                obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd);
+
+        if (lli->lli_symlink_name) {
+                OBD_FREE(lli->lli_symlink_name,
+                         strlen(lli->lli_symlink_name) + 1);
+                lli->lli_symlink_name = NULL;
+        }
+
+        EXIT;
+}
+
+void llu_iop_gone(struct inode *inode)
+{
+        struct llu_inode_info *lli = llu_i2info(inode);
+
+        llu_clear_inode(inode);
+
+        OBD_FREE(lli, sizeof(*lli));
+}
+
+struct filesys_ops llu_filesys_ops =
+{
+        fsop_gone: llu_fsop_gone,
+};
+
+
+static struct inode_ops llu_inode_ops = {
+        inop_lookup:    llu_iop_lookup,
+        inop_getattr:   llu_iop_getattr,
+        inop_open:      llu_iop_open,
+        inop_close:     llu_iop_close,
+        inop_ipreadv:   llu_iop_ipreadv,
+        inop_ipwritev:  llu_iop_ipwritev,
+        inop_iodone:    llu_iop_iodone,
+        inop_gone:      llu_iop_gone,
+};
+
+
+static int
+llu_fsswop_mount(const char *source,
+                 unsigned flags,
+                 const void *data __IS_UNUSED,
+                 struct pnode *tocover,
+                 struct mount **mntp)
+{
+        struct filesys *fs;
+        struct inode *root;
+        struct pnode_base *rootpb;
+        static struct qstr noname = { NULL, 0, 0 };
+        struct ll_fid rootfid;
+
+        struct llu_sb_info *sbi;
+        struct ptlrpc_connection *mdc_conn;
+        struct ptlrpc_request *request = NULL;
+        struct mds_body *root_body;
+        struct obd_uuid param_uuid;
+        class_uuid_t uuid;
+        struct obd_device *obd;
+        char *osc=mount_option.osc_uuid;
+        char *mdc=mount_option.mdc_uuid;
+        int err = -EINVAL;
+
+        ENTRY;
+
+        OBD_ALLOC(sbi, sizeof(*sbi));
+        if (!sbi)
+                RETURN(-ENOMEM);
+
+        INIT_LIST_HEAD(&sbi->ll_conn_chain);
+        generate_random_uuid(uuid);
+        class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
+
+        fs = _sysio_fs_new(&llu_filesys_ops, flags, sbi);
+        if (!fs) {
+                err = -ENOMEM;
+                goto out_free;
+        }
+
+        strncpy(param_uuid.uuid, mdc, sizeof(param_uuid.uuid));
+        obd = class_uuid2obd(&param_uuid);
+        if (!obd) {
+                CERROR("MDC %s: not setup or attached\n", mdc);
+                err = -EINVAL;
+                goto out_free;
+        }
+
+        /* setup mdc */
+        /* FIXME need recover stuff */
+        err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid);
+        if (err) {
+                CERROR("cannot connect to %s: rc = %d\n", mdc, err);
+                goto out_free;
+        }
+
+        mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection;
+
+        /* setup osc */
+        strncpy(param_uuid.uuid, osc, sizeof(param_uuid.uuid));
+        obd = class_uuid2obd(&param_uuid);
+        if (!obd) {
+                CERROR("OSC %s: not setup or attached\n", osc);
+                err = -EINVAL;
+                goto out_mdc;
+        }
+
+        err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid);
+        if (err) {
+                CERROR("cannot connect to %s: rc = %d\n", osc, err);
+                goto out_mdc;
+        }
+
+        err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid);
+        if (err) {
+                CERROR("cannot mds_connect: rc = %d\n", err);
+                goto out_osc;
+        }
+        CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
+        sbi->ll_rootino = rootfid.id;
+
+/* XXX do we need this??
+        memset(&osfs, 0, sizeof(osfs));
+        rc = obd_statfs(&sbi->ll_mdc_conn, &osfs);
+*/
+        /* fetch attr of root inode */
+        err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid,
+                          OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request);
+        if (err) {
+                CERROR("mdc_getattr failed for root: rc = %d\n", err);
+                goto out_request;
+        }
+
+        root_body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*root_body));
+        LASSERT(sbi->ll_rootino != 0);
+
+        root = llu_new_inode(fs, root_body->ino, root_body->mode);
+        if (!root) {
+               err = -ENOMEM;
+                goto out_request;
+        }
+
+        llu_update_inode(root, root_body, NULL);
+
+       /*
+        * Generate base path-node for root.
+        */
+       rootpb = _sysio_pb_new(&noname, NULL, root);
+       if (!rootpb) {
+               err = -ENOMEM;
+               goto out_inode;
+       }
+
+       err = _sysio_do_mount(fs, rootpb, flags, NULL, mntp);
+       if (err) {
+                _sysio_pb_gone(rootpb);
+               goto out_inode;
+        }
+
+        ptlrpc_req_finished(request);
+        request = NULL;
+
+        printf("************************************************\n");
+        printf("*          Mount successfully!!!!!!!           *\n");
+        printf("************************************************\n");
+
+        return 0;
+
+out_inode:
+        _sysio_i_gone(root);
+out_request:
+        ptlrpc_req_finished(request);
+out_osc:
+        obd_disconnect(&sbi->ll_osc_conn);
+out_mdc:
+        obd_disconnect(&sbi->ll_mdc_conn);
+out_free:
+        OBD_FREE(sbi, sizeof(*sbi));
+        return err;
+}
+
+struct fssw_ops llu_fssw_ops = {
+        llu_fsswop_mount
+};
+