From 66e1f2cafe9125f42b4ff2c7d2bcd408efbd4e73 Mon Sep 17 00:00:00 2001 From: pschwan Date: Sat, 12 Apr 2003 20:06:58 +0000 Subject: [PATCH] - merge b_devel into b_proto (incl b_swab changes) - fix leak-all-bulk-pages bug - Mike fixed a bug where a failed ptlrpc_queue_wait send buf would fail, but the user would still have to wait for the timeout to pass before being able to interrupt --- lustre/include/ioctl.h | 64 +++++ lustre/liblustre/file.c | 551 ++++++++++++++++++++++++++++++++++++++++ lustre/liblustre/llite_lib.c | 224 +++++++++++++++++ lustre/liblustre/llite_lib.h | 129 ++++++++++ lustre/liblustre/lltest.c | 147 +++++++++++ lustre/liblustre/rw.c | 531 +++++++++++++++++++++++++++++++++++++++ lustre/liblustre/super.c | 580 +++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 2226 insertions(+) create mode 100644 lustre/include/ioctl.h create mode 100644 lustre/liblustre/file.c create mode 100644 lustre/liblustre/llite_lib.c create mode 100644 lustre/liblustre/llite_lib.h create mode 100644 lustre/liblustre/lltest.c create mode 100644 lustre/liblustre/rw.c create mode 100644 lustre/liblustre/super.c diff --git a/lustre/include/ioctl.h b/lustre/include/ioctl.h new file mode 100644 index 0000000..a4ec8a5 --- /dev/null +++ b/lustre/include/ioctl.h @@ -0,0 +1,64 @@ +#ifndef _ASMI386_IOCTL_H +#define _ASMI386_IOCTL_H + +/* ioctl command encoding: 32 bits total, command in lower 16 bits, + * size of the parameter structure in the lower 14 bits of the + * upper 16 bits. + * Encoding the size of the parameter structure in the ioctl request + * The highest 2 bits are reserved for indicating the ``access mode''. + * NOTE: This limits the max parameter size to 16kB -1 ! + */ + +/* + * The following is for compatibility across the various Linux + * platforms. The i386 ioctl numbering scheme doesn't really enforce + * a type field. De facto, however, the top 8 bits of the lower 16 + * bits are indeed used as a type field, so we might just as well make + * this explicit here. Please be sure to use the decoding macros + * below from now on. + */ +#define _IOC_NRBITS 8 +#define _IOC_TYPEBITS 8 +#define _IOC_SIZEBITS 14 +#define _IOC_DIRBITS 2 + +#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) +#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) +#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) +#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) + +#define _IOC_NRSHIFT 0 +#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) +#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) +#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) + +/* + * Direction bits. + */ +#define _IOC_NONE 0U +#define _IOC_WRITE 1U +#define _IOC_READ 2U + +#define _IOC(dir,type,nr,size) (((dir) << _IOC_DIRSHIFT) | ((type) << _IOC_TYPESHIFT) | ((nr) << _IOC_NRSHIFT) | ((size) << _IOC_SIZESHIFT)) + +/* used to create numbers */ +#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) +#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) +#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) +#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) + +/* used to decode ioctl numbers.. */ +#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) +#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) +#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) +#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) + +/* ...and for the drivers/sound files... */ + +#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT) +#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT) +#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT) +#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT) +#define IOCSIZE_SHIFT (_IOC_SIZESHIFT) + +#endif /* _ASMI386_IOCTL_H */ diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c new file mode 100644 index 0000000..d656918 --- /dev/null +++ b/lustre/liblustre/file.c @@ -0,0 +1,551 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Light Super operations + * + * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "llite_lib.h" + +void llu_prepare_mdc_op_data(struct mdc_op_data *data, + struct inode *i1, + struct inode *i2, + const char *name, + int namelen, + int mode) +{ + struct llu_inode_info *lli1, *lli2; + + LASSERT(i1); + + lli1 = llu_i2info(i1); + data->ino1 = lli1->lli_st_ino; + data->gen1 = lli1->lli_st_generation; + data->typ1 = lli1->lli_st_mode & S_IFMT; + data->gid1 = lli1->lli_st_gid; + + if (i2) { + lli2 = llu_i2info(i2); + data->ino2 = lli2->lli_st_ino; + data->gen2 = lli2->lli_st_generation; + data->typ2 = lli2->lli_st_mode & S_IFMT; + data->gid2 = lli2->lli_st_gid; + } else + data->ino2 = 0; + + data->name = name; + data->namelen = namelen; + data->mode = mode; +} + +static struct inode *llu_create_node(struct inode *dir, const char *name, + int namelen, const void *data, int datalen, + int mode, __u64 extra, + struct lookup_intent *it) +{ + struct inode *inode; + struct ptlrpc_request *request = NULL; + struct mds_body *body; + time_t time = 123456;//time(NULL); + struct llu_sb_info *sbi = llu_i2sbi(dir); + + if (it && it->it_disposition) { + LBUG(); +#if 0 + ll_invalidate_inode_pages(dir); +#endif + request = it->it_data; + body = lustre_msg_buf(request->rq_repmsg, 1, sizeof(*body)); + } else { + struct mdc_op_data op_data; + struct llu_inode_info *lli_dir = llu_i2info(dir); + int gid = current->fsgid; + int rc; + + if (lli_dir->lli_st_mode & S_ISGID) { + gid = lli_dir->lli_st_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } + + llu_prepare_mdc_op_data(&op_data, dir, NULL, name, namelen, 0); + rc = mdc_create(&sbi->ll_mdc_conn, &op_data, + data, datalen, mode, current->fsuid, gid, + time, extra, &request); + if (rc) { + inode = (struct inode*)rc; + goto out; + } + body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body)); + } + + inode = llu_new_inode(dir->i_fs, body->ino, body->mode); + if (!inode) { + /* FIXME more cleanup needed? */ + goto out; + } + + llu_update_inode(inode, body, NULL); + + if (it && it->it_disposition) { + /* We asked for a lock on the directory, but were + * granted a lock on the inode. Since we finally have + * an inode pointer, stuff it in the lock. */ +#if 0 + ll_mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle, + inode); +#endif + } + + out: + ptlrpc_req_finished(request); + return inode; +} + +int llu_create(struct inode *dir, struct pnode_base *pnode, int mode) +{ + struct inode *inode; +#if 0 + int rc = 0; + + CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu,intent=%s\n", + dentry->d_name.name, dir->i_ino, LL_IT2STR(dentry->d_it)); + + it = dentry->d_it; + + rc = ll_it_open_error(IT_OPEN_CREATE, it); + if (rc) { + LL_GET_INTENT(dentry, it); + ptlrpc_req_finished(it->it_data); + RETURN(rc); + } +#endif + inode = llu_create_node(dir, pnode->pb_name.name, pnode->pb_name.len, + NULL, 0, mode, 0, NULL); + + if (IS_ERR(inode)) + RETURN(PTR_ERR(inode)); + + pnode->pb_ino = inode; + + return 0; +} + +static int llu_create_obj(struct lustre_handle *conn, struct inode *inode, + struct lov_stripe_md *lsm) +{ + struct ptlrpc_request *req = NULL; + struct llu_inode_info *lli = llu_i2info(inode); + struct lov_mds_md *lmm = NULL; + struct obdo *oa; + struct iattr iattr; + struct mdc_op_data op_data; + int rc, err, lmm_size = 0;; + ENTRY; + + oa = obdo_alloc(); + if (!oa) + RETURN(-ENOMEM); + + oa->o_mode = S_IFREG | 0600; + oa->o_id = lli->lli_st_ino; + /* Keep these 0 for now, because chown/chgrp does not change the + * ownership on the OST, and we don't want to allow BA OST NFS + * users to access these objects by mistake. + */ + oa->o_uid = 0; + oa->o_gid = 0; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE | + OBD_MD_FLUID | OBD_MD_FLGID; + + rc = obd_create(conn, oa, &lsm, NULL); + if (rc) { + CERROR("error creating objects for inode %lu: rc = %d\n", + lli->lli_st_ino, rc); + if (rc > 0) { + CERROR("obd_create returned invalid rc %d\n", rc); + rc = -EIO; + } + GOTO(out_oa, rc); + } + + LASSERT(lsm && lsm->lsm_object_id); + rc = obd_packmd(conn, &lmm, lsm); + if (rc < 0) + GOTO(out_destroy, rc); + + lmm_size = rc; + + /* Save the stripe MD with this file on the MDS */ + memset(&iattr, 0, sizeof(iattr)); + iattr.ia_valid = ATTR_FROM_OPEN; + + llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); + + rc = mdc_setattr(&llu_i2sbi(inode)->ll_mdc_conn, &op_data, + &iattr, lmm, lmm_size, &req); + ptlrpc_req_finished(req); + + obd_free_diskmd(conn, &lmm); + + /* If we couldn't complete mdc_open() and store the stripe MD on the + * MDS, we need to destroy the objects now or they will be leaked. + */ + if (rc) { + CERROR("error: storing stripe MD for %lu: rc %d\n", + lli->lli_st_ino, rc); + GOTO(out_destroy, rc); + } + lli->lli_smd = lsm; + + EXIT; +out_oa: + obdo_free(oa); + return rc; + +out_destroy: + obdo_from_inode(oa, inode, OBD_MD_FLTYPE); + oa->o_id = lsm->lsm_object_id; + oa->o_valid |= OBD_MD_FLID; + err = obd_destroy(conn, oa, lsm, NULL); + obd_free_memmd(conn, &lsm); + if (err) { + CERROR("error uncreating inode %lu objects: rc %d\n", + lli->lli_st_ino, err); + } + goto out_oa; +} + +/* FIXME currently no "it" passed in */ +static int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it) +{ + struct ll_file_data *fd; +#if 0 + struct ptlrpc_request *req = it->it_data; + struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1); + ENTRY; +#endif + LASSERT(!lli->lli_file_data); + + fd = malloc(sizeof(struct ll_file_data)); + /* We can't handle this well without reorganizing ll_file_open and + * ll_mdc_close, so don't even try right now. */ + LASSERT(fd != NULL); + + memset(fd, 0, sizeof(*fd)); +#if 0 + memcpy(&fd->fd_mds_och.och_fh, &body->handle, sizeof(body->handle)); + fd->fd_mds_och.och_req = it->it_data; +#endif + lli->lli_file_data = fd; + + RETURN(0); +} + +static int llu_osc_open(struct lustre_handle *conn, struct inode *inode, + struct lov_stripe_md *lsm) +{ + struct ll_file_data *fd = llu_i2info(inode)->lli_file_data; + struct obdo *oa; + int rc; + ENTRY; + + oa = obdo_alloc(); + if (!oa) + RETURN(-ENOMEM); + oa->o_id = lsm->lsm_object_id; + oa->o_mode = S_IFREG; + oa->o_valid = (OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLBLOCKS | + OBD_MD_FLMTIME | OBD_MD_FLCTIME); + rc = obd_open(conn, oa, lsm, NULL, &fd->fd_ost_och); + if (rc) + GOTO(out, rc); + +// file->f_flags &= ~O_LOV_DELAY_CREATE; + obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | + OBD_MD_FLCTIME); + + EXIT; +out: + obdo_free(oa); + return rc; +} + +static int llu_file_open(struct inode *inode) +{ +#if 0 + struct llu_sb_info *sbi = llu_i2sbi(inode); +#endif + struct llu_inode_info *lli = llu_i2info(inode); + struct lustre_handle *conn = llu_i2obdconn(inode); + struct lookup_intent *it; + struct lov_stripe_md *lsm; + int rc = 0; + +#if 0 + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino); + LL_GET_INTENT(file->f_dentry, it); + rc = ll_it_open_error(IT_OPEN_OPEN, it); + if (rc) + RETURN(rc); +#endif + rc = llu_local_open(lli, it); + if (rc) + LBUG(); +#if 0 + mdc_set_open_replay_data(&((struct ll_file_data *) + file->private_data)->fd_mds_och); +#endif + lsm = lli->lli_smd; + if (lsm == NULL) { +#if 0 + if (file->f_flags & O_LOV_DELAY_CREATE) { + CDEBUG(D_INODE, "delaying object creation\n"); + RETURN(0); + } +#endif + if (!lli->lli_smd) { + rc = llu_create_obj(conn, inode, NULL); + if (rc) + GOTO(out_close, rc); + } else { + CERROR("warning: stripe already set on ino %lu\n", + lli->lli_st_ino); + } + lsm = lli->lli_smd; + } + + rc = llu_osc_open(conn, inode, lsm); + if (rc) + GOTO(out_close, rc); + RETURN(0); + + out_close: +// ll_mdc_close(&sbi->ll_mdc_conn, inode, file); + return rc; +} + +int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) +{ + struct inode *dir = pnode->p_parent->p_base->pb_ino; + int rc; + /* FIXME later we must add the ldlm here */ + + LASSERT(dir); + + /* libsysio forgot to guarentee mode is valid XXX */ + mode |= S_IFREG; + + if (!pnode->p_base->pb_ino) { + rc = llu_create(dir, pnode->p_base, mode); + if (rc) + return rc; + } + + LASSERT(pnode->p_base->pb_ino); + return llu_file_open(pnode->p_base->pb_ino); +} + + +static int llu_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct ll_file_data *fd = lli->lli_file_data; + struct ptlrpc_request *req = NULL; + unsigned long flags; + struct obd_import *imp; + int rc; + + /* FIXME add following code later FIXME */ +#if 0 + /* Complete the open request and remove it from replay list */ + rc = mdc_close(&ll_i2sbi(inode)->ll_mdc_conn, lli->lli_st_ino, + inode->i_mode, &fd->fd_mds_och.och_fh, &req); + if (rc) + CERROR("inode %lu close failed: rc = %d\n", + lli->lli_st_ino, rc); + + imp = fd->fd_mds_och.och_req->rq_import; + LASSERT(imp != NULL); + spin_lock_irqsave(&imp->imp_lock, flags); + + DEBUG_REQ(D_HA, fd->fd_mds_och.och_req, "matched open req %p", + fd->fd_mds_och.och_req); + + /* We held on to the request for replay until we saw a close for that + * file. Now that we've closed it, it gets replayed on the basis of + * its transno only. */ + fd->fd_mds_och.och_req->rq_replay = 0; + + if (fd->fd_mds_och.och_req->rq_transno) { + /* This open created a file, so it needs replay as a + * normal transaction now. Our reference to it now + * effectively owned by the imp_replay_list, and it'll + * be committed just like other transno-having + * requests from here on out. */ + + /* We now retain this close request, so that it is + * replayed if the open is replayed. We duplicate the + * transno, so that we get freed at the right time, + * and rely on the difference in xid to keep + * everything ordered correctly. + * + * But! If this close was already given a transno + * (because it caused real unlinking of an + * open-unlinked file, f.e.), then we'll be ordered on + * the basis of that and we don't need to do anything + * magical here. */ + if (!req->rq_transno) { + req->rq_transno = fd->fd_mds_och.och_req->rq_transno; + ptlrpc_retain_replayable_request(req, imp); + } + spin_unlock_irqrestore(&imp->imp_lock, flags); + + /* Should we free_committed now? we always free before + * replay, so it's probably a wash. We could check to + * see if the fd_req should already be committed, in + * which case we can avoid the whole retain_replayable + * dance. */ + } else { + /* No transno means that we can just drop our ref. */ + spin_unlock_irqrestore(&imp->imp_lock, flags); + } + ptlrpc_req_finished(fd->fd_mds_och.och_req); + + /* Do this after the fd_req->rq_transno check, because we don't want + * to bounce off zero references. */ + ptlrpc_req_finished(req); + fd->fd_mds_och.och_fh.cookie = DEAD_HANDLE_MAGIC; +#endif + lli->lli_file_data = NULL; + free(fd); + + RETURN(-abs(rc)); +} + +static int llu_file_release(struct inode *inode) +{ + struct llu_sb_info *sbi = llu_i2sbi(inode); + struct llu_inode_info *lli = llu_i2info(inode); + struct lov_stripe_md *lsm = lli->lli_smd; + struct ll_file_data *fd; + struct obdo oa; + int rc = 0, rc2; + + fd = lli->lli_file_data; + if (!fd) /* no process opened the file after an mcreate */ + RETURN(rc = 0); + + /* we might not be able to get a valid handle on this file + * again so we really want to flush our write cache.. */ + if (S_ISREG(inode->i_mode) && lsm) { + memset(&oa, 0, sizeof(oa)); + oa.o_id = lsm->lsm_object_id; + oa.o_mode = S_IFREG; + oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID; + + memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE); + oa.o_valid |= OBD_MD_FLHANDLE; + + rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL); + if (rc) + CERROR("inode %lu object close failed: rc = " + "%d\n", lli->lli_st_ino, rc); + } + + rc2 = llu_mdc_close(&sbi->ll_mdc_conn, inode); + if (rc2 && !rc) + rc = rc2; + + RETURN(rc); +} + +int llu_iop_close(struct inode *inode) +{ + return llu_file_release(inode); +} + +int llu_iop_ipreadv(struct inode *ino, + struct io_arguments *ioargs, + struct ioctx **ioctxp) +{ + struct ioctx *ioctx; + + if (!ioargs->ioarg_iovlen) + return 0; + if (ioargs->ioarg_iovlen < 0) + return -EINVAL; + + ioctx = _sysio_ioctx_new(ino, ioargs); + if (!ioctx) + return -ENOMEM; + + ioctx->ioctx_cc = llu_file_read(ino, + ioctx->ioctx_iovec, + ioctx->ioctx_iovlen, + ioctx->ioctx_offset); + if (ioctx->ioctx_cc < 0) + ioctx->ioctx_errno = ioctx->ioctx_cc; + + *ioctxp = ioctx; + return 0; +} + +int llu_iop_ipwritev(struct inode *ino, + struct io_arguments *ioargs, + struct ioctx **ioctxp) +{ + struct ioctx *ioctx; + + if (!ioargs->ioarg_iovlen) + return 0; + if (ioargs->ioarg_iovlen < 0) + return -EINVAL; + + ioctx = _sysio_ioctx_new(ino, ioargs); + if (!ioctx) + return -ENOMEM; + + ioctx->ioctx_cc = llu_file_write(ino, + ioctx->ioctx_iovec, + ioctx->ioctx_iovlen, + ioctx->ioctx_offset); + if (ioctx->ioctx_cc < 0) + ioctx->ioctx_errno = ioctx->ioctx_cc; + + *ioctxp = ioctx; + return 0; +} + diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c new file mode 100644 index 0000000..4fd83fd --- /dev/null +++ b/lustre/liblustre/llite_lib.c @@ -0,0 +1,224 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Light Super operations + * + * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include /* needed for ptpctl.h */ +#include /* needed for parse_dump */ + +#include "llite_lib.h" + + +ptl_handle_ni_t tcpnal_ni; +struct task_struct *current; +struct obd_class_user_state ocus; + +/* portals interfaces */ +ptl_handle_ni_t * +kportal_get_ni (int nal) +{ + return &tcpnal_ni; +} + +inline void +kportal_put_ni (int nal) +{ + return; +} + +struct ldlm_namespace; +struct ldlm_res_id; +struct obd_import; + +extern int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, struct ldlm_res_id *res_id, int flags); +extern int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int local_only); +extern int ldlm_replay_locks(struct obd_import *imp); + +void *inter_module_get(char *arg) +{ + if (!strcmp(arg, "tcpnal_ni")) + return &tcpnal_ni; + else if (!strcmp(arg, "ldlm_cli_cancel_unused")) + return ldlm_cli_cancel_unused; + else if (!strcmp(arg, "ldlm_namespace_cleanup")) + return ldlm_namespace_cleanup; + else if (!strcmp(arg, "ldlm_replay_locks")) + return ldlm_replay_locks; + else + return NULL; +} + +void init_current(char *comm) +{ + current = malloc(sizeof(*current)); + current->fs = malloc(sizeof(*current->fs)); + strncpy(current->comm, comm, sizeof(current->comm)); + current->pid = getpid(); + current->fsuid = 0; + current->fsgid = 0; + current->cap_effective = 0; + memset(¤t->pending, 0, sizeof(current->pending)); +} + +ptl_nid_t tcpnal_mynid; + +int init_lib_portals() +{ + int rc; + + PtlInit(); + rc = PtlNIInit(procbridge_interface, 0, 0, 0, &tcpnal_ni); + if (rc != 0) { + CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); + PtlFini(); + RETURN (rc); + } + PtlNIDebug(tcpnal_ni, ~0); + return rc; +} + +extern int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, unsigned long arg); + +struct mount_option_s mount_option = {NULL, NULL}; + +/* FIXME simple arg parser FIXME */ +void parse_mount_options(void *arg) +{ + char *buf = NULL; + struct obd_ioctl_data *data; + char *ptr, *comma, *eq, **tgt, *v; + int len; + + if (obd_ioctl_getdata(&buf, &len, arg)) { + CERROR("OBD ioctl: data error\n"); + return; + } + data = (struct obd_ioctl_data *)buf; + ptr = data->ioc_inlbuf1; + printf("mount option: %s\n", ptr); + + while (ptr) { + eq = strchr(ptr, '='); + if (!eq) + return; + + *eq = 0; + if (!strcmp("osc", ptr)) + tgt = &mount_option.osc_uuid; + else if (!strcmp("mdc", ptr)) + tgt = &mount_option.mdc_uuid; + else { + printf("Unknown mount option %s\n", ptr); + return; + } + + v = eq + 1; + comma = strchr(v, ','); + if (comma) { + *comma = 0; + ptr = comma + 1; + } else + ptr = NULL; + + *tgt = malloc(strlen(v)+1); + strcpy(*tgt, v); + } + + if (buf) + obd_ioctl_freedata(buf, len); +} + +int lib_ioctl(int dev_id, int opc, void * ptr) +{ + int rc; + + if (dev_id == OBD_DEV_ID) { + struct obd_ioctl_data *ioc = ptr; + + if (opc == OBD_IOC_MOUNTOPT) { + parse_mount_options(ptr); + return 0; + } + + rc = class_handle_ioctl(&ocus, opc, (unsigned long)ptr); + + /* you _may_ need to call obd_ioctl_unpack or some + other verification function if you want to use ioc + directly here */ + printf ("processing ioctl cmd: %x buf len: %d, rc %d\n", + opc, ioc->ioc_len, rc); + + if (rc) + return rc; + } + return (0); +} + +int lllib_init(char *arg) +{ + tcpnal_mynid = ntohl(inet_addr(arg)); + INIT_LIST_HEAD(&ocus.ocus_conns); + + init_current("dummy"); + if (init_obdclass() || + init_lib_portals() || + ptlrpc_init() || + ldlm_init() || + mdc_init() || + lov_init() || + osc_init()) + return -1; + + if (parse_dump("/tmp/DUMP_FILE", lib_ioctl)) + return -1; + + return _sysio_fssw_register("llite", &llu_fssw_ops); +} + +/* FIXME */ +void generate_random_uuid(unsigned char uuid_out[16]) +{ + int *arr = (int*)uuid_out; + int i; + + for (i = 0; i < sizeof(uuid_out)/sizeof(int); i++) + arr[i] = rand(); +} + diff --git a/lustre/liblustre/llite_lib.h b/lustre/liblustre/llite_lib.h new file mode 100644 index 0000000..107e51f --- /dev/null +++ b/lustre/liblustre/llite_lib.h @@ -0,0 +1,129 @@ +#ifndef __LLU_H_ +#define __LLU_H_ + +#include +#include +#include +#include +#include + +#include +#include + +struct ll_file_data { + struct obd_client_handle fd_mds_och; + struct obd_client_handle fd_ost_och; + __u32 fd_flags; +}; + +struct llu_sb_info +{ + struct obd_uuid ll_sb_uuid; + struct lustre_handle ll_mdc_conn; + struct lustre_handle ll_osc_conn; + obd_id ll_rootino; + int ll_flags; + struct list_head ll_conn_chain; +}; + +struct llu_inode_info { + struct llu_sb_info *lli_sbi; + struct ll_fid lli_fid; + struct lov_stripe_md *lli_smd; + char *lli_symlink_name; + /*struct semaphore lli_open_sem;*/ + unsigned long lli_flags; + struct list_head lli_read_extents; + + /* in libsysio we have no chance to store data in file, + * so place it here */ + struct ll_file_data *lli_file_data; + + /* stat FIXME not 64 bit clean */ + dev_t lli_st_dev; + ino_t lli_st_ino; + mode_t lli_st_mode; + nlink_t lli_st_nlink; + uid_t lli_st_uid; + gid_t lli_st_gid; + dev_t lli_st_rdev; + loff_t lli_st_size; + unsigned int lli_st_blksize; + unsigned int lli_st_blocks; + time_t lli_st_atime; + time_t lli_st_mtime; + time_t lli_st_ctime; + + /* not for stat, change it later */ + int lli_st_flags; + unsigned long lli_st_generation; +}; + +static inline struct llu_sb_info *llu_fs2sbi(struct filesys *fs) +{ + return (struct llu_sb_info*)(fs->fs_private); +} + +static inline struct llu_inode_info *llu_i2info(struct inode *inode) +{ + return (struct llu_inode_info*)(inode->i_private); +} + +static inline struct llu_sb_info *llu_i2sbi(struct inode *inode) +{ + return llu_i2info(inode)->lli_sbi; +} + +static inline struct client_obd *sbi2mdc(struct llu_sb_info *sbi) +{ + struct obd_device *obd = class_conn2obd(&sbi->ll_mdc_conn); + if (obd == NULL) + LBUG(); + return &obd->u.cli; +} + +static inline struct lustre_handle *llu_i2obdconn(struct inode *inode) +{ + return &(llu_i2info(inode)->lli_sbi->ll_osc_conn); +} + + +struct mount_option_s +{ + char *mdc_uuid; + char *osc_uuid; +}; + +/* llite_lib.c */ +void generate_random_uuid(unsigned char uuid_out[16]); + +extern struct mount_option_s mount_option; + +/* super.c */ +void llu_update_inode(struct inode *inode, struct mds_body *body, + struct lov_stripe_md *lmm); +void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid); +void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid); +struct inode* llu_new_inode(struct filesys *fs, ino_t ino, mode_t mode); + +extern struct fssw_ops llu_fssw_ops; + +/* file.c */ +int llu_create(struct inode *dir, struct pnode_base *pnode, int mode); +int llu_iop_open(struct pnode *pnode, int flags, mode_t mode); +int llu_iop_close(struct inode *inode); +int llu_iop_ipreadv(struct inode *ino, + struct io_arguments *ioargs, + struct ioctx **ioctxp); +int llu_iop_ipwritev(struct inode *ino, + struct io_arguments *ioargs, + struct ioctx **ioctxp); + +/* rw.c */ +int llu_iop_iodone(struct ioctx *ioctxp __IS_UNUSED); +ssize_t llu_file_write(struct inode *inode, const struct iovec *iovec, + size_t iovlen, loff_t pos); +ssize_t llu_file_read(struct inode *inode, const struct iovec *iovec, + size_t iovlen, loff_t pos); + +#endif diff --git a/lustre/liblustre/lltest.c b/lustre/liblustre/lltest.c new file mode 100644 index 0000000..8f38fe7 --- /dev/null +++ b/lustre/liblustre/lltest.c @@ -0,0 +1,147 @@ +/* + * This Cplant(TM) source code is the property of Sandia National + * Laboratories. + * + * This Cplant(TM) source code is copyrighted by Sandia National + * Laboratories. + * + * The redistribution of this Cplant(TM) source code is subject to the + * terms of the GNU Lesser General Public License + * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) + * + * Cplant(TM) Copyright 1998-2003 Sandia Corporation. + * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive + * license for use of this work by or on behalf of the US Government. + * Export of this program may require a license from the United States + * Government. + */ + +/* + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Questions or comments about this library should be sent to: + * + * Lee Ward + * Sandia National Laboratories, New Mexico + * P.O. Box 5800 + * Albuquerque, NM 87185-1110 + * + * lee@sandia.gov + */ + +#define _BSD_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +/* + * Get stats of file and file system. + * + * Usage: test_stats [-a] [-r ] [-m ] [ ...] + */ + +extern int lllib_init(char *arg); + +char *root_driver = "llite"; +char *root_path = "/"; +unsigned mntflgs = 0; +struct mount root_mount; + +extern int portal_debug; +extern int portal_subsystem_debug; + +char* files[] = {"/dir1", "/dir1/file1", "/dir1/file2", "/dir1/dir2", "/dir1/dir2/file3"}; + +int +main(int argc, char * const argv[]) +{ + struct stat statbuf; + int err, i, fd, written, readed; + char pgbuf[4096], readbuf[4096]; + int npages; + + if (_sysio_init() != 0) { + perror("init sysio"); + exit(1); + } + err = lllib_init(argv[1]); + if (err) { + perror("init llite driver"); + exit(1); + } + + err = _sysio_mount_root(root_path, root_driver, mntflgs, NULL); + if (err) { + errno = -err; + perror(root_driver); + exit(1); + } +#if 0 + for (i=0; i< sizeof(files)/sizeof(char*); i++) { + printf("******** stat %s *********\n", files[i]); + /* XXX ugly, only for testing */ + err = fixme_lstat(files[i], &statbuf); + if (err) + perror(root_driver); + printf("******** end stat %s: %d*********\n", files[i], err); + } +#endif +#if 1 + portal_debug = 0; + portal_subsystem_debug = 0; + npages = 100; + + fd = open("/newfile01", O_RDWR|O_CREAT|O_TRUNC, 00664); + printf("***************** open return %d ****************\n", fd); + + printf("***************** begin write pages ****************\n"); + for (i = 0; i < npages; i++ ) { + memset(pgbuf, ('A'+ i%10), 4096); + written = write(fd, pgbuf, 4096); + printf(">>> page %d: %d bytes written\n", i, written); + } + + printf("***************** begin read pages ****************\n"); + lseek(fd, 0, SEEK_SET); + + for (i = 0; i < npages; i++ ) { + memset(readbuf, '8', 4096); + readed = read(fd, readbuf, 4096); + readbuf[10] = 0; + printf("<<< page %d: %d bytes (%s)\n", i, readed, readbuf); + } +#endif + printf("sysio is about shutdown\n"); + /* + * Clean up. + */ + _sysio_shutdown(); + + printf("complete successfully\n"); + return 0; +} diff --git a/lustre/liblustre/rw.c b/lustre/liblustre/rw.c new file mode 100644 index 0000000..da692b2 --- /dev/null +++ b/lustre/liblustre/rw.c @@ -0,0 +1,531 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Light Super operations + * + * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "llite_lib.h" + +int llu_iop_iodone(struct ioctx *ioctxp __IS_UNUSED) +{ + return 1; +} + +/* + * this grabs a lock and manually implements behaviour that makes it look + * like the OST is returning the file size with each lock acquisition + */ +int llu_extent_lock(struct ll_file_data *fd, struct inode *inode, + struct lov_stripe_md *lsm, + int mode, struct ldlm_extent *extent, + struct lustre_handle *lockh) +{ +#if 0 + struct ll_inode_info *lli = ll_i2info(inode); + int rc; + ENTRY; + + rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh); + if (rc != ELDLM_OK) + RETURN(rc); + + /* always do a getattr for the first person to pop out of lock + * acquisition.. the DID_GETATTR flag and semaphore serialize + * this initial race. we used to make a decision based on whether + * the lock was matched or acquired, but the matcher could win the + * waking race with the first issuer so that was no good.. + */ + if (test_bit(LLI_F_DID_GETATTR, &lli->lli_flags)) + RETURN(ELDLM_OK); + + down(&lli->lli_getattr_sem); + + if (!test_bit(LLI_F_DID_GETATTR, &lli->lli_flags)) { + rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL); + if (rc == 0) { + set_bit(LLI_F_DID_GETATTR, &lli->lli_flags); + } else { + /* XXX can this fail? */ + ll_extent_unlock(fd, inode, lsm, mode, lockh); + } + } + + up(&lli->lli_getattr_sem); + RETURN(rc); +#else + return ELDLM_OK; +#endif +} + +int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode, + struct lov_stripe_md *lsm, int mode, + struct lustre_handle *lockh) +{ +#if 0 + struct ll_sb_info *sbi = ll_i2sbi(inode); + int rc; + ENTRY; + + /* XXX phil: can we do this? won't it screw the file size up? */ + if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) || + (sbi->ll_flags & LL_SBI_NOLCK)) + RETURN(0); + + rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh); + + RETURN(rc); +#else + return 0; +#endif +} + +static int llu_brw(int cmd, struct inode *inode, struct page *page, int flags) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct lov_stripe_md *lsm = lli->lli_smd; + struct obd_brw_set *set; + struct brw_page pg; + int rc; + ENTRY; + + set = obd_brw_set_new(); + if (set == NULL) + RETURN(-ENOMEM); + + pg.pg = page; + pg.off = ((obd_off)page->index) << PAGE_SHIFT; + + /* FIXME FIXME FIXME FIXME FIXME FIXME FIXME FIXME FIXME */ +#if 0 + if (cmd == OBD_BRW_WRITE && (pg.off + PAGE_SIZE > lli->lli_st_size)) + pg.count = lli->lli_st_size % PAGE_SIZE; + else +#endif + pg.count = PAGE_SIZE; + + CDEBUG(D_PAGE, "%s %d bytes ino %lu at "LPU64"/"LPX64"\n", + cmd & OBD_BRW_WRITE ? "write" : "read", pg.count, lli->lli_st_ino, + pg.off, pg.off); + if (pg.count == 0) { + LBUG(); + } + + pg.flag = flags; + + set->brw_callback = ll_brw_sync_wait; + rc = obd_brw(cmd, llu_i2obdconn(inode), lsm, 1, &pg, set, NULL); + if (rc) { + if (rc != -EIO) + CERROR("error from obd_brw: rc = %d\n", rc); + } else { + rc = ll_brw_sync_wait(set, CB_PHASE_START); + if (rc) + CERROR("error from callback: rc = %d\n", rc); + } + obd_brw_set_decref(set); + + RETURN(rc); +} + +static int llu_prepare_write(struct inode *inode, struct page *page, + unsigned from, unsigned to) +{ + struct llu_inode_info *lli = llu_i2info(inode); + obd_off offset = ((obd_off)page->index) << PAGE_SHIFT; + int rc = 0; + ENTRY; + +#if 0 + if (!PageLocked(page)) + LBUG(); + + if (PageUptodate(page)) + RETURN(0); + + //POISON(addr + from, 0xca, to - from); +#endif + /* We're completely overwriting an existing page, so _don't_ set it up + * to date until commit_write */ + if (from == 0 && to == PAGE_SIZE) + RETURN(0); + + /* If are writing to a new page, no need to read old data. + * the extent locking and getattr procedures in ll_file_write have + * guaranteed that i_size is stable enough for our zeroing needs */ + if (lli->lli_st_size <= offset) { + memset(kmap(page), 0, PAGE_SIZE); + kunmap(page); + GOTO(prepare_done, rc = 0); + } + + rc = llu_brw(OBD_BRW_READ, inode, page, 0); + + EXIT; + + prepare_done: + return rc; +} + +static int llu_commit_write(struct inode *inode, struct page *page, + unsigned from, unsigned to) +{ + struct llu_inode_info *lli = llu_i2info(inode); + loff_t size; + int rc; + ENTRY; +#if 0 + LASSERT(inode == file->f_dentry->d_inode); + LASSERT(PageLocked(page)); + + CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n", + inode, page, from, to, page->index); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu,from=%d,to=%d\n", + inode->i_ino, from, to); + /* to match full page case in prepare_write */ + SetPageUptodate(page); + /* mark the page dirty, put it on mapping->dirty, + * mark the inode PAGES_DIRTY, put it on sb->dirty */ + set_page_dirty(page); +#endif + rc = llu_brw(OBD_BRW_WRITE, inode, page, 0); + if (rc) + return rc; + + /* this is matched by a hack in obdo_to_inode at the moment */ + size = (((obd_off)page->index) << PAGE_SHIFT) + to; + if (size > lli->lli_st_size) + lli->lli_st_size = size; + + RETURN(0); +} /* ll_commit_write */ + +ssize_t +llu_generic_file_write(struct inode *inode, const char *buf, + size_t count, loff_t pos) +{ + struct page *page; + ssize_t written; + long status = 0; + int err; + unsigned bytes; + + if ((ssize_t) count < 0) + return -EINVAL; +#if 0 + down(&inode->i_sem); +#endif + if (pos < 0) + return -EINVAL; + + written = 0; + +#if 0 + remove_suid(inode); + update_inode_times(inode); +#endif + do { + unsigned long index, offset; + char *kaddr; + + /* + * Try to find the page in the cache. If it isn't there, + * allocate a free page. + */ + offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ + index = pos >> PAGE_CACHE_SHIFT; + bytes = PAGE_CACHE_SIZE - offset; + if (bytes > count) { + bytes = count; + } + + status = -ENOMEM; /* we'll assign it later anyway */ + page = __grab_cache_page(index); + if (!page) + break; + + kaddr = kmap(page); + status = llu_prepare_write(inode, page, offset, offset+bytes); + if (status) + goto sync_failure; + + memcpy(kaddr+offset, buf, bytes); + + status = llu_commit_write(inode, page, offset, offset+bytes); + if (!status) + status = bytes; + + if (status >= 0) { + written += status; + count -= status; + pos += status; + buf += status; + } +unlock: + kunmap(page); + page_cache_release(page); + + if (status < 0) + break; + } while (count); +done: + err = written ? written : status; + +#if 0 + up(&inode->i_sem); +#endif + return err; + + status = -EFAULT; + goto unlock; + +sync_failure: + /* + * If blocksize < pagesize, prepare_write() may have instantiated a + * few blocks outside i_size. Trim these off again. + */ + kunmap(page); + page_cache_release(page); + goto done; +} + +ssize_t llu_file_write(struct inode *inode, const struct iovec *iovec, + size_t iovlen, loff_t pos) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct ll_file_data *fd = lli->lli_file_data; /* XXX not ready don't use it now */ + struct lustre_handle lockh = { 0, 0 }; + struct lov_stripe_md *lsm = lli->lli_smd; + struct ldlm_extent extent; + ldlm_error_t err; + ssize_t retval = 0; + ENTRY; + + /* XXX consider other types later */ + if (!S_ISREG(lli->lli_st_mode)) + LBUG(); +#if 0 + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu,size="LPSZ",offset=%Ld\n", + inode->i_ino, count, *ppos); + + /* + * sleep doing some writeback work of this mount's dirty data + * if the VM thinks we're low on memory.. other dirtying code + * paths should think about doing this, too, but they should be + * careful not to hold locked pages while they do so. like + * ll_prepare_write. *cough* + */ + ll_check_dirty(inode->i_sb); +#endif + while (iovlen--) { + const char *buf = iovec[iovlen].iov_base; + size_t count = iovec[iovlen].iov_len; + + /* POSIX, but surprised the VFS doesn't check this already */ + if (count == 0) + continue; + +#if 0 + if (!S_ISBLK(lli->lli_st_mode) && file->f_flags & O_APPEND) { + extent.start = 0; + extent.end = OBD_OBJECT_EOF; + } else { + extent.start = *ppos; + extent.end = *ppos + count - 1; + } +#else + extent.start = pos; + extent.end = pos + count - 1; +#endif + + err = llu_extent_lock(fd, inode, lsm, LCK_PW, &extent, &lockh); + if (err != ELDLM_OK) + RETURN(-ENOLCK); + +#if 0 + if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) + *ppos = inode->i_size; + + CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n", + inode->i_ino, count, *ppos); +#endif + retval += llu_generic_file_write(inode, buf, count, pos); + } + + /* XXX errors? */ + ll_extent_unlock(fd, inode, lsm, LCK_PW, &lockh); + return(retval); +} + +static void llu_update_atime(struct inode *inode) +{ +#if 0 + struct llu_inode_info *lli = llu_i2info(inode); + +#ifdef USE_ATIME + struct iattr attr; + + attr.ia_atime = LTIME_S(CURRENT_TIME); + attr.ia_valid = ATTR_ATIME; + + if (lli->lli_st_atime == attr.ia_atime) return; + if (IS_RDONLY(inode)) return; + if (IS_NOATIME(inode)) return; + + /* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */ + llu_inode_setattr(inode, &attr, 0); +#else + /* update atime, but don't explicitly write it out just this change */ + inode->i_atime = CURRENT_TIME; +#endif +#endif +} + +static size_t llu_generic_file_read(struct inode *inode, char *buf, + size_t count, loff_t pos) +{ + struct llu_inode_info *lli = llu_i2info(inode); + unsigned long index, offset; + int error = 0; + size_t readed = 0; + + index = pos >> PAGE_CACHE_SHIFT; + offset = pos & ~PAGE_CACHE_MASK; + + do { + struct page *page; + unsigned long end_index, nr; + + end_index = lli->lli_st_size >> PAGE_CACHE_SHIFT; + + if (index > end_index) + break; + nr = PAGE_CACHE_SIZE; + if (index == end_index) { + nr = lli->lli_st_size & ~PAGE_CACHE_MASK; + if (nr <= offset) + break; + } + + nr = nr - offset; + if (nr > count) + nr = count; + + page = grab_cache_page(index); + if (!page) { + error = -ENOMEM; + break; + } + + error = llu_brw(OBD_BRW_READ, inode, page, 0); + if (error) { + page_cache_release(page); + break; + } + + memcpy(buf, kmap(page)+offset, nr); + offset += nr; + index += offset >> PAGE_CACHE_SHIFT; + offset &= ~PAGE_CACHE_MASK; + readed += nr; + count -= nr; + + page_cache_release(page); + } while (count); + + if (error) + return error; + return readed; +} + +ssize_t llu_file_read(struct inode *inode, const struct iovec *iovec, + size_t iovlen, loff_t pos) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct ll_file_data *fd = lli->lli_file_data; + struct lov_stripe_md *lsm = lli->lli_smd; + struct lustre_handle lockh = { 0, 0 }; +#if 0 + struct ll_read_extent rextent; +#else + struct ldlm_extent extent; +#endif + ldlm_error_t err; + ssize_t retval = 0; + ENTRY; + + while (iovlen--) { + char *buf = iovec[iovlen].iov_base; + size_t count = iovec[iovlen].iov_len; + + /* "If nbyte is 0, read() will return 0 and have no other results." + * -- Single Unix Spec */ + if (count == 0) + RETURN(0); + +#if 0 + rextent.re_extent.start = pos; + rextent.re_extent.end = pos + count - 1; +#else + extent.start = pos; + extent.end = pos + count - 1; +#endif + err = llu_extent_lock(fd, inode, lsm, LCK_PR, &extent, &lockh); + if (err != ELDLM_OK) + RETURN(-ENOLCK); +#if 0 + rextent.re_task = current; + spin_lock(&lli->lli_read_extent_lock); + list_add(&rextent.re_lli_item, &lli->lli_read_extents); + spin_unlock(&lli->lli_read_extent_lock); +#endif + CDEBUG(D_INFO, "Reading inode %lu, "LPSZ" bytes, offset %Ld\n", + lli->lli_st_ino, count, pos); + retval = llu_generic_file_read(inode, buf, count, pos); +#if 0 + spin_lock(&lli->lli_read_extent_lock); + list_del(&rextent.re_lli_item); + spin_unlock(&lli->lli_read_extent_lock); +#endif + } + + if (retval > 0) + llu_update_atime(inode); + + /* XXX errors? */ + ll_extent_unlock(fd, inode, lsm, LCK_PR, &lockh); + RETURN(retval); +} + diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c new file mode 100644 index 0000000..de74554 --- /dev/null +++ b/lustre/liblustre/super.c @@ -0,0 +1,580 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Light Super operations + * + * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_LLITE + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "llite_lib.h" + +static void llu_fsop_gone(struct filesys *fs) +{ + /* FIXME */ +} + +static struct inode_ops llu_inode_ops; + +void llu_update_inode(struct inode *inode, struct mds_body *body, + struct lov_stripe_md *lsm) +{ + struct llu_inode_info *lli = llu_i2info(inode); + + LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0)); + if (lsm != NULL) { + if (lli->lli_smd == NULL) + lli->lli_smd = lsm; + else + LASSERT (!memcmp (lli->lli_smd, lsm, + sizeof (*lsm))); + } + + if (body->valid & OBD_MD_FLID) + lli->lli_st_ino = body->ino; + if (body->valid & OBD_MD_FLATIME) + LTIME_S(lli->lli_st_atime) = body->atime; + if (body->valid & OBD_MD_FLMTIME) + LTIME_S(lli->lli_st_mtime) = body->mtime; + if (body->valid & OBD_MD_FLCTIME) + LTIME_S(lli->lli_st_ctime) = body->ctime; + if (body->valid & OBD_MD_FLMODE) + lli->lli_st_mode = (lli->lli_st_mode & S_IFMT)|(body->mode & ~S_IFMT); + if (body->valid & OBD_MD_FLTYPE) + lli->lli_st_mode = (lli->lli_st_mode & ~S_IFMT)|(body->mode & S_IFMT); + if (body->valid & OBD_MD_FLUID) + lli->lli_st_uid = body->uid; + if (body->valid & OBD_MD_FLGID) + lli->lli_st_gid = body->gid; + if (body->valid & OBD_MD_FLFLAGS) + lli->lli_st_flags = body->flags; + if (body->valid & OBD_MD_FLNLINK) + lli->lli_st_nlink = body->nlink; + if (body->valid & OBD_MD_FLGENER) + lli->lli_st_generation = body->generation; + if (body->valid & OBD_MD_FLRDEV) + lli->lli_st_rdev = body->rdev; + if (body->valid & OBD_MD_FLSIZE) + lli->lli_st_size = body->size; + if (body->valid & OBD_MD_FLBLOCKS) + lli->lli_st_blocks = body->blocks; + + /* fillin fid */ + if (body->valid & OBD_MD_FLID) + lli->lli_fid.id = body->ino; + if (body->valid & OBD_MD_FLGENER) + lli->lli_fid.generation = body->generation; + if (body->valid & OBD_MD_FLTYPE) + lli->lli_fid.f_type = body->mode & S_IFMT; +} + +void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid) +{ + struct llu_inode_info *lli = llu_i2info(dst); + + valid &= src->o_valid; + + if (valid & OBD_MD_FLATIME) + LTIME_S(lli->lli_st_atime) = src->o_atime; + if (valid & OBD_MD_FLMTIME) + LTIME_S(lli->lli_st_mtime) = src->o_mtime; + if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(lli->lli_st_ctime)) + LTIME_S(lli->lli_st_ctime) = src->o_ctime; + if (valid & OBD_MD_FLSIZE) + lli->lli_st_size = src->o_size; + if (valid & OBD_MD_FLBLOCKS) /* allocation of space */ + lli->lli_st_blocks = src->o_blocks; + if (valid & OBD_MD_FLBLKSZ) + lli->lli_st_blksize = src->o_blksize; + if (valid & OBD_MD_FLTYPE) + lli->lli_st_mode = (lli->lli_st_mode & ~S_IFMT) | (src->o_mode & S_IFMT); + if (valid & OBD_MD_FLMODE) + lli->lli_st_mode = (lli->lli_st_mode & S_IFMT) | (src->o_mode & ~S_IFMT); + if (valid & OBD_MD_FLUID) + lli->lli_st_uid = src->o_uid; + if (valid & OBD_MD_FLGID) + lli->lli_st_gid = src->o_gid; + if (valid & OBD_MD_FLFLAGS) + lli->lli_st_flags = src->o_flags; + if (valid & OBD_MD_FLNLINK) + lli->lli_st_nlink = src->o_nlink; + if (valid & OBD_MD_FLGENER) + lli->lli_st_generation = src->o_generation; + if (valid & OBD_MD_FLRDEV) + lli->lli_st_rdev = src->o_rdev; +} + +void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid) +{ + struct llu_inode_info *lli = llu_i2info(src); + + if (valid & OBD_MD_FLATIME) + dst->o_atime = LTIME_S(lli->lli_st_atime); + if (valid & OBD_MD_FLMTIME) + dst->o_mtime = LTIME_S(lli->lli_st_mtime); + if (valid & OBD_MD_FLCTIME) + dst->o_ctime = LTIME_S(lli->lli_st_ctime); + if (valid & OBD_MD_FLSIZE) + dst->o_size = lli->lli_st_size; + if (valid & OBD_MD_FLBLOCKS) /* allocation of space */ + dst->o_blocks = lli->lli_st_blocks; + if (valid & OBD_MD_FLBLKSZ) + dst->o_blksize = lli->lli_st_blksize; + if (valid & OBD_MD_FLTYPE) + dst->o_mode = (dst->o_mode & ~S_IFMT) | (lli->lli_st_mode & S_IFMT); + if (valid & OBD_MD_FLMODE) + dst->o_mode = (dst->o_mode & S_IFMT) | (lli->lli_st_mode & ~S_IFMT); + if (valid & OBD_MD_FLUID) + dst->o_uid = lli->lli_st_uid; + if (valid & OBD_MD_FLGID) + dst->o_gid = lli->lli_st_gid; + if (valid & OBD_MD_FLFLAGS) + dst->o_flags = lli->lli_st_flags; + if (valid & OBD_MD_FLNLINK) + dst->o_nlink = lli->lli_st_nlink; + if (valid & OBD_MD_FLGENER) + dst->o_generation = lli->lli_st_generation; + if (valid & OBD_MD_FLRDEV) + dst->o_rdev = (__u32)(lli->lli_st_rdev); + + dst->o_valid |= (valid & ~OBD_MD_FLID); +} + +int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm, + char *ostdata) +{ + struct llu_sb_info *sbi = llu_i2sbi(inode); + struct obdo oa; + int rc; + ENTRY; + + LASSERT(lsm); + LASSERT(sbi); + + memset(&oa, 0, sizeof oa); + oa.o_id = lsm->lsm_object_id; + oa.o_mode = S_IFREG; + oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE | + OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | OBD_MD_FLCTIME; + + if (ostdata != NULL) { + memcpy(&oa.o_inline, ostdata, FD_OSTDATA_SIZE); + oa.o_valid |= OBD_MD_FLHANDLE; + } + + rc = obd_getattr(&sbi->ll_osc_conn, &oa, lsm); + if (rc) + RETURN(rc); + + obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | + OBD_MD_FLMTIME | OBD_MD_FLCTIME); + + RETURN(0); +} + +struct inode* llu_new_inode(struct filesys *fs, ino_t ino, mode_t mode) +{ + struct inode *inode; + struct llu_inode_info *lli; + + OBD_ALLOC(lli, sizeof(*lli)); + if (!lli) + return NULL; + + /* initialize lli here */ + lli->lli_sbi = llu_fs2sbi(fs); + lli->lli_smd = NULL; + lli->lli_symlink_name = NULL; + lli->lli_flags = 0; + INIT_LIST_HEAD(&lli->lli_read_extents); + lli->lli_file_data = NULL; + + /* could file_identifier be 0 ? FIXME */ + inode = _sysio_i_new(fs, ino, NULL, +#ifndef AUTOMOUNT_FILE_NAME + mode & S_IFMT, +#else + mode, /* all of the bits! */ +#endif + 0, + &llu_inode_ops, lli); + + if (!inode) + OBD_FREE(lli, sizeof(*lli)); + + return inode; +} + +static int llu_iop_lookup(struct pnode *pnode, + struct inode **inop, + struct intent *intnt __IS_UNUSED, + const char *path __IS_UNUSED) +{ + struct pnode_base *pb_dir = pnode->p_parent->p_base; + struct ptlrpc_request *request = NULL; + struct llu_sb_info *sbi = llu_i2sbi(pb_dir->pb_ino); + struct ll_fid *fid = &llu_i2info(pb_dir->pb_ino)->lli_fid; + struct qstr *name = &pnode->p_base->pb_name; + struct mds_body *body; + unsigned long valid; + char *pname; + int rc, easize; + struct ll_read_inode2_cookie lic = {.lic_body = NULL, .lic_lsm = NULL}; + + /* the mount root inode have no name, so don't call + * remote in this case. but probably we need revalidate + * it here? FIXME */ + if (pnode->p_mount->mnt_root == pnode) { + struct inode *i = pnode->p_base->pb_ino; + I_REF(i); + *inop = i; + return 0; + } + + if (!name->len) + return -EINVAL; + + /* mdc_getattr_name require NULL-terminated name */ + pname = malloc(name->len + 1); + if (!pname) + return -ENOMEM; + memcpy(pname, name->name, name->len); + pname[name->len] = 0; + + valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE; + + /* FIXME before getattr_name, we don't know whether + * the inode we are finding is regular or not, so here + * we blindly require server feed in EA data */ + easize = obd_size_diskmd(&sbi->ll_osc_conn, NULL); + valid |= OBD_MD_FLEASIZE; + + rc = mdc_getattr_name(&sbi->ll_mdc_conn, fid, + pname, name->len + 1, + valid, easize, &request); + if (rc < 0) { + CERROR("mdc_getattr_name: %d\n", rc); + rc = -ENOENT; + goto out; + } + body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body)); + + *inop = llu_new_inode(pnode->p_mount->mnt_fs, body->ino, body->mode); + if (!inop) + goto out; + + lic.lic_body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*lic.lic_body)); + LASSERT (lic.lic_body != NULL); + LASSERT_REPSWABBED (request, 0); + + if (S_ISREG(lic.lic_body->mode) && + lic.lic_body->valid & OBD_MD_FLEASIZE) { + struct lov_mds_md *lmm; + int lmm_size; + int rc; + + lmm_size = lic.lic_body->eadatasize; + if (lmm_size == 0) { + CERROR ("OBD_MD_FLEASIZE set but eadatasize 0\n"); + RETURN (-EPROTO); + } + lmm = lustre_msg_buf(request->rq_repmsg, 0 + 1, lmm_size); + LASSERT(lmm != NULL); + LASSERT_REPSWABBED (request, 0 + 1); + + rc = obd_unpackmd (&sbi->ll_osc_conn, + &lic.lic_lsm, lmm, lmm_size); + if (rc < 0) { + CERROR ("Error %d unpacking eadata\n", rc); + RETURN (rc); + } + LASSERT (rc >= sizeof (*lic.lic_lsm)); + + } else { + lic.lic_lsm = NULL; + } + + llu_update_inode(*inop, body, lic.lic_lsm); + + if (llu_i2info(*inop)->lli_smd) { + rc = llu_inode_getattr(*inop, llu_i2info(*inop)->lli_smd, NULL); + if (rc) + _sysio_i_gone(*inop); + } + +out: + ptlrpc_req_finished(request); + + return rc; +} + +static int llu_iop_getattr(struct pnode *pno, + struct inode *ino, + struct intnl_stat *b) +{ + struct llu_inode_info *lli = llu_i2info(ino); + + b->st_dev = lli->lli_st_dev; + b->st_ino = lli->lli_st_ino; + b->st_mode = lli->lli_st_mode; + b->st_nlink = lli->lli_st_nlink; + b->st_uid = lli->lli_st_uid; + b->st_gid = lli->lli_st_gid; + b->st_rdev = lli->lli_st_rdev; + b->st_size = lli->lli_st_size; + b->st_blksize = lli->lli_st_blksize; + b->st_blocks = lli->lli_st_blocks; + b->st_atime = lli->lli_st_atime; + b->st_mtime = lli->lli_st_mtime; + b->st_ctime = lli->lli_st_ctime; + + return 0; +} + +int llu_mdc_cancel_unused(struct lustre_handle *conn, + struct llu_inode_info *lli, + int flags) +{ + struct ldlm_res_id res_id = + { .name = {lli->lli_st_ino, lli->lli_st_generation} }; + struct obd_device *obddev = class_conn2obd(conn); + ENTRY; + RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags)); +} + +static void llu_clear_inode(struct inode *inode) +{ + struct llu_sb_info *sbi = llu_i2sbi(inode); + struct llu_inode_info *lli = llu_i2info(inode); + int rc; + ENTRY; + + CDEBUG(D_INODE, "clear inode: %lu\n", lli->lli_st_ino); + rc = llu_mdc_cancel_unused(&sbi->ll_mdc_conn, lli, + LDLM_FL_NO_CALLBACK); + if (rc < 0) { + CERROR("ll_mdc_cancel_unused: %d\n", rc); + /* XXX FIXME do something dramatic */ + } + + if (lli->lli_smd) { + rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd, 0); + if (rc < 0) { + CERROR("obd_cancel_unused: %d\n", rc); + /* XXX FIXME do something dramatic */ + } + } + + if (lli->lli_smd) + obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd); + + if (lli->lli_symlink_name) { + OBD_FREE(lli->lli_symlink_name, + strlen(lli->lli_symlink_name) + 1); + lli->lli_symlink_name = NULL; + } + + EXIT; +} + +void llu_iop_gone(struct inode *inode) +{ + struct llu_inode_info *lli = llu_i2info(inode); + + llu_clear_inode(inode); + + OBD_FREE(lli, sizeof(*lli)); +} + +struct filesys_ops llu_filesys_ops = +{ + fsop_gone: llu_fsop_gone, +}; + + +static struct inode_ops llu_inode_ops = { + inop_lookup: llu_iop_lookup, + inop_getattr: llu_iop_getattr, + inop_open: llu_iop_open, + inop_close: llu_iop_close, + inop_ipreadv: llu_iop_ipreadv, + inop_ipwritev: llu_iop_ipwritev, + inop_iodone: llu_iop_iodone, + inop_gone: llu_iop_gone, +}; + + +static int +llu_fsswop_mount(const char *source, + unsigned flags, + const void *data __IS_UNUSED, + struct pnode *tocover, + struct mount **mntp) +{ + struct filesys *fs; + struct inode *root; + struct pnode_base *rootpb; + static struct qstr noname = { NULL, 0, 0 }; + struct ll_fid rootfid; + + struct llu_sb_info *sbi; + struct ptlrpc_connection *mdc_conn; + struct ptlrpc_request *request = NULL; + struct mds_body *root_body; + struct obd_uuid param_uuid; + class_uuid_t uuid; + struct obd_device *obd; + char *osc=mount_option.osc_uuid; + char *mdc=mount_option.mdc_uuid; + int err = -EINVAL; + + ENTRY; + + OBD_ALLOC(sbi, sizeof(*sbi)); + if (!sbi) + RETURN(-ENOMEM); + + INIT_LIST_HEAD(&sbi->ll_conn_chain); + generate_random_uuid(uuid); + class_uuid_unparse(uuid, &sbi->ll_sb_uuid); + + fs = _sysio_fs_new(&llu_filesys_ops, flags, sbi); + if (!fs) { + err = -ENOMEM; + goto out_free; + } + + strncpy(param_uuid.uuid, mdc, sizeof(param_uuid.uuid)); + obd = class_uuid2obd(¶m_uuid); + if (!obd) { + CERROR("MDC %s: not setup or attached\n", mdc); + err = -EINVAL; + goto out_free; + } + + /* setup mdc */ + /* FIXME need recover stuff */ + err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid); + if (err) { + CERROR("cannot connect to %s: rc = %d\n", mdc, err); + goto out_free; + } + + mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection; + + /* setup osc */ + strncpy(param_uuid.uuid, osc, sizeof(param_uuid.uuid)); + obd = class_uuid2obd(¶m_uuid); + if (!obd) { + CERROR("OSC %s: not setup or attached\n", osc); + err = -EINVAL; + goto out_mdc; + } + + err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid); + if (err) { + CERROR("cannot connect to %s: rc = %d\n", osc, err); + goto out_mdc; + } + + err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid); + if (err) { + CERROR("cannot mds_connect: rc = %d\n", err); + goto out_osc; + } + CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id); + sbi->ll_rootino = rootfid.id; + +/* XXX do we need this?? + memset(&osfs, 0, sizeof(osfs)); + rc = obd_statfs(&sbi->ll_mdc_conn, &osfs); +*/ + /* fetch attr of root inode */ + err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid, + OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request); + if (err) { + CERROR("mdc_getattr failed for root: rc = %d\n", err); + goto out_request; + } + + root_body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*root_body)); + LASSERT(sbi->ll_rootino != 0); + + root = llu_new_inode(fs, root_body->ino, root_body->mode); + if (!root) { + err = -ENOMEM; + goto out_request; + } + + llu_update_inode(root, root_body, NULL); + + /* + * Generate base path-node for root. + */ + rootpb = _sysio_pb_new(&noname, NULL, root); + if (!rootpb) { + err = -ENOMEM; + goto out_inode; + } + + err = _sysio_do_mount(fs, rootpb, flags, NULL, mntp); + if (err) { + _sysio_pb_gone(rootpb); + goto out_inode; + } + + ptlrpc_req_finished(request); + request = NULL; + + printf("************************************************\n"); + printf("* Mount successfully!!!!!!! *\n"); + printf("************************************************\n"); + + return 0; + +out_inode: + _sysio_i_gone(root); +out_request: + ptlrpc_req_finished(request); +out_osc: + obd_disconnect(&sbi->ll_osc_conn); +out_mdc: + obd_disconnect(&sbi->ll_mdc_conn); +out_free: + OBD_FREE(sbi, sizeof(*sbi)); + return err; +} + +struct fssw_ops llu_fssw_ops = { + llu_fsswop_mount +}; + -- 1.8.3.1