* This code is issued under the GNU General Public License.
* See the file COPYING in this distribution
*
- * Copryright (C) 1996 Peter J. Braam <braam@stelias.com>
- * Copryright (C) 1999 Stelias Computing Inc. <braam@stelias.com>
- * Copryright (C) 1999 Seagate Technology Inc.
- * Copryright (C) 2001 Mountain View Data, Inc.
* Copryright (C) 2002 Cluster File Systems, Inc.
- *
*/
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/locks.h>
-#include <linux/unistd.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <asm/uaccess.h>
-#include <asm/segment.h>
-
-#define DEBUG_SUBSYSTEM S_LLIGHT
+#define DEBUG_SUBSYSTEM S_LLITE
-#include <linux/lustre_light.h>
+#include <linux/module.h>
+#include <linux/lustre_lite.h>
+#include <linux/lustre_ha.h>
+#include <linux/lustre_dlm.h>
kmem_cache_t *ll_file_data_slab;
extern struct address_space_operations ll_aops;
extern struct address_space_operations ll_dir_aops;
struct super_operations ll_super_operations;
+extern int ll_recover(struct ptlrpc_client *);
+extern int ll_commitcbd_setup(struct ll_sb_info *);
+extern int ll_commitcbd_cleanup(struct ll_sb_info *);
+
static char *ll_read_opt(const char *opt, char *data)
{
char *value;
char *retval;
ENTRY;
- CDEBUG(D_INFO, "option: %s, data %s\n", opt, data);
- if ( strncmp(opt, data, strlen(opt)) ) {
- EXIT;
- return NULL;
- }
- if ( (value = strchr(data, '=')) == NULL ) {
- EXIT;
- return NULL;
- }
+ CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data);
+ if ( strncmp(opt, data, strlen(opt)) )
+ RETURN(NULL);
+ if ( (value = strchr(data, '=')) == NULL )
+ RETURN(NULL);
value++;
OBD_ALLOC(retval, strlen(value) + 1);
if ( !retval ) {
CERROR("out of memory!\n");
- return NULL;
+ RETURN(NULL);
}
memcpy(retval, value, strlen(value)+1);
CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval);
- EXIT;
- return retval;
+ RETURN(retval);
}
-static void ll_options(char *options, char **dev, char **vers)
+static void ll_options(char *options, char **ost, char **mds)
{
char *this_char;
ENTRY;
for (this_char = strtok (options, ",");
this_char != NULL;
this_char = strtok (NULL, ",")) {
- CDEBUG(D_INFO, "this_char %s\n", this_char);
- if ( (!*dev && (*dev = ll_read_opt("device", this_char)))||
- (!*vers && (*vers = ll_read_opt("version", this_char))) )
+ CDEBUG(D_SUPER, "this_char %s\n", this_char);
+ if ( (!*ost && (*ost = ll_read_opt("ost", this_char)))||
+ (!*mds && (*mds = ll_read_opt("mds", this_char))) )
continue;
}
EXIT;
}
+#ifndef log2
+#define log2(n) ffz(~(n))
+#endif
+
static struct super_block * ll_read_super(struct super_block *sb,
void *data, int silent)
{
struct inode *root = 0;
+ struct obd_device *obd;
struct ll_sb_info *sbi;
- char *device = NULL;
- char *version = NULL;
- int devno;
+ char *ost = NULL;
+ char *mds = NULL;
int err;
+ struct ll_fid rootfid;
+ struct statfs sfs;
+ __u64 last_committed, last_rcvd;
+ __u32 last_xid;
struct ptlrpc_request *request = NULL;
+ struct ll_inode_md md;
ENTRY;
MOD_INC_USE_COUNT;
RETURN(NULL);
}
- memset(sbi, 0, sizeof(*sbi));
sb->u.generic_sbp = sbi;
- ll_options(data, &device, &version);
+ ll_options(data, &ost, &mds);
- if ( !device ) {
- CERROR("no device\n");
+ if (!ost) {
+ CERROR("no ost\n");
GOTO(out_free, sb = NULL);
}
- devno = simple_strtoul(device, NULL, 0);
- if ( devno >= MAX_OBD_DEVICES ) {
- CERROR("device of %s too high\n", device);
+ if (!mds) {
+ CERROR("no mds\n");
GOTO(out_free, sb = NULL);
}
- sbi->ll_conn.oc_dev = &obd_dev[devno];
- err = obd_connect(&sbi->ll_conn);
- if ( err ) {
- CERROR("cannot connect to %s\n", device);
+ obd = class_uuid2obd(mds);
+ if (!obd) {
+ CERROR("MDS %s: not setup or attached\n", mds);
GOTO(out_free, sb = NULL);
}
- /* the first parameter should become an mds device no */
- err = ptlrpc_connect_client(-1, "mds",
- MDS_REQUEST_PORTAL,
- MDC_REPLY_PORTAL,
- mds_pack_req,
- mds_unpack_rep,
- &sbi->ll_mds_client);
+#if 0
+ err = connmgr_connect(ptlrpc_connmgr, sbi->ll_mds_conn);
+ if (err) {
+ CERROR("cannot connect to MDS: rc = %d\n", err);
+ GOTO(out_rpc, sb = NULL);
+ }
+#endif
+
+ err = obd_connect(&sbi->ll_mdc_conn, obd);
+ if (err) {
+ CERROR("cannot connect to %s: rc = %d\n", mds, err);
+ GOTO(out_free, sb = NULL);
+ }
+ sbi2mdc(sbi)->mdc_conn->c_level = LUSTRE_CONN_FULL;
+
+ obd = class_uuid2obd(ost);
+ if (!obd) {
+ CERROR("OST %s: not setup or attached\n", ost);
+ GOTO(out_mdc, sb = NULL);
+ }
+ err = obd_connect(&sbi->ll_osc_conn, obd);
+ if (err) {
+ CERROR("cannot connect to %s: rc = %d\n", ost, err);
+ GOTO(out_mdc, sb = NULL);
+ }
+ /* XXX: need to store the last_* values somewhere */
+ err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid, &last_committed,
+ &last_rcvd, &last_xid, &request);
if (err) {
- CERROR("cannot find MDS\n");
- GOTO(out_disc, sb = NULL);
+ CERROR("cannot mds_connect: rc = %d\n", err);
+ GOTO(out_request, sb = NULL);
}
- sbi->ll_super = sb;
- sbi->ll_rootino = 2;
+ CDEBUG(D_SUPER, "rootfid %Ld\n", (unsigned long long)rootfid.id);
+ sbi->ll_rootino = rootfid.id;
- sb->s_maxbytes = 1LL << 36;
- sb->s_blocksize = PAGE_SIZE;
- sb->s_blocksize_bits = (unsigned char)PAGE_SHIFT;
+ memset(&sfs, 0, sizeof(sfs));
+ err = mdc_statfs(&sbi->ll_mdc_conn, &sfs, &request);
+ sb->s_blocksize = sfs.f_bsize;
+ sb->s_blocksize_bits = log2(sfs.f_bsize);
sb->s_magic = LL_SUPER_MAGIC;
+ sb->s_maxbytes = (1ULL << (32 + 9)) - sfs.f_bsize;
+ ptlrpc_req_finished(request);
+
sb->s_op = &ll_super_operations;
/* make root inode */
- err = mdc_getattr(&sbi->ll_mds_client, sbi->ll_rootino, S_IFDIR,
- OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, &request);
+ err = mdc_getattr(&sbi->ll_mdc_conn, sbi->ll_rootino, S_IFDIR,
+ OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request);
if (err) {
- CERROR("mdc_getattr failed for root %d\n", err);
- GOTO(out_req, sb = NULL);
+ CERROR("mdc_getattr failed for root: rc = %d\n", err);
+ GOTO(out_request, sb = NULL);
}
- root = iget4(sb, sbi->ll_rootino, NULL, request->rq_rep.mds);
+ /* initialize committed transaction callback daemon */
+ spin_lock_init(&sbi->ll_commitcbd_lock);
+ init_waitqueue_head(&sbi->ll_commitcbd_waitq);
+ init_waitqueue_head(&sbi->ll_commitcbd_ctl_waitq);
+ sbi->ll_commitcbd_flags = 0;
+ err = ll_commitcbd_setup(sbi);
+ if (err) {
+ CERROR("failed to start commit callback daemon: rc = %d\n",err);
+ GOTO(out_request, sb = NULL);
+ }
+
+ md.body = lustre_msg_buf(request->rq_repmsg, 0);
+ md.md = NULL;
+ root = iget4(sb, sbi->ll_rootino, NULL, &md);
+
if (root) {
sb->s_root = d_alloc_root(root);
} else {
- CERROR("lustre_light: bad iget4 for root\n");
- GOTO(out_req, sb = NULL);
+ CERROR("lustre_lite: bad iget4 for root\n");
+ GOTO(out_cdb, sb = NULL);
}
-out_req:
ptlrpc_free_req(request);
- if (!sb) {
-out_disc:
- obd_disconnect(&sbi->ll_conn);
-out_free:
- MOD_DEC_USE_COUNT;
- OBD_FREE(sbi, sizeof(*sbi));
- }
- OBD_FREE(device, strlen(device) + 1);
- OBD_FREE(version, strlen(version) + 1);
+
+out_dev:
+ if (mds)
+ OBD_FREE(mds, strlen(mds) + 1);
+ if (ost)
+ OBD_FREE(ost, strlen(ost) + 1);
RETURN(sb);
+
+out_cdb:
+ ll_commitcbd_cleanup(sbi);
+out_request:
+ ptlrpc_free_req(request);
+ obd_disconnect(&sbi->ll_osc_conn);
+out_mdc:
+ obd_disconnect(&sbi->ll_mdc_conn);
+out_free:
+ OBD_FREE(sbi, sizeof(*sbi));
+
+ MOD_DEC_USE_COUNT;
+ goto out_dev;
} /* ll_read_super */
static void ll_put_super(struct super_block *sb)
{
- struct ll_sb_info *sbi = sb->u.generic_sbp;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
ENTRY;
- obd_disconnect(&sbi->ll_conn);
- OBD_FREE(sb->u.generic_sbp, sizeof(*sbi));
+ ll_commitcbd_cleanup(sbi);
+ obd_disconnect(&sbi->ll_osc_conn);
+ obd_disconnect(&sbi->ll_mdc_conn);
+ OBD_FREE(sbi, sizeof(*sbi));
+
MOD_DEC_USE_COUNT;
EXIT;
} /* ll_put_super */
+static void ll_clear_inode(struct inode *inode)
+{
+ if (atomic_read(&inode->i_count) == 0) {
+ struct lov_stripe_md *md = ll_i2info(inode)->lli_smd;
+ if (md) {
+ OBD_FREE(md, md->lmd_size);
+ ll_i2info(inode)->lli_smd = NULL;
+ }
+ if (ll_i2info(inode)->lli_symlink_name) {
+ OBD_FREE(ll_i2info(inode)->lli_symlink_name,
+ strlen(ll_i2info(inode)->lli_symlink_name)+ 1);
+ ll_i2info(inode)->lli_symlink_name = NULL;
+ }
+ }
+}
-extern inline struct obdo * ll_oa_from_inode(struct inode *inode, int valid);
static void ll_delete_inode(struct inode *inode)
{
if (S_ISREG(inode->i_mode)) {
- int err;
- struct obdo *oa;
- oa = ll_oa_from_inode(inode, OBD_MD_FLNOTOBD);
- if (!oa) {
- CERROR("no memory\n");
+ int err;
+ struct obdo oa;
+ struct lov_stripe_md *md = ll_i2info(inode)->lli_smd;
+
+ if (!md)
+ GOTO(out, -EINVAL);
+
+ oa.o_id = md->lmd_object_id;
+ oa.o_easize = md->lmd_size;
+ if (oa.o_id == 0) {
+ CERROR("This really happens\n");
+ /* No obdo was ever created */
+ GOTO(out, 0);
}
- err = obd_destroy(ll_i2obdconn(inode), oa);
- CDEBUG(D_INODE, "obd destroy of %Ld error %d\n",
- oa->o_id, err);
- obdo_free(oa);
+ err = obd_destroy(ll_i2obdconn(inode), &oa, md);
+ CDEBUG(D_SUPER, "obd destroy of %Ld error %d\n",
+ md->lmd_object_id, err);
}
-
- clear_inode(inode);
+out:
+ clear_inode(inode);
}
-/* like inode_setattr, but doesn't mark the inode dirty */
+/* like inode_setattr, but doesn't mark the inode dirty */
static int ll_attr2inode(struct inode * inode, struct iattr * attr, int trunc)
{
unsigned int ia_valid = attr->ia_valid;
int error = 0;
- if ((ia_valid & ATTR_SIZE) && trunc ) {
+ if ((ia_valid & ATTR_SIZE) && trunc) {
error = vmtruncate(inode, attr->ia_size);
if (error)
goto out;
- } else if (ia_valid & ATTR_SIZE) {
+ } else if (ia_valid & ATTR_SIZE)
inode->i_size = attr->ia_size;
- }
if (ia_valid & ATTR_UID)
inode->i_uid = attr->ia_uid;
int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc)
{
- struct ptlrpc_request *request;
+ struct ptlrpc_request *request = NULL;
struct ll_sb_info *sbi = ll_i2sbi(inode);
int err;
/* change incore inode */
ll_attr2inode(inode, attr, do_trunc);
- err = mdc_setattr(&sbi->ll_mds_client, inode, attr, &request);
+ err = mdc_setattr(&sbi->ll_mdc_conn, inode, attr, &request);
if (err)
CERROR("mdc_setattr fails (%d)\n", err);
- ptlrpc_free_req(request);
+ ptlrpc_req_finished(request);
- EXIT;
- return err;
+ RETURN(err);
}
int ll_setattr(struct dentry *de, struct iattr *attr)
{
+ int rc = inode_change_ok(de->d_inode, attr);
+
+ if (rc)
+ return rc;
+
return ll_inode_setattr(de->d_inode, attr, 1);
}
-static int ll_statfs(struct super_block *sb, struct statfs *buf)
+static int ll_statfs(struct super_block *sb, struct statfs *sfs)
{
- struct statfs tmp;
- int err;
-
+ struct ptlrpc_request *request = NULL;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ int rc;
ENTRY;
- err = obd_statfs(ID(sb), &tmp);
- if ( err ) {
- CERROR("obd_statfs fails (%d)\n", err);
- return err;
+ memset(sfs, 0, sizeof(*sfs));
+ rc = mdc_statfs(&sbi->ll_mdc_conn, sfs, &request);
+ ptlrpc_req_finished(request);
+ if (rc)
+ CERROR("obd_statfs fails: rc = %d\n", rc);
+ else
+ CDEBUG(D_SUPER, "statfs shows blocks %ld/%ld objects %ld/%ld\n",
+ sfs->f_bavail, sfs->f_blocks, sfs->f_files,sfs->f_ffree);
+
+ /* temporary until mds_statfs returns statfs info for all OSTs */
+ if (!rc) {
+ struct statfs obd_sfs;
+
+ rc = obd_statfs(&sbi->ll_osc_conn, &obd_sfs);
+ if (rc) {
+ CERROR("obd_statfs fails: rc = %d\n", rc);
+ GOTO(out, rc);
+ }
+ CDEBUG(D_SUPER, "obd_statfs returns blocks %ld/%ld, "
+ "objects %ld/%ld\n",
+ obd_sfs.f_bavail, obd_sfs.f_blocks,
+ obd_sfs.f_ffree, obd_sfs.f_files);
+
+ sfs->f_bfree = obd_sfs.f_bfree;
+ sfs->f_bavail = obd_sfs.f_bavail;
+ sfs->f_blocks = obd_sfs.f_blocks;
+ if (obd_sfs.f_ffree < sfs->f_ffree)
+ sfs->f_ffree = obd_sfs.f_ffree;
}
- memcpy(buf, &tmp, sizeof(*buf));
- CDEBUG(D_SUPER, "statfs returns avail %ld\n", tmp.f_bavail);
- EXIT;
- return err;
+out:
+ RETURN(rc);
}
-static void inline ll_to_inode(struct inode *dst, struct mds_rep *rep)
+inline int ll_stripe_md_size(struct super_block *sb)
{
- struct ll_inode_info *ii =
- (struct ll_inode_info *) &dst->u.generic_ip;
+ struct mdc_obd *mdc = sbi2mdc(ll_s2sbi(sb));
+ return mdc->mdc_max_mdsize;
+}
- /* core attributes first */
- if ( rep->valid & OBD_MD_FLID )
- dst->i_ino = rep->ino;
- if ( rep->valid & OBD_MD_FLATIME )
- dst->i_atime = rep->atime;
- if ( rep->valid & OBD_MD_FLMTIME )
- dst->i_mtime = rep->mtime;
- if ( rep->valid & OBD_MD_FLCTIME )
- dst->i_ctime = rep->ctime;
- if ( rep->valid & OBD_MD_FLSIZE )
- dst->i_size = rep->size;
- if ( rep->valid & OBD_MD_FLMODE )
- dst->i_mode = rep->mode;
- if ( rep->valid & OBD_MD_FLUID )
- dst->i_uid = rep->uid;
- if ( rep->valid & OBD_MD_FLGID )
- dst->i_gid = rep->gid;
- if ( rep->valid & OBD_MD_FLFLAGS )
- dst->i_flags = rep->flags;
- if ( rep->valid & OBD_MD_FLNLINK )
- dst->i_nlink = rep->nlink;
- if ( rep->valid & OBD_MD_FLGENER )
- dst->i_generation = rep->generation;
-
- /* this will become more elaborate for striping etc */
- if (rep->valid & OBD_MD_FLOBJID)
- ii->lli_objid = rep->objid;
-#if 0
+static void ll_to_inode(struct inode *dst, struct ll_inode_md *md)
+{
+ struct mds_body *body = md->body;
+ struct ll_inode_info *ii = ll_i2info(dst);
- if (obdo_has_inline(oa)) {
- if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
- S_ISFIFO(inode->i_mode)) {
- obd_rdev rdev = *((obd_rdev *)oa->o_inline);
- CDEBUG(D_INODE,
- "copying device %x from obdo to inode\n", rdev);
- init_special_inode(inode, inode->i_mode, rdev);
- } else {
- CDEBUG(D_INFO, "copying inline from obdo to inode\n");
- memcpy(oinfo->lli_inline, oa->o_inline, OBD_INLINESZ);
+ /* core attributes first */
+ if (body->valid & OBD_MD_FLID)
+ dst->i_ino = body->ino;
+ if (body->valid & OBD_MD_FLATIME)
+ dst->i_atime = body->atime;
+ if (body->valid & OBD_MD_FLMTIME)
+ dst->i_mtime = body->mtime;
+ if (body->valid & OBD_MD_FLCTIME)
+ dst->i_ctime = body->ctime;
+ if (body->valid & OBD_MD_FLSIZE)
+ dst->i_size = body->size;
+ if (body->valid & OBD_MD_FLMODE)
+ dst->i_mode = body->mode;
+ if (body->valid & OBD_MD_FLUID)
+ dst->i_uid = body->uid;
+ if (body->valid & OBD_MD_FLGID)
+ dst->i_gid = body->gid;
+ if (body->valid & OBD_MD_FLFLAGS)
+ dst->i_flags = body->flags;
+ if (body->valid & OBD_MD_FLNLINK)
+ dst->i_nlink = body->nlink;
+ if (body->valid & OBD_MD_FLGENER)
+ dst->i_generation = body->generation;
+ if (body->valid & OBD_MD_FLRDEV)
+ dst->i_rdev = body->extra;
+ if (md && md->md && md->md->lmd_stripe_count) {
+ struct lov_stripe_md *smd = md->md;
+ int size = ll_stripe_md_size(dst->i_sb);
+ if (md->md->lmd_size != size) {
+ CERROR("Striping metadata size error %ld\n",
+ dst->i_ino);
+ LBUG();
+ }
+ OBD_ALLOC(ii->lli_smd, size);
+ if (!ii->lli_smd){
+ CERROR("No memory for %d\n", size);
+ LBUG();
}
- oinfo->lli_flags |= OBD_FL_INLINEDATA;
+ memcpy(ii->lli_smd, smd, size);
}
-#endif
} /* ll_to_inode */
-static inline void ll_read_inode2(struct inode *inode, void *opaque)
+static void ll_read_inode2(struct inode *inode, void *opaque)
{
- struct mds_rep *rep = opaque;
-
+ struct ll_inode_md *md = opaque;
+
ENTRY;
- ll_to_inode(inode, rep);
+ ll_to_inode(inode, md);
/* OIDEBUG(inode); */
EXIT;
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = &ll_dir_inode_operations;
- inode->i_fop = &ll_dir_operations;
+ inode->i_fop = &ll_dir_operations;
inode->i_mapping->a_ops = &ll_dir_aops;
EXIT;
} else if (S_ISLNK(inode->i_mode)) {
} else {
init_special_inode(inode, inode->i_mode,
((int *)ll_i2info(inode)->lli_inline)[0]);
+ EXIT;
}
- EXIT;
return;
}
struct super_operations ll_super_operations =
{
read_inode2: ll_read_inode2,
+ clear_inode: ll_clear_inode,
delete_inode: ll_delete_inode,
put_super: ll_put_super,
- // statfs: ll_statfs
+ statfs: ll_statfs
};
-struct file_system_type lustre_light_fs_type = {
- "lustre_light", 0, ll_read_super, NULL
+struct file_system_type lustre_lite_fs_type = {
+ "lustre_lite", 0, ll_read_super, NULL
};
-static int __init init_lustre_light(void)
+static int __init init_lustre_lite(void)
{
- printk(KERN_INFO "Lustre Light 0.0.1, braam@clusterfs.com\n");
+ printk(KERN_INFO "Lustre Lite 0.0.1, info@clusterfs.com\n");
ll_file_data_slab = kmem_cache_create("ll_file_data",
sizeof(struct ll_file_data), 0,
SLAB_HWCACHE_ALIGN, NULL, NULL);
if (ll_file_data_slab == NULL)
return -ENOMEM;
-
- return register_filesystem(&lustre_light_fs_type);
+ return register_filesystem(&lustre_lite_fs_type);
}
-static void __exit exit_lustre_light(void)
+static void __exit exit_lustre_lite(void)
{
- unregister_filesystem(&lustre_light_fs_type);
+ unregister_filesystem(&lustre_lite_fs_type);
kmem_cache_destroy(ll_file_data_slab);
}
-MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
-MODULE_DESCRIPTION("Lustre Light Client File System v1.0");
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre Lite Client File System v1.0");
MODULE_LICENSE("GPL");
-module_init(init_lustre_light);
-module_exit(exit_lustre_light);
+module_init(init_lustre_lite);
+module_exit(exit_lustre_lite);