lustre*.tar.gz
cscope.files
cscope.out
+autom4te-2.53.cache
-TBA
+2003-01-06 Andreas Dilger <adilger@clusterfs.com>
+ * version v0_5_19
* bug fixes
- Fully reactivate OST imports after reconnection (512, others)
- Make sure client sees our -ENOTCONN from mds_handle (513 - partial)
- fix paths in lconf, which would load incorrect modules (451, 507)
- fix confusing lconf 'host not found' error message (386)
- fix lock order deadlock on OST (O/R i_sem before journal ops, 478)
+ - fix race condition in mdc_blocking_ast() for inode access (526)
+ - fix lov_unpackmd() unpacking wrong number of stripes (537)
+ - fix lov_set_osc_active() marking wrong OSC inactive (440)
+ - fix bad lstripe lov_unpackmd() assertion (fix layering too) (527)
+ - fix multiple writes of stripe MD to MDS (358, maybe 519)
+ - fix lstripe in several ways (kernel side) (527)
+ - fix request leak in ldlm_cli_enqueue (262)
+ - incorrect OSC was marked inactive after OST failure
+ - call mds_fs_cleanup before unmounting filesystem (524)
+ - fix races between taking ns_lock and ldlm_lock_change_resource
+ - fix races updating LOV export open file list
+ - fix lov_enqueue error path, avoid decref-ing bad lock handle
+ - fix recovery NULL deref in ldlm_cli_cancel_unused
+ - fix some DLM races by using new hash table for lock handles (419)
+ - permit the client to specify desired inodes, at replay
+ - duplicate requests when we queue them for replay reintegration
+ - fix last_rcvd offset calculation
+ - sync after each recovered transaction, so we always make progress
+ - never, not always, ERESTART requests without transnos
+ - store the lov_desc in the MDS, so we don't depend on getlovinfo to
+ set it
+ - skip replay if the MDS says that the client is already connected
+ - don't check for a recovery-enabled export to match lctl's UUID
+ - don't INC_USE_COUNT for phantom exports
+ - don't crash when cleaning up phantom exports (567)
+ - don't double-finish or set replay data for errored mdc_open requests
+ - abort requests when they time out, so we don't get old replies
+ - send/receive replies for AST messages again
+ - if the client says that it doesn't have the lock, cancel it on the
+ server
+ - if we timeout during I/O, don't try to cancel an in-use lock; instead
+ mark it as destroyed, it will all work out when decref is called
+ - fix module use counts (22, 581)
+ * protocol changes
+ - ASTs now expect a reply (server cancels lock on error reply)
2002-12-02 Andreas Dilger <adilger@clusterfs.com>
* version v0_5_18
endif
# NOTE: keep extN before mds and obdfilter
-SUBDIRS = $(DIRS24) obdclass utils ptlrpc ldlm lib obdfilter mdc osc ost llite
-SUBDIRS+= obdecho lov tests doc scripts
+SUBDIRS = $(DIRS24) obdclass utils ptlrpc ldlm lib obdfilter mdc osc ost llite
+SUBDIRS+= obdecho lov cobd ptlbd tests doc scripts conf
DIST_SUBDIRS = $(SUBDIRS)
EXTRA_DIST = BUGS FDL Rules include archdep.m4
include $(top_srcdir)/Rules
rpms: dist Makefile
- rpm -ta $(distdir).tar.gz
+ rpmbuild -ta $(distdir).tar.gz
--- /dev/null
+.deps
+Makefile
+Makefile.in
--- /dev/null
+# Copyright (C) 2002 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+DEFS=
+
+MODULE = cobd
+modulefs_DATA = cobd.o
+EXTRA_PROGRAMS = cobd
+LINX=
+
+cobd_SOURCES = cache_obd.c lproc_cache.c $(LINX)
+
+include $(top_srcdir)/Rules
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This code is issued under the GNU General Public License.
+ * See the file COPYING in this distribution
+ */
+
+#define DEBUG_SUBSYSTEM S_COBD
+
+#include <linux/obd_support.h>
+#include <linux/lustre_lib.h>
+#include <linux/lustre_net.h>
+#include <linux/lustre_idl.h>
+#include <linux/obd_class.h>
+#include <linux/obd_cache.h>
+
+extern struct lprocfs_vars status_var_nm_1[];
+extern struct lprocfs_vars status_class_var[];
+
+static int
+cobd_attach (struct obd_device *dev, obd_count len, void *data)
+{
+ return (lprocfs_reg_obd (dev, status_var_nm_1, dev));
+}
+
+static int
+cobd_detach (struct obd_device *dev)
+{
+ return (lprocfs_dereg_obd (dev));
+}
+
+static int
+cobd_setup (struct obd_device *dev, obd_count len, void *buf)
+{
+ struct obd_ioctl_data *data = (struct obd_ioctl_data *)buf;
+ struct cache_obd *cobd = &dev->u.cobd;
+ struct obd_device *target;
+ struct obd_device *cache;
+ int rc;
+
+ if (data->ioc_inlbuf1 == NULL ||
+ data->ioc_inlbuf2 == NULL)
+ return (-EINVAL);
+
+ target = class_uuid2obd (data->ioc_inlbuf1);
+ cache = class_uuid2obd (data->ioc_inlbuf2);
+ if (target == NULL ||
+ cache == NULL)
+ return (-EINVAL);
+
+ /* don't bother checking attached/setup;
+ * obd_connect() should, and it can change underneath us */
+
+ rc = obd_connect (&cobd->cobd_target, target, NULL, NULL, NULL);
+ if (rc != 0)
+ return (rc);
+
+ rc = obd_connect (&cobd->cobd_cache, cache, NULL, NULL, NULL);
+ if (rc != 0)
+ goto fail_0;
+
+ return (0);
+
+ fail_0:
+ obd_disconnect (&cobd->cobd_target);
+ return (rc);
+}
+
+static int
+cobd_cleanup (struct obd_device *dev)
+{
+ struct cache_obd *cobd = &dev->u.cobd;
+ int rc;
+
+ if (!list_empty (&dev->obd_exports))
+ return (-EBUSY);
+
+ rc = obd_disconnect (&cobd->cobd_cache);
+ if (rc != 0)
+ CERROR ("error %d disconnecting cache\n", rc);
+
+ rc = obd_disconnect (&cobd->cobd_target);
+ if (rc != 0)
+ CERROR ("error %d disconnecting target\n", rc);
+
+ return (0);
+}
+
+static int
+cobd_connect (struct lustre_handle *conn, struct obd_device *obd,
+ obd_uuid_t cluuid, struct recovd_obd *recovd,
+ ptlrpc_recovery_cb_t recover)
+{
+ int rc = class_connect (conn, obd, cluuid);
+
+ CERROR ("rc %d\n", rc);
+ return (rc);
+}
+
+static int
+cobd_disconnect (struct lustre_handle *conn)
+{
+ int rc = class_disconnect (conn);
+
+ CERROR ("rc %d\n", rc);
+ return (rc);
+}
+
+static int
+cobd_get_info(struct lustre_handle *conn, obd_count keylen,
+ void *key, obd_count *vallen, void **val)
+{
+ struct obd_device *obd = class_conn2obd(conn);
+ struct cache_obd *cobd;
+
+ if (obd == NULL) {
+ CERROR("invalid client "LPX64"\n", conn->addr);
+ return -EINVAL;
+ }
+
+ cobd = &obd->u.cobd;
+
+ /* intercept cache utilisation info? */
+
+ return (obd_get_info (&cobd->cobd_target,
+ keylen, key, vallen, val));
+}
+
+static int
+cobd_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
+{
+ struct obd_device *obd = class_conn2obd(conn);
+ struct cache_obd *cobd;
+
+ if (obd == NULL) {
+ CERROR("invalid client "LPX64"\n", conn->addr);
+ return -EINVAL;
+ }
+
+ cobd = &obd->u.cobd;
+ return (obd_statfs (&cobd->cobd_target, osfs));
+}
+
+static int
+cobd_getattr(struct lustre_handle *conn, struct obdo *oa,
+ struct lov_stripe_md *lsm)
+{
+ struct obd_device *obd = class_conn2obd(conn);
+ struct cache_obd *cobd;
+
+ if (obd == NULL) {
+ CERROR("invalid client "LPX64"\n", conn->addr);
+ return -EINVAL;
+ }
+
+ cobd = &obd->u.cobd;
+ return (obd_getattr (&cobd->cobd_target, oa, lsm));
+}
+
+static int
+cobd_open(struct lustre_handle *conn, struct obdo *oa,
+ struct lov_stripe_md *lsm)
+{
+ struct obd_device *obd = class_conn2obd(conn);
+ struct cache_obd *cobd;
+
+ if (obd == NULL) {
+ CERROR("invalid client "LPX64"\n", conn->addr);
+ return -EINVAL;
+ }
+
+ cobd = &obd->u.cobd;
+ return (obd_open (&cobd->cobd_target, oa, lsm));
+}
+
+static int
+cobd_close(struct lustre_handle *conn, struct obdo *oa,
+ struct lov_stripe_md *lsm)
+{
+ struct obd_device *obd = class_conn2obd(conn);
+ struct cache_obd *cobd;
+
+ if (obd == NULL) {
+ CERROR("invalid client "LPX64"\n", conn->addr);
+ return -EINVAL;
+ }
+
+ cobd = &obd->u.cobd;
+ return (obd_close (&cobd->cobd_target, oa, lsm));
+}
+
+static int
+cobd_preprw(int cmd, struct lustre_handle *conn,
+ int objcount, struct obd_ioobj *obj,
+ int niocount, struct niobuf_remote *nb,
+ struct niobuf_local *res, void **desc_private)
+{
+ struct obd_device *obd = class_conn2obd(conn);
+ struct cache_obd *cobd;
+
+ if (obd == NULL) {
+ CERROR("invalid client "LPX64"\n", conn->addr);
+ return -EINVAL;
+ }
+
+ if ((cmd & OBD_BRW_WRITE) != 0)
+ return -EOPNOTSUPP;
+
+ cobd = &obd->u.cobd;
+ return (obd_preprw (cmd, &cobd->cobd_target,
+ objcount, obj,
+ niocount, nb,
+ res, desc_private));
+}
+
+static int
+cobd_commitrw(int cmd, struct lustre_handle *conn,
+ int objcount, struct obd_ioobj *obj,
+ int niocount, struct niobuf_local *local,
+ void *desc_private)
+{
+ struct obd_device *obd = class_conn2obd(conn);
+ struct cache_obd *cobd;
+
+ if (obd == NULL) {
+ CERROR("invalid client "LPX64"\n", conn->addr);
+ return -EINVAL;
+ }
+
+ if ((cmd & OBD_BRW_WRITE) != 0)
+ return -EOPNOTSUPP;
+
+ cobd = &obd->u.cobd;
+ return (obd_commitrw (cmd, &cobd->cobd_target,
+ objcount, obj,
+ niocount, local,
+ desc_private));
+}
+
+static inline int
+cobd_brw(int cmd, struct lustre_handle *conn,
+ struct lov_stripe_md *lsm, obd_count oa_bufs,
+ struct brw_page *pga, struct obd_brw_set *set)
+{
+ struct obd_device *obd = class_conn2obd(conn);
+ struct cache_obd *cobd;
+
+ if (obd == NULL) {
+ CERROR("invalid client "LPX64"\n", conn->addr);
+ return -EINVAL;
+ }
+
+ if ((cmd & OBD_BRW_WRITE) != 0)
+ return -EOPNOTSUPP;
+
+ cobd = &obd->u.cobd;
+ return (obd_brw (cmd, &cobd->cobd_target,
+ lsm, oa_bufs, pga, set));
+}
+
+static int
+cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
+ void *karg, void *uarg)
+{
+ struct obd_device *obd = class_conn2obd(conn);
+ struct cache_obd *cobd;
+
+ if (obd == NULL) {
+ CERROR("invalid client "LPX64"\n", conn->addr);
+ return -EINVAL;
+ }
+
+ /* intercept? */
+
+ cobd = &obd->u.cobd;
+ return (obd_iocontrol (cmd, &cobd->cobd_target, len, karg, uarg));
+}
+
+static struct obd_ops cobd_ops = {
+ o_owner: THIS_MODULE,
+ o_attach: cobd_attach,
+ o_detach: cobd_detach,
+
+ o_setup: cobd_setup,
+ o_cleanup: cobd_cleanup,
+
+ o_connect: cobd_connect,
+ o_disconnect: cobd_disconnect,
+
+ o_get_info: cobd_get_info,
+ o_statfs: cobd_statfs,
+
+ o_getattr: cobd_getattr,
+ o_open: cobd_open,
+ o_close: cobd_close,
+ o_preprw: cobd_preprw,
+ o_commitrw: cobd_commitrw,
+ o_brw: cobd_brw,
+ o_iocontrol: cobd_iocontrol,
+};
+
+static int __init
+cobd_init (void)
+{
+ int rc;
+
+ printk (KERN_INFO "Lustre Caching OBD driver\n");
+
+ rc = class_register_type (&cobd_ops, status_class_var,
+ OBD_CACHE_DEVICENAME);
+ return (rc);
+}
+
+static void __exit
+cobd_exit (void)
+{
+ class_unregister_type (OBD_CACHE_DEVICENAME);
+}
+
+MODULE_AUTHOR("Cluster Filesystems Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre Caching OBD driver");
+MODULE_LICENSE("GPL");
+
+module_init(cobd_init);
+module_exit(cobd_exit);
+
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/lustre_lite.h>
+#include <linux/lprocfs_status.h>
+
+/*
+ * Common STATUS namespace
+ */
+
+static int rd_uuid (char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct obd_device* dev = (struct obd_device*)data;
+
+ return (snprintf(page, count, "%s\n", dev->obd_uuid));
+}
+
+static int rd_target (char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct obd_device *dev = (struct obd_device*)data;
+ struct cache_obd *cobd = &dev->u.cobd;
+ struct lustre_handle *conn = &cobd->cobd_target;
+ struct obd_export *exp;
+ int rc;
+
+ if ((dev->obd_flags & OBD_SET_UP) == 0)
+ rc = snprintf (page, count, "not set up\n");
+ else {
+ exp = class_conn2export (conn);
+ LASSERT (exp != NULL);
+ rc = snprintf(page, count, "%s\n", exp->exp_obd->obd_uuid);
+ }
+ return (rc);
+}
+
+static int rd_cache(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct obd_device *dev = (struct obd_device*)data;
+ struct cache_obd *cobd = &dev->u.cobd;
+ struct lustre_handle *conn = &cobd->cobd_cache;
+ struct obd_export *exp;
+ int rc;
+
+ if ((dev->obd_flags & OBD_SET_UP) == 0)
+ rc = snprintf (page, count, "not set up\n");
+ else {
+ exp = class_conn2export (conn);
+ LASSERT (exp != NULL);
+ rc = snprintf(page, count, "%s\n", exp->exp_obd->obd_uuid);
+ }
+ return (rc);
+}
+
+struct lprocfs_vars status_var_nm_1[] = {
+ {"status/uuid", rd_uuid, 0, 0},
+ {"status/target_uuid", rd_target, 0, 0},
+ {"status/cache_uuid", rd_cache, 0, 0},
+ {0}
+};
+
+int rd_numrefs(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct obd_type* class = (struct obd_type*)data;
+
+ return (snprintf(page, count, "%d\n", class->typ_refcnt));
+}
+
+struct lprocfs_vars status_class_var[] = {
+ {"status/num_refs", rd_numrefs, 0, 0},
+ {0}
+};
--- /dev/null
+Makefile
+Makefile.in
--- /dev/null
+# Copyright (C) 2001 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+EXTRA_DIST = lustre2ldif.xsl lustre.dtd lustre.schema slapd-lustre.conf
+ldapconfdir = $(sysconfdir)/openldap
+ldapschemadir = $(sysconfdir)/openldap/schema
+ldapconf_SCRIPTS = slapd-lustre.conf
+ldapschema_SCRIPTS = lustre.schema
+
+include $(top_srcdir)/Rules
+
--- /dev/null
+<!-- Lustre Management DTD -->
+
+<!-- basic entities -->
+<!ENTITY % object.content "(#PCDATA)">
+<!ENTITY % object.attr "
+ name CDATA #REQUIRED
+ uuid CDATA #REQUIRED">
+
+<!ENTITY % objref.content "(#PCDATA)">
+<!ENTITY % objref.attr "uuidref CDATA #REQUIRED">
+
+<!-- main elements -->
+<!ELEMENT lustre (node | profile | mountpoint | ldlm | echoclient |
+ mds | obd | ost | lov | lovconfig)*>
+
+<!ELEMENT node (network | profile_ref)*>
+<!ATTLIST node %object.attr;
+ router CDATA #IMPLIED>
+
+<!ELEMENT network (nid | port | route_tbl | sendmem | recvmem)*>
+<!ATTLIST network %object.attr;
+ nettype (tcp | elan | gm) 'tcp'>
+
+<!ELEMENT routetbl (route)*>
+<!ATTLIST routetbl %object.attr;>
+<!ELEMENT route %object.content;>
+<!ATTLIST route type (elan | tcp | gm) #REQUIRED
+ gw CDATA #REQUIRED
+ lo CDATA #REQUIRED
+ hi CDATA #IMPLIED >
+
+<!ELEMENT profile (ldlm_ref | network_ref | obd_ref | ost_ref |
+ echoclient_ref | mdsdev_ref | lov_ref |
+ lovconfig_ref| mountpoint_ref)*>
+<!ATTLIST profile %object.attr;>
+
+<!ELEMENT mountpoint (path | fileset | mds_ref | obd_ref)*>
+<!ATTLIST mountpoint %object.attr;>
+
+<!ELEMENT echoclient (obd_ref)>
+<!ATTLIST echoclient %object.attr;>
+
+<!ELEMENT ldlm EMPTY>
+<!ATTLIST ldlm %object.attr;>
+
+<!ELEMENT obd (fstype | devpath | devsize | autoformat | active_ref)*>
+<!ATTLIST obd %object.attr;
+ obdtype (obdfilter | obdecho) 'obdfilter'>
+
+<!ELEMENT ost (network_ref | obd_ref | failover_ref)*>
+<!ATTLIST ost %object.attr;>
+
+<!ELEMENT mds (active_ref)*>
+<!ATTLIST mds %object.attr;>
+
+<!ELEMENT mdsdev (fstype | devpath | devsize | autoformat |
+ mds_ref | network_ref )*>
+<!ATTLIST mdsdev %object.attr;>
+
+<!ELEMENT lov (mds_ref |(obd_ref)+)*>
+<!ATTLIST lov %object.attr;
+ stripesize CDATA #REQUIRED
+ stripecount CDATA #REQUIRED
+ stripeoffset CDATA #IMPLIED
+ stripepattern CDATA #REQUIRED>
+
+<!ELEMENT lovconfig (lov_ref)>
+<!ATTLIST lovconfig %object.attr;>
+
+<!-- basic elements -->
+<!ELEMENT fstype %object.content;>
+<!ELEMENT nid %object.content;>
+<!ELEMENT port %object.content;>
+<!ELEMENT send_mem %object.content;>
+<!ELEMENT recv_mem %object.content;>
+<!ELEMENT autoformat %object.content;>
+<!ELEMENT activetarget %object.content;>
+<!ELEMENT devpath %object.content;>
+<!ELEMENT devsize %object.content;>
+<!ELEMENT path %object.content;>
+<!ELEMENT fileset %object.content;>
+
+<!-- object reference tag elements -->
+<!ELEMENT network_ref %objref.content;>
+<!ATTLIST network_ref %objref.attr;>
+<!ELEMENT node_ref %objref.content;>
+<!ATTLIST node_ref %objref.attr;>
+<!ELEMENT profile_ref %objref.content;>
+<!ATTLIST profile_ref %objref.attr;>
+<!ELEMENT obd_ref %objref.content;>
+<!ATTLIST obd_ref %objref.attr;>
+<!ELEMENT mds_ref %objref.content;>
+<!ATTLIST mds_ref %objref.attr;>
+<!ELEMENT mdsdev_ref %objref.content;>
+<!ATTLIST mdsdev_ref %objref.attr;>
+<!ELEMENT ost_ref %objref.content;>
+<!ATTLIST ost_ref %objref.attr;>
+<!ELEMENT lov_ref %objref.content;>
+<!ATTLIST lov_ref %objref.attr;>
+<!ELEMENT lovconfig_ref %objref.content;>
+<!ATTLIST lovconfig_ref %objref.attr;>
+<!ELEMENT mountpoint_ref %objref.content;>
+<!ATTLIST mountpoint_ref %objref.attr;>
+<!ELEMENT echoclient_ref %objref.content;>
+<!ATTLIST echoclient_ref %objref.attr;>
+<!ELEMENT failover_ref %objref.content;>
+<!ATTLIST failover_ref %objref.attr;>
+<!ELEMENT ldlm_ref %objref.content;>
+<!ATTLIST ldlm_ref %objref.attr;>
+
+
--- /dev/null
+<?xml version="1.0" encoding="iso-8859-1"?>
+<stylesheet version="1.0" xmlns="http://www.w3.org/1999/XSL/Transform">
+<output omit-xml-declaration="yes" />
+<strip-space elements="*"/>
+<param name="config">fs=lustre</param>
+<variable name="basedn">config=<value-of select="$config"/>,fs=lustre</variable>
+
+<template match="lustre">
+dn: <value-of select="$basedn"/>
+uuid: CONFIG_UUID
+objectClass: LUSTRECONFIG
+config: <value-of select="$config"/>
+<text>
+</text><apply-templates/>
+</template>
+
+<template match="node">
+dn: uuid=<value-of select="@uuid"/>,<value-of select="$basedn"/>
+objectClass: NODE
+lustreName: <value-of select="@name"/>
+uuid: <value-of select="@uuid"/>
+networkRef: <value-of select="network/@uuid"/>
+<for-each select="profile_ref">
+profileRef: <value-of select="@uuidref"/>
+</for-each>
+<text>
+</text><apply-templates/>
+</template>
+
+<template match="profile">
+dn: uuid=<value-of select="@uuid"/>,<value-of select="$basedn"/>
+objectClass: PROFILE
+lustreName: <value-of select="@name"/>
+uuid: <value-of select="@uuid"/><apply-templates/>
+<text>
+</text>
+</template>
+
+<template match="network">
+dn: uuid=<value-of select="@uuid"/>,<value-of select="$basedn"/>
+objectClass: NETWORK
+lustreName: <value-of select="@name"/>
+uuid: <value-of select="@uuid"/>
+nettype: <value-of select="@nettype"/>
+nid: <value-of select="nid"/>
+<if test="port">
+port: <value-of select="port"/>
+</if>
+<text>
+</text>
+</template>
+
+<template match="mds">
+dn: uuid=<value-of select="@uuid"/>,<value-of select="$basedn"/>
+objectClass: MDS
+lustreName: <value-of select="@name"/>
+uuid: <value-of select="@uuid"/><apply-templates/>
+<text>
+</text>
+</template>
+
+<template match="mdsdev">
+dn: uuid=<value-of select="@uuid"/>,<value-of select="$basedn"/>
+objectClass: MDSDEV
+lustreName: <value-of select="@name"/>
+uuid: <value-of select="@uuid"/>
+<if test="fstype">
+fstype: <value-of select="fstype"/>
+</if>
+<if test="autoformat">
+autoformat: <value-of select="autoformat"/>
+</if>
+<if test="devpath">
+devpath: <value-of select="devpath"/>
+</if>
+<if test="devsize">
+devsize: <value-of select="devsize"/>
+</if>
+networkRef: <value-of select="network_ref/@uuidref"/>
+mdsRef: <value-of select="mds_ref/@uuidref"/>
+<text>
+</text>
+</template>
+
+<template match="lov">
+dn: uuid=<value-of select="@uuid"/>,<value-of select="$basedn"/>
+objectClass: LOV
+lustreName: <value-of select="@name"/>
+uuid: <value-of select="@uuid"/>
+mdsRef: <value-of select="mds_ref/@uuidref"/>
+stripepattern: <value-of select="@stripepattern"/>
+stripesize: <value-of select="@stripesize"/>
+stripecount: <value-of select="@stripecount"/><apply-templates/>
+<text>
+</text>
+</template>
+
+<template match="lovconfig">
+dn: uuid=<value-of select="@uuid"/>,<value-of select="$basedn"/>
+objectClass: LOVCONFIG
+lustreName: <value-of select="@name"/>
+uuid: <value-of select="@uuid"/><apply-templates/>
+<text>
+</text>
+</template>
+
+<template match="obd">
+dn: uuid=<value-of select="@uuid"/>,<value-of select="$basedn"/>
+objectClass: OBD
+lustreName: <value-of select="@name"/>
+uuid: <value-of select="@uuid"/>
+activeRef: <value-of select="active_ref/@uuidref"/>
+obdtype: <value-of select="@obdtype"/>
+<if test="fstype">
+fstype: <value-of select="fstype"/>
+</if>
+<if test="autoformat">
+autoformat: <value-of select="autoformat"/>
+</if>
+<if test="devpath">
+devpath: <value-of select="devpath"/>
+</if>
+<if test="devsize">
+devsize: <value-of select="devsize"/>
+</if>
+<text>
+</text>
+</template>
+
+<template match="ost">
+dn: uuid=<value-of select="@uuid"/>,<value-of select="$basedn"/>
+objectClass: OST
+lustreName: <value-of select="@name"/>
+uuid: <value-of select="@uuid"/><apply-templates/>
+<text>
+</text>
+</template>
+
+<template match="mountpoint">
+dn: uuid=<value-of select="@uuid"/>,<value-of select="$basedn"/>
+objectClass: MOUNTPOINT
+lustreName: <value-of select="@name"/>
+uuid: <value-of select="@uuid"/><apply-templates/>
+<text>
+</text>
+</template>
+
+<template match="echoclient">
+dn: uuid=<value-of select="@uuid"/>,<value-of select="$basedn"/>
+objectClass: ECHOCLIENT
+lustreName: <value-of select="@name"/>
+uuid: <value-of select="@uuid"/><apply-templates/>
+<text>
+</text>
+</template>
+
+<template match="ldlm">
+dn: uuid=<value-of select="@uuid"/>,<value-of select="$basedn"/>
+objectClass: LDLM
+lustreName: <value-of select="@name"/>
+uuid: <value-of select="@uuid"/>
+<text>
+</text>
+</template>
+
+
+<template match="ldlm_ref">
+ldlmRef: <value-of select="@uuidref"/>
+</template>
+
+<template match="obd_ref">
+obdRef: <value-of select="@uuidref"/>
+</template>
+
+<template match="ost_ref">
+ostRef: <value-of select="@uuidref"/>
+</template>
+
+<template match="network_ref">
+networkRef: <value-of select="@uuidref"/>
+</template>
+
+<template match="mds_ref">
+mdsRef: <value-of select="@uuidref"/>
+</template>
+
+<template match="mountpoint_ref">
+mountpointRef: <value-of select="@uuidref"/>
+</template>
+
+<template match="echoclient_ref">
+echoclientRef: <value-of select="@uuidref"/>
+</template>
+
+<template match="lov_ref">
+lovRef: <value-of select="@uuidref"/>
+</template>
+
+<template match="lovconfig_ref">
+lovconfigRef: <value-of select="@uuidref"/>
+</template>
+
+<template match="path">
+path: <value-of select="."/>
+</template>
+
+<template match="active_ref">
+activeRef: <value-of select="@uuidref"/>
+</template>
+</stylesheet>
+
+
--- /dev/null
+#######################################################################
+# lustre ldap config database
+# $Id: slapd-lustre.conf,v 1.2 2003/01/06 22:17:53 adilger Exp $
+#######################################################################
+
+database ldbm
+suffix "fs=lustre"
+rootdn "cn=Manager,fs=lustre"
+include /etc/openldap/schema/lustre.schema
+rootpw secret
+directory /var/lib/ldap/lustre
+index objectClass eq, uuid eq
--- /dev/null
+dn: fs=lustre
+fs:lustre
+objectClass: lustre
+desc: Lustre Config
AC_OUTPUT(Makefile lib/Makefile ldlm/Makefile obdecho/Makefile ptlrpc/Makefile \
lov/Makefile osc/Makefile mdc/Makefile mds/Makefile ost/Makefile \
+ cobd/Makefile ptlbd/Makefile conf/Makefile \
utils/Makefile utils/lconf tests/Makefile obdfilter/Makefile \
obdclass/Makefile llite/Makefile doc/Makefile scripts/Makefile \
scripts/lustre.spec extN/Makefile, chmod +x utils/lconf)
EXTN_FIXES = patch-2.4.18-chaos22
#EXTN_FIXES = ext3-2.4.18-fixes.diff
EXTNP = htree-ext3-2.4.18.diff linux-2.4.18ea-0.8.26.diff
-EXTNP+= ext3-2.4.18-ino_sb_macro.diff extN-misc-fixup.diff
+EXTNP+= ext3-2.4.18-ino_sb_macro.diff extN-misc-fixup.diff extN-noread.diff
+EXTNP+= extN-wantedi.diff
EXTNC = balloc.c bitmap.c dir.c file.c fsync.c ialloc.c inode.c ioctl.c
EXTNC+= namei.c super.c symlink.c
EXTNI = extN_fs.h extN_fs_i.h extN_fs_sb.h extN_jbd.h quotaops.h
$(RM) extN.patchT
l='$(EXTNC)'; for f in $$l; do \
echo "$$f"; \
- (diff -u $(extN_orig)/$$f extN/$$f) >> extN.patchT; \
- test $$? -le 1 || exit 1;
+ (diff -u $(extN_orig)/$$f extN/$$f) >> extN.patchT; \
+ test $$? -le 1 || exit 1; \
done
l='$(EXTNI)'; for f in $$l; do \
echo "$$f"; \
(diff -u $(extN_include_orig)/$$f $(top_srcdir)/include/linux/$$f)>>extN.patchT;\
- test $$? -le 1 || exit 1;
+ test $$? -le 1 || exit 1; \
done
l='$(EXTN_EXTRA)'; for f in $$l; do \
f=`echo "$$f" | sed 's%^fs/%%'`; \
echo "$$f"; \
(cd $(top_srcdir) && \
diff -u /dev/null $$f) >> extN.patchT; \
- test $$? -le 1 || exit 1;
+ test $$? -le 1 || exit 1; \
done
mv -f extN.patchT $(top_builddir)/$(subdir)/extN.patch-$(RELEASE)
echo "Don't forget to add $(srcdir)/extN.patch-$(RELEASE) to CVS!"
-
-
.PHONY: diff
# Just do the SUB transformation on all our source files.
-
-
sed-stamp:
$(RM) $@
rm -rf $(extN_orig) $(extN_include_orig)
--- /dev/null
+diff -ru lustre-head/fs/extN/ialloc.c lustre/fs/extN/ialloc.c
+--- lustre-head/fs/extN/ialloc.c Mon Dec 23 10:02:58 2002
++++ lustre/fs/extN/ialloc.c Mon Dec 23 09:46:20 2002
+@@ -289,6 +289,37 @@
+ }
+
+ /*
++ * @block_group: block group of inode
++ * @offset: relative offset of inode within @block_group
++ *
++ * Check whether any of the inodes in this disk block are in use.
++ *
++ * Caller must be holding superblock lock (group/bitmap read lock in future).
++ */
++int extN_itable_block_used(struct super_block *sb, unsigned int block_group,
++ int offset)
++{
++ int bitmap_nr = load_inode_bitmap(sb, block_group);
++ int inodes_per_block;
++ unsigned long inum, iend;
++ struct buffer_head *ibitmap;
++
++ if (bitmap_nr < 0)
++ return 1;
++
++ inodes_per_block = sb->s_blocksize / EXTN_SB(sb)->s_inode_size;
++ inum = offset & ~(inodes_per_block - 1);
++ iend = inum + inodes_per_block;
++ ibitmap = EXTN_SB(sb)->s_inode_bitmap[bitmap_nr];
++ for (; inum < iend; inum++) {
++ if (inum != offset && extN_test_bit(inum, ibitmap->b_data))
++ return 1;
++ }
++
++ return 0;
++}
++
++/*
+ * There are two policies for allocating an inode. If the new inode is
+ * a directory, then a forward search is made for a block group with both
+ * free space and a low directory-to-inode ratio; if that fails, then of
+@@ -312,6 +343,7 @@
+ struct extN_group_desc * gdp;
+ struct extN_group_desc * tmp;
+ struct extN_super_block * es;
++ struct extN_iloc iloc;
+ int err = 0;
+
+ /* Cannot create files in a deleted directory */
+@@ -505,7 +538,7 @@
+ ei->i_prealloc_count = 0;
+ #endif
+ ei->i_block_group = i;
+-
++
+ if (ei->i_flags & EXTN_SYNC_FL)
+ inode->i_flags |= S_SYNC;
+ if (IS_SYNC(inode))
+@@ -514,9 +547,18 @@
+ inode->i_generation = sbi->s_next_generation++;
+
+ ei->i_state = EXTN_STATE_NEW;
+- err = extN_mark_inode_dirty(handle, inode);
++ err = extN_get_inode_loc_new(inode, &iloc, 1);
+ if (err) goto fail;
+-
++ BUFFER_TRACE(iloc->bh, "get_write_access");
++ err = extN_journal_get_write_access(handle, iloc.bh);
++ if (err) {
++ brelse(iloc.bh);
++ iloc.bh = NULL;
++ goto fail;
++ }
++ err = extN_mark_iloc_dirty(handle, inode, &iloc);
++ if (err) goto fail;
++
+ unlock_super (sb);
+ if(DQUOT_ALLOC_INODE(inode)) {
+ DQUOT_DROP(inode);
+diff -ru lustre-head/fs/extN/inode.c lustre/fs/extN/inode.c
+--- lustre-head/fs/extN/inode.c Mon Dec 23 10:02:58 2002
++++ lustre/fs/extN/inode.c Mon Dec 23 09:50:25 2002
+@@ -2011,23 +1994,32 @@
+ extN_journal_stop(handle, inode);
+ }
+
+-/*
+- * extN_get_inode_loc returns with an extra refcount against the
+- * inode's underlying buffer_head on success.
+- */
++extern int extN_itable_block_used(struct super_block *sb,
++ unsigned int block_group,
++ int offset);
++
++#define NUM_INODE_PREREAD 16
+
+-int extN_get_inode_loc (struct inode *inode, struct extN_iloc *iloc)
++/*
++ * extN_get_inode_loc returns with an extra refcount against the inode's
++ * underlying buffer_head on success. If this is for a new inode allocation
++ * (new is non-zero) then we may be able to optimize away the read if there
++ * are no other in-use inodes in this inode table block. If we need to do
++ * a read, then read in a whole chunk of blocks to avoid blocking again soon
++ * if we are doing lots of creates/updates.
++ */
++int extN_get_inode_loc_new(struct inode *inode, struct extN_iloc *iloc, int new)
+ {
+ struct super_block *sb = inode->i_sb;
+ struct extN_sb_info *sbi = EXTN_SB(sb);
+- struct buffer_head *bh = 0;
++ struct buffer_head *bh[NUM_INODE_PREREAD];
+ unsigned long block;
+ unsigned long block_group;
+ unsigned long group_desc;
+ unsigned long desc;
+ unsigned long offset;
+ struct extN_group_desc * gdp;
+-
++
+ if ((inode->i_ino != EXTN_ROOT_INO &&
+ inode->i_ino != EXTN_JOURNAL_INO &&
+ inode->i_ino < EXTN_FIRST_INO(sb)) ||
+@@ -2042,38 +2034,86 @@
+ }
+ group_desc = block_group >> sbi->s_desc_per_block_bits;
+ desc = block_group & (sbi->s_desc_per_block - 1);
+- bh = sbi->s_group_desc[group_desc];
+- if (!bh) {
++ if (!sbi->s_group_desc[group_desc]) {
+ extN_error(sb, __FUNCTION__, "Descriptor not loaded");
+ goto bad_inode;
+ }
+
+- gdp = (struct extN_group_desc *) bh->b_data;
++ gdp = (struct extN_group_desc *)(sbi->s_group_desc[group_desc]->b_data);
++
+ /*
+ * Figure out the offset within the block group inode table
+ */
+- offset = ((inode->i_ino - 1) % sbi->s_inodes_per_group) *
+- sbi->s_inode_size;
++ offset = ((inode->i_ino - 1) % sbi->s_inodes_per_group);
++
+ block = le32_to_cpu(gdp[desc].bg_inode_table) +
+- (offset >> EXTN_BLOCK_SIZE_BITS(sb));
+- if (!(bh = sb_bread(sb, block))) {
+- extN_error (sb, __FUNCTION__,
+- "unable to read inode block - "
+- "inode=%lu, block=%lu", inode->i_ino, block);
+- goto bad_inode;
++ (offset * sbi->s_inode_size >> EXTN_BLOCK_SIZE_BITS(sb));
++
++ bh[0] = sb_getblk(sb, block);
++ if (buffer_uptodate(bh[0]))
++ goto done;
++
++ /* If we don't really need to read this block, and it isn't already
++ * in memory, then we just zero it out. Otherwise, we keep the
++ * current block contents (deleted inode data) for posterity.
++ */
++ if (new && !extN_itable_block_used(sb, block_group, offset)) {
++ lock_buffer(bh[0]);
++ memset(bh[0]->b_data, 0, bh[0]->b_size);
++ mark_buffer_uptodate(bh[0], 1);
++ unlock_buffer(bh[0]);
++ } else {
++ unsigned long block_end, itable_end;
++ int count = 1;
++
++ itable_end = le32_to_cpu(gdp[desc].bg_inode_table) +
++ sbi->s_itb_per_group;
++ block_end = block + NUM_INODE_PREREAD;
++ if (block_end > itable_end)
++ block_end = itable_end;
++
++ for (; block < block_end; block++) {
++ bh[count] = sb_getblk(sb, block);
++ if (count && (buffer_uptodate(bh[count]) ||
++ buffer_locked(bh[count]))) {
++ __brelse(bh[count]);
++ } else
++ count++;
++ }
++
++ ll_rw_block(READ, count, bh);
++
++ /* Release all but the block we actually need (bh[0]) */
++ while (--count > 0)
++ __brelse(bh[count]);
++
++ wait_on_buffer(bh[0]);
++ if (!buffer_uptodate(bh[0])) {
++ extN_error(sb, __FUNCTION__,
++ "unable to read inode block - "
++ "inode=%lu, block=%lu", inode->i_ino,
++ bh[0]->b_blocknr);
++ goto bad_inode;
++ }
+ }
+- offset &= (EXTN_BLOCK_SIZE(sb) - 1);
++ done:
++ offset = (offset * sbi->s_inode_size) & (EXTN_BLOCK_SIZE(sb) - 1);
+
+- iloc->bh = bh;
+- iloc->raw_inode = (struct extN_inode *) (bh->b_data + offset);
++ iloc->bh = bh[0];
++ iloc->raw_inode = (struct extN_inode *)(bh[0]->b_data + offset);
+ iloc->block_group = block_group;
+-
++
+ return 0;
+-
++
+ bad_inode:
+ return -EIO;
+ }
+
++int extN_get_inode_loc(struct inode *inode, struct extN_iloc *iloc)
++{
++ return extN_get_inode_loc_new(inode, iloc, 0);
++}
++
+ void extN_read_inode(struct inode * inode)
+ {
+ struct extN_iloc iloc;
--- /dev/null
+--- lustre/extN-clean/namei.c 2002-12-30 05:56:09.000000000 -0500
++++ lustre/extN/namei.c 2002-12-30 06:29:39.000000000 -0500
+@@ -1224,7 +1224,8 @@
+ if (IS_SYNC(dir))
+ handle->h_sync = 1;
+
+- inode = extN_new_inode (handle, dir, mode);
++ inode = extN_new_inode (handle, dir, mode,
++ (unsigned long)dentry->d_fsdata);
+ err = PTR_ERR(inode);
+ if (!IS_ERR(inode)) {
+ inode->i_op = &extN_file_inode_operations;
+@@ -1254,7 +1254,8 @@
+ if (IS_SYNC(dir))
+ handle->h_sync = 1;
+
+- inode = extN_new_inode (handle, dir, mode);
++ inode = extN_new_inode (handle, dir, mode,
++ (unsigned long)dentry->d_fsdata);
+ err = PTR_ERR(inode);
+ if (!IS_ERR(inode)) {
+ init_special_inode(inode, mode, rdev);
+@@ -1286,7 +1286,8 @@
+ if (IS_SYNC(dir))
+ handle->h_sync = 1;
+
+- inode = extN_new_inode (handle, dir, S_IFDIR | mode);
++ inode = extN_new_inode (handle, dir, S_IFDIR | mode,
++ (unsigned long)dentry->d_fsdata);
+ err = PTR_ERR(inode);
+ if (IS_ERR(inode))
+ goto out_stop;
+@@ -1680,7 +1681,8 @@
+ if (IS_SYNC(dir))
+ handle->h_sync = 1;
+
+- inode = extN_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
++ inode = extN_new_inode (handle, dir, S_IFLNK|S_IRWXUGO,
++ (unsigned long)dentry->d_fsdata);
+ err = PTR_ERR(inode);
+ if (IS_ERR(inode))
+ goto out_stop;
+--- lustre/extN-clean/ialloc.c 2002-12-28 23:56:42.000000000 -0500
++++ lustre/extN/ialloc.c 2002-12-30 06:29:39.000000000 -0500
+@@ -329,8 +329,8 @@
+ * For other inodes, search forward from the parent directory's block
+ * group to find a free inode.
+ */
+-struct inode * extN_new_inode (handle_t *handle,
+- const struct inode * dir, int mode)
++struct inode *extN_new_inode(handle_t *handle, const struct inode *dir,
++ int mode, unsigned long goal)
+ {
+ struct super_block * sb;
+ struct buffer_head * bh;
+@@ -360,6 +361,38 @@
+
+ lock_super (sb);
+ es = sbi->s_es;
++
++ if (goal) {
++ i = (goal - 1) / EXTN_INODES_PER_GROUP(sb);
++ j = (goal - 1) % EXTN_INODES_PER_GROUP(sb);
++ gdp = extN_get_group_desc(sb, i, &bh2);
++
++ bitmap_nr = load_inode_bitmap (sb, i);
++ if (bitmap_nr < 0)
++ goto fail;
++
++ bh = sbi->s_inode_bitmap[bitmap_nr];
++
++ BUFFER_TRACE(bh, "get_write_access");
++ err = extN_journal_get_write_access(handle, bh);
++ if (err) goto fail;
++
++ if (extN_set_bit(j, bh->b_data)) {
++ printk(KERN_ERR "goal inode %lu unavailable", goal);
++ /* Oh well, we tried. */
++ goto repeat;
++ }
++
++ BUFFER_TRACE(bh, "call extN_journal_dirty_metadata");
++ err = extN_journal_dirty_metadata(handle, bh);
++ if (err) goto fail;
++
++ /* We've shortcircuited the allocation system successfully,
++ * now finish filling in the inode.
++ */
++ goto have_bit_and_group;
++ }
++
+ repeat:
+ gdp = NULL;
+ i = 0;
+@@ -474,6 +509,7 @@
+ }
+ goto repeat;
+ }
++have_bit_and_group:
+ j += i * sbi->s_inodes_per_group + 1;
+ if (j < sbi->s_first_ino || j > le32_to_cpu(es->s_inodes_count)) {
+ extN_error (sb, "extN_new_inode",
+--- lustre/extN-clean/ioctl.c 2002-12-28 23:56:42.000000000 -0500
++++ lustre/extN/ioctl.c 2002-12-30 06:29:39.000000000 -0500
+@@ -24,6 +24,31 @@
+ extN_debug ("cmd = %u, arg = %lu\n", cmd, arg);
+
+ switch (cmd) {
++ case EXTN_IOC_CREATE_INUM: {
++ char name[32];
++ struct dentry *dchild, *dparent;
++ int rc = 0;
++
++ dparent = list_entry(inode->i_dentry.next, struct dentry,
++ d_alias);
++ snprintf(name, sizeof name, "%lu", arg);
++ dchild = lookup_one_len(name, dparent, strlen(name));
++ if (dchild->d_inode) {
++ printk(KERN_ERR "%*s/%lu already exists (ino %lu)\n",
++ dparent->d_name.len, dparent->d_name.name, arg,
++ dchild->d_inode->i_ino);
++ rc = -EEXIST;
++ } else {
++ dchild->d_fsdata = (void *)arg;
++ rc = vfs_create(inode, dchild, 0644);
++ if (rc)
++ printk(KERN_ERR "vfs_create: %d\n", rc);
++ else if (dchild->d_inode->i_ino != arg)
++ rc = -EEXIST;
++ }
++ dput(dchild);
++ return rc;
++ }
+ case EXTN_IOC_GETFLAGS:
+ flags = ei->i_flags & EXTN_FL_USER_VISIBLE;
+ return put_user(flags, (int *) arg);
+--- lustre/include/linux/extN_fs.h~ 2002-12-30 06:01:43.000000000 -0500
++++ lustre/include/linux/extN_fs.h 2002-12-30 06:02:51.000000000 -0500
+@@ -200,6 +200,7 @@
+ #define EXTN_IOC_SETFLAGS _IOW('f', 2, long)
+ #define EXTN_IOC_GETVERSION _IOR('f', 3, long)
+ #define EXTN_IOC_SETVERSION _IOW('f', 4, long)
++/* EXTN_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
+ #define EXTN_IOC_GETVERSION_OLD _IOR('v', 1, long)
+ #define EXTN_IOC_SETVERSION_OLD _IOW('v', 2, long)
+ #ifdef CONFIG_JBD_DEBUG
+@@ -632,7 +633,8 @@
+ extern int extN_sync_file (struct file *, struct dentry *, int);
+
+ /* ialloc.c */
+-extern struct inode * extN_new_inode (handle_t *, const struct inode *, int);
++extern struct inode * extN_new_inode (handle_t *, const struct inode *, int,
++ unsigned long);
+ extern void extN_free_inode (handle_t *, struct inode *);
+ extern struct inode * extN_orphan_get (struct super_block *, ino_t);
+ extern unsigned long extN_count_free_inodes (struct super_block *);
+@@ -714,4 +716,6 @@
+
+ #endif /* __KERNEL__ */
+
++#define EXTN_IOC_CREATE_INUM _IOW('f', 5, long)
++
+ #endif /* _LINUX_EXTN_FS_H */
typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, int flags);
struct ldlm_lock {
- __u64 l_random;
+ struct portals_handle l_handle; // must be first in the structure
atomic_t l_refc;
struct ldlm_resource *l_resource;
struct ldlm_lock *l_parent;
};
typedef int (*ldlm_res_compat)(struct ldlm_lock *child, struct ldlm_lock *new);
-typedef int (*ldlm_res_policy)(struct ldlm_lock *lock, void *req_cookie,
- ldlm_mode_t mode, int flags, void *data);
+typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock *,
+ void *req_cookie, ldlm_mode_t mode, int flags,
+ void *data);
#define LDLM_PLAIN 10
#define LDLM_EXTENT 11
do { \
if (lock->l_resource == NULL) { \
CDEBUG(D_DLMTRACE, "### " format \
- " ns: \?\? lock: %p lrc: %d/%d,%d mode: %s/%s " \
+ " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "\
"res: \?\? rrc=\?\? type: \?\?\? remote: "LPX64")\n" \
- , ## a, lock, lock->l_refc, lock->l_readers, \
- lock->l_writers, \
+ , ## a, lock, lock->l_handle.h_cookie, \
+ atomic_read(&lock->l_refc), \
+ lock->l_readers, lock->l_writers, \
ldlm_lockname[lock->l_granted_mode], \
ldlm_lockname[lock->l_req_mode], \
- lock->l_remote_handle.addr); \
+ lock->l_remote_handle.cookie); \
break; \
} \
if (lock->l_resource->lr_type == LDLM_EXTENT) { \
CDEBUG(D_DLMTRACE, "### " format \
- " ns: %s lock: %p lrc: %d/%d,%d mode: %s/%s res: "LPU64 \
- "/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64"] remote: " \
- LPX64"\n" , ## a, \
+ " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \
+ "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64\
+ "] remote: "LPX64"\n" , ## a, \
lock->l_resource->lr_namespace->ns_name, lock, \
- lock->l_refc, lock->l_readers, lock->l_writers, \
+ lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \
+ lock->l_readers, lock->l_writers, \
ldlm_lockname[lock->l_granted_mode], \
ldlm_lockname[lock->l_req_mode], \
lock->l_resource->lr_name[0], \
atomic_read(&lock->l_resource->lr_refcount), \
ldlm_typename[lock->l_resource->lr_type], \
lock->l_extent.start, lock->l_extent.end, \
- lock->l_remote_handle.addr); \
+ lock->l_remote_handle.cookie); \
break; \
} \
{ \
CDEBUG(D_DLMTRACE, "### " format \
- " ns: %s lock: %p lrc: %d/%d,%d mode: %s/%s res: "LPU64 \
- "/"LPU64" rrc: %d type: %s remote: "LPX64"\n" , ## a, \
- lock->l_resource->lr_namespace->ns_name, lock, \
- lock->l_refc, lock->l_readers, lock->l_writers, \
+ " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \
+ "res: "LPU64"/"LPU64" rrc: %d type: %s remote: "LPX64 \
+ "\n" , ## a, lock->l_resource->lr_namespace->ns_name, \
+ lock, lock->l_handle.h_cookie, \
+ atomic_read (&lock->l_refc), \
+ lock->l_readers, lock->l_writers, \
ldlm_lockname[lock->l_granted_mode], \
ldlm_lockname[lock->l_req_mode], \
lock->l_resource->lr_name[0], \
lock->l_resource->lr_name[1], \
atomic_read(&lock->l_resource->lr_refcount), \
ldlm_typename[lock->l_resource->lr_type], \
- lock->l_remote_handle.addr); \
+ lock->l_remote_handle.cookie); \
} \
} while (0)
* Iterators.
*/
-#define LDLM_ITER_CONTINUE 0 /* keep iterating */
-#define LDLM_ITER_STOP 1 /* stop iterating */
+#define LDLM_ITER_CONTINUE 1 /* keep iterating */
+#define LDLM_ITER_STOP 0 /* stop iterating */
typedef int (*ldlm_iterator_t)(struct ldlm_lock *, void *);
+typedef int (*ldlm_res_iterator_t)(struct ldlm_resource *, void *);
int ldlm_resource_foreach(struct ldlm_resource *res, ldlm_iterator_t iter,
void *closure);
int ldlm_namespace_foreach(struct ldlm_namespace *ns, ldlm_iterator_t iter,
void *closure);
+int ldlm_namespace_foreach_res(struct ldlm_namespace *ns,
+ ldlm_res_iterator_t iter, void *closure);
int ldlm_replay_locks(struct obd_import *imp);
/* ldlm_extent.c */
int ldlm_extent_compat(struct ldlm_lock *, struct ldlm_lock *);
-int ldlm_extent_policy(struct ldlm_lock *, void *, ldlm_mode_t, int flags,
- void *);
+int ldlm_extent_policy(struct ldlm_namespace *, struct ldlm_lock *, void *,
+ ldlm_mode_t, int flags, void *);
/* ldlm_lockd.c */
int ldlm_handle_enqueue(struct ptlrpc_request *req);
int ldlm_del_waiting_lock(struct ldlm_lock *lock);
/* ldlm_lock.c */
-void ldlm_register_intent(int (*arg)(struct ldlm_lock *lock, void *req_cookie,
- ldlm_mode_t mode, int flags, void *data));
+void ldlm_register_intent(ldlm_res_policy arg);
void ldlm_unregister_intent(void);
void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh);
-struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *, int strict,
- int flags);
+struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *, int flags);
void ldlm_cancel_callback(struct ldlm_lock *);
int ldlm_lock_set_data(struct lustre_handle *, void *data, int datalen);
void ldlm_lock_remove_from_lru(struct ldlm_lock *);
static inline struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *h)
{
- return __ldlm_handle2lock(h, 1, 0);
+ return __ldlm_handle2lock(h, 0);
}
#define LDLM_LOCK_PUT(lock) \
struct lustre_handle *parent_lock_handle,
__u64 *res_id, __u32 type, ldlm_mode_t mode, void *data,
__u32 data_len);
-ldlm_error_t ldlm_lock_enqueue(struct ldlm_lock *lock, void *cookie,
- int cookie_len, int *flags,
+ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *, struct ldlm_lock *,
+ void *cookie, int cookie_len, int *flags,
ldlm_completion_callback completion,
ldlm_blocking_callback blocking);
struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
int *flags);
void ldlm_lock_cancel(struct ldlm_lock *lock);
void ldlm_cancel_locks_for_export(struct obd_export *export);
-void ldlm_run_ast_work(struct list_head *rpc_list);
+int ldlm_run_ast_work(struct list_head *rpc_list);
void ldlm_reprocess_all(struct ldlm_resource *res);
-void ldlm_lock_dump(struct ldlm_lock *lock);
-void ldlm_lock_dump_handle(struct lustre_handle *);
+void ldlm_reprocess_all_ns(struct ldlm_namespace *ns);
+void ldlm_lock_dump(int level, struct ldlm_lock *lock);
+void ldlm_lock_dump_handle(int level, struct lustre_handle *);
/* ldlm_test.c */
int ldlm_test(struct obd_device *device, struct lustre_handle *connh);
void ldlm_dump_all_namespaces(void);
void ldlm_namespace_dump(struct ldlm_namespace *);
void ldlm_resource_dump(struct ldlm_resource *);
-int ldlm_lock_change_resource(struct ldlm_lock *, __u64 new_resid[3]);
+int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *,
+ __u64 new_resid[3]);
/* ldlm_request.c */
+int ldlm_expired_completion_wait(void *data);
int ldlm_completion_ast(struct ldlm_lock *lock, int flags);
int ldlm_cli_enqueue(struct lustre_handle *conn,
struct ptlrpc_request *req,
#include <linux/obd_filter.h>
struct lov_export_data {
+ spinlock_t led_lock;
struct list_head led_open_head;
};
+struct ost_export_data {
+ __u8 oed_uuid[37]; /* client UUID */
+};
+
struct obd_export {
__u64 exp_cookie;
struct list_head exp_obd_chain;
struct mds_export_data eu_mds_data;
struct filter_export_data eu_filter_data;
struct lov_export_data eu_lov_data;
+ struct ost_export_data eu_ost_data;
} u;
};
#define exp_mds_data u.eu_mds_data
#define exp_lov_data u.eu_lov_data
#define exp_filter_data u.eu_filter_data
+#define exp_ost_data u.eu_ost_data
extern struct obd_export *class_conn2export(struct lustre_handle *conn);
extern struct obd_device *class_conn2obd(struct lustre_handle *conn);
int recovd_cleanup(struct recovd_obd *mgr);
extern struct recovd_obd *ptlrpc_recovd;
+struct ptlrpc_request;
int ptlrpc_run_recovery_upcall(struct ptlrpc_connection *conn);
-int ptlrpc_reconnect_import(struct obd_import *imp, int rq_opc);
-int ptlrpc_replay(struct obd_import *imp, int send_last_flag);
+int ptlrpc_reconnect_import(struct obd_import *imp, int rq_opc,
+ struct ptlrpc_request **reqptr);
+int ptlrpc_replay(struct obd_import *imp);
int ptlrpc_resend(struct obd_import *imp);
void ptlrpc_free_committed(struct obd_import *imp);
void ptlrpc_wake_delayed(struct obd_import *imp);
#define LDLM_CB_REPLY_PORTAL 16
#define LDLM_CANCEL_REQUEST_PORTAL 17
#define LDLM_CANCEL_REPLY_PORTAL 18
+#define PTLBD_REQUEST_PORTAL 19
+#define PTLBD_REPLY_PORTAL 20
+#define PTLBD_BULK_PORTAL 21
#define SVC_KILLED 1
#define SVC_EVENT 2
#define MSG_OP_FLAG_SHIFT 16
/* Flags that apply to all requests are in the bottom 16 bits */
-#define MSG_GEN_FLAG_MASK 0x0000ffff
-#define MSG_LAST_REPLAY 1
-#define MSG_RESENT 2
+#define MSG_GEN_FLAG_MASK 0x0000ffff
+#define MSG_LAST_REPLAY 1
+#define MSG_RESENT 2
+
+/* XXX horrible interim hack -- see bug 578 */
+#define MSG_REPLAY_IN_PROGRESS 4
static inline int lustre_msg_get_flags(struct lustre_msg *msg)
{
struct lov_mds_md {
__u32 lmm_magic;
- __u32 lmm_unused; /* was packed size of extended attribute */
__u64 lmm_object_id; /* lov object id */
- __u32 lmm_stripe_offset; /* starting stripe offset in lmd_objects */
- __u32 lmm_stripe_count; /* number of stipes in use for this object */
- __u64 lmm_stripe_size; /* size of the stripe */
- __u32 lmm_ost_count; /* how many OST idx are in this LOV md */
- __u32 lmm_stripe_pattern; /* per-lov object stripe pattern */
+ __u32 lmm_stripe_size; /* size of the stripe */
+ __u32 lmm_stripe_offset; /* starting stripe offset in lmm_objects */
+ __u16 lmm_stripe_count; /* number of stipes in use for this object */
+ __u16 lmm_ost_count; /* how many OST idx are in this LOV md */
struct lov_object_id lmm_objects[0];
};
#define MDS_GETSTATUS 9
#define MDS_STATFS 10
#define MDS_GETLOVINFO 11
+#define MDS_GETATTR_NAME 12
#define REINT_SETATTR 1
#define REINT_CREATE 2
__u64 lock_policy_res1;
__u64 lock_policy_res2;
};
+
+/*
+ * ptlbd, portal block device requests
+ */
+typedef enum {
+ PTLBD_QUERY = 200,
+ PTLBD_READ = 201,
+ PTLBD_WRITE = 202,
+} ptlbd_cmd_t;
+
+struct ptlbd_op {
+ __u16 op_cmd;
+ __u16 op_lun;
+ __u16 op_niob_cnt;
+ __u16 op__padding;
+ __u32 op_block_cnt;
+};
+
+struct ptlbd_niob {
+ __u64 n_xid;
+ __u64 n_block_nr;
+ __u32 n_offset;
+ __u32 n_length;
+};
+
+struct ptlbd_rsp {
+ __u16 r_status;
+ __u16 r_error_cnt;
+};
#endif
struct obd_device;
struct recovd_data;
struct recovd_obd;
+struct obd_export;
#include <linux/lustre_ha.h>
int target_handle_connect(struct ptlrpc_request *req);
int target_handle_disconnect(struct ptlrpc_request *req);
+int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
+ char *cluuid);
int client_obd_connect(struct lustre_handle *conn, struct obd_device *obd,
obd_uuid_t cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover);
handle->addr = (__u64)(unsigned long)object;
}
-struct obd_statfs;
-struct statfs;
-void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs);
-void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs);
-void obd_statfs_pack(struct obd_statfs *tgt, struct obd_statfs *src);
-static inline void
-obd_statfs_unpack(struct obd_statfs *tgt, struct obd_statfs *src)
-{
- obd_statfs_pack(tgt, src);
-}
-
#include <linux/portals_lib.h>
/*
}
if (data->ioc_inllen2) {
- data->ioc_inlbuf2 = &data->ioc_bulk[0] + size_round(data->ioc_inllen1);
+ data->ioc_inlbuf2 = &data->ioc_bulk[0] +
+ size_round(data->ioc_inllen1);
}
if (data->ioc_inllen3) {
- data->ioc_inlbuf3 = &data->ioc_bulk[0] + size_round(data->ioc_inllen1) +
+ data->ioc_inlbuf3 = &data->ioc_bulk[0] +
+ size_round(data->ioc_inllen1) +
size_round(data->ioc_inllen2);
}
#define OBD_IOC_CLEANUP _IO ('f', 103 )
#define OBD_IOC_DESTROY _IOW ('f', 104, long)
#define OBD_IOC_PREALLOCATE _IOWR('f', 105, long)
-#define OBD_IOC_DEC_USE_COUNT _IO ('f', 106 )
+
#define OBD_IOC_SETATTR _IOW ('f', 107, long)
#define OBD_IOC_GETATTR _IOR ('f', 108, long)
#define OBD_IOC_READ _IOWR('f', 109, long)
int ll_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *, void *data,
__u32 data_len, int flag);
int ll_size_lock(struct inode *, struct lov_stripe_md *, obd_off start,
- int mode, struct lustre_handle **);
+ int mode, struct lustre_handle *);
int ll_size_unlock(struct inode *, struct lov_stripe_md *, int mode,
struct lustre_handle *);
int ll_file_size(struct inode *inode, struct lov_stripe_md *md);
/* mds/mds_fs.c */
int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt);
-void mds_fs_cleanup(struct obd_device *obddev);
+int mds_fs_cleanup(struct obd_device *obddev);
/* mdc/mdc_request.c */
int mdc_enqueue(struct lustre_handle *conn, int lock_type,
struct ptlrpc_request **request);
int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid);
int mdc_getattr(struct lustre_handle *conn,
- obd_id ino, int type, unsigned long valid, size_t ea_size,
+ obd_id ino, int type, unsigned long valid, unsigned int ea_size,
struct ptlrpc_request **request);
+int mdc_getattr_name(struct lustre_handle *conn, struct inode *parent,
+ char *filename, int namelen, unsigned long valid,
+ unsigned int ea_size, struct ptlrpc_request **request);
int mdc_setattr(struct lustre_handle *conn,
struct inode *, struct iattr *iattr, struct ptlrpc_request **);
int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
#define LDLM_NUM_THREADS 4
#define LDLM_NEVENTS 1024
-#define LDLM_NBUFS 20
-#define LDLM_BUFSIZE (32 * 1024)
+#define LDLM_NBUFS 100
+#define LDLM_BUFSIZE (8 * 1024)
#define LDLM_MAXREQSIZE 1024
#define MDT_NUM_THREADS 8
#define MDS_NEVENTS 1024
-#define MDS_NBUFS 20
-#define MDS_BUFSIZE (32 * 1024)
+#define MDS_NBUFS 100
+#define MDS_BUFSIZE (8 * 1024)
#define MDS_MAXREQSIZE 1024
#define OST_NUM_THREADS 6
#define OST_NEVENTS min(num_physpages / 16, 32768UL)
-#define OST_NBUFS min(OST_NEVENTS / 128, 256UL)
-#define OST_BUFSIZE ((OST_NEVENTS > 4096UL ? 128 : 32) * 1024)
+#define OST_NBUFS min(OST_NEVENTS / 128, 1280UL)
+#define OST_BUFSIZE ((OST_NEVENTS > 4096UL ? 32 : 8) * 1024)
#define OST_MAXREQSIZE (8 * 1024)
+#define PTLBD_NUM_THREADS 4
+#define PTLBD_NEVENTS 1024
+#define PTLBD_NBUFS 20
+#define PTLBD_BUFSIZE (32 * 1024)
+#define PTLBD_MAXREQSIZE 1024
+
#define CONN_INVALID 1
struct ptlrpc_connection {
__u64 rq_xid;
int rq_level;
- time_t rq_timeout;
// void * rq_reply_handle;
wait_queue_head_t rq_wait_for_rep;
#define DEBUG_REQ(level, req, fmt, args...) \
do { \
CDEBUG(level, \
- "@@@ " fmt " req x"LPD64"/t"LPD64" o%d->%s:%d lens %d/%d ref %d fl " \
- "%x\n" , ## args, req->rq_xid, req->rq_transno, \
+ "@@@ " fmt " req@%p x"LPD64"/t"LPD64" o%d->%s:%d lens %d/%d ref %d fl " \
+ "%x\n" , ## args, req, req->rq_xid, req->rq_reqmsg->transno, \
req->rq_reqmsg ? req->rq_reqmsg->opc : -1, \
req->rq_connection ? (char *)req->rq_connection->c_remote_uuid : "<?>", \
(req->rq_import && req->rq_import->imp_client) ? \
req->rq_import->imp_client->cli_request_portal : -1, \
- req->rq_reqlen, req->rq_replen, req->rq_refcount, req->rq_flags); \
+ req->rq_reqlen, req->rq_replen, \
+ atomic_read (&req->rq_refcount), req->rq_flags); \
} while (0)
struct ptlrpc_bulk_page {
struct lov_oinfo { /* per-child structure */
__u64 loi_id; /* object ID on the target OST */
- struct lustre_handle *loi_handle; /* handle for object on OST */
+ struct lustre_handle *loi_handle; /* open file handle for obj on OST */
int loi_ost_idx; /* OST stripe index in lmd_objects array */
};
struct lov_stripe_md {
- __u32 lsm_magic;
__u64 lsm_object_id; /* lov object id */
- __u64 lsm_stripe_size; /* size of the stripe */
- __u32 lsm_stripe_pattern; /* per-lov object stripe pattern */
+ __u32 lsm_magic;
+ __u32 lsm_stripe_size; /* size of the stripe */
int lsm_stripe_offset; /* offset of first stripe in lmd_objects */
int lsm_stripe_count; /* how many objects are being striped on */
struct lov_oinfo lsm_oinfo[0];
};
+#define IOC_OSC_TYPE 'h'
+#define IOC_OSC_MIN_NR 20
+#define IOC_OSC_REGISTER_LOV _IOWR(IOC_OSC_TYPE, 20, struct obd_device *)
+#define IOC_OSC_MAX_NR 50
+
+#define IOC_MDC_TYPE 'i'
+#define IOC_MDC_MIN_NR 20
+#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
+#define IOC_MDC_MAX_NR 50
+
#ifdef __KERNEL__
# include <linux/fs.h>
# include <linux/list.h>
};
struct brw_page {
- struct page *pg;
- obd_size count;
obd_off off;
+ struct page *pg;
+ int count;
obd_flag flag;
};
struct dentry *fo_dentry_O_mode[16];
spinlock_t fo_objidlock; /* protects fo_lastobjid increment */
__u64 fo_lastobjid;
+ __u64 fo_last_committed;
struct file_operations *fo_fop;
struct inode_operations *fo_iop;
struct address_space_operations *fo_aops;
struct obd_device *cl_containing_lov;
};
-#define IOC_OSC_TYPE 'h'
-#define IOC_OSC_MIN_NR 20
-#define IOC_OSC_REGISTER_LOV _IOWR('h', 20, struct obd_device *)
-#define IOC_OSC_MAX_NR 50
-
struct mds_obd {
struct ptlrpc_service *mds_service;
struct list_head mds_delayed_reply_queue;
spinlock_t mds_processing_task_lock;
pid_t mds_processing_task;
+
+ int mds_has_lov_desc;
+ struct lov_desc mds_lov_desc;
};
struct ldlm_obd {
atomic_t eo_write;
};
+/*
+ * this struct does double-duty acting as either a client or
+ * server instance .. maybe not wise.
+ */
+struct ptlbd_obd {
+ /* server's */
+ struct ptlrpc_service *ptlbd_service;
+ /* client's */
+ struct ptlrpc_client bd_client;
+ struct obd_import bd_import;
+ int refcount; /* XXX sigh */
+};
+
struct recovd_obd {
spinlock_t recovd_lock;
struct list_head recovd_managed_items; /* items managed */
struct lustre_handle conn; /* the local connection to osc/lov */
};
+struct cache_obd {
+ struct lustre_handle cobd_target; /* local connection to target obd */
+ struct lustre_handle cobd_cache; /* local connection to cache obd */
+};
+
struct lov_tgt_desc {
obd_uuid_t uuid;
struct lustre_handle conn;
struct recovd_obd recovd;
struct trace_obd trace;
struct lov_obd lov;
+ struct cache_obd cobd;
+ struct ptlbd_obd ptlbd;
#if 0
struct snap_obd snap;
#endif
};
struct obd_ops {
+ struct module *o_owner;
int (*o_iocontrol)(unsigned int cmd, struct lustre_handle *, int len,
void *karg, void *uarg);
int (*o_get_info)(struct lustre_handle *, obd_count keylen, void *key,
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+
+#ifndef _OBD_CACHE_H__
+#define _OBD_CACHE_H__
+
+#ifdef __KERNEL__
+
+#define OBD_CACHE_DEVICENAME "cobd"
+
+#endif
+#endif
static inline void obdo_from_inode(struct obdo *dst, struct inode *src,
obd_flag valid)
{
-// if (valid & OBD_MD_FLID)
-// dst->o_id = src->i_ino;
if (valid & OBD_MD_FLATIME)
dst->o_atime = src->i_atime;
if (valid & OBD_MD_FLMTIME)
static inline void obdo_to_inode(struct inode *dst, struct obdo *src,
obd_flag valid)
{
-// if (valid & OBD_MD_FLID)
-// dst->i_ino = src->o_id;
+ valid &= src->o_valid;
+
if (valid & OBD_MD_FLATIME)
dst->i_atime = src->o_atime;
if (valid & OBD_MD_FLMTIME)
int class_uuid2dev(char *uuid);
struct obd_device *class_uuid2obd(char *uuid);
struct obd_export *class_new_export(struct obd_device *obddev);
-struct obd_type *class_nm_to_type(char* name);
+struct obd_type *class_get_type(char *name);
+void class_put_type(struct obd_type *type);
void class_destroy_export(struct obd_export *exp);
int class_connect(struct lustre_handle *conn, struct obd_device *obd,
obd_uuid_t cluuid);
return list_entry(rd, struct ptlrpc_connection, c_recovd_data);
}
+struct obd_statfs;
+struct statfs;
+void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs);
+void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs);
+void obd_statfs_pack(struct obd_statfs *tgt, struct obd_statfs *src);
+void obd_statfs_unpack(struct obd_statfs *tgt, struct obd_statfs *src);
+
#endif
/* sysctl.c */
void ost_pack_niobuf(void **tmp, __u64 offset, __u32 len, __u32 flags,
__u32 xid);
void ost_unpack_niobuf(void **tmp, struct niobuf_remote **nbp);
-void ost_pack_ioo(void **tmp, struct lov_stripe_md *oa, int bufcnt);
-void ost_unpack_ioo(void **tmp, struct obd_ioobj **ioop);
+void ost_pack_ioo(struct obd_ioobj **ioop, struct lov_stripe_md *oa,int bufcnt);
+void ost_unpack_ioo(struct obd_ioobj **tmp, struct obd_ioobj **ioop);
#endif
--- /dev/null
+#ifndef _OBD_PTLBD_H
+#define _OBD_PTLBD_H
+
+#include <linux/lustre_idl.h>
+/*
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ *
+ * This code is issued under the GNU General Public License.
+ * See the file COPYING in this distribution
+ */
+
+#define OBD_PTLBD_SV_DEVICENAME "ptlbd_server"
+#define OBD_PTLBD_CL_DEVICENAME "ptlbd_client"
+
+/* XXX maybe this isn't the best header to be dumping all this in.. */
+
+extern int ptlbd_blk_init(void);
+extern int ptlbd_cl_init(void);
+extern int ptlbd_sv_init(void);
+
+extern void ptlbd_blk_exit(void);
+extern void ptlbd_cl_exit(void);
+extern void ptlbd_sv_exit(void);
+
+extern void ptlbd_blk_register(struct ptlbd_obd *ptlbd);
+extern int ptlbd_send_req(struct ptlbd_obd *, ptlbd_cmd_t cmd,
+ struct buffer_head *);
+extern int ptlbd_parse_req(struct ptlrpc_request *req);
+
+#endif
/* global variables */
extern atomic_t obd_memory;
+extern int obd_memmax;
extern unsigned long obd_fail_loc;
extern unsigned long obd_timeout;
extern char obd_recovery_upcall[128];
#define OBD_FAIL_MDS_GETSTATUS_PACK 0x11c
#define OBD_FAIL_MDS_STATFS_PACK 0x11d
#define OBD_FAIL_MDS_STATFS_NET 0x11e
+#define OBD_FAIL_MDS_GETATTR_NAME_NET 0x11f
#define OBD_FAIL_OST 0x200
#define OBD_FAIL_OST_CONNECT_NET 0x201
int s = (size); \
(ptr) = lptr = kmalloc(s, GFP_KERNEL); \
if (lptr == NULL) { \
- CERROR("kmalloc of '" #ptr "' (%ld bytes) failed " \
+ CERROR("kmalloc of '" #ptr "' (%d bytes) failed " \
"at %s:%d\n", s, __FILE__, __LINE__); \
} else { \
+ int obd_curmem; \
memset(lptr, 0, s); \
atomic_add(s, &obd_memory); \
+ obd_curmem = atomic_read(&obd_memory); \
+ if (obd_curmem > obd_memmax) \
+ obd_memmax = obd_curmem; \
CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p " \
- "(tot %d)\n", s, lptr, atomic_read(&obd_memory));\
+ "(tot %d)\n", s, lptr, obd_curmem); \
} \
} while (0)
owner = 1;
spin_unlock(&lock->l_spin);
+ /* This is safe to increment outside the spinlock because we
+ * can only have 1 CPU running on the current task
+ * (i.e. l_owner == current), regardless of the number of CPUs.
+ */
if (owner) {
++lock->l_depth;
} else {
}
/* apply the internal policy by walking all the lists */
-int ldlm_extent_policy(struct ldlm_lock *lock, void *req_cookie,
+int ldlm_extent_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
+ void *req_cookie,
ldlm_mode_t mode, int flags, void *data)
{
struct ldlm_resource *res = lock->l_resource;
if (!res)
LBUG();
- l_lock(&res->lr_namespace->ns_lock);
+ l_lock(&ns->ns_lock);
policy_internal(&res->lr_granted, req_ex, &new_ex, mode);
policy_internal(&res->lr_converting, req_ex, &new_ex, mode);
policy_internal(&res->lr_waiting, req_ex, &new_ex, mode);
- l_unlock(&res->lr_namespace->ns_lock);
+ l_unlock(&ns->ns_lock);
memcpy(&lock->l_extent, &new_ex, sizeof(new_ex));
#include <linux/slab.h>
#include <linux/module.h>
-#include <linux/random.h>
#include <linux/lustre_dlm.h>
#include <linux/lustre_mds.h>
#include <linux/obd_class.h>
static ldlm_res_policy ldlm_intent_policy_func;
-static int ldlm_plain_policy(struct ldlm_lock *lock, void *req_cookie,
- ldlm_mode_t mode, int flags, void *data)
+static int ldlm_plain_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
+ void *req_cookie, ldlm_mode_t mode, int flags,
+ void *data)
{
if ((flags & LDLM_FL_HAS_INTENT) && ldlm_intent_policy_func) {
- return ldlm_intent_policy_func(lock, req_cookie, mode, flags,
- data);
+ return ldlm_intent_policy_func(ns, lock, req_cookie, mode,
+ flags, data);
}
return ELDLM_OK;
EXIT;
}
+/* Only called with strict == 0 by recovery, to mark in-use locks as
+ * should-be-destroyed */
void ldlm_lock_destroy(struct ldlm_lock *lock)
{
ENTRY;
if (!list_empty(&lock->l_children)) {
LDLM_DEBUG(lock, "still has children (%p)!",
lock->l_children.next);
- ldlm_lock_dump(lock);
+ ldlm_lock_dump(D_ERROR, lock);
LBUG();
}
if (lock->l_readers || lock->l_writers) {
LDLM_DEBUG(lock, "lock still has references");
- ldlm_lock_dump(lock);
+ ldlm_lock_dump(D_OTHER, lock);
}
if (!list_empty(&lock->l_res_link)) {
- ldlm_lock_dump(lock);
+ ldlm_lock_dump(D_ERROR, lock);
LBUG();
}
list_del_init(&lock->l_export_chain);
ldlm_lock_remove_from_lru(lock);
+ portals_handle_unhash(&lock->l_handle);
#if 0
/* Wake anyone waiting for this lock */
if (lock == NULL)
RETURN(NULL);
- get_random_bytes(&lock->l_random, sizeof(__u64));
lock->l_resource = ldlm_resource_getref(resource);
atomic_set(&lock->l_refc, 2);
l_unlock(&parent->l_resource->lr_namespace->ns_lock);
}
+ INIT_LIST_HEAD(&lock->l_handle.h_link);
+ portals_handle_hash(&lock->l_handle, lock_handle_addref);
+
RETURN(lock);
}
-int ldlm_lock_change_resource(struct ldlm_lock *lock, __u64 new_resid[3])
+int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock,
+ __u64 new_resid[3])
{
- struct ldlm_namespace *ns = lock->l_resource->lr_namespace;
struct ldlm_resource *oldres = lock->l_resource;
ENTRY;
void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh)
{
- lockh->addr = (__u64) (unsigned long)lock;
- lockh->cookie = lock->l_random;
+ //lockh->addr = (__u64)(unsigned long)lock;
+ memset(&lockh->addr, 0x69, sizeof(lockh->addr));
+ lockh->cookie = lock->l_handle.h_cookie;
}
-/*
- * if flags: atomically get the lock and set the flags.
- * Return NULL if flag already set
+/* if flags: atomically get the lock and set the flags.
+ * Return NULL if flag already set
*/
-struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *handle, int strict,
- int flags)
+struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *handle, int flags)
{
struct ldlm_lock *lock = NULL, *retval = NULL;
ENTRY;
LASSERT(handle);
- if (!handle->addr)
+ lock = portals_handle2object(handle->cookie);
+ if (lock == NULL)
RETURN(NULL);
- lock = (struct ldlm_lock *)(unsigned long)(handle->addr);
- if (!kmem_cache_validate(ldlm_lock_slab, (void *)lock)) {
- //CERROR("bogus lock %p\n", lock);
- GOTO(out2, retval);
- }
-
- if (lock->l_random != handle->cookie) {
- //CERROR("bogus cookie: lock %p has "LPX64" vs. handle "LPX64
- // "\n", lock, lock->l_random, handle->cookie);
- GOTO(out2, NULL);
- }
- if (!lock->l_resource) {
- CERROR("trying to lock bogus resource: lock %p\n", lock);
- //LDLM_DEBUG(lock, "ldlm_handle2lock(%p)", lock);
- GOTO(out2, retval);
- }
- if (!lock->l_resource->lr_namespace) {
- CERROR("trying to lock bogus namespace: lock %p\n", lock);
- //LDLM_DEBUG(lock, "ldlm_handle2lock(%p)", lock);
- GOTO(out2, retval);
- }
+ LASSERT(lock->l_resource != NULL);
+ LASSERT(lock->l_resource->lr_namespace != NULL);
l_lock(&lock->l_resource->lr_namespace->ns_lock);
- if (strict && lock->l_destroyed) {
+
+ /* It's unlikely but possible that someone marked the lock as
+ * destroyed after we did handle2object on it */
+ if (lock->l_destroyed) {
CERROR("lock already destroyed: lock %p\n", lock);
- //LDLM_DEBUG(lock, "ldlm_handle2lock(%p)", lock);
- GOTO(out, NULL);
+ LDLM_LOCK_PUT(lock);
+ GOTO(out, retval);
}
- if (flags && (lock->l_flags & flags))
- GOTO(out, NULL);
+ if (flags && (lock->l_flags & flags)) {
+ LDLM_LOCK_PUT(lock);
+ GOTO(out, retval);
+ }
if (flags)
lock->l_flags |= flags;
- retval = LDLM_LOCK_GET(lock);
+ retval = lock;
EXIT;
out:
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
- out2:
+ return retval;
+}
+
+struct ldlm_lock *ldlm_handle2lock_ns(struct ldlm_namespace *ns,
+ struct lustre_handle *handle)
+{
+ struct ldlm_lock *retval = NULL;
+
+ l_lock(&ns->ns_lock);
+ retval = __ldlm_handle2lock(handle, 0);
+ l_unlock(&ns->ns_lock);
+
return retval;
}
void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode)
{
- struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0, 0);
+ struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0);
struct ldlm_namespace *ns;
ENTRY;
LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
ns = lock->l_resource->lr_namespace;
l_lock(&lock->l_resource->lr_namespace->ns_lock);
- if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR)
+ if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR) {
+ LASSERT(lock->l_readers > 0);
lock->l_readers--;
- else
+ } else {
+ LASSERT(lock->l_writers > 0);
lock->l_writers--;
+ }
/* If we received a blocked AST and this was the last reference,
* run the callback. */
ns->ns_nr_unused++;
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
ldlm_cancel_lru(ns);
- } else
+ } else {
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+ }
LDLM_LOCK_PUT(lock); /* matches the ldlm_lock_get in addref */
LDLM_LOCK_PUT(lock); /* matches the handle2lock above */
return lock;
}
-/* Must be called with lock->l_lock and lock->l_resource->lr_lock not held */
-ldlm_error_t ldlm_lock_enqueue(struct ldlm_lock * lock,
+ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
+ struct ldlm_lock *lock,
void *cookie, int cookie_len,
int *flags,
ldlm_completion_callback completion,
if (!local && !(*flags & LDLM_FL_REPLAY) &&
(policy = ldlm_res_policy_table[res->lr_type])) {
int rc;
- rc = policy(lock, cookie, lock->l_req_mode, *flags, NULL);
+ rc = policy(ns, lock, cookie, lock->l_req_mode, *flags, NULL);
if (rc == ELDLM_LOCK_CHANGED) {
res = lock->l_resource;
}
}
- l_lock(&res->lr_namespace->ns_lock);
+ l_lock(&ns->ns_lock);
if (local && lock->l_req_mode == lock->l_granted_mode) {
/* The server returned a blocked lock, but it was granted before
* we got a chance to actually enqueue it. We don't need to do
* FIXME (bug 268): Detect obvious lies by checking compatibility in
* granted/converting queues. */
ldlm_resource_unlink_lock(lock);
- if (local || (*flags & LDLM_FL_REPLAY)) {
+ if (local) {
if (*flags & LDLM_FL_BLOCK_CONV)
ldlm_resource_add_lock(res, res->lr_converting.prev,
lock);
else
ldlm_grant_lock(lock);
GOTO(out, ELDLM_OK);
+ } else if (*flags & LDLM_FL_REPLAY) {
+ if (*flags & LDLM_FL_BLOCK_CONV) {
+ ldlm_resource_add_lock(res, res->lr_converting.prev,
+ lock);
+ GOTO(out, ELDLM_OK);
+ } else if (*flags & LDLM_FL_BLOCK_WAIT) {
+ ldlm_resource_add_lock(res, res->lr_waiting.prev, lock);
+ GOTO(out, ELDLM_OK);
+ } else if (*flags & LDLM_FL_BLOCK_GRANTED) {
+ ldlm_grant_lock(lock);
+ GOTO(out, ELDLM_OK);
+ }
+ /* If no flags, fall through to normal enqueue path. */
}
/* FIXME: We may want to optimize by checking lr_most_restr */
ldlm_grant_lock(lock);
EXIT;
out:
- l_unlock(&res->lr_namespace->ns_lock);
+ l_unlock(&ns->ns_lock);
/* Don't set 'completion_ast' until here so that if the lock is granted
* immediately we don't do an unnecessary completion call. */
lock->l_completion_ast = completion;
RETURN(0);
}
-void ldlm_run_ast_work(struct list_head *rpc_list)
+int ldlm_run_ast_work(struct list_head *rpc_list)
{
struct list_head *tmp, *pos;
- int rc;
+ int rc, retval = 0;
ENTRY;
list_for_each_safe(tmp, pos, rpc_list) {
w->w_datalen, LDLM_CB_BLOCKING);
else
rc = w->w_lock->l_completion_ast(w->w_lock, w->w_flags);
- if (rc)
+ if (rc == -ERESTART)
+ retval = rc;
+ else if (rc)
CERROR("Failed AST - should clean & disconnect "
"client\n");
LDLM_LOCK_PUT(w->w_lock);
list_del(&w->w_list);
OBD_FREE(w, sizeof(*w));
}
- EXIT;
+ RETURN(retval);
+}
+
+static int reprocess_one_queue(struct ldlm_resource *res, void *closure)
+{
+ ldlm_reprocess_all(res);
+ return LDLM_ITER_CONTINUE;
+}
+
+void ldlm_reprocess_all_ns(struct ldlm_namespace *ns)
+{
+ (void)ldlm_namespace_foreach_res(ns, reprocess_one_queue, NULL);
}
/* Must be called with resource->lr_lock not taken. */
void ldlm_reprocess_all(struct ldlm_resource *res)
{
struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
+ int rc;
ENTRY;
/* Local lock trees don't get reprocessed. */
return;
}
+ restart:
l_lock(&res->lr_namespace->ns_lock);
res->lr_tmp = &rpc_list;
res->lr_tmp = NULL;
l_unlock(&res->lr_namespace->ns_lock);
- ldlm_run_ast_work(&rpc_list);
+ rc = ldlm_run_ast_work(&rpc_list);
+ if (rc == -ERESTART)
+ goto restart;
EXIT;
}
ns = res->lr_namespace;
l_lock(&ns->ns_lock);
+ /* Please do not, no matter how tempting, remove this LBUG without
+ * talking to me first. -phik */
if (lock->l_readers || lock->l_writers) {
LDLM_DEBUG(lock, "lock still has references");
- ldlm_lock_dump(lock);
- //LBUG();
+ ldlm_lock_dump(D_OTHER, lock);
+ LBUG();
}
ldlm_cancel_callback(lock);
RETURN(res);
}
-void ldlm_lock_dump(struct ldlm_lock *lock)
+void ldlm_lock_dump(int level, struct ldlm_lock *lock)
{
char ver[128];
- if (!(portal_debug & D_OTHER))
+ if (!(portal_debug & level))
return;
if (RES_VERSION_SIZE != 4)
LBUG();
if (!lock) {
- CDEBUG(D_OTHER, " NULL LDLM lock\n");
+ CDEBUG(level, " NULL LDLM lock\n");
return;
}
lock->l_version[0], lock->l_version[1],
lock->l_version[2], lock->l_version[3]);
- CDEBUG(D_OTHER, " -- Lock dump: %p (%s)\n", lock, ver);
+ CDEBUG(level, " -- Lock dump: %p (%s)\n", lock, ver);
if (lock->l_export && lock->l_export->exp_connection)
- CDEBUG(D_OTHER, " Node: NID %x (rhandle: "LPX64")\n",
+ CDEBUG(level, " Node: NID %x (rhandle: "LPX64")\n",
lock->l_export->exp_connection->c_peer.peer_nid,
- lock->l_remote_handle.addr);
+ lock->l_remote_handle.cookie);
else
- CDEBUG(D_OTHER, " Node: local\n");
- CDEBUG(D_OTHER, " Parent: %p\n", lock->l_parent);
- CDEBUG(D_OTHER, " Resource: %p ("LPD64")\n", lock->l_resource,
+ CDEBUG(level, " Node: local\n");
+ CDEBUG(level, " Parent: %p\n", lock->l_parent);
+ CDEBUG(level, " Resource: %p ("LPD64")\n", lock->l_resource,
lock->l_resource->lr_name[0]);
- CDEBUG(D_OTHER, " Requested mode: %d, granted mode: %d\n",
+ CDEBUG(level, " Requested mode: %d, granted mode: %d\n",
(int)lock->l_req_mode, (int)lock->l_granted_mode);
- CDEBUG(D_OTHER, " Readers: %u ; Writers; %u\n",
+ CDEBUG(level, " Readers: %u ; Writers; %u\n",
lock->l_readers, lock->l_writers);
if (lock->l_resource->lr_type == LDLM_EXTENT)
- CDEBUG(D_OTHER, " Extent: %Lu -> %Lu\n",
- (unsigned long long)lock->l_extent.start,
- (unsigned long long)lock->l_extent.end);
+ CDEBUG(level, " Extent: "LPU64" -> "LPU64"\n",
+ lock->l_extent.start, lock->l_extent.end);
}
-void ldlm_lock_dump_handle(struct lustre_handle *lockh)
+void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh)
{
struct ldlm_lock *lock;
if (lock == NULL)
return;
- ldlm_lock_dump(lock);
+ ldlm_lock_dump(D_OTHER, lock);
LDLM_LOCK_PUT(lock);
}
l_pending_chain);
if (l->l_callback_timeout > jiffies)
break;
+ CERROR("lock timer expired, lock %p\n", l);
LDLM_DEBUG(l, "timer expired, recovering exp %p on conn %p",
l->l_export, l->l_export->exp_connection);
recovd_conn_fail(l->l_export->exp_connection);
memcpy(&body->lock_desc, desc, sizeof(*desc));
LDLM_DEBUG(lock, "server preparing blocking AST");
- req->rq_replen = 0; /* no reply needed */
+ req->rq_replen = lustre_msg_size(0, NULL);
ldlm_add_waiting_lock(lock);
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
- (void)ptl_send_rpc(req);
+ req->rq_level = LUSTRE_CONN_RECOVD;
+ rc = ptlrpc_queue_wait(req);
+ if (rc == -ETIMEDOUT || rc == -EINTR) {
+ ldlm_expired_completion_wait(lock);
+ } else if (rc) {
+ CERROR("client returned %d from blocking AST for lock %p\n",
+ req->rq_status, lock);
+ LDLM_DEBUG(lock, "client returned error %d from blocking AST",
+ req->rq_status);
+ ldlm_lock_cancel(lock);
+ /* Server-side AST functions are called from ldlm_reprocess_all,
+ * which needs to be told to please restart its reprocessing. */
+ rc = -ERESTART;
+ }
- /* not waiting for reply */
ptlrpc_req_finished(req);
RETURN(rc);
ldlm_lock2desc(lock, &body->lock_desc);
LDLM_DEBUG(lock, "server preparing completion AST");
- req->rq_replen = 0; /* no reply needed */
-
- (void)ptl_send_rpc(req);
-
- /* not waiting for reply */
+ req->rq_replen = lustre_msg_size(0, NULL);
+
+ req->rq_level = LUSTRE_CONN_RECOVD;
+ rc = ptlrpc_queue_wait(req);
+ if (rc == -ETIMEDOUT || rc == -EINTR) {
+ ldlm_expired_completion_wait(lock);
+ } else if (rc) {
+ CERROR("client returned %d from completion AST for lock %p\n",
+ req->rq_status, lock);
+ LDLM_DEBUG(lock, "client returned error %d from completion AST",
+ req->rq_status);
+ ldlm_lock_cancel(lock);
+ /* Server-side AST functions are called from ldlm_reprocess_all,
+ * which needs to be told to please restart its reprocessing. */
+ rc = -ERESTART;
+ }
ptlrpc_req_finished(req);
RETURN(rc);
&lock->l_export->exp_ldlm_data.led_held_locks);
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
- err = ldlm_lock_enqueue(lock, cookie, cookielen, &flags,
- ldlm_server_completion_ast,
+ err = ldlm_lock_enqueue(obddev->obd_namespace, lock, cookie, cookielen,
+ &flags, ldlm_server_completion_ast,
ldlm_server_blocking_ast);
if (err != ELDLM_OK)
GOTO(out, err);
RETURN(0);
}
-static int ldlm_handle_bl_callback(struct ptlrpc_request *req)
+struct ldlm_lock *ldlm_handle2lock_ns(struct ldlm_namespace *ns,
+ struct lustre_handle *handle);
+
+static int ldlm_handle_bl_callback(struct ptlrpc_request *req,
+ struct ldlm_namespace *ns)
{
struct ldlm_request *dlm_req;
struct ldlm_lock *lock;
dlm_req = lustre_msg_buf(req->rq_reqmsg, 0);
- lock = ldlm_handle2lock(&dlm_req->lock_handle1);
+ lock = ldlm_handle2lock_ns(ns, &dlm_req->lock_handle1);
if (!lock) {
CERROR("blocking callback on lock "LPX64" - lock disappeared\n",
- dlm_req->lock_handle1.addr);
- RETURN(0);
+ dlm_req->lock_handle1.cookie);
+ RETURN(-EINVAL);
}
LDLM_DEBUG(lock, "client blocking AST callback handler START");
RETURN(0);
}
-static int ldlm_handle_cp_callback(struct ptlrpc_request *req)
+static int ldlm_handle_cp_callback(struct ptlrpc_request *req,
+ struct ldlm_namespace *ns)
{
struct list_head ast_list = LIST_HEAD_INIT(ast_list);
struct ldlm_request *dlm_req;
dlm_req = lustre_msg_buf(req->rq_reqmsg, 0);
- lock = ldlm_handle2lock(&dlm_req->lock_handle1);
+ lock = ldlm_handle2lock_ns(ns, &dlm_req->lock_handle1);
if (!lock) {
CERROR("completion callback on lock "LPX64" - lock "
- "disappeared\n", dlm_req->lock_handle1.addr);
- RETURN(0);
+ "disappeared\n", dlm_req->lock_handle1.cookie);
+ RETURN(-EINVAL);
}
LDLM_DEBUG(lock, "client completion callback handler START");
- l_lock(&lock->l_resource->lr_namespace->ns_lock);
+ l_lock(&ns->ns_lock);
/* If we receive the completion AST before the actual enqueue returned,
* then we might need to switch lock modes, resources, or extents. */
if (memcmp(dlm_req->lock_desc.l_resource.lr_name,
lock->l_resource->lr_name,
sizeof(__u64) * RES_NAME_SIZE) != 0) {
- ldlm_lock_change_resource(lock,
+ ldlm_lock_change_resource(ns, lock,
dlm_req->lock_desc.l_resource.lr_name);
LDLM_DEBUG(lock, "completion AST, new resource");
}
lock->l_resource->lr_tmp = &ast_list;
ldlm_grant_lock(lock);
lock->l_resource->lr_tmp = NULL;
- l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+ l_unlock(&ns->ns_lock);
LDLM_DEBUG(lock, "callback handler finished, about to run_ast_work");
LDLM_LOCK_PUT(lock);
static int ldlm_callback_handler(struct ptlrpc_request *req)
{
+ struct ldlm_namespace *ns;
int rc;
ENTRY;
rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
if (rc) {
- CERROR("lustre_ldlm: Invalid request: %d\n", rc);
+ CERROR("Invalid request: %d\n", rc);
RETURN(rc);
}
dlm_req = lustre_msg_buf(req->rq_reqmsg, 0);
CERROR("--> lock addr: "LPX64", cookie: "LPX64"\n",
dlm_req->lock_handle1.addr,dlm_req->lock_handle1.cookie);
- CERROR("--> ignoring this error as a temporary workaround! "
- "beware!\n");
- //RETURN(-ENOTCONN);
+ RETURN(-ENOTCONN);
}
+ LASSERT(req->rq_export != NULL);
+ LASSERT(req->rq_export->exp_obd != NULL);
+ ns = req->rq_export->exp_obd->obd_namespace;
+ LASSERT(ns != NULL);
+
switch (req->rq_reqmsg->opc) {
case LDLM_BL_CALLBACK:
CDEBUG(D_INODE, "blocking ast\n");
OBD_FAIL_RETURN(OBD_FAIL_LDLM_BL_CALLBACK, 0);
- rc = ldlm_handle_bl_callback(req);
- RETURN(rc);
+ rc = ldlm_handle_bl_callback(req, ns);
+ break;
case LDLM_CP_CALLBACK:
CDEBUG(D_INODE, "completion ast\n");
OBD_FAIL_RETURN(OBD_FAIL_LDLM_CP_CALLBACK, 0);
- rc = ldlm_handle_cp_callback(req);
- RETURN(rc);
-
+ rc = ldlm_handle_cp_callback(req, ns);
+ break;
default:
CERROR("invalid opcode %d\n", req->rq_reqmsg->opc);
RETURN(-EINVAL);
}
+ req->rq_status = rc;
+ if (rc) {
+ ptlrpc_error(req->rq_svc, req);
+ } else {
+ rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen,
+ &req->rq_repmsg);
+ if (rc)
+ RETURN(rc);
+ ptlrpc_reply(req->rq_svc, req);
+ }
+
RETURN(0);
}
-
static int ldlm_cancel_handler(struct ptlrpc_request *req)
{
int rc;
}
if (req->rq_export == NULL) {
+ struct ldlm_request *dlm_req;
CERROR("operation %d with bad export (ptl req %d/rep %d)\n",
req->rq_reqmsg->opc, req->rq_request_portal,
req->rq_reply_portal);
CERROR("--> export addr: "LPX64", cookie: "LPX64"\n",
req->rq_reqmsg->addr, req->rq_reqmsg->cookie);
+ dlm_req = lustre_msg_buf(req->rq_reqmsg, 0);
+ ldlm_lock_dump_handle(D_ERROR, &dlm_req->lock_handle1);
CERROR("--> ignoring this error as a temporary workaround! "
"beware!\n");
//RETURN(-ENOTCONN);
RETURN(0);
}
-
static int ldlm_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
void *karg, void *uarg)
{
if (_IOC_TYPE(cmd) != IOC_LDLM_TYPE || _IOC_NR(cmd) < IOC_LDLM_MIN_NR ||
_IOC_NR(cmd) > IOC_LDLM_MAX_NR) {
- CDEBUG(D_IOCTL, "invalid ioctl (type %ld, nr %ld, size %ld)\n",
+ CDEBUG(D_IOCTL, "invalid ioctl (type %d, nr %d, size %d)\n",
_IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
RETURN(-EINVAL);
}
if (ldlm_already_setup)
RETURN(-EALREADY);
- MOD_INC_USE_COUNT;
-
rc = ldlm_proc_setup(obddev);
if (rc != 0)
- GOTO(out_dec, rc);
+ RETURN(rc);
ldlm->ldlm_cb_service =
ptlrpc_init_svc(LDLM_NEVENTS, LDLM_NBUFS, LDLM_BUFSIZE,
out_proc:
ldlm_proc_cleanup(obddev);
- out_dec:
- MOD_DEC_USE_COUNT;
return rc;
}
ldlm_proc_cleanup(obddev);
ldlm_already_setup = 0;
- MOD_DEC_USE_COUNT;
RETURN(0);
}
}
struct obd_ops ldlm_obd_ops = {
+ o_owner: THIS_MODULE,
o_iocontrol: ldlm_iocontrol,
o_setup: ldlm_setup,
o_cleanup: ldlm_cleanup,
EXPORT_SYMBOL(ldlm_cancel_locks_for_export);
EXPORT_SYMBOL(ldlm_replay_locks);
EXPORT_SYMBOL(ldlm_resource_foreach);
+EXPORT_SYMBOL(ldlm_reprocess_all_ns);
EXPORT_SYMBOL(ldlm_namespace_foreach);
+EXPORT_SYMBOL(ldlm_namespace_foreach_res);
EXPORT_SYMBOL(l_lock);
EXPORT_SYMBOL(l_unlock);
RETURN(1);
}
-static int expired_completion_wait(void *data)
+int ldlm_expired_completion_wait(void *data)
{
struct ldlm_lock *lock = data;
struct ptlrpc_connection *conn;
LDLM_DEBUG(lock, "timed out waiting for completion");
CERROR("lock %p timed out from %s\n", lock,
conn->c_remote_uuid);
+ ldlm_lock_dump(D_ERROR, lock);
class_signal_connection_failure(conn);
}
RETURN(0);
int ldlm_completion_ast(struct ldlm_lock *lock, int flags)
{
struct l_wait_info lwi =
- LWI_TIMEOUT_INTR(obd_timeout * HZ, expired_completion_wait,
+ LWI_TIMEOUT_INTR(obd_timeout * HZ, ldlm_expired_completion_wait,
interrupted_completion_wait, lock);
int rc = 0;
ENTRY;
LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, "
"sleeping");
- ldlm_lock_dump(lock);
+ ldlm_lock_dump(D_OTHER, lock);
ldlm_reprocess_all(lock->l_resource);
noreproc:
ldlm_lock2handle(lock, lockh);
lock->l_connh = NULL;
- err = ldlm_lock_enqueue(lock, cookie, cookielen, flags, completion,
+ err = ldlm_lock_enqueue(ns, lock, cookie, cookielen, flags, completion,
blocking);
if (err != ELDLM_OK)
GOTO(out, err);
/* FIXME: if we've already received a completion AST, this will
* LBUG! */
ldlm_lock_destroy(lock);
- GOTO(out, rc);
+ GOTO(out_req, rc);
}
reply = lustre_msg_buf(req->rq_repmsg, 0);
(long)reply->lock_resource_name[0],
(long)lock->l_resource->lr_name[0]);
- ldlm_lock_change_resource(lock,
+ ldlm_lock_change_resource(ns, lock,
reply->lock_resource_name);
if (lock->l_resource == NULL) {
LBUG();
- RETURN(-ENOMEM);
+ GOTO(out_req, rc = -ENOMEM);
}
LDLM_DEBUG(lock, "client-side enqueue, new resource");
}
}
if (!is_replay) {
- rc = ldlm_lock_enqueue(lock, cookie, cookielen, flags,
+ rc = ldlm_lock_enqueue(ns, lock, cookie, cookielen, flags,
completion, blocking);
if (lock->l_completion_ast)
lock->l_completion_ast(lock, *flags);
}
- if (!req_passed_in)
- ptlrpc_req_finished(req);
-
LDLM_DEBUG(lock, "client-side enqueue END");
EXIT;
+ out_req:
+ if (!req_passed_in)
+ ptlrpc_req_finished(req);
out:
LDLM_LOCK_PUT(lock);
out_nolock:
ENTRY;
/* concurrent cancels on the same handle can happen */
- lock = __ldlm_handle2lock(lockh, 0, LDLM_FL_CANCELING);
+ lock = __ldlm_handle2lock(lockh, LDLM_FL_CANCELING);
if (lock == NULL)
RETURN(0);
int i;
ENTRY;
+ if (ns == NULL)
+ RETURN(ELDLM_OK);
+
if (res_id)
RETURN(ldlm_cli_cancel_unused_resource(ns, res_id, flags));
return helper->iter(lock, helper->closure);
}
+static int ldlm_res_iter_helper(struct ldlm_resource *res, void *closure)
+{
+ return ldlm_resource_foreach(res, ldlm_iter_helper, closure);
+}
+
int ldlm_namespace_foreach(struct ldlm_namespace *ns, ldlm_iterator_t iter,
void *closure)
{
- int i, rc = LDLM_ITER_CONTINUE;
struct iter_helper_data helper = { iter: iter, closure: closure };
+ return ldlm_namespace_foreach_res(ns, ldlm_res_iter_helper, &helper);
+}
+
+int ldlm_namespace_foreach_res(struct ldlm_namespace *ns,
+ ldlm_res_iterator_t iter, void *closure)
+{
+ int i, rc = LDLM_ITER_CONTINUE;
l_lock(&ns->ns_lock);
for (i = 0; i < RES_HASH_SIZE; i++) {
list_entry(tmp, struct ldlm_resource, lr_hash);
ldlm_resource_getref(res);
- rc = ldlm_resource_foreach(res, ldlm_iter_helper,
- &helper);
+ rc = iter(res, closure);
ldlm_resource_putref(res);
if (rc == LDLM_ITER_STOP)
GOTO(out, rc);
return LDLM_ITER_CONTINUE;
}
-static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock,
- int last)
+static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
{
struct ptlrpc_request *req;
struct ldlm_request *body;
struct ldlm_reply *reply;
int rc, size;
- int flags = LDLM_FL_REPLAY;
-
- flags |= lock->l_flags &
- (LDLM_FL_BLOCK_GRANTED|LDLM_FL_BLOCK_CONV|LDLM_FL_BLOCK_WAIT);
-
+ int flags;
+
+ /*
+ * If granted mode matches the requested mode, this lock is granted.
+ *
+ * If they differ, but we have a granted mode, then we were granted
+ * one mode and now want another: ergo, converting.
+ *
+ * If we haven't been granted anything and are on a resource list,
+ * then we're blocked/waiting.
+ *
+ * If we haven't been granted anything and we're NOT on a resource list,
+ * then we haven't got a reply yet and don't have a known disposition.
+ * This happens whenever a lock enqueue is the request that triggers
+ * recovery.
+ */
+ if (lock->l_granted_mode == lock->l_req_mode)
+ flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_GRANTED;
+ else if (lock->l_granted_mode)
+ flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_CONV;
+ else if (!list_empty(&lock->l_res_link))
+ flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_WAIT;
+ else
+ flags = LDLM_FL_REPLAY;
+
size = sizeof(*body);
req = ptlrpc_prep_req(imp, LDLM_ENQUEUE, 1, &size, NULL);
if (!req)
RETURN(-ENOMEM);
+
+ /* We're part of recovery, so don't wait for it. */
+ req->rq_level = LUSTRE_CONN_RECOVD;
body = lustre_msg_buf(req->rq_reqmsg, 0);
ldlm_lock2desc(lock, &body->lock_desc);
size = sizeof(*reply);
req->rq_replen = lustre_msg_size(1, &size);
- if (last)
- req->rq_reqmsg->flags |= MSG_LAST_REPLAY;
-
LDLM_DEBUG(lock, "replaying lock:");
rc = ptlrpc_queue_wait(req);
if (rc != ELDLM_OK)
list_for_each_safe(pos, next, &list) {
lock = list_entry(pos, struct ldlm_lock, l_pending_chain);
- rc = replay_one_lock(imp, lock, (next == &list));
+ rc = replay_one_lock(imp, lock);
if (rc)
break; /* or try to do the rest? */
}
extern struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
-/* If 'local_only' is true, don't try to tell the server, just cleanup. */
+/* If 'local_only' is true, don't try to tell the server, just cleanup.
+ * This is currently only used for recovery, and we make certain assumptions
+ * as a result--notably, that we shouldn't cancel locks with refs. -phil */
static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
int local_only)
{
lock = list_entry(tmp, struct ldlm_lock, l_res_link);
LDLM_LOCK_GET(lock);
+ if (local_only && (lock->l_readers || lock->l_writers)) {
+ /* This is a little bit gross, but much better than the
+ * alternative: pretend that we got a blocking AST from
+ * the server, so that when the lock is decref'd, it
+ * will go away ... */
+ lock->l_flags |= LDLM_FL_CBPENDING;
+ /* ... without sending a CANCEL message. */
+ lock->l_flags |= LDLM_FL_CANCELING;
+ LDLM_LOCK_PUT(lock);
+ continue;
+ }
+
/* At shutdown time, don't call the cancellation callback */
lock->l_flags |= LDLM_FL_CANCEL;
}
LDLM_LOCK_PUT(lock);
}
+ EXIT;
}
int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int local_only)
{
int i;
+ if (ns == NULL) {
+ CDEBUG(D_INFO, "NULL ns, skipping cleanup\n");
+ return ELDLM_OK;
+ }
+
l_lock(&ns->ns_lock);
for (i = 0; i < RES_HASH_SIZE; i++) {
struct list_head *tmp, *pos;
l_lock(&res->lr_namespace->ns_lock);
ldlm_resource_dump(res);
- ldlm_lock_dump(lock);
+ CDEBUG(D_OTHER, "About to grant this lock:\n");
+ ldlm_lock_dump(D_OTHER, lock);
LASSERT(list_empty(&lock->l_res_link));
list_for_each(tmp, &res->lr_granted) {
struct ldlm_lock *lock;
lock = list_entry(tmp, struct ldlm_lock, l_res_link);
- ldlm_lock_dump(lock);
+ ldlm_lock_dump(D_OTHER, lock);
}
CDEBUG(D_OTHER, "Converting locks:\n");
list_for_each(tmp, &res->lr_converting) {
struct ldlm_lock *lock;
lock = list_entry(tmp, struct ldlm_lock, l_res_link);
- ldlm_lock_dump(lock);
+ ldlm_lock_dump(D_OTHER, lock);
}
CDEBUG(D_OTHER, "Waiting locks:\n");
list_for_each(tmp, &res->lr_waiting) {
struct ldlm_lock *lock;
lock = list_entry(tmp, struct ldlm_lock, l_res_link);
- ldlm_lock_dump(lock);
+ ldlm_lock_dump(D_OTHER, lock);
}
}
lock1 = ldlm_lock_create(ns, NULL, res_id, LDLM_PLAIN, LCK_CR, NULL, 0);
if (lock1 == NULL)
LBUG();
- err = ldlm_lock_enqueue(lock1, NULL, 0, &flags,
+ err = ldlm_lock_enqueue(ns, lock1, NULL, 0, &flags,
ldlm_completion_ast, ldlm_blocking_ast);
if (err != ELDLM_OK)
LBUG();
lock = ldlm_lock_create(ns, NULL, res_id, LDLM_PLAIN, LCK_EX, NULL, 0);
if (lock == NULL)
LBUG();
- err = ldlm_lock_enqueue(lock, NULL, 0, &flags,
+ err = ldlm_lock_enqueue(ns, lock, NULL, 0, &flags,
ldlm_completion_ast, ldlm_blocking_ast);
if (err != ELDLM_OK)
LBUG();
0);
if (lock1 == NULL)
LBUG();
- err = ldlm_lock_enqueue(lock1, &ext1, sizeof(ext1), &flags, NULL, NULL);
+ err = ldlm_lock_enqueue(ns, lock1, &ext1, sizeof(ext1), &flags, NULL,
+ NULL);
if (err != ELDLM_OK)
LBUG();
if (!(flags & LDLM_FL_LOCK_CHANGED))
flags = 0;
lock2 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR,
NULL, 0);
- err = ldlm_lock_enqueue(lock2, &ext2, sizeof(ext2), &flags, NULL, NULL);
+ err = ldlm_lock_enqueue(ns, lock2, &ext2, sizeof(ext2), &flags, NULL,
+ NULL);
if (err != ELDLM_OK)
LBUG();
if (!(flags & LDLM_FL_LOCK_CHANGED))
lock = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_EX, NULL, 0);
if (lock == NULL)
LBUG();
- err = ldlm_lock_enqueue(lock, &ext3, sizeof(ext3), &flags,
+ err = ldlm_lock_enqueue(ns, lock, &ext3, sizeof(ext3), &flags,
NULL, NULL);
if (err != ELDLM_OK)
LBUG();
CERROR("ldlm_cli_convert: %d\n", err);
lock = ldlm_handle2lock(&lockh1);
- ldlm_lock_dump(lock);
+ ldlm_lock_dump(D_OTHER, lock);
ldlm_lock_put(lock);
/* Need to decrement old mode. Don't bother incrementing new
static int ldlm_test_main(void *data)
{
struct ldlm_test_thread *thread = data;
+ unsigned long flags;
ENTRY;
lock_kernel();
sigfillset(¤t->blocked);
recalc_sigpending();
#else
- spin_lock_irq(¤t->sigmask_lock);
+ spin_lock_irqsave(¤t->sigmask_lock, flags);
sigfillset(¤t->blocked);
recalc_sigpending(current);
- spin_unlock_irq(¤t->sigmask_lock);
+ spin_unlock_irqrestore(¤t->sigmask_lock, flags);
#endif
sprintf(current->comm, "ldlm_test");
-EXTRA_DIST = mds_updates.c obd_pack.c ll_pack.c simple.c
+EXTRA_DIST = mds_updates.c obd_pack.c simple.c
EXTRA_DIST += client.c target.c
include $(top_srcdir)/Rules
{
int i;
- for (i=0; i < MAX_OBD_DEVICES; i++) {
+ for (i = 0; i < MAX_OBD_DEVICES; i++) {
struct obd_device *obd = &obd_dev[i];
if ((strcmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME) == 0) ||
(strcmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME) == 0)) {
struct client_obd *cli = &obd->u.cli;
- if (strncmp(tgtuuid, cli->cl_target_uuid,
+ if (strncmp(tgtuuid, cli->cl_target_uuid,
sizeof(cli->cl_target_uuid)) == 0)
return obd;
}
imp->imp_connection = ptlrpc_uuid_to_connection(server_uuid);
if (!imp->imp_connection)
RETURN(-ENOENT);
-
+
INIT_LIST_HEAD(&imp->imp_replay_list);
INIT_LIST_HEAD(&imp->imp_sending_list);
INIT_LIST_HEAD(&imp->imp_delayed_list);
cli->cl_max_mds_easize = sizeof(struct lov_mds_md);
- MOD_INC_USE_COUNT;
RETURN(0);
}
ptlrpc_cleanup_client(&obd->cl_import);
ptlrpc_put_connection(obd->cl_import.imp_connection);
- MOD_DEC_USE_COUNT;
return 0;
}
ENTRY;
down(&cli->cl_sem);
- MOD_INC_USE_COUNT;
rc = class_connect(conn, obd, cluuid);
- if (rc) {
- MOD_DEC_USE_COUNT;
+ if (rc)
GOTO(out_sem, rc);
- }
+
cli->cl_conn_count++;
if (cli->cl_conn_count > 1)
GOTO(out_sem, rc);
out_disco:
cli->cl_conn_count--;
class_disconnect(conn);
- MOD_DEC_USE_COUNT;
}
}
out_sem:
cli->cl_conn_count--;
if (cli->cl_conn_count)
- GOTO(out_disco, rc = 0);
+ GOTO(out_no_disconnect, rc = 0);
ldlm_namespace_free(obd->obd_namespace);
obd->obd_namespace = NULL;
request = ptlrpc_prep_req(&cli->cl_import, rq_opc, 0, NULL,
NULL);
if (!request)
- GOTO(out_disco, rc = -ENOMEM);
-
+ GOTO(out_req, rc = -ENOMEM);
+
request->rq_replen = lustre_msg_size(0, NULL);
/* Process disconnects even if we're waiting for recovery. */
request->rq_level = LUSTRE_CONN_RECOVD;
-
+
rc = ptlrpc_queue_wait(request);
if (rc)
GOTO(out_req, rc);
out_req:
if (request)
ptlrpc_req_finished(request);
- out_disco:
+ list_del_init(&cli->cl_import.imp_chain);
+ out_no_disconnect:
err = class_disconnect(conn);
if (!rc && err)
rc = err;
- list_del_init(&cli->cl_import.imp_chain);
- MOD_DEC_USE_COUNT;
out_sem:
up(&cli->cl_sem);
RETURN(rc);
#include <linux/obd_ost.h>
#include <linux/lustre_net.h>
-void ost_pack_ioo(void **tmp, struct lov_stripe_md *lsm, int bufcnt)
+void ost_pack_ioo(struct obd_ioobj **tmp, struct lov_stripe_md *lsm,int bufcnt)
{
struct obd_ioobj *ioo = *tmp;
- char *c = *tmp;
+ void *p = *tmp;
ioo->ioo_id = HTON__u64(lsm->lsm_object_id);
ioo->ioo_gr = HTON__u64(0);
ioo->ioo_type = HTON__u32(S_IFREG);
ioo->ioo_bufcnt = HTON__u32(bufcnt);
- *tmp = c + sizeof(*ioo);
+ *tmp = p + sizeof(*ioo);
}
-void ost_unpack_ioo(void **tmp, struct obd_ioobj **ioop)
+void ost_unpack_ioo(struct obd_ioobj **tmp, struct obd_ioobj **ioop)
{
- char *c = *tmp;
+ void *p = *tmp;
struct obd_ioobj *ioo = *tmp;
*ioop = *tmp;
ioo->ioo_gr = NTOH__u64(ioo->ioo_gr);
ioo->ioo_type = NTOH__u32(ioo->ioo_type);
ioo->ioo_bufcnt = NTOH__u32(ioo->ioo_bufcnt);
- *tmp = c + sizeof(*ioo);
+ *tmp = p + sizeof(*ioo);
}
void ost_pack_niobuf(void **tmp, __u64 offset, __u32 len, __u32 flags,
#ifdef OBD_CTXT_DEBUG
/* Debugging check only needed during development */
-#define ASSERT_CTXT_MAGIC(magic) do { if ((magic) != OBD_RUN_CTXT_MAGIC) { \
- CERROR("bad ctxt magic\n"); LBUG(); } } while(0)
-#define ASSERT_NOT_KERNEL_CTXT(msg) do { if (segment_eq(get_fs(), get_ds())) { \
- CERROR(msg); LBUG(); } } while(0)
-#define ASSERT_KERNEL_CTXT(msg) do { if (!segment_eq(get_fs(), get_ds())) { \
- CERROR(msg); LBUG(); } } while(0)
+#define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
+#define ASSERT_NOT_KERNEL_CTXT(msg) LASSERT(!segment_eq(get_fs(), get_ds()))
+#define ASSERT_KERNEL_CTXT(msg) LASSERT(segment_eq(get_fs(), get_ds()))
#else
#define ASSERT_CTXT_MAGIC(magic) do {} while(0)
#define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0)
*/
save->fs = get_fs();
+ LASSERT(atomic_read(¤t->fs->pwd->d_count));
+ LASSERT(atomic_read(&new_ctx->pwd->d_count));
save->pwd = dget(current->fs->pwd);
save->pwdmnt = mntget(current->fs->pwdmnt);
*/
int lustre_fwrite(struct file *file, const char *str, int len, loff_t *off)
{
+ ENTRY;
ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n");
- if (!file || !file->f_op || !off)
+ if (!file)
+ RETURN(-ENOENT);
+ if (!file->f_op)
RETURN(-ENOSYS);
+ if (!off)
+ RETURN(-EINVAL);
if (!file->f_op->write)
RETURN(-EROFS);
- return file->f_op->write(file, str, len, off);
+ RETURN(file->f_op->write(file, str, len, off));
}
/*
*/
int lustre_fsync(struct file *file)
{
+ ENTRY;
ASSERT_KERNEL_CTXT("kernel doing sync outside kernel context\n");
if (!file || !file->f_op || !file->f_op->fsync)
RETURN(-ENOSYS);
- return file->f_op->fsync(file, file->f_dentry, 0);
+ RETURN(file->f_op->fsync(file, file->f_dentry, 0));
}
#include <linux/lustre_net.h>
#include <linux/lustre_dlm.h>
+int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
+ char *cluuid)
+{
+ if (exp->exp_connection) {
+ struct lustre_handle *hdl;
+ hdl = &exp->exp_ldlm_data.led_import.imp_handle;
+ /* Might be a re-connect after a partition. */
+ if (!memcmp(conn, hdl, sizeof *conn)) {
+ CERROR("%s reconnecting\n", cluuid);
+ conn->addr = (__u64) (unsigned long)exp;
+ conn->cookie = exp->exp_cookie;
+ RETURN(EALREADY);
+ } else {
+ CERROR("%s reconnecting from %s, "
+ "handle mismatch (ours "LPX64"/"LPX64", "
+ "theirs "LPX64"/"LPX64")\n", cluuid,
+ exp->exp_connection->c_remote_uuid, hdl->addr,
+ hdl->cookie, conn->addr, conn->cookie);
+ /* XXX disconnect them here? */
+ memset(conn, 0, sizeof *conn);
+ /* This is a little scary, but right now we build this
+ * file separately into each server module, so I won't
+ * go _immediately_ to hell.
+ */
+ RETURN(-EALREADY);
+ }
+ }
+
+ conn->addr = (__u64) (unsigned long)exp;
+ conn->cookie = exp->exp_cookie;
+ CDEBUG(D_INFO, "existing export for UUID '%s' at %p\n", cluuid, exp);
+ CDEBUG(D_IOCTL,"connect: addr %Lx cookie %Lx\n",
+ (long long)conn->addr, (long long)conn->cookie);
+ RETURN(0);
+}
+
int target_handle_connect(struct ptlrpc_request *req)
{
struct obd_device *target;
if (rc && rc != EALREADY)
GOTO(out, rc);
+ /* If all else goes well, this is our RPC return code. */
+ req->rq_status = rc;
+
rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg);
if (rc)
GOTO(out, rc);
dlmimp->imp_handle.addr = req->rq_reqmsg->addr;
dlmimp->imp_handle.cookie = req->rq_reqmsg->cookie;
dlmimp->imp_obd = /* LDLM! */ NULL;
+ dlmimp->imp_recover = NULL;
+ INIT_LIST_HEAD(&dlmimp->imp_replay_list);
+ INIT_LIST_HEAD(&dlmimp->imp_sending_list);
+ INIT_LIST_HEAD(&dlmimp->imp_delayed_list);
spin_lock_init(&dlmimp->imp_lock);
dlmimp->imp_level = LUSTRE_CONN_FULL;
out:
- req->rq_status = rc;
+ if (rc)
+ req->rq_status = rc;
RETURN(rc);
}
modulefs_DATA = llite.o
EXTRA_PROGRAMS = llite
-LINX= ll_pack.c
-
llite_SOURCES = dcache.c commit_callback.c super.c rw.c super25.c
-llite_SOURCES += file.c dir.c sysctl.c symlink.c $(LINX)
+llite_SOURCES += file.c dir.c sysctl.c symlink.c
llite_SOURCES += recover.c namei.c lproc_llite.c
-ll_pack.c:
- test -e ll_pack.c || ln -sf $(top_srcdir)/lib/ll_pack.c .
-
include $(top_srcdir)/Rules
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * The daemon that causes completed but not committed transactions
+ * The daemon that causes completed but not committed transactions
* on the MDS to be flushed periodically when they are committed.
- * A gratuitous getattr RPC is made to the MDS to discover the
- * last committed record.
+ * A gratuitous getattr RPC is made to the MDS to discover the
+ * last committed record.
*
* Lustre High Availability Daemon
*
static int ll_commitcbd_check_event(struct ll_sb_info *sbi)
{
- int rc = 0;
+ int rc = 0;
ENTRY;
- spin_lock(&sbi->ll_commitcbd_lock);
- if (sbi->ll_commitcbd_flags & LL_COMMITCBD_STOPPING) {
+ spin_lock(&sbi->ll_commitcbd_lock);
+ if (sbi->ll_commitcbd_flags & LL_COMMITCBD_STOPPING)
GOTO(out, rc = 1);
- }
+ EXIT;
out:
spin_unlock(&sbi->ll_commitcbd_lock);
- RETURN(rc);
+ return rc;
}
static int ll_commitcbd_main(void *arg)
{
struct ll_sb_info *sbi = (struct ll_sb_info *)arg;
-
+ unsigned long flags;
ENTRY;
lock_kernel();
daemonize();
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- spin_lock_irq(¤t->sigmask_lock);
+ spin_lock_irqsave(¤t->sigmask_lock, flags);
sigfillset(¤t->blocked);
our_recalc_sigpending(current);
- spin_unlock_irq(¤t->sigmask_lock);
+ spin_unlock_irqrestore(¤t->sigmask_lock, flags);
#else
sigfillset(¤t->blocked);
our_recalc_sigpending(current);
/* And now, loop forever on requests */
while (1) {
- wait_event(sbi->ll_commitcbd_waitq,
+ wait_event(sbi->ll_commitcbd_waitq,
ll_commitcbd_check_event(sbi));
spin_lock(&sbi->ll_commitcbd_lock);
if (sbi->ll_commitcbd_flags & LL_COMMITCBD_STOPPING) {
spin_unlock(&sbi->ll_commitcbd_lock);
- CERROR("lustre_commitd quitting\n");
+ CERROR("lustre_commitd quitting\n");
EXIT;
break;
}
schedule_timeout(sbi->ll_commitcbd_timeout);
- CERROR("commit callback daemon woken up - FIXME\n");
+ CERROR("commit callback daemon woken up - FIXME\n");
spin_unlock(&sbi->ll_commitcbd_lock);
}
CERROR("cannot start thread\n");
RETURN(rc);
}
- wait_event(sbi->ll_commitcbd_ctl_waitq,
+ wait_event(sbi->ll_commitcbd_ctl_waitq,
sbi->ll_commitcbd_flags & LL_COMMITCBD_RUNNING);
RETURN(0);
}
unlock_page(page);
RETURN(rc);
}
- ldlm_lock_dump_handle(&lockh);
+ ldlm_lock_dump_handle(D_OTHER, &lockh);
if (PageUptodate(page)) {
CERROR("Explain this please?\n");
return 0;
}
+static int ll_dir_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct obd_ioctl_data *data;
+ ENTRY;
+
+ switch(cmd) {
+ case IOC_MDC_LOOKUP: {
+ struct ptlrpc_request *request = NULL;
+ char *buf = NULL;
+ char *filename;
+ int namelen, rc, err, len = 0;
+ int ea_size = 0; // obd_size_wiremd(&sbi->ll_osc_conn, NULL);
+ unsigned long valid;
+
+ rc = obd_ioctl_getdata(&buf, &len, (void *)arg);
+ if (rc)
+ RETURN(rc);
+ data = (void *)buf;
+
+ filename = data->ioc_inlbuf1;
+ namelen = data->ioc_inllen1;
+
+ if (namelen < 1) {
+ CERROR("IOC_MDC_LOOKUP missing filename\n");
+ GOTO(out, rc = -EINVAL);
+ }
+
+ valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE;
+ rc = mdc_getattr_name(&sbi->ll_mdc_conn, inode, filename,
+ namelen, valid, ea_size, &request);
+ if (rc < 0) {
+ CERROR("mdc_getattr_name: %d\n", rc);
+ GOTO(out, rc);
+ } else {
+ struct mds_body *body;
+ body = lustre_msg_buf(request->rq_repmsg, 0);
+ /* surely there's a better way -phik */
+ data->ioc_obdo1.o_mode = body->mode;
+ data->ioc_obdo1.o_uid = body->uid;
+ data->ioc_obdo1.o_gid = body->gid;
+ }
+
+ err = copy_to_user((void *)arg, buf, len);
+ if (err)
+ GOTO(out_req, rc = -EFAULT);
+
+ EXIT;
+ out_req:
+ ptlrpc_req_finished(request);
+ out:
+ OBD_FREE(buf, len);
+ return rc;
+ }
+ default:
+ CERROR("unrecognized ioctl %#x\n", cmd);
+ RETURN(-ENOTTY);
+ }
+}
+
struct file_operations ll_dir_operations = {
read: generic_read_dir,
- readdir: ll_readdir
+ readdir: ll_readdir,
+ ioctl: ll_dir_ioctl
};
int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc);
extern int ll_setattr(struct dentry *de, struct iattr *attr);
-int ll_create_objects(struct super_block *sb, obd_id id, uid_t uid, gid_t gid,
- struct lov_stripe_md **lsmp)
+static int ll_mdc_open(struct lustre_handle *mdc_conn, struct inode *inode,
+ struct file *file, struct lov_mds_md *lmm, int lmm_size)
{
+ struct ptlrpc_request *req = NULL;
+ struct ll_file_data *fd;
+ int rc;
+ ENTRY;
+
+ LASSERT(!file->private_data);
+
+ fd = kmem_cache_alloc(ll_file_data_slab, SLAB_KERNEL);
+ if (!fd)
+ RETURN(-ENOMEM);
+
+ memset(fd, 0, sizeof(*fd));
+ fd->fd_mdshandle.addr = (__u64)(unsigned long)file;
+ get_random_bytes(&fd->fd_mdshandle.cookie,
+ sizeof(fd->fd_mdshandle.cookie));
+
+ rc = mdc_open(mdc_conn, inode->i_ino, S_IFREG | inode->i_mode,
+ file->f_flags, lmm, lmm_size, &fd->fd_mdshandle, &req);
+
+ /* This is the "reply" refcount. */
+ ptlrpc_req_finished(req);
+
+ if (rc)
+ GOTO(out_fd, rc);
+
+ fd->fd_req = req;
+ file->private_data = fd;
+
+ if (!fd->fd_mdshandle.addr ||
+ fd->fd_mdshandle.addr == (__u64)(unsigned long)file) {
+ CERROR("hmm, mdc_open didn't assign fd_mdshandle?\n");
+ /* XXX handle this how, abort or is it non-fatal? */
+ }
+
+ file->f_flags &= ~O_LOV_DELAY_CREATE;
+ RETURN(0);
+
+out_fd:
+ fd->fd_mdshandle.cookie = DEAD_HANDLE_MAGIC;
+ kmem_cache_free(ll_file_data_slab, fd);
+
+ return -abs(rc);
+}
+
+static int ll_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode,
+ struct file *file)
+{
+ struct ll_file_data *fd = file->private_data;
+ struct ptlrpc_request *req = NULL;
+ unsigned long flags;
+ struct obd_import *imp = fd->fd_req->rq_import;
+ int rc;
+
+ /* Complete the open request and remove it from replay list */
+ DEBUG_REQ(D_HA, fd->fd_req, "matched open req %p", fd->fd_req);
+ rc = mdc_close(&ll_i2sbi(inode)->ll_mdc_conn, inode->i_ino,
+ inode->i_mode, &fd->fd_mdshandle, &req);
+
+ if (rc)
+ CERROR("inode %lu close failed: rc = %d\n", inode->i_ino, rc);
+ ptlrpc_req_finished(req);
+
+ spin_lock_irqsave(&imp->imp_lock, flags);
+ if (fd->fd_req->rq_transno) {
+ /* This caused an EA to be written, need to replay as a normal
+ * transaction now. Our reference is now effectively owned
+ * by the imp_replay_list, and we'll be committed just like
+ * other transno-having requests now.
+ */
+ fd->fd_req->rq_flags &= ~PTL_RPC_FL_REPLAY;
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
+ } else {
+ /* No transno means that we can just drop our ref. */
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
+ ptlrpc_req_finished(fd->fd_req);
+ }
+ fd->fd_mdshandle.cookie = DEAD_HANDLE_MAGIC;
+ file->private_data = NULL;
+ kmem_cache_free(ll_file_data_slab, fd);
+
+ return -abs(rc);
+}
+
+static int ll_osc_open(struct lustre_handle *conn, struct inode *inode,
+ struct file *file, struct lov_stripe_md *lsm)
+{
+ struct ll_file_data *fd;
struct obdo *oa;
int rc;
ENTRY;
oa = obdo_alloc();
if (!oa)
RETURN(-ENOMEM);
+ oa->o_id = lsm->lsm_object_id;
+ oa->o_mode = S_IFREG;
+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
+ OBD_MD_FLBLOCKS;
+ rc = obd_open(conn, oa, lsm);
+ if (rc)
+ GOTO(out, rc);
- oa->o_mode = S_IFREG | 0600;
- oa->o_id = id;
- oa->o_uid = uid;
- oa->o_gid = gid;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
- OBD_MD_FLUID | OBD_MD_FLGID;
- rc = obd_create(ll_s2obdconn(sb), oa, lsmp);
- obdo_free(oa);
+ obdo_to_inode(inode, oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
- if (!rc)
- LASSERT(*lsmp && (*lsmp)->lsm_object_id);
+ fd = file->private_data;
+ obd_oa2handle(&fd->fd_osthandle, oa);
+
+ atomic_inc(&ll_i2info(inode)->lli_open_count);
+out:
+ obdo_free(oa);
RETURN(rc);
}
-static int ll_file_open(struct inode *inode, struct file *file)
+/* Caller must hold lli_open_sem to protect lli->lli_smd from changing and
+ * duplicate objects from being created. We only install lsm to lli_smd if
+ * the mdc open was successful (hence stored stripe MD on MDS), otherwise
+ * other nodes could try to create different objects for the same file.
+ */
+static int ll_create_open_obj(struct lustre_handle *conn, struct inode *inode,
+ struct file *file, struct lov_stripe_md *lsm)
{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ll_inode_info *lli = ll_i2info(inode);
- struct lustre_handle *conn = ll_i2obdconn(inode);
- struct ptlrpc_request *req = NULL;
- struct ll_file_data *fd;
- struct obdo *oa;
- struct lov_stripe_md *lsm;
struct lov_mds_md *lmm = NULL;
int lmm_size = 0;
- int rc = 0;
+ struct obdo *oa;
+ int rc, err;
ENTRY;
- LASSERT(!file->private_data);
-
- lsm = lli->lli_smd;
+ oa = obdo_alloc();
+ if (!oa)
+ RETURN(-ENOMEM);
- /* delayed create of object (intent created inode) */
- /* XXX object needs to be cleaned up if mdc_open fails */
- /* XXX error handling appropriate here? */
- if (lsm == NULL) {
- if (file->f_flags & O_LOV_DELAY_CREATE) {
- CDEBUG(D_INODE, "delaying object creation\n");
- RETURN(0);
- }
- down(&lli->lli_open_sem);
- /* Check to see if we lost the race */
- if (!lli->lli_smd)
- rc = ll_create_objects(inode->i_sb, inode->i_ino, 0, 0,
- &lli->lli_smd);
- up(&lli->lli_open_sem);
- if (rc)
- RETURN(rc);
+ oa->o_mode = S_IFREG | 0600;
+ oa->o_id = inode->i_ino;
+ /* Keep these 0 for now, because chown/chgrp does not change the
+ * ownership on the OST, and we don't want to allow BA OST NFS
+ * users to access these objects by mistake.
+ */
+ oa->o_uid = 0;
+ oa->o_gid = 0;
+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
+ OBD_MD_FLUID | OBD_MD_FLGID;
- lsm = lli->lli_smd;
+ rc = obd_create(conn, oa, &lsm);
+ if (rc) {
+ CERROR("error creating objects for inode %lu: rc = %d\n",
+ inode->i_ino, rc);
+ GOTO(out_oa, rc);
}
- /* XXX We should only send this to MDS if we just created these
- * objects, except we also need to handle the user-stripe case.
- */
- rc = obd_packmd(conn, &lmm, lli->lli_smd);
+ LASSERT(lsm && lsm->lsm_object_id);
+ rc = obd_packmd(conn, &lmm, lsm);
if (rc < 0)
- GOTO(out, rc);
+ GOTO(out_destroy, rc);
lmm_size = rc;
- fd = kmem_cache_alloc(ll_file_data_slab, SLAB_KERNEL);
- if (!fd) {
- if (lmm)
- obd_free_wiremd(conn, &lmm);
- GOTO(out, rc = -ENOMEM);
- }
- memset(fd, 0, sizeof(*fd));
+ rc = ll_mdc_open(&ll_i2sbi(inode)->ll_mdc_conn,inode,file,lmm,lmm_size);
- fd->fd_mdshandle.addr = (__u64)(unsigned long)file;
- get_random_bytes(&fd->fd_mdshandle.cookie,
- sizeof(fd->fd_mdshandle.cookie));
- rc = mdc_open(&sbi->ll_mdc_conn, inode->i_ino, S_IFREG | inode->i_mode,
- file->f_flags, lmm, lmm_size, &fd->fd_mdshandle, &req);
- if (lmm)
- obd_free_wiremd(conn, &lmm);
- fd->fd_req = req;
+ obd_free_wiremd(conn, &lmm);
- /* This is the "reply" refcount. */
- ptlrpc_req_finished(req);
- if (rc)
- GOTO(out_req, -abs(rc));
- if (!fd->fd_mdshandle.addr ||
- fd->fd_mdshandle.addr == (__u64)(unsigned long)file) {
- CERROR("hmm, mdc_open didn't assign fd_mdshandle?\n");
- /* XXX handle this how, abort or is it non-fatal? */
+ /* If we couldn't complete mdc_open() and store the stripe MD on the
+ * MDS, we need to destroy the objects now or they will be leaked.
+ */
+ if (rc) {
+ CERROR("error MDS opening %lu with delayed create: rc %d\n",
+ inode->i_ino, rc);
+ GOTO(out_destroy, rc);
}
+ lli->lli_smd = lsm;
- oa = obdo_alloc();
- if (!oa)
- GOTO(out_mdc, rc = -EINVAL);
+ EXIT;
+out_oa:
+ obdo_free(oa);
+ return rc;
+out_destroy:
+ obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
oa->o_id = lsm->lsm_object_id;
- oa->o_mode = S_IFREG;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
- OBD_MD_FLBLOCKS;
- rc = obd_open(ll_i2obdconn(inode), oa, lsm);
- obdo_to_inode(inode, oa, oa->o_valid & (OBD_MD_FLSIZE|OBD_MD_FLBLOCKS));
+ oa->o_valid |= OBD_MD_FLID;
+ err = obd_destroy(conn, oa, lsm);
+ obd_free_memmd(conn, &lsm);
+ if (err)
+ CERROR("error uncreating inode %lu objects: rc %d\n",
+ inode->i_ino, err);
+ goto out_oa;
+}
- obd_oa2handle(&fd->fd_osthandle, oa);
- obdo_free(oa);
+/* Open a file, and (for the very first open) create objects on the OSTs at
+ * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object
+ * creation or open until ll_lov_setstripe() ioctl is called. We grab
+ * lli_open_sem to ensure no other process will create objects, send the
+ * stripe MD to the MDS, or try to destroy the objects if that fails.
+ *
+ * If we already have the stripe MD locally, we don't request it in
+ * mdc_open() by passing a lmm_size = 0.
+ *
+ * It is up to the application to ensure no other processes open this file
+ * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be
+ * used. We might be able to avoid races of that sort by getting lli_open_sem
+ * before returning in the O_LOV_DELAY_CREATE case and dropping it here
+ * or in ll_file_release(), but I'm not sure that is desirable/necessary.
+ */
+static int ll_file_open(struct inode *inode, struct file *file)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct lustre_handle *conn = ll_i2obdconn(inode);
+ struct lov_stripe_md *lsm;
+ int rc = 0;
+ ENTRY;
- if (rc)
- GOTO(out_mdc, rc = -abs(rc));
+ lsm = lli->lli_smd;
+ if (lsm == NULL) {
+ if (file->f_flags & O_LOV_DELAY_CREATE) {
+ CDEBUG(D_INODE, "delaying object creation\n");
+ RETURN(0);
+ }
- atomic_inc(&lli->lli_open_count);
+ down(&lli->lli_open_sem);
+ if (!lli->lli_smd) {
+ rc = ll_create_open_obj(conn, inode, file, NULL);
+ up(&lli->lli_open_sem);
+ } else {
+ CERROR("stripe already set on ino %lu\n", inode->i_ino);
+ up(&lli->lli_open_sem);
+ rc = ll_mdc_open(&sbi->ll_mdc_conn, inode, file,NULL,0);
+ }
+ lsm = lli->lli_smd;
+ } else
+ rc = ll_mdc_open(&sbi->ll_mdc_conn, inode, file, NULL, 0);
- file->private_data = fd;
+ if (rc)
+ RETURN(rc);
+ rc = ll_osc_open(conn, inode, file, lsm);
+ if (rc)
+ GOTO(out_close, rc);
RETURN(0);
-out_mdc:
- mdc_close(&sbi->ll_mdc_conn, inode->i_ino,
- S_IFREG, &fd->fd_mdshandle, &req);
-out_req:
- ptlrpc_req_finished(req); /* once for an early "commit" */
-//out_fd:
- fd->fd_mdshandle.cookie = DEAD_HANDLE_MAGIC;
- kmem_cache_free(ll_file_data_slab, fd);
-out:
+out_close:
+ ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
return rc;
}
int ll_size_lock(struct inode *inode, struct lov_stripe_md *lsm, obd_off start,
- int mode, struct lustre_handle **lockhs_p)
+ int mode, struct lustre_handle *lockh)
{
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct ldlm_extent extent;
- struct lustre_handle *lockhs = NULL;
- int rc, flags = 0, stripe_count;
+ int rc, flags = 0;
ENTRY;
- if (sbi->ll_flags & LL_SBI_NOLCK) {
- *lockhs_p = NULL;
+ /* XXX phil: can we do this? won't it screw the file size up? */
+ if (sbi->ll_flags & LL_SBI_NOLCK)
RETURN(0);
- }
-
- stripe_count = lsm->lsm_stripe_count;
- if (!stripe_count)
- stripe_count = 1;
-
- OBD_ALLOC(lockhs, stripe_count * sizeof(*lockhs));
- if (lockhs == NULL)
- RETURN(-ENOMEM);
extent.start = start;
extent.end = OBD_OBJECT_EOF;
rc = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, &extent,
sizeof(extent), mode, &flags, ll_lock_callback,
- inode, sizeof(*inode), lockhs);
- if (rc != ELDLM_OK) {
- CERROR("lock enqueue: %d\n", rc);
- OBD_FREE(lockhs, stripe_count * sizeof(*lockhs));
- } else
- *lockhs_p = lockhs;
+ inode, sizeof(*inode), lockh);
RETURN(rc);
}
int ll_size_unlock(struct inode *inode, struct lov_stripe_md *lsm, int mode,
- struct lustre_handle *lockhs)
+ struct lustre_handle *lockh)
{
struct ll_sb_info *sbi = ll_i2sbi(inode);
- int rc, stripe_count;
+ int rc;
ENTRY;
+ /* XXX phil: can we do this? won't it screw the file size up? */
if (sbi->ll_flags & LL_SBI_NOLCK)
RETURN(0);
- if (lockhs == NULL) {
- LBUG();
- RETURN(-EINVAL);
- }
-
- rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockhs);
+ rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh);
if (rc != ELDLM_OK) {
CERROR("lock cancel: %d\n", rc);
LBUG();
}
- stripe_count = lsm->lsm_stripe_count;
- if (!stripe_count)
- stripe_count = 1;
-
- OBD_FREE(lockhs, stripe_count * sizeof(*lockhs));
RETURN(rc);
}
int ll_file_size(struct inode *inode, struct lov_stripe_md *lsm)
{
struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct lustre_handle *lockhs;
+ //struct lustre_handle lockh = { 0, 0 };
struct obdo oa;
- int err, rc;
+ //int err;
+ int rc;
ENTRY;
LASSERT(lsm);
LASSERT(sbi);
- rc = ll_size_lock(inode, lsm, 0, LCK_PR, &lockhs);
+ /* XXX do not yet need size lock - OST size always correct (sync write)
+ rc = ll_size_lock(inode, lsm, 0, LCK_PR, &lockh);
if (rc != ELDLM_OK) {
CERROR("lock enqueue: %d\n", rc);
RETURN(rc);
}
+ */
memset(&oa, 0, sizeof oa);
oa.o_id = lsm->lsm_object_id;
oa.o_mode = S_IFREG;
oa.o_valid = OBD_MD_FLID|OBD_MD_FLTYPE|OBD_MD_FLSIZE|OBD_MD_FLBLOCKS;
rc = obd_getattr(&sbi->ll_osc_conn, &oa, lsm);
- if (!rc)
- obdo_to_inode(inode, &oa,
- oa.o_valid & ~(OBD_MD_FLTYPE | OBD_MD_FLMODE));
-
- err = ll_size_unlock(inode, lsm, LCK_PR, lockhs);
+ if (!rc) {
+ obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS);
+ CDEBUG(D_INODE, LPX64" size %Lu/%Lu\n",
+ lsm->lsm_object_id, inode->i_size, inode->i_size);
+ }
+ /* XXX do not need size lock, because OST size always correct (sync write)
+ err = ll_size_unlock(inode, lsm, LCK_PR, &lockh);
if (err != ELDLM_OK) {
CERROR("lock cancel: %d\n", err);
- LBUG();
+ if (!rc)
+ rc = err;
}
+ */
RETURN(rc);
}
+/* While this returns an error code, fput() the caller does not, so we need
+ * to make every effort to clean up all of our state here. Also, applications
+ * rarely check close errors and even if an error is returned they will not
+ * re-try the close call.
+ */
static int ll_file_release(struct inode *inode, struct file *file)
{
- struct ptlrpc_request *req = NULL;
struct ll_file_data *fd;
struct obdo oa;
struct ll_sb_info *sbi = ll_i2sbi(inode);
ENTRY;
fd = (struct ll_file_data *)file->private_data;
- if (!fd) {
- LASSERT(file->f_flags & O_LOV_DELAY_CREATE);
- GOTO(out, rc = 0);
- }
+ if (!fd) /* no process opened the file after an mcreate */
+ RETURN(rc = 0);
memset(&oa, 0, sizeof(oa));
oa.o_id = lsm->lsm_object_id;
oa.o_mode = S_IFREG;
oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
obd_handle2oa(&oa, &fd->fd_osthandle);
- rc = obd_close(ll_i2obdconn(inode), &oa, lsm);
+ rc = obd_close(&sbi->ll_osc_conn, &oa, lsm);
if (rc)
- GOTO(out_mdc, rc = -abs(rc));
-
-#if 0
-#error "This should only be done on the node that already has the EOF lock"
-#error "and only in the case where the file size actually changed. For now"
-#error "we don't care about the size on the MDS, since we never use it (the"
-#error "OST always has the authoritative size and we don't even use the MDS."
- /* If this fails and we goto out_fd, the file size on the MDS is out of
- * date. Is that a big deal? */
- if (file->f_mode & FMODE_WRITE) {
- struct lustre_handle *lockhs;
-
- rc = ll_size_lock(inode, lsm, 0, LCK_PR, &lockhs);
- if (rc)
- GOTO(out_mdc, -abs(rc));
-
- oa.o_id = lsm->lsm_object_id;
- oa.o_mode = S_IFREG;
- oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
- OBD_MD_FLBLOCKS;
- rc = obd_getattr(&sbi->ll_osc_conn, &oa, lsm);
- if (!rc) {
- struct iattr attr;
- attr.ia_valid = (ATTR_MTIME | ATTR_CTIME | ATTR_ATIME |
- ATTR_SIZE);
- attr.ia_mtime = inode->i_mtime;
- attr.ia_ctime = inode->i_ctime;
- attr.ia_atime = inode->i_atime;
- attr.ia_size = oa.o_size;
-
- inode->i_blocks = oa.o_blocks;
-
- /* XXX: this introduces a small race that we should
- * evaluate */
- rc = ll_inode_setattr(inode, &attr, 0);
- }
- rc2 = ll_size_unlock(inode, lli->lli_smd, LCK_PR, lockhs);
- if (rc2) {
- CERROR("lock cancel: %d\n", rc);
- LBUG();
- if (!rc)
- rc = rc2;
- }
- }
-#endif
+ CERROR("inode %lu object close failed: rc = %d\n",
+ inode->i_ino, rc);
-out_mdc:
- rc2 = mdc_close(&sbi->ll_mdc_conn, inode->i_ino,
- S_IFREG, &fd->fd_mdshandle, &req);
- ptlrpc_req_finished(req);
- if (rc2) {
- if (!rc)
- rc = -abs(rc2);
- GOTO(out_fd, rc);
- }
- DEBUG_REQ(D_HA, fd->fd_req, "matched open for this close: ");
- ptlrpc_req_finished(fd->fd_req);
+ rc2 = ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
+ if (rc2 && !rc)
+ rc = rc2;
if (atomic_dec_and_test(&lli->lli_open_count)) {
CDEBUG(D_INFO, "last close, cancelling unused locks\n");
- rc = obd_cancel_unused(ll_i2obdconn(inode), lsm, 0);
- if (rc)
+ rc2 = obd_cancel_unused(&sbi->ll_osc_conn, lsm, 0);
+ if (rc2 && !rc) {
+ rc = rc2;
CERROR("obd_cancel_unused: %d\n", rc);
- } else {
+ }
+ } else
CDEBUG(D_INFO, "not last close, not cancelling unused locks\n");
- }
-
- EXIT;
-out_fd:
- fd->fd_mdshandle.cookie = DEAD_HANDLE_MAGIC;
- file->private_data = NULL;
- kmem_cache_free(ll_file_data_slab, fd);
-out:
- return rc;
+ RETURN(rc);
}
static inline void ll_remove_suid(struct inode *inode)
void *data, __u32 data_len, int flag)
{
struct inode *inode = data;
- struct lustre_handle lockh;
+ struct lustre_handle lockh = { 0, 0 };
int rc;
ENTRY;
struct ll_file_data *fd = (struct ll_file_data *)filp->private_data;
struct inode *inode = filp->f_dentry->d_inode;
struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct lustre_handle *lockhs = NULL;
+ struct lustre_handle lockh = { 0, 0 };
struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
int flags = 0;
ldlm_error_t err;
* call us */
retval = ll_file_size(inode, lsm);
if (retval < 0) {
- CERROR("ll_file_size: %d\n", retval);
+ CERROR("ll_file_size: "LPSZ"\n", retval);
RETURN(retval);
}
if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) &&
!(sbi->ll_flags & LL_SBI_NOLCK)) {
struct ldlm_extent extent;
- OBD_ALLOC(lockhs, lsm->lsm_stripe_count * sizeof(*lockhs));
- if (!lockhs)
- RETURN(-ENOMEM);
-
extent.start = *ppos;
extent.end = *ppos + count;
CDEBUG(D_INFO, "Locking inode %lu, start "LPU64" end "LPU64"\n",
err = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT,
&extent, sizeof(extent), LCK_PR, &flags,
ll_lock_callback, inode, sizeof(*inode),
- lockhs);
+ &lockh);
if (err != ELDLM_OK) {
- OBD_FREE(lockhs, lsm->lsm_stripe_count*sizeof(*lockhs));
CERROR("lock enqueue: err: %d\n", err);
RETURN(err);
}
}
- CDEBUG(D_INFO, "Reading inode %lu, %d bytes, offset %Ld\n",
+ CDEBUG(D_INFO, "Reading inode %lu, "LPSZ" bytes, offset %Ld\n",
inode->i_ino, count, *ppos);
retval = generic_file_read(filp, buf, count, ppos);
if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) &&
!(sbi->ll_flags & LL_SBI_NOLCK)) {
- err = obd_cancel(&sbi->ll_osc_conn, lsm, LCK_PR, lockhs);
+ err = obd_cancel(&sbi->ll_osc_conn, lsm, LCK_PR, &lockh);
if (err != ELDLM_OK) {
CERROR("lock cancel: err: %d\n", err);
retval = err;
}
}
- if (lockhs)
- OBD_FREE(lockhs, lsm->lsm_stripe_count * sizeof(*lockhs));
RETURN(retval);
}
struct ll_file_data *fd = (struct ll_file_data *)file->private_data;
struct inode *inode = file->f_dentry->d_inode;
struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct lustre_handle *lockhs = NULL, *eof_lockhs = NULL;
+ struct lustre_handle lockh = { 0, 0 }, eof_lockh = { 0, 0 };
struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
int flags = 0;
ldlm_error_t err;
if (!oa)
RETURN(-ENOMEM);
- err = ll_size_lock(inode, lsm, 0, LCK_PW, &eof_lockhs);
+ err = ll_size_lock(inode, lsm, 0, LCK_PW, &eof_lockh);
if (err) {
obdo_free(oa);
RETURN(err);
if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) &&
!(sbi->ll_flags & LL_SBI_NOLCK)) {
struct ldlm_extent extent;
- OBD_ALLOC(lockhs, lsm->lsm_stripe_count * sizeof(*lockhs));
- if (!lockhs)
- GOTO(out_eof, retval = -ENOMEM);
extent.start = *ppos;
extent.end = *ppos + count;
CDEBUG(D_INFO, "Locking inode %lu, start "LPU64" end "LPU64"\n",
err = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT,
&extent, sizeof(extent), LCK_PW, &flags,
ll_lock_callback, inode, sizeof(*inode),
- lockhs);
+ &lockh);
if (err != ELDLM_OK) {
CERROR("lock enqueue: err: %d\n", err);
- GOTO(out_free, retval = err);
+ GOTO(out_eof, retval = err);
}
}
- CDEBUG(D_INFO, "Writing inode %lu, %ld bytes, offset "LPD64"\n",
- inode->i_ino, (long)count, *ppos);
+ CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n",
+ inode->i_ino, count, *ppos);
retval = generic_file_write(file, buf, count, ppos);
if (!(fd->fd_flags & LL_FILE_IGNORE_LOCK) ||
sbi->ll_flags & LL_SBI_NOLCK) {
- err = obd_cancel(&sbi->ll_osc_conn, lsm, LCK_PW, lockhs);
+ err = obd_cancel(&sbi->ll_osc_conn, lsm, LCK_PW, &lockh);
if (err != ELDLM_OK) {
CERROR("lock cancel: err: %d\n", err);
- GOTO(out_free, retval = err);
+ GOTO(out_eof, retval = err);
}
}
EXIT;
- out_free:
- if (lockhs)
- OBD_FREE(lockhs, lsm->lsm_stripe_count * sizeof(*lockhs));
-
out_eof:
if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) {
- err = ll_size_unlock(inode, lsm, LCK_PW, eof_lockhs);
+ err = ll_size_unlock(inode, lsm, LCK_PW, &eof_lockh);
if (err && !retval)
retval = err;
}
return retval;
}
-/* Retrieve object striping information.
- *
- * @arg is a pointer to a user struct with one or more of the fields set to
- * indicate the application preference: lmm_stripe_count, lmm_stripe_size,
- * lmm_stripe_offset, and lmm_stripe_pattern. lmm_magic must be LOV_MAGIC.
- */
static int ll_lov_setstripe(struct inode *inode, struct file *file,
unsigned long arg)
{
struct ll_inode_info *lli = ll_i2info(inode);
- struct lov_mds_md *lmm = NULL, *lmmu = (void *)arg;
- struct lustre_handle *conn = ll_i2obdconn(inode);
+ struct lustre_handle *conn;
+ struct lov_stripe_md *lsm;
int rc;
+ ENTRY;
- rc = obd_alloc_wiremd(conn, &lmm);
- if (rc < 0)
- RETURN(rc);
-
- rc = copy_from_user(lmm, lmmu, sizeof(*lmm));
- if (rc)
- GOTO(out_free, rc = -EFAULT);
+ down(&lli->lli_open_sem);
+ lsm = lli->lli_smd;
+ if (lsm) {
+ up(&lli->lli_open_sem);
+ CERROR("stripe already set for ino %lu\n", inode->i_ino);
+ /* If we haven't already done the open, do so now */
+ if (file->f_flags & O_LOV_DELAY_CREATE) {
+ int rc2 = ll_file_open(inode, file);
+ if (rc2)
+ RETURN(rc2);
+ }
- if (lmm->lmm_magic != LOV_MAGIC) {
- CERROR("bad LOV magic %X\n", lmm->lmm_magic);
- GOTO(out_free, rc = -EINVAL);
+ RETURN(-EALREADY);
}
- down(&lli->lli_open_sem);
- if (lli->lli_smd) {
- CERROR("striping data already set for %lu\n", inode->i_ino);
- GOTO(out_lov_up, rc = -EPERM);
- }
- rc = obd_unpackmd(conn, &lli->lli_smd, lmm);
- if (rc < 0) {
- CERROR("error setting LOV striping on %lu: rc = %d\n",
- inode->i_ino, rc);
- GOTO(out_lov_up, rc);
- }
+ conn = ll_i2obdconn(inode);
+
+ rc = obd_iocontrol(LL_IOC_LOV_SETSTRIPE, conn, 0, &lsm, (void *)arg);
+ if (!rc)
+ rc = ll_create_open_obj(conn, inode, file, lsm);
+ up(&lli->lli_open_sem);
- rc = ll_create_objects(inode->i_sb, inode->i_ino, 0, 0, &lli->lli_smd);
if (rc) {
- obd_free_memmd(conn, &lli->lli_smd);
- } else {
- file->f_flags &= ~O_LOV_DELAY_CREATE;
- rc = ll_file_open(inode, file);
+ obd_free_memmd(conn, &lsm);
+ RETURN(rc);
}
-out_lov_up:
- up(&lli->lli_open_sem);
-out_free:
- obd_free_wiremd(conn, &lmm);
- return rc;
+ rc = ll_osc_open(conn, inode, file, lli->lli_smd);
+ RETURN(rc);
}
-/* Retrieve object striping information.
- *
- * @arg is a pointer to a user struct with lmm_ost_count indicating
- * the maximum number of OST indices which will fit in the user buffer.
- * lmm_magic must be LOV_MAGIC.
- */
static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
{
- struct lov_mds_md lmm, *lmmu = (void *)arg, *lmmk = NULL;
struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
struct lustre_handle *conn = ll_i2obdconn(inode);
- int ost_count, rc, lmm_size;
if (!lsm)
RETURN(-ENODATA);
- rc = copy_from_user(&lmm, lmmu, sizeof(lmm));
- if (rc)
- RETURN(-EFAULT);
-
- if (lmm.lmm_magic != LOV_MAGIC)
- RETURN(-EINVAL);
-
- if (lsm->lsm_stripe_count == 0)
- ost_count = 1;
- else {
- struct obd_device *obd = class_conn2obd(conn);
- struct lov_obd *lov = &obd->u.lov;
- ost_count = lov->desc.ld_tgt_count;
- }
-
- /* XXX we _could_ check if indices > user lmm_ost_count are zero */
- if (lmm.lmm_ost_count < ost_count)
- RETURN(-EOVERFLOW);
-
- rc = obd_packmd(conn, &lmmk, lsm);
- if (rc < 0)
- RETURN(rc);
-
- lmm_size = rc;
-
- /* LOV STACKING layering violation to make LOV/OSC return same data */
- if (lsm->lsm_stripe_count == 0) {
- struct lov_object_id *loi;
-
- loi = (void *)lmmu + offsetof(typeof(*lmmu), lmm_objects);
- rc = copy_to_user(loi, &lsm->lsm_object_id, sizeof(*loi));
- if (rc) {
- lmm_size = 0;
- rc = -EFAULT;
- } else {
- lmmk->lmm_magic = LOV_MAGIC;
- lmmk->lmm_ost_count = lmmk->lmm_stripe_count = 1;
- }
- }
-
- if (lmm_size && copy_to_user(lmmu, lmmk, lmm_size))
- rc = -EFAULT;
-
- obd_free_wiremd(conn, &lmmk);
-
- RETURN(rc);
+ return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, conn, 0, lsm, (void *)arg);
}
int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
rc = mdc_getattr(&sbi->ll_mdc_conn, inode->i_ino,
inode->i_mode, valid, datalen, &req);
if (rc) {
- CERROR("failure %d inode "LPX64"\n", rc, inode->i_ino);
+ CERROR("failure %d inode %lu\n", rc, inode->i_ino);
ptlrpc_req_finished(req);
RETURN(-abs(rc));
}
GOTO(out, flag = LL_LOOKUP_POSITIVE);
}
- /* Do a getattr now that we have the lock */
+ /* Do a getattr now that we have the lock, and fetch the
+ * up-to-date stripe MD at the same time.
+ */
valid = OBD_MD_FLNOTOBD;
if (it->it_op == IT_READLINK) {
datalen = mds_body->size;
}
}
- EXIT;
out:
if (intent_finish != NULL) {
rc = intent_finish(flag, request, de, it, offset, ino);
{
struct dentry *save = dentry;
int rc;
+ ENTRY;
rc = ll_intent_lock(parent, &dentry, it, lookup2_finish);
if (rc < 0) {
CERROR("ll_intent_lock: %d\n", rc);
- return ERR_PTR(rc);
+ RETURN(ERR_PTR(rc));
}
if (dentry == save)
- return NULL;
+ RETURN(NULL);
else
- return dentry;
+ RETURN(dentry);
}
static struct inode *ll_create_node(struct inode *dir, const char *name,
list_entry(tmp, struct obd_import, imp_chain);
if (phase == PTLRPC_RECOVD_PHASE_PREPARE) {
- spin_lock(&imp->imp_lock);
+ unsigned long flags;
+ spin_lock_irqsave(&imp->imp_lock, flags);
imp->imp_level = LUSTRE_CONN_RECOVD;
- spin_unlock(&imp->imp_lock);
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
}
imp->imp_recover(imp, phase);
}
else
pg.count = PAGE_SIZE;
+ CDEBUG(D_PAGE, "%s %d bytes ino %lu at "LPU64"/"LPX64"\n",
+ cmd & OBD_BRW_WRITE ? "write" : "read", pg.count, inode->i_ino,
+ pg.off, pg.off);
+ if (pg.count == 0) {
+ CERROR("ZERO COUNT: ino %lu: size %p:%Lu(%p:%Lu) idx %lu off "
+ LPU64"\n",
+ inode->i_ino, inode, inode->i_size, page->mapping->host,
+ page->mapping->host->i_size, page->index, pg.off);
+ }
+
pg.flag = create ? OBD_BRW_CREATE : 0;
set->brw_callback = ll_brw_sync_wait;
{
struct obdo oa = {0};
struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
- struct lustre_handle *lockhs = NULL;
+ struct lustre_handle lockh = { 0, 0 };
int err;
ENTRY;
oa.o_mode = inode->i_mode;
oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE;
- CDEBUG(D_INFO, "calling punch for "LPX64" (all bytes after "LPD64")\n",
+ CDEBUG(D_INFO, "calling punch for "LPX64" (all bytes after %Lu)\n",
oa.o_id, inode->i_size);
- err = ll_size_lock(inode, lsm, inode->i_size, LCK_PW, &lockhs);
+ err = ll_size_lock(inode, lsm, inode->i_size, LCK_PW, &lockh);
if (err) {
CERROR("ll_size_lock failed: %d\n", err);
return;
else
obdo_to_inode(inode, &oa, oa.o_valid);
- err = ll_size_unlock(inode, lsm, LCK_PW, lockhs);
+ err = ll_size_unlock(inode, lsm, LCK_PW, &lockh);
if (err)
CERROR("ll_size_unlock failed: %d\n", err);
pg.pg = page;
pg.count = to;
+ /* XXX make the starting offset "from" */
pg.off = (((obd_off)page->index) << PAGE_SHIFT);
pg.flag = create ? OBD_BRW_CREATE : 0;
if (!PageLocked(page))
LBUG();
- CDEBUG(D_INODE, "commit_page writing (off "LPD64"), count "LPD64"\n",
+ CDEBUG(D_INODE, "commit_page writing (off "LPD64"), count %d\n",
pg.off, pg.count);
set->brw_callback = ll_brw_sync_wait;
#define log2(n) ffz(~(n))
#endif
-static struct super_block * ll_read_super(struct super_block *sb,
- void *data, int silent)
+static struct super_block *ll_read_super(struct super_block *sb,
+ void *data, int silent)
{
struct inode *root = 0;
struct obd_device *obd;
class_uuid_t uuid;
ENTRY;
- MOD_INC_USE_COUNT;
OBD_ALLOC(sbi, sizeof(*sbi));
- if (!sbi) {
- MOD_DEC_USE_COUNT;
+ if (!sbi)
RETURN(NULL);
- }
INIT_LIST_HEAD(&sbi->ll_conn_chain);
INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list);
out_free:
OBD_FREE(sbi, sizeof(*sbi));
- MOD_DEC_USE_COUNT;
goto out_dev;
} /* ll_read_super */
OBD_FREE(sbi, sizeof(*sbi));
- MOD_DEC_USE_COUNT;
EXIT;
} /* ll_put_super */
}
}
- if (atomic_read(&inode->i_count) == 0) {
- char *symlink_name = lli->lli_symlink_name;
+ if (atomic_read(&inode->i_count) != 0)
+ CERROR("clearing in-use inode %lu: count = %d\n",
+ inode->i_ino, atomic_read(&inode->i_count));
- if (lli->lli_smd)
- obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd);
+ if (lli->lli_smd)
+ obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd);
- if (symlink_name) {
- OBD_FREE(symlink_name, strlen(symlink_name) + 1);
- lli->lli_symlink_name = NULL;
- }
+ if (lli->lli_symlink_name) {
+ OBD_FREE(lli->lli_symlink_name,strlen(lli->lli_symlink_name)+1);
+ lli->lli_symlink_name = NULL;
}
EXIT;
struct obdo *oa;
struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
+ /* mcreate with no open */
if (!lsm)
- GOTO(out, -EINVAL);
+ GOTO(out, 0);
if (lsm->lsm_object_id == 0) {
CERROR("This really happens\n");
GOTO(out, -ENOMEM);
oa->o_id = lsm->lsm_object_id;
- oa->o_mode = inode->i_mode;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLEASIZE | OBD_MD_FLTYPE;
+ obdo_from_inode(oa, inode, OBD_MD_FLID | OBD_MD_FLTYPE);
err = obd_destroy(ll_i2obdconn(inode), oa, lsm);
obdo_free(oa);
- CDEBUG(D_SUPER, "obd destroy of objid "LPX64" error %d\n",
- lsm->lsm_object_id, err);
+ if (err)
+ CDEBUG(D_SUPER, "obd destroy objid "LPX64" error %d\n",
+ lsm->lsm_object_id, err);
}
out:
clear_inode(inode);
{
struct ptlrpc_request *request = NULL;
struct ll_sb_info *sbi = ll_i2sbi(inode);
- int err;
-
+ int err = 0;
ENTRY;
/* change incore inode */
ll_attr2inode(inode, attr, do_trunc);
- err = mdc_setattr(&sbi->ll_mdc_conn, inode, attr, &request);
- if (err)
- CERROR("mdc_setattr fails (%d)\n", err);
+ /* Don't send size changes to MDS to avoid "fast EA" problems, and
+ * also avoid a pointless RPC (we get file size from OST anyways).
+ */
+ attr->ia_valid &= ~ATTR_SIZE;
+ if (attr->ia_valid) {
+ err = mdc_setattr(&sbi->ll_mdc_conn, inode, attr, &request);
+ if (err)
+ CERROR("mdc_setattr fails (%d)\n", err);
- ptlrpc_req_finished(request);
+ ptlrpc_req_finished(request);
+ }
RETURN(err);
}
/* core attributes first */
ll_update_inode(inode, body);
- //if (body->valid & OBD_MD_FLEASIZE)
LASSERT(!lli->lli_smd);
if (lic && lic->lic_lmm)
obd_unpackmd(ll_i2obdconn(inode), &lli->lli_smd, lic->lic_lmm);
rc = ll_file_size(inode, lli->lli_smd);
if (rc) {
CERROR("ll_file_size: %d\n", rc);
- /* FIXME: need to somehow prevent inode creation */
- LBUG();
+ ll_clear_inode(inode);
make_bad_inode(inode);
}
}
list_for_each_safe(tmp, n, req_list) {
struct ptlrpc_request *req =
list_entry(tmp, struct ptlrpc_request, rq_list);
- CERROR("invalidating req xid "LPD64" op %d to %s:%d\n",
- (unsigned long long)req->rq_xid, req->rq_reqmsg->opc,
+ CERROR("invalidating req xid "LPU64" op %d to %s:%d\n",
+ req->rq_xid, req->rq_reqmsg->opc,
req->rq_connection->c_remote_uuid,
req->rq_import->imp_client->cli_request_portal);
req->rq_flags |= PTL_RPC_FL_ERR;
umount_begin: ll_umount_begin
};
-struct file_system_type lustre_lite_fs_type = {
- "lustre_lite", 0, ll_read_super, NULL
+static struct file_system_type lustre_lite_fs_type = {
+ name: "lustre_lite",
+ fs_flags: 0,
+ read_super: ll_read_super,
+ owner: THIS_MODULE,
};
static int __init init_lustre_lite(void)
class_uuid_t uuid;
ENTRY;
- MOD_INC_USE_COUNT;
OBD_ALLOC(sbi, sizeof(*sbi));
- if (!sbi) {
- MOD_DEC_USE_COUNT;
+ if (!sbi)
RETURN(-ENOMEM);
- }
INIT_LIST_HEAD(&sbi->ll_conn_chain);
generate_random_uuid(uuid);
out_free:
OBD_FREE(sbi, sizeof(*sbi));
- MOD_DEC_USE_COUNT;
goto out_dev;
} /* ll_fill_super */
obd_disconnect(&sbi->ll_mdc_conn);
OBD_FREE(sbi, sizeof(*sbi));
- MOD_DEC_USE_COUNT;
EXIT;
} /* ll_put_super */
static void ll_clear_inode(struct inode *inode)
{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ll_inode_info *lli = ll_i2info(inode);
+ int rc;
ENTRY;
- if (atomic_read(&inode->i_count) == 0) {
- struct ll_inode_info *lli = ll_i2info(inode);
- char *symlink_name = lli->lli_symlink_name;
+#warning "Is there a reason we don't do this in 2.5, but we do in 2.4?"
+#if 0
+ rc = mdc_cancel_unused(&sbi->ll_mdc_conn, inode, LDLM_FL_NO_CALLBACK);
+ if (rc < 0) {
+ CERROR("mdc_cancel_unused: %d\n", rc);
+ /* XXX FIXME do something dramatic */
+ }
- if (lli->lli_smd)
- obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd);
- if (symlink_name) {
- OBD_FREE(symlink_name, strlen(symlink_name) + 1);
- lli->lli_symlink_name = NULL;
+ if (lli->lli_smd) {
+ rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd, 0);
+ if (rc < 0) {
+ CERROR("obd_cancel_unused: %d\n", rc);
+ /* XXX FIXME do something dramatic */
}
}
+#endif
+
+ if (atomic_read(&inode->i_count) != 0)
+ CERROR("clearing in-use inode %lu: count = %d\n",
+ inode->i_ino, atomic_read(&inode->i_count));
+
+ if (lli->lli_smd)
+ obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd);
+
+ if (lli->lli_symlink_name) {
+ OBD_FREE(lli->lli_symlink_name,strlen(lli->lli_symlink_name)+1);
+ lli->lli_symlink_name = NULL;
+ }
+
EXIT;
}
struct obdo *oa;
struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
+ /* mcreate with no open */
if (!lsm)
- GOTO(out, -EINVAL);
+ GOTO(out, 0);
if (lsm->lsm_object_id == 0) {
CERROR("This really happens\n");
oa->o_id = lsm->lsm_object_id;
oa->o_mode = inode->i_mode;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLEASIZE | OBD_MD_FLTYPE;
+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
err = obd_destroy(ll_i2obdconn(inode), oa, lsm);
obdo_free(oa);
- CDEBUG(D_SUPER, "obd destroy of objid "LPX64" error %d\n",
- lsm->lsm_object_id, err);
+ if (err)
+ CDEBUG(D_SUPER, "obd destroy objid "LPX64" error %d\n",
+ lsm->lsm_object_id, err);
}
out:
clear_inode(inode);
{
struct ptlrpc_request *request = NULL;
struct ll_sb_info *sbi = ll_i2sbi(inode);
- int err;
+ int err = 0;
ENTRY;
/* change incore inode */
ll_attr2inode(inode, attr, do_trunc);
- err = mdc_setattr(&sbi->ll_mdc_conn, inode, attr, &request);
- if (err)
- CERROR("mdc_setattr fails (%d)\n", err);
+ /* Don't send size changes to MDS to avoid "fast EA" problems, and
+ * also avoid a pointless RPC (we get file size from OST anyways).
+ */
+ attr->ia_valid &= ~ATTR_SIZE;
+ if (attr->ia_valid) {
+ err = mdc_setattr(&sbi->ll_mdc_conn, inode, attr, &request);
+ if (err)
+ CERROR("mdc_setattr fails (%d)\n", err);
- ptlrpc_req_finished(request);
+ ptlrpc_req_finished(request);
+ }
RETURN(err);
}
/* core attributes first */
ll_update_inode(inode, body);
- //if (body->valid & OBD_MD_FLEASIZE)
LASSERT(!lli->lli_smd);
if (lic && lic->lic_lmm)
obd_unpackmd(ll_i2obdconn(inode), &lli->lli_smd, lic->lic_lmm);
rc = ll_file_size(inode, lli->lli_smd);
if (rc) {
CERROR("ll_file_size: %d\n", rc);
- /* FIXME: need to somehow prevent inode creation */
- LBUG();
+ ll_clear_inode(inode);
make_bad_inode(inode);
+ RETURN(rc);
}
}
struct inode *inode = dentry->d_inode;
struct ll_inode_info *lli = ll_i2info(inode);
struct ptlrpc_request *request;
- int op, mode, rc;
+ int op = 0, mode = 0, rc;
char *symname;
ENTRY;
#include <linux/lustre_lib.h>
#include <linux/lustre_net.h>
#include <linux/lustre_idl.h>
+#include <linux/lustre_lite.h> /* for LL_IOC_LOV_[GS]ETSTRIPE */
#include <linux/lustre_mds.h>
#include <linux/obd_class.h>
#include <linux/obd_lov.h>
struct lustre_handle *lfh_handles;
};
+struct lov_lock_handles {
+ __u64 llh_cookie;
+ struct lustre_handle llh_handles[0];
+};
+
extern int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmm,
struct lov_stripe_md *lsm);
extern int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsm,
struct lov_mds_md *lmm);
+extern int lov_setstripe(struct lustre_handle *conn,
+ struct lov_stripe_md **lsmp, struct lov_mds_md *lmmu);
+extern int lov_getstripe(struct lustre_handle *conn, struct lov_mds_md *lmmu,
+ struct lov_stripe_md *lsm);
/* obd methods */
int lov_attach(struct obd_device *dev, obd_count len, void *data)
int rc, rc2, i;
ENTRY;
- MOD_INC_USE_COUNT;
rc = class_connect(conn, obd, cluuid);
if (rc)
- GOTO(out_dec, rc);
+ RETURN(rc);
/* We don't want to actually do the underlying connections more than
* once, so keep track. */
RETURN(0);
exp = class_conn2export(conn);
+ spin_lock_init(&exp->exp_lov_data.led_lock);
INIT_LIST_HEAD(&exp->exp_lov_data.led_open_head);
/* retrieve LOV metadata from MDS */
for (i = 0; i < desc->ld_tgt_count; i++) {
struct obd_device *tgt = client_tgtuuid2obd(uuidarray[i]);
- int rc2;
if (!tgt) {
CERROR("Target %s not attached\n", uuidarray[i]);
rc = obd_connect(&lov->tgts[i].conn, tgt, NULL, recovd,
recover);
- /* Register even if connect failed, so that we get reactivation
- * notices.
- */
- rc2 = obd_iocontrol(IOC_OSC_REGISTER_LOV, &lov->tgts[i].conn,
- sizeof(struct obd_device *), obd, NULL);
- if (rc2) {
- CERROR("Target %s REGISTER_LOV error %d\n",
- uuidarray[i], rc2);
- GOTO(out_disc, rc2);
+ if (rc) {
+ CERROR("Target %s connect error %d\n", uuidarray[i],
+ rc);
+ GOTO(out_disc, rc);
}
-
- /* But mark failed-connect OSCs as inactive! */
+
+ rc = obd_iocontrol(IOC_OSC_REGISTER_LOV, &lov->tgts[i].conn,
+ sizeof(struct obd_device *), obd, NULL);
if (rc) {
- CDEBUG(D_INFO, "Target %s connect error %d\n",
+ CERROR("Target %s REGISTER_LOV error %d\n",
uuidarray[i], rc);
- LASSERT(lov->tgts[i].active == 0);
- rc = 0;
- continue;
+ GOTO(out_disc, rc);
}
-
+
desc->ld_active_tgt_count++;
lov->tgts[i].active = 1;
}
RETURN(rc);
out_disc:
+ i--; /* skip failed-connect OSC */
while (i-- > 0) {
desc->ld_active_tgt_count--;
lov->tgts[i].active = 0;
OBD_FREE(lov->tgts, lov->bufsize);
out_conn:
class_disconnect(conn);
- out_dec:
- MOD_DEC_USE_COUNT;
goto out;
}
lov->tgts = NULL;
exp = class_conn2export(conn);
+ spin_lock(&exp->exp_lov_data.led_lock);
list_for_each_safe(p, n, &exp->exp_lov_data.led_open_head) {
/* XXX close these, instead of just discarding them? */
struct lov_file_handles *lfh;
lfh->lfh_count * sizeof(*lfh->lfh_handles));
kmem_cache_free(lov_file_cache, lfh);
}
+ spin_unlock(&exp->exp_lov_data.led_lock);
out_local:
rc = class_disconnect(conn);
- if (!rc)
- MOD_DEC_USE_COUNT;
return rc;
}
int activate)
{
struct obd_device *obd;
+ struct lov_tgt_desc *tgt;
int i, rc = 0;
ENTRY;
lov, uuid, activate);
spin_lock(&lov->lov_lock);
- for (i = 0; i < lov->desc.ld_tgt_count; i++)
- if (strncmp(uuid, lov->tgts[i].uuid,
- sizeof(lov->tgts[i].uuid)) == 0)
+ for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) {
+ CDEBUG(D_INFO, "lov idx %d is %s conn "LPX64"\n",
+ i, tgt->uuid, tgt->conn.addr);
+ if (strncmp(uuid, tgt->uuid, sizeof(tgt->uuid)) == 0)
break;
+ }
if (i == lov->desc.ld_tgt_count)
GOTO(out, rc = -EINVAL);
- obd = class_conn2obd(&lov->tgts[i].conn);
+ obd = class_conn2obd(&tgt->conn);
if (obd == NULL) {
LBUG();
GOTO(out, rc = -ENOTCONN);
}
- CDEBUG(D_INFO, "Found OBD %p type %s\n", obd, obd->obd_type->typ_name);
+ CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LOV idx %d\n",
+ obd->obd_name, obd->obd_uuid, obd->obd_minor, obd,
+ obd->obd_type->typ_name, i);
if (strcmp(obd->obd_type->typ_name, "osc") != 0) {
LBUG();
GOTO(out, rc = -EBADF);
}
- if (lov->tgts[i].active == activate) {
+ if (tgt->active == activate) {
CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd,
activate ? "" : "in");
GOTO(out, rc = -EALREADY);
CDEBUG(D_INFO, "Marking OBD %p %sactive\n", obd, activate ? "" : "in");
- lov->tgts[i].active = activate;
+ tgt->active = activate;
if (activate) {
/*
* foreach(export)
lov->desc.ld_active_tgt_count--;
}
+#warning "FIXME: walk open files list for objects that need opening"
EXIT;
out:
spin_unlock(&lov->lov_lock);
struct lov_stripe_md *lsm;
struct lov_oinfo *loi;
struct obdo *tmp;
- int ost_count, ost_idx = 1;
+ int ost_count, ost_idx;
+ int first = 1, obj_alloc = 0;
int rc = 0, i;
ENTRY;
if (!export)
RETURN(-EINVAL);
- tmp = obdo_alloc();
- if (!tmp)
- RETURN(-ENOMEM);
-
lov = &export->exp_obd->u.lov;
if (!lov->desc.ld_active_tgt_count)
RETURN(-EIO);
- spin_lock(&lov->lov_lock);
- ost_count = lov->desc.ld_tgt_count;
+ tmp = obdo_alloc();
+ if (!tmp)
+ RETURN(-ENOMEM);
lsm = *ea;
- /* Can't create more stripes than we have targets (incl inactive). */
- if (lsm && lsm->lsm_stripe_count > lov->desc.ld_tgt_count)
- GOTO(out_tmp, rc = -EINVAL);
-
- /* Free the user lsm if it needs to be changed, to avoid memory leaks */
- if (!lsm || (lsm &&
- lsm->lsm_stripe_count > lov->desc.ld_active_tgt_count)) {
- struct lov_stripe_md *lsm_new = NULL;
- rc = obd_alloc_memmd(conn, &lsm_new);
- if (rc < 0) {
- spin_unlock(&lov->lov_lock);
- if (lsm)
- obd_free_memmd(conn, &lsm);
+ if (!lsm) {
+ rc = obd_alloc_memmd(conn, &lsm);
+ if (rc < 0)
GOTO(out_tmp, rc);
- }
- if (lsm) {
- LASSERT(lsm->lsm_magic == LOV_MAGIC);
- CERROR("replace user LOV MD: stripes %u > %u active\n",
- lsm->lsm_stripe_count,
- lov->desc.ld_active_tgt_count);
- lsm_new->lsm_stripe_offset = lsm->lsm_stripe_offset;
- lsm_new->lsm_stripe_size = lsm->lsm_stripe_size;
- lsm_new->lsm_stripe_pattern = lsm->lsm_stripe_pattern;
- obd_free_memmd(conn, &lsm);
- }
- lsm = lsm_new;
- ost_idx = 0; /* if lsm->lsm_stripe_offset is set yet */
+
+ rc = 0;
lsm->lsm_magic = LOV_MAGIC;
}
+ ost_count = lov->desc.ld_tgt_count;
+
LASSERT(oa->o_valid & OBD_MD_FLID);
lsm->lsm_object_id = oa->o_id;
if (!lsm->lsm_stripe_size)
lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
- /* Because of 64-bit divide/mod operations only work with a 32-bit
- * divisor in a 32-bit kernel, we cannot support a stripe width
- * of 4GB or larger on 32-bit CPUs.
- */
- if (lsm->lsm_stripe_size * lsm->lsm_stripe_count > ~0UL) {
- CERROR("LOV: stripe width "LPU64"x%u > %lu on 32-bit system\n",
- lsm->lsm_stripe_size, lsm->lsm_stripe_count, ~0UL);
- spin_unlock(&lov->lov_lock);
- GOTO(out_free, rc = -EINVAL);
- }
-
- if (!ost_idx || lsm->lsm_stripe_offset >= ost_count) {
+ if (!*ea || lsm->lsm_stripe_offset >= ost_count) {
int mult = lsm->lsm_object_id * lsm->lsm_stripe_count;
int stripe_offset = mult % ost_count;
int sub_offset = (mult / ost_count) % lsm->lsm_stripe_count;
- lsm->lsm_stripe_offset = stripe_offset + sub_offset;
- }
-
- /* Start with lsm_stripe_offset on an active OSC to avoid confusion */
- while (!lov->tgts[lsm->lsm_stripe_offset].active)
- lsm->lsm_stripe_offset = (lsm->lsm_stripe_offset+1) % ost_count;
-
- /* Pick the OSTs before we release the lock */
- ost_idx = lsm->lsm_stripe_offset;
- for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
- CDEBUG(D_INODE, "objid "LPX64"[%d] is ost_idx %d (uuid %s)\n",
- lsm->lsm_object_id, i, ost_idx, lov->tgts[ost_idx].uuid);
- loi->loi_ost_idx = ost_idx;
- do {
- ost_idx = (ost_idx + 1) % ost_count;
- } while (!lov->tgts[ost_idx].active);
- }
-
- spin_unlock(&lov->lov_lock);
+ ost_idx = stripe_offset + sub_offset;
+ } else
+ ost_idx = lsm->lsm_stripe_offset;
CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n",
- lsm->lsm_stripe_count,lsm->lsm_object_id,lsm->lsm_stripe_offset);
+ lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx);
- for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
+ loi = lsm->lsm_oinfo;
+ for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
struct lov_stripe_md obj_md;
struct lov_stripe_md *obj_mdp = &obj_md;
+ int err;
- ost_idx = loi->loi_ost_idx;
+ if (lov->tgts[ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx);
+ continue;
+ }
/* create data objects with "parent" OA */
memcpy(tmp, oa, sizeof(*tmp));
/* XXX: LOV STACKING: use real "obj_mdp" sub-data */
- rc = obd_create(&lov->tgts[ost_idx].conn, tmp, &obj_mdp);
- if (rc) {
- CERROR("error creating objid "LPX64" sub-object on "
- "OST idx %d: rc = %d\n", oa->o_id, ost_idx, rc);
- GOTO(out_cleanup, rc);
+ err = obd_create(&lov->tgts[ost_idx].conn, tmp, &obj_mdp);
+ if (err) {
+ if (lov->tgts[ost_idx].active) {
+ CERROR("error creating objid "LPX64" sub-object"
+ "on OST idx %d: rc = %d\n",
+ oa->o_id, ost_idx, err);
+ if (!rc)
+ rc = err;
+ }
+ continue;
}
loi->loi_id = tmp->o_id;
+ loi->loi_ost_idx = ost_idx;
CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64" at idx %d\n",
lsm->lsm_object_id, loi->loi_id, ost_idx);
+
+ if (first) {
+ lsm->lsm_stripe_offset = ost_idx;
+ first = 0;
+ }
+
+ ++obj_alloc;
+ ++loi;
+
+ /* If we have allocated enough objects, we are OK */
+ if (obj_alloc == lsm->lsm_stripe_count) {
+ rc = 0;
+ GOTO(out_done, rc);
+ }
}
+ if (*ea)
+ GOTO(out_cleanup, rc);
+ else {
+ struct lov_stripe_md *lsm_new;
+ /* XXX LOV STACKING call into osc for sizes */
+ int size = lov_stripe_md_size(obj_alloc);
+
+ OBD_ALLOC(lsm_new, size);
+ if (!lsm_new)
+ GOTO(out_cleanup, rc = -ENOMEM);
+ memcpy(lsm_new, lsm, size);
+ /* XXX LOV STACKING call into osc for sizes */
+ OBD_FREE(lsm, lov_stripe_md_size(lsm->lsm_stripe_count));
+ lsm = lsm_new;
+ }
+ out_done:
*ea = lsm;
out_tmp:
obdo_free(tmp);
- RETURN(rc);
+ return rc;
out_cleanup:
while (i-- > 0) {
oa->o_id, loi->loi_id, loi->loi_ost_idx,
err);
}
- out_free:
if (!*ea)
obd_free_memmd(conn, &lsm);
goto out_tmp;
}
if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#lx != %#lx\n",
+ CERROR("LOV striping magic bad %#x != %#x\n",
lsm->lsm_magic, LOV_MAGIC);
RETURN(-EINVAL);
}
for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
int err;
if (lov->tgts[loi->loi_ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
/* Orphan clean up will (someday) fix this up. */
continue;
}
}
if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#lx != %#lx\n",
+ CERROR("LOV striping magic bad %#x != %#x\n",
lsm->lsm_magic, LOV_MAGIC);
RETURN(-EINVAL);
}
if (oa->o_valid & OBD_MD_FLHANDLE)
lfh = lov_handle2lfh(obdo_handle(oa));
+ CDEBUG(D_INFO, "objid "LPX64": %ux%u byte stripes\n",
+ lsm->lsm_object_id, lsm->lsm_stripe_count, lsm->lsm_stripe_size);
for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
int err;
- if (loi->loi_id == 0)
- continue;
-
- if (lov->tgts[loi->loi_ost_idx].active == 0)
+ if (lov->tgts[loi->loi_ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
continue;
+ }
CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx "
"%u\n", oa->o_id, i, loi->loi_id, loi->loi_ost_idx);
}
if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#lx != %#lx\n",
+ CERROR("LOV striping magic bad %#x != %#x\n",
lsm->lsm_magic, LOV_MAGIC);
RETURN(-EINVAL);
}
}
if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#lx != %#lx\n",
+ CERROR("LOV striping magic bad %#x != %#x\n",
lsm->lsm_magic, LOV_MAGIC);
RETURN(-EINVAL);
}
for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
if (lov->tgts[loi->loi_ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
continue;
}
handle->addr = (__u64)(unsigned long)lfh;
handle->cookie = lfh->lfh_cookie;
oa->o_valid |= OBD_MD_FLHANDLE;
+ spin_lock(&export->exp_lov_data.led_lock);
list_add(&lfh->lfh_list, &export->exp_lov_data.led_open_head);
+ spin_unlock(&export->exp_lov_data.led_lock);
out_tmp:
obdo_free(tmp);
}
if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#lx != %#lx\n",
+ CERROR("LOV striping magic bad %#x != %#x\n",
lsm->lsm_magic, LOV_MAGIC);
RETURN(-EINVAL);
}
lov = &export->exp_obd->u.lov;
for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
int err;
-
- if (lov->tgts[loi->loi_ost_idx].active == 0)
+
+ if (lov->tgts[loi->loi_ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
continue;
+ }
/* create data objects with "parent" OA */
memcpy(&tmp, oa, sizeof(tmp));
}
if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#lx != %#lx\n",
+ CERROR("LOV striping magic bad %#x != %#x\n",
lsm->lsm_magic, LOV_MAGIC);
RETURN(-EINVAL);
}
if (starti == endi)
continue;
+
/* create data objects with "parent" OA */
memcpy(&tmp, oa, sizeof(tmp));
tmp.o_id = loi->loi_id;
}
if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#lx != %#lx\n",
+ CERROR("LOV striping magic bad %#x != %#x\n",
lsm->lsm_magic, LOV_MAGIC);
RETURN(-EINVAL);
}
RETURN(rc);
}
+static struct lov_lock_handles *lov_newlockh(struct lov_stripe_md *lsm)
+{
+ struct lov_lock_handles *lov_lockh;
+
+ OBD_ALLOC(lov_lockh, sizeof(*lov_lockh) +
+ sizeof(*lov_lockh->llh_handles) * lsm->lsm_stripe_count);
+ if (!lov_lockh)
+ return NULL;
+
+ get_random_bytes(&lov_lockh->llh_cookie, sizeof(lov_lockh->llh_cookie));
+
+ return lov_lockh;
+}
+
+/* We are only ever passed local lock handles here, so we do not need to
+ * validate (and we can't really because these structs are variable sized
+ * and therefore alloced, and not from a private slab).
+ *
+ * We just check because we can...
+ */
+static struct lov_lock_handles *lov_h2lovlockh(struct lustre_handle *handle)
+{
+ struct lov_lock_handles *lov_lockh = NULL;
+
+ if (!handle || !handle->addr)
+ RETURN(NULL);
+
+ lov_lockh = (struct lov_lock_handles *)(unsigned long)(handle->addr);
+ if (lov_lockh->llh_cookie != handle->cookie)
+ RETURN(NULL);
+
+ return lov_lockh;
+}
+
static int lov_enqueue(struct lustre_handle *conn, struct lov_stripe_md *lsm,
struct lustre_handle *parent_lock,
__u32 type, void *cookie, int cookielen, __u32 mode,
int *flags, void *cb, void *data, int datalen,
- struct lustre_handle *lockhs)
+ struct lustre_handle *lockh)
{
struct obd_export *export = class_conn2export(conn);
+ struct lov_lock_handles *lov_lockh = NULL;
+ struct lustre_handle *lov_lockhp;
struct lov_obd *lov;
struct lov_oinfo *loi;
struct lov_stripe_md submd;
}
if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#lx != %#lx\n",
+ CERROR("LOV striping magic bad %#x != %#x\n",
lsm->lsm_magic, LOV_MAGIC);
RETURN(-EINVAL);
}
if (!export || !export->exp_obd)
RETURN(-ENODEV);
- memset(lockhs, 0, sizeof(*lockhs) * lsm->lsm_stripe_count);
+ if (lsm->lsm_stripe_count > 1) {
+ lov_lockh = lov_newlockh(lsm);
+ if (!lov_lockh)
+ RETURN(-ENOMEM);
+
+ lockh->addr = (__u64)(unsigned long)lov_lockh;
+ lockh->cookie = lov_lockh->llh_cookie;
+ lov_lockhp = lov_lockh->llh_handles;
+ } else
+ lov_lockhp = lockh;
lov = &export->exp_obd->u.lov;
- for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
+ for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
+ i++, loi++, lov_lockhp++) {
struct ldlm_extent *extent = (struct ldlm_extent *)cookie;
struct ldlm_extent sub_ext;
- if (lov->tgts[loi->loi_ost_idx].active == 0)
+ if (lov->tgts[loi->loi_ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
continue;
+ }
*flags = 0;
sub_ext.start = lov_stripe_offset(lsm, extent->start, i);
sub_ext.end = lov_stripe_offset(lsm, extent->end, i);
- if (sub_ext.start == sub_ext.end)
+ if (sub_ext.start == sub_ext.end /* || !active */)
continue;
+ /* XXX LOV STACKING: submd should be from the subobj */
submd.lsm_object_id = loi->loi_id;
- /* XXX submd should be that from the subobj, it should come
- * opaquely from the LOV.
- */
submd.lsm_stripe_count = 0;
/* XXX submd is not fully initialized here */
*flags = 0;
rc = obd_enqueue(&(lov->tgts[loi->loi_ost_idx].conn), &submd,
parent_lock, type, &sub_ext, sizeof(sub_ext),
- mode, flags, cb, data, datalen, &(lockhs[i]));
+ mode, flags, cb, data, datalen, lov_lockhp);
// XXX add a lock debug statement here
+ if (rc)
+ memset(lov_lockhp, 0, sizeof(*lov_lockhp));
if (rc && lov->tgts[loi->loi_ost_idx].active) {
CERROR("Error enqueue objid "LPX64" subobj "LPX64
" on OST idx %d: rc = %d\n", lsm->lsm_object_id,
goto out_locks;
}
}
-
RETURN(0);
- out_locks:
- for (i--, loi = &lsm->lsm_oinfo[i]; i >= 0; i--, loi--) {
+out_locks:
+ while (loi--, lov_lockhp--, i-- > 0) {
+ struct lov_stripe_md submd;
int err;
-
- if (lov->tgts[loi->loi_ost_idx].active == 0)
+
+ if (lov_lockhp->addr == 0 ||
+ lov->tgts[loi->loi_ost_idx].active == 0)
continue;
+ /* XXX LOV STACKING: submd should be from the subobj */
submd.lsm_object_id = loi->loi_id;
submd.lsm_stripe_count = 0;
err = obd_cancel(&lov->tgts[loi->loi_ost_idx].conn, &submd,
- mode, &lockhs[i]);
+ mode, lov_lockhp);
if (err) {
- CERROR("Error cancelling objid "LPX64" subobj "LPX64
+ CERROR("Error cancelling objid "LPX64
" on OST idx %d after enqueue error: rc = %d\n",
loi->loi_id, loi->loi_ost_idx, err);
}
}
+
+ if (lsm->lsm_stripe_count > 1) {
+ lov_lockh->llh_cookie = DEAD_HANDLE_MAGIC;
+ OBD_FREE(lov_lockh, sizeof(*lov_lockh) +
+ sizeof(*lov_lockh->llh_handles) *
+ lsm->lsm_stripe_count);
+ }
+ lockh->addr = 0;
+ lockh->cookie = DEAD_HANDLE_MAGIC;
+
RETURN(rc);
}
static int lov_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm,
- __u32 mode, struct lustre_handle *lockhs)
+ __u32 mode, struct lustre_handle *lockh)
{
struct obd_export *export = class_conn2export(conn);
+ struct lov_lock_handles *lov_lockh = NULL;
+ struct lustre_handle *lov_lockhp;
struct lov_obd *lov;
struct lov_oinfo *loi;
int rc = 0, i;
}
if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#lx != %#lx\n",
+ CERROR("LOV striping magic bad %#x != %#x\n",
lsm->lsm_magic, LOV_MAGIC);
RETURN(-EINVAL);
}
if (!export || !export->exp_obd)
RETURN(-ENODEV);
+ LASSERT(lockh);
+ if (lsm->lsm_stripe_count > 1) {
+ lov_lockh = lov_h2lovlockh(lockh);
+ if (!lov_lockh) {
+ CERROR("LOV: invalid lov lock handle %p\n", lockh);
+ RETURN(-EINVAL);
+ }
+
+ lov_lockhp = lov_lockh->llh_handles;
+ } else
+ lov_lockhp = lockh;
+
lov = &export->exp_obd->u.lov;
- for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
+ for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
+ i++, loi++, lov_lockhp++ ) {
struct lov_stripe_md submd;
int err;
- if (lov->tgts[loi->loi_ost_idx].active == 0)
- continue;
-
- if (lockhs[i].addr == 0)
+ if (lov_lockhp->addr == 0) {
+ CDEBUG(D_HA, "lov idx %d no lock?\n", loi->loi_ost_idx);
continue;
+ }
+ /* XXX LOV STACKING: submd should be from the subobj */
submd.lsm_object_id = loi->loi_id;
submd.lsm_stripe_count = 0;
err = obd_cancel(&lov->tgts[loi->loi_ost_idx].conn, &submd,
- mode, &lockhs[i]);
- if (err && lov->tgts[loi->loi_ost_idx].active) {
- CERROR("Error cancel objid "LPX64" subobj "LPX64
- " on OST idx %d: rc = %d\n", lsm->lsm_object_id,
- loi->loi_id, loi->loi_ost_idx, err);
- if (!rc)
- rc = err;
+ mode, lov_lockhp);
+ if (err) {
+ if (lov->tgts[loi->loi_ost_idx].active) {
+ CERROR("Error cancel objid "LPX64" subobj "
+ LPX64" on OST idx %d: rc = %d\n",
+ lsm->lsm_object_id,
+ loi->loi_id, loi->loi_ost_idx, err);
+ if (!rc)
+ rc = err;
+ }
}
}
+
+ if (lsm->lsm_stripe_count > 1) {
+ lov_lockh->llh_cookie = DEAD_HANDLE_MAGIC;
+ OBD_FREE(lov_lockh, sizeof(*lov_lockh) +
+ sizeof(*lov_lockh->llh_handles) *
+ lsm->lsm_stripe_count);
+ }
+ lockh->addr = 0;
+ lockh->cookie = DEAD_HANDLE_MAGIC;
+
RETURN(rc);
}
struct obd_export *export = class_conn2export(conn);
struct lov_obd *lov;
struct lov_oinfo *loi;
- int rc = 0, i, err;
+ int rc = 0, i;
ENTRY;
if (!lsm) {
lov = &export->exp_obd->u.lov;
for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
struct lov_stripe_md submd;
+ int err;
submd.lsm_object_id = loi->loi_id;
submd.lsm_stripe_count = 0;
for (i = 0; i < lov->desc.ld_tgt_count; i++) {
int err;
- if (!lov->tgts[i].active)
+ if (!lov->tgts[i].active) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", i);
continue;
+ }
err = obd_statfs(&lov->tgts[i].conn, &lov_sfs);
if (err) {
- CERROR("Error statfs OSC %s idx %d: err = %d\n",
+ CERROR("Error statfs OSC %s i %d: err = %d\n",
lov->tgts[i].uuid, i, err);
if (!rc)
rc = err;
{
struct obd_device *obddev = class_conn2obd(conn);
struct lov_obd *lov = &obddev->u.lov;
- struct obd_ioctl_data *data = karg;
int i, count = lov->desc.ld_tgt_count;
int rc;
switch (cmd) {
case IOC_LOV_SET_OSC_ACTIVE: {
+ struct obd_ioctl_data *data = karg;
rc = lov_set_osc_active(lov,data->ioc_inlbuf1,data->ioc_offset);
break;
}
case OBD_IOC_LOV_GET_CONFIG: {
+ struct obd_ioctl_data *data = karg;
struct lov_tgt_desc *tgtdesc;
struct lov_desc *desc;
obd_uuid_t *uuidp;
OBD_FREE(buf, len);
break;
}
+ case LL_IOC_LOV_SETSTRIPE:
+ rc = lov_setstripe(conn, karg, uarg);
+ break;
+ case LL_IOC_LOV_GETSTRIPE:
+ rc = lov_getstripe(conn, karg, uarg);
+ break;
default:
if (count == 0)
RETURN(-ENOTTY);
rc = 0;
for (i = 0; i < count; i++) {
- int err = obd_iocontrol(cmd, &lov->tgts[i].conn,
- len, karg, uarg);
+ int err;
+
+ err = obd_iocontrol(cmd, &lov->tgts[i].conn,
+ len, karg, uarg);
if (err && !rc)
rc = err;
}
}
struct obd_ops lov_obd_ops = {
+ o_owner: THIS_MODULE,
o_attach: lov_attach,
o_detach: lov_detach,
o_setup: lov_setup,
#include <linux/lustre_net.h>
#include <linux/obd.h>
#include <linux/obd_lov.h>
+#include <linux/obd_class.h>
#include <linux/obd_support.h>
/* lov_packdesc() is in mds/mds_lov.c */
-
void lov_unpackdesc(struct lov_desc *ld)
{
ld->ld_tgt_count = NTOH__u32(ld->ld_tgt_count);
ld->ld_pattern = HTON__u32(ld->ld_pattern);
}
+void lov_dump_lmm(int level, struct lov_mds_md *lmm)
+{
+ struct lov_object_id *loi;
+ int idx;
+
+ CDEBUG(level, "objid "LPX64", magic %#08x, ost_count %u\n",
+ lmm->lmm_object_id, lmm->lmm_magic, lmm->lmm_ost_count);
+ CDEBUG(level,"stripe_size %u, stripe_count %u, stripe_offset %u\n",
+ lmm->lmm_stripe_size, lmm->lmm_stripe_count,
+ lmm->lmm_stripe_offset);
+ for (idx = 0, loi = lmm->lmm_objects; idx < lmm->lmm_ost_count;
+ idx++, loi++)
+ CDEBUG(level, "ost idx %u subobj "LPX64"\n", idx,
+ loi->l_object_id);
+}
+
+#define LMM_ASSERT(test) \
+do { \
+ if (!(test)) lov_dump_lmm(D_ERROR, lmm); \
+ LASSERT(test); /* so we know what assertion failed */ \
+} while(0)
+
/* Pack LOV object metadata for shipment to the MDS.
*
* XXX In the future, this will be enhanced to get the EA size from the
ENTRY;
if (lsm) {
+ int i, max = 0;
if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("bad mem LOV MAGIC: %#08x != %#08x\n",
+ CERROR("bad mem LOV MAGIC: %#010x != %#010x\n",
lsm->lsm_magic, LOV_MAGIC);
RETURN(-EINVAL);
}
stripe_count = lsm->lsm_stripe_count;
+
+ for (i = 0,loi = lsm->lsm_oinfo; i < stripe_count; i++,loi++) {
+ if (loi->loi_ost_idx > max)
+ max = loi->loi_ost_idx;
+ }
+ ost_count = max + 1;
}
/* XXX LOV STACKING call into osc for sizes */
lmm->lmm_stripe_count = (stripe_count);
if (!lsm)
RETURN(lmm_size);
+
/* XXX endianness */
lmm->lmm_magic = (lsm->lsm_magic);
lmm->lmm_object_id = (lsm->lsm_object_id);
LASSERT(lsm->lsm_object_id);
lmm->lmm_stripe_size = (lsm->lsm_stripe_size);
- lmm->lmm_stripe_pattern = (lsm->lsm_stripe_pattern);
lmm->lmm_stripe_offset = (lsm->lsm_stripe_offset);
- lmm->lmm_ost_count = (lov->desc.ld_tgt_count);
+ lmm->lmm_ost_count = (ost_count);
/* Only fill in the object ids which we are actually using.
* Assumes lmm_objects is otherwise zero-filled. */
RETURN(lmm_size);
}
+static int lov_get_stripecnt(struct lov_obd *lov, int stripe_count)
+{
+ if (!stripe_count)
+ stripe_count = lov->desc.ld_default_stripe_count;
+ if (!stripe_count || stripe_count > lov->desc.ld_active_tgt_count)
+ stripe_count = lov->desc.ld_active_tgt_count;
+
+ return stripe_count;
+}
+
int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
struct lov_mds_md *lmm)
{
struct lov_obd *lov = &obd->u.lov;
struct lov_stripe_md *lsm;
struct lov_oinfo *loi;
- int ost_count = lov->desc.ld_active_tgt_count;
+ int ost_count;
int ost_offset = 0;
- int stripe_count = 0;
+ int stripe_count;
int lsm_size;
int i;
ENTRY;
RETURN(-EINVAL);
}
stripe_count = (lmm->lmm_stripe_count);
- }
-
- if (!stripe_count)
- stripe_count = lov->desc.ld_default_stripe_count;
- if (!stripe_count || stripe_count > ost_count)
- stripe_count = ost_count;
+ LASSERT(stripe_count);
+ } else
+ stripe_count = lov_get_stripecnt(lov, 0);
/* XXX LOV STACKING call into osc for sizes */
lsm_size = lov_stripe_md_size(stripe_count);
ost_offset = lsm->lsm_stripe_offset = (lmm->lmm_stripe_offset);
lsm->lsm_magic = (lmm->lmm_magic);
lsm->lsm_object_id = (lmm->lmm_object_id);
- LASSERT(lsm->lsm_object_id);
lsm->lsm_stripe_size = (lmm->lmm_stripe_size);
- lsm->lsm_stripe_pattern = (lmm->lmm_stripe_pattern);
+
+ ost_count = (lmm->lmm_ost_count);
+
+ LMM_ASSERT(lsm->lsm_object_id);
+ LMM_ASSERT(ost_count);
for (i = 0, loi = lsm->lsm_oinfo; i < ost_count; i++, ost_offset++) {
ost_offset %= ost_count;
if (!lmm->lmm_objects[ost_offset].l_object_id)
continue;
- LASSERT(loi - lsm->lsm_oinfo < stripe_count);
+ LMM_ASSERT(loi - lsm->lsm_oinfo < stripe_count);
/* XXX LOV STACKING call down to osc_unpackmd() */
loi->loi_id = (lmm->lmm_objects[ost_offset].l_object_id);
loi->loi_ost_idx = ost_offset;
loi++;
}
- LASSERT(loi - lsm->lsm_oinfo == stripe_count);
+ LMM_ASSERT(loi - lsm->lsm_oinfo > 0);
+ LMM_ASSERT(loi - lsm->lsm_oinfo == stripe_count);
RETURN(lsm_size);
}
+
+/* Configure object striping information on a new file.
+ *
+ * @lmmu is a pointer to a user struct with one or more of the fields set to
+ * indicate the application preference: lmm_stripe_count, lmm_stripe_size,
+ * lmm_stripe_offset, and lmm_stripe_pattern. lmm_magic must be LOV_MAGIC.
+ * @lsmp is a pointer to an in-core stripe MD that needs to be filled in.
+ */
+int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp,
+ struct lov_mds_md *lmmu)
+{
+ struct obd_device *obd = class_conn2obd(conn);
+ struct lov_obd *lov = &obd->u.lov;
+ struct lov_mds_md lmm;
+ struct lov_stripe_md *lsm;
+ int stripe_count;
+ int rc;
+ ENTRY;
+
+ rc = copy_from_user(&lmm, lmmu, sizeof(lmm));
+ if (rc)
+ RETURN(-EFAULT);
+
+ if (lmm.lmm_magic != LOV_MAGIC) {
+ CERROR("bad wire LOV MAGIC: %#08x != %#08x\n",
+ lmm.lmm_magic, LOV_MAGIC);
+ RETURN(-EINVAL);
+ }
+ if (lmm.lmm_stripe_count > lov->desc.ld_tgt_count) {
+ CERROR("stripe count %d more than OST count %d\n",
+ (int)lmm.lmm_stripe_count, lov->desc.ld_tgt_count);
+ RETURN(-EINVAL);
+ }
+ if (lmm.lmm_stripe_offset >= lov->desc.ld_tgt_count) {
+ CERROR("stripe offset %d more than max OST index %d\n",
+ (int)lmm.lmm_stripe_count, lov->desc.ld_tgt_count);
+ RETURN(-EINVAL);
+ }
+ if (lmm.lmm_stripe_size & (PAGE_SIZE - 1)) {
+ CERROR("stripe size %u not multiple of %lu\n",
+ lmm.lmm_stripe_size, PAGE_SIZE);
+ RETURN(-EINVAL);
+ }
+ if (lmm.lmm_stripe_size * lmm.lmm_stripe_count > ~0UL) {
+ CERROR("stripe width %ux%u > %lu on 32-bit system\n",
+ lmm.lmm_stripe_size, (int)lmm.lmm_stripe_count, ~0UL);
+ RETURN(-EINVAL);
+ }
+
+ stripe_count = lov_get_stripecnt(lov, lmm.lmm_stripe_count);
+
+ /* XXX LOV STACKING call into osc for sizes */
+ OBD_ALLOC(lsm, lov_stripe_md_size(stripe_count));
+ if (!lsm)
+ RETURN(-ENOMEM);
+
+ lsm->lsm_magic = LOV_MAGIC;
+ /* This is all validated in lov_create() */
+ lsm->lsm_stripe_count = stripe_count;
+ lsm->lsm_stripe_offset = lmm.lmm_stripe_offset;
+ lsm->lsm_stripe_size = lmm.lmm_stripe_size;
+
+ *lsmp = lsm;
+
+ RETURN(rc);
+}
+
+/* Retrieve object striping information.
+ *
+ * @lmmu is a pointer to an in-core struct with lmm_ost_count indicating
+ * the maximum number of OST indices which will fit in the user buffer.
+ * lmm_magic must be LOV_MAGIC.
+ */
+int lov_getstripe(struct lustre_handle *conn, struct lov_stripe_md *lsm,
+ struct lov_mds_md *lmmu)
+{
+ struct obd_device *obd = class_conn2obd(conn);
+ struct lov_obd *lov = &obd->u.lov;
+ struct lov_mds_md lmm, *lmmk = NULL;
+ int ost_count, rc, lmm_size;
+ ENTRY;
+
+ if (!lsm)
+ RETURN(-ENODATA);
+
+ rc = copy_from_user(&lmm, lmmu, sizeof(lmm));
+ if (rc)
+ RETURN(-EFAULT);
+
+ if (lmm.lmm_magic != LOV_MAGIC)
+ RETURN(-EINVAL);
+
+ ost_count = lov->desc.ld_tgt_count;
+
+ /* XXX we _could_ check if indices > user lmm_ost_count are zero */
+ if (lmm.lmm_ost_count < ost_count)
+ RETURN(-EOVERFLOW);
+
+ rc = lov_packmd(conn, &lmmk, lsm);
+ if (rc < 0)
+ RETURN(rc);
+
+ lmm_size = rc;
+ rc = 0;
+
+ if (lmm_size && copy_to_user(lmmu, lmmk, lmm_size))
+ rc = -EFAULT;
+
+ obd_free_wiremd(conn, &lmmk);
+
+ RETURN(rc);
+}
* Common STATUS namespace
*/
-int rd_uuid(char* page, char **start, off_t off, int count, int *eof,
+int rd_uuid(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
- int len = 0;
struct obd_device* dev = (struct obd_device*)data;
- len += snprintf(page, count, "%s\n", dev->obd_uuid);
- return len;
-
-
+ return snprintf(page, count, "%s\n", dev->obd_uuid);
}
-int rd_stripesize(char* page, char **start, off_t off, int count, int *eof,
+
+int rd_stripesize(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
- struct obd_device* dev = (struct obd_device*)data;
- int len = 0;
- struct lov_obd* lov = &dev->u.lov;
- len += snprintf(page, count, LPU64"\n",
- (__u64)(lov->desc.ld_default_stripe_size));
-
- return len;
+ struct obd_device *dev = (struct obd_device*)data;
+ struct lov_desc *desc = &dev->u.lov.desc;
+
+ return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_size);
}
-int rd_stripeoffset(char* page, char **start, off_t off, int count, int *eof,
+int rd_stripeoffset(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
struct obd_device* dev = (struct obd_device*)data;
- int len = 0;
struct lov_obd* lov = &dev->u.lov;
- len += snprintf(page, count, LPU64"\n",
- lov->desc.ld_default_stripe_offset);
- return len;
+ return snprintf(page, count, LPU64"\n",
+ lov->desc.ld_default_stripe_offset);
}
-int rd_stripetype(char* page, char **start, off_t off, int count, int *eof,
+int rd_stripetype(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
struct obd_device* dev = (struct obd_device*)data;
- int len = 0;
struct lov_obd* lov = &dev->u.lov;
- len += snprintf(page, count, LPU64"\n",
- (__u64)(lov->desc.ld_pattern));
- return len;
+ return snprintf(page, count, "%u\n", lov->desc.ld_pattern);
}
-int rd_stripecount(char* page, char **start, off_t off, int count, int *eof,
+
+int rd_stripecount(char *page, char **start, off_t off, int count, int *eof,
void *data)
-{
+{
struct obd_device* dev = (struct obd_device*)data;
- int len = 0;
struct lov_obd* lov = &dev->u.lov;
- len += snprintf(page, count, LPU64"\n",
- (__u64)(lov->desc.ld_default_stripe_count));
- return len;
+ return snprintf(page, count, "%u\n", lov->desc.ld_default_stripe_count);
}
-int rd_numobd(char* page, char **start, off_t off, int count, int *eof,
+
+int rd_numobd(char *page, char **start, off_t off, int count, int *eof,
void *data)
-{
- struct obd_device* dev = (struct obd_device*)data;
- int len = 0;
- struct lov_obd* lov=&dev->u.lov;
- len += snprintf(page, count, LPU64"\n",
- (__u64)(lov->desc.ld_tgt_count));
- return len;
+{
+ struct obd_device *dev = (struct obd_device*)data;
+ struct lov_obd *lov = &dev->u.lov;
+
+ return snprintf(page, count, "%u\n", lov->desc.ld_tgt_count);
}
-int rd_activeobd(char* page, char **start, off_t off, int count, int *eof,
+int rd_activeobd(char *page, char **start, off_t off, int count, int *eof,
void *data)
-{
+{
struct obd_device* dev = (struct obd_device*)data;
- int len = 0;
struct lov_obd* lov = &dev->u.lov;
- len += snprintf(page, count, LPU64"\n",
- (__u64)(lov->desc.ld_active_tgt_count));
- return len;
+ return snprintf(page, count, "%u\n", lov->desc.ld_active_tgt_count);
}
-int rd_blksize(char* page, char **start, off_t off, int count, int *eof,
+int rd_blksize(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
return 0;
}
-int rd_kbtotal(char* page, char **start, off_t off, int count, int *eof,
+int rd_kbtotal(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
return 0;
}
-int rd_kbfree(char* page, char **start, off_t off, int count, int *eof,
+int rd_kbfree(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
return 0;
}
-int rd_filestotal(char* page, char **start, off_t off, int count, int *eof,
+int rd_filestotal(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
return 0;
}
-int rd_filesfree(char* page, char **start, off_t off, int count, int *eof,
+int rd_filesfree(char* page, char **start, off_t off, int count, int *eof,
void *data)
{
return 0;
}
-int rd_filegroups(char* page, char **start, off_t off, int count, int *eof,
- void *data)
+int rd_filegroups(char* page, char **start, off_t off, int count, int *eof,
+ void *data)
{
return 0;
}
-int rd_target(char* page, char **start, off_t off, int count, int *eof,
+int rd_target(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
struct obd_device* dev = (struct obd_device*)data;
int len = 0, i = 0;
struct lov_obd* lov = &dev->u.lov;
struct lov_tgt_desc* tgts = lov->tgts;
- while(i < lov->desc.ld_tgt_count){
- len += snprintf(&page[len], count, "%d: %s\n", i, tgts->uuid);
+ while (i < lov->desc.ld_tgt_count) {
+ len += snprintf(&page[len], count - len, "%d: %s %sACTIVE\n",
+ i, tgts->uuid, tgts->active ? "" : "IN");
i++;
tgts++;
}
-
+
return len;
}
+
int rd_mdc(char* page, char **start, off_t off, int count, int *eof, void *data)
{
struct obd_device* dev = (struct obd_device*)data;
{"status/kbytesfree", rd_kbfree, 0, 0},
{"status/target_obd", rd_target, 0, 0},
{"status/target_mdc", rd_mdc, 0, 0},
-
{0}
};
-int rd_numrefs(char* page, char **start, off_t off, int count, int *eof,
+
+int rd_numrefs(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
struct obd_type* class = (struct obd_type*)data;
- int len = 0;
- len += snprintf(page, count, "%d\n", class->typ_refcnt);
- return len;
+
+ return snprintf(page, count, "%d\n", class->typ_refcnt);
}
struct lprocfs_vars status_class_var[]={
modulefs_DATA = mdc.o
EXTRA_PROGRAMS = mdc
-LINX= mds_updates.c ll_pack.c client.c
+LINX= mds_updates.c client.c
mdc_SOURCES = mdc_request.c mdc_reint.c lproc_mdc.c $(LINX)
-ll_pack.c:
- test -e ll_pack.c || ln -sf $(top_srcdir)/lib/ll_pack.c .
mds_updates.c:
test -e mds_updates.c || ln -sf $(top_srcdir)/lib/mds_updates.c .
client.c:
extern struct lprocfs_vars status_var_nm_1[];
extern struct lprocfs_vars status_class_var[];
-/* should become mdc_getinfo() */
-int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid)
+/* Helper that implements most of mdc_getstatus and signal_completed_replay. */
+static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
+ int level, int msg_flags)
{
struct ptlrpc_request *req;
struct mds_body *body;
int rc, size = sizeof(*body);
ENTRY;
- req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETSTATUS, 1, &size,
- NULL);
+ req = ptlrpc_prep_req(imp, MDS_GETSTATUS, 1, &size, NULL);
if (!req)
GOTO(out, rc = -ENOMEM);
body = lustre_msg_buf(req->rq_reqmsg, 0);
- req->rq_level = LUSTRE_CONN_CON;
+ req->rq_level = level;
req->rq_replen = lustre_msg_size(1, &size);
-
+
mds_pack_req_body(req);
+ req->rq_reqmsg->flags |= msg_flags;
rc = ptlrpc_queue_wait(req);
if (!rc) {
return rc;
}
+/* should become mdc_getinfo() */
+int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid)
+{
+ return send_getstatus(class_conn2cliimp(conn), rootfid, LUSTRE_CONN_CON,
+ 0);
+}
+
int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
struct ptlrpc_request **request)
{
RETURN(rc);
}
-
int mdc_getattr(struct lustre_handle *conn,
- obd_id ino, int type, unsigned long valid, size_t ea_size,
+ obd_id ino, int type, unsigned long valid, unsigned int ea_size,
struct ptlrpc_request **request)
{
struct ptlrpc_request *req;
size[bufcount] = ea_size;
bufcount++;
body->size = ea_size;
- CDEBUG(D_INODE, "reserving %d bytes for MD/symlink in packet\n",
+ CDEBUG(D_INODE, "reserved %u bytes for MD/symlink in packet\n",
ea_size);
}
req->rq_replen = lustre_msg_size(bufcount, size);
return rc;
}
+int mdc_getattr_name(struct lustre_handle *conn, struct inode *parent,
+ char *filename, int namelen, unsigned long valid,
+ unsigned int ea_size, struct ptlrpc_request **request)
+{
+ struct ptlrpc_request *req;
+ struct mds_body *body;
+ int rc, size[2] = {sizeof(*body), namelen}, bufcount = 1;
+ ENTRY;
+
+ req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETATTR_NAME, 2,
+ size, NULL);
+ if (!req)
+ GOTO(out, rc = -ENOMEM);
+
+ body = lustre_msg_buf(req->rq_reqmsg, 0);
+ ll_inode2fid(&body->fid1, parent);
+ body->valid = valid;
+ memcpy(lustre_msg_buf(req->rq_reqmsg, 1), filename, namelen);
+
+ if (ea_size) {
+ size[1] = ea_size;
+ bufcount++;
+ body->size = ea_size;
+ CDEBUG(D_INODE, "reserved %u bytes for MD/symlink in packet\n",
+ ea_size);
+ valid |= OBD_MD_FLEASIZE;
+ }
+
+ req->rq_replen = lustre_msg_size(bufcount, size);
+ mds_pack_req_body(req);
+
+ rc = ptlrpc_queue_wait(req);
+
+ if (!rc) {
+ body = lustre_msg_buf(req->rq_repmsg, 0);
+ mds_unpack_body(body);
+ }
+
+ EXIT;
+ out:
+ *request = req;
+ return rc;
+}
+
void d_delete_aliases(struct inode *inode)
{
struct dentry *dentry = NULL;
break;
case LDLM_CB_CANCELING: {
/* Invalidate all dentries associated with this inode */
- struct inode *inode = data;
-
-#warning "FIXME: what tells us that 'inode' is valid at all?"
- if (inode->i_state & I_FREEING)
- break;
+ struct inode *inode;
- LASSERT(inode != NULL);
+ LASSERT(data != NULL);
LASSERT(data_len == sizeof(*inode));
+ /* XXX what tells us that 'data' is a valid inode at all?
+ * we should probably validate the lock handle first?
+ */
+ inode = igrab(data);
+
+ if (inode == NULL) /* inode->i_state & I_FREEING */
+ break;
+
if (S_ISDIR(inode->i_mode)) {
CDEBUG(D_INODE, "invalidating inode %lu\n",
inode->i_ino);
ll_invalidate_inode_pages(inode);
}
- if (inode != inode->i_sb->s_root->d_inode) {
- /* XXX should this igrab move up 12 lines? */
- LASSERT(igrab(inode) == inode);
+ if (inode != inode->i_sb->s_root->d_inode)
d_delete_aliases(inode);
- iput(inode);
- }
+
+ iput(inode);
break;
}
default:
struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, reqoff);
struct mds_body *body = lustre_msg_buf(req->rq_repmsg, repoff);
- DEBUG_REQ(D_HA, req, "storing generation %x for ino "LPD64,
- body->fid1.generation, body->fid1.id);
memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
+ DEBUG_REQ(D_HA, req, "storing generation %x for ino "LPD64,
+ rec->cr_replayfid.generation, rec->cr_replayfid.id);
}
+/* We always reserve enough space in the reply packet for a stripe MD, because
+ * we don't know in advance the file type.
+ *
+ * XXX we could get that from ext2_dir_entry_2 file_type
+ */
int mdc_enqueue(struct lustre_handle *conn, int lock_type,
struct lookup_intent *it, int lock_mode, struct inode *dir,
struct dentry *de, struct lustre_handle *lockh,
&lockh2)) {
/* We already have a lock; cancel the old one */
ldlm_lock_decref(lockh, lock_mode);
- ldlm_cli_cancel(lockh);
+ /* FIXME: bug 563 */
+ //ldlm_cli_cancel(lockh);
memcpy(lockh, &lockh2, sizeof(lockh2));
}
LDLM_LOCK_PUT(lock);
memcpy(saved->fh, &body->handle, sizeof(body->handle));
}
+/* If lmm is non-NULL and lmm_size is non-zero, the stripe MD is stored on
+ * the MDS. Otherwise, we have already read a copy from the MDS (probably
+ * during mdc_enqueue() and we do not need to send it to the MDS again.
+ *
+ * In the future (when we support the non-intent case) we need to be able
+ * to read the stripe MD from the MDS here (need to fix mds_open() too).
+ */
int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh,
struct ptlrpc_request **request)
struct ptlrpc_request *req;
ENTRY;
- if (lmm && lmm_size) {
+ if (lmm_size) {
bufcount = 3;
- size[2] = size[1]; /* shuffle the spare data along */
+ size[2] = size[1]; /* shuffle the replay data along */
size[1] = lmm_size;
}
body->flags = HTON__u32(flags);
memcpy(&body->handle, fh, sizeof(body->handle));
- if (lmm && lmm_size) {
- CDEBUG(D_INODE, "sending %u bytes MD for ino "LPU64"\n",
- lmm_size, ino);
- lustre_msg_set_op_flags(req->rq_reqmsg, MDS_OPEN_HAS_EA);
- memcpy(lustre_msg_buf(req->rq_reqmsg, 1), lmm, lmm_size);
+ if (lmm_size) {
body->flags |= HTON__u32(OBD_MD_FLEASIZE);
+ if (lmm) {
+ CDEBUG(D_INODE, "sending %u bytes MD for ino "LPU64"\n",
+ lmm_size, ino);
+ lustre_msg_set_op_flags(req->rq_reqmsg,MDS_OPEN_HAS_EA);
+ memcpy(lustre_msg_buf(req->rq_reqmsg,1), lmm, lmm_size);
+ }
}
req->rq_replen = lustre_msg_size(1, size);
body = lustre_msg_buf(req->rq_repmsg, 0);
mds_unpack_body(body);
memcpy(fh, &body->handle, sizeof(*fh));
- }
- /* If open is replayed, we need to fix up the fh. */
- req->rq_replay_cb = mdc_replay_open;
- replay_data = lustre_msg_buf(req->rq_reqmsg, lmm ? 2 : 1);
- replay_data->fh = fh;
+ /* If open is replayed, we need to fix up the fh. */
+ req->rq_replay_cb = mdc_replay_open;
+ replay_data = lustre_msg_buf(req->rq_reqmsg, lmm ? 2 : 1);
+ replay_data->fh = fh;
+ }
EXIT;
out:
return lprocfs_dereg_obd(dev);
}
+/* Send a mostly-dummy GETSTATUS request and indicate that we're done replay. */
+static int signal_completed_replay(struct obd_import *imp)
+{
+ struct ll_fid fid;
+
+ return send_getstatus(imp, &fid, LUSTRE_CONN_RECOVD, MSG_LAST_REPLAY);
+}
+
static int mdc_recover(struct obd_import *imp, int phase)
{
int rc;
+ unsigned long flags;
+ struct ptlrpc_request *req;
ENTRY;
switch(phase) {
RETURN(0);
case PTLRPC_RECOVD_PHASE_RECOVER:
reconnect:
- rc = ptlrpc_reconnect_import(imp, MDS_CONNECT);
+ rc = ptlrpc_reconnect_import(imp, MDS_CONNECT, &req);
+
+ /* We were still connected, just go about our business. */
if (rc == EALREADY)
- RETURN(ptlrpc_replay(imp, 0));
- if (rc)
+ GOTO(skip_replay, rc);
+
+ if (rc) {
+ ptlrpc_req_finished(req);
RETURN(rc);
+ }
+
+ /* We can't replay, which might be a problem. */
+ if (!(lustre_msg_get_flags(req->rq_repmsg) &
+ MSG_REPLAY_IN_PROGRESS)) {
+ if (phase != PTLRPC_RECOVD_PHASE_NOTCONN) {
+ CERROR("can't replay, invalidating\n");
+ ldlm_namespace_cleanup(imp->imp_obd->obd_namespace,
+ 1);
+ ptlrpc_abort_inflight(imp);
+ }
+ goto skip_replay;
+ }
- rc = ptlrpc_replay(imp, 0 /* no last flag*/);
+ rc = ptlrpc_replay(imp);
if (rc)
RETURN(rc);
if (rc)
RETURN(rc);
- spin_lock(&imp->imp_lock);
+ rc = signal_completed_replay(imp);
+ if (rc)
+ RETURN(rc);
+
+ skip_replay:
+ ptlrpc_req_finished(req);
+ spin_lock_irqsave(&imp->imp_lock, flags);
imp->imp_level = LUSTRE_CONN_FULL;
- spin_unlock(&imp->imp_lock);
+ imp->imp_flags &= ~IMP_INVALID;
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
ptlrpc_wake_delayed(imp);
}
struct obd_ops mdc_obd_ops = {
- o_attach: mdc_attach,
- o_detach: mdc_detach,
- o_setup: client_obd_setup,
- o_cleanup: client_obd_cleanup,
- o_connect: mdc_connect,
- o_disconnect: client_obd_disconnect,
- o_statfs: mdc_statfs,
+ o_owner: THIS_MODULE,
+ o_attach: mdc_attach,
+ o_detach: mdc_detach,
+ o_setup: client_obd_setup,
+ o_cleanup: client_obd_cleanup,
+ o_connect: mdc_connect,
+ o_disconnect: client_obd_disconnect,
+ o_statfs: mdc_statfs
};
static int __init ptlrpc_request_init(void)
EXPORT_SYMBOL(mdc_enqueue);
EXPORT_SYMBOL(mdc_cancel_unused);
EXPORT_SYMBOL(mdc_getattr);
+EXPORT_SYMBOL(mdc_getattr_name);
EXPORT_SYMBOL(mdc_create);
EXPORT_SYMBOL(mdc_unlink);
EXPORT_SYMBOL(mdc_rename);
modulefs_DATA = mds.o
EXTRA_PROGRAMS = mds
-LINX= mds_updates.c simple.c ll_pack.c target.c
+LINX= mds_updates.c simple.c target.c
-ll_pack.c:
- test -e ll_pack.c || ln -sf $(top_srcdir)/lib/ll_pack.c
mds_updates.c:
test -e mds_updates.c || ln -sf $(top_srcdir)/lib/mds_updates.c
simple.c:
if (!conn || !obd || !cluuid)
RETURN(-EINVAL);
- MOD_INC_USE_COUNT;
+ /* lctl gets a backstage, all-access pass. */
+ if (!strcmp(cluuid, "OBD_CLASS_UUID"))
+ goto dont_check_exports;
spin_lock(&obd->obd_dev_lock);
list_for_each(p, &obd->obd_exports) {
continue;
}
if (!memcmp(cluuid, mcd->mcd_uuid, sizeof mcd->mcd_uuid)) {
- /* XXX make handle-found-export a subroutine */
- LASSERT(exp->exp_obd == obd);
-
spin_unlock(&obd->obd_dev_lock);
- if (exp->exp_connection) {
- struct lustre_handle *hdl;
- hdl = &exp->exp_ldlm_data.led_import.imp_handle;
- /* Might be a re-connect after a partition. */
- if (!memcmp(conn, hdl, sizeof *conn)) {
- CERROR("%s reconnecting\n", cluuid);
- conn->addr = (__u64) (unsigned long)exp;
- conn->cookie = exp->exp_cookie;
- rc = EALREADY;
- } else {
- CERROR("%s reconnecting from %s, "
- "handle mismatch (ours %Lx/%Lx, "
- "theirs %Lx/%Lx)\n", cluuid,
- exp->exp_connection->
- c_remote_uuid, hdl->addr,
- hdl->cookie, conn->addr,
- conn->cookie);
- /* XXX disconnect them here? */
- memset(conn, 0, sizeof *conn);
- rc = -EALREADY;
- }
- MOD_DEC_USE_COUNT;
- RETURN(rc);
- }
- conn->addr = (__u64) (unsigned long)exp;
- conn->cookie = exp->exp_cookie;
- CDEBUG(D_INFO, "existing export for UUID '%s' at %p\n",
- cluuid, exp);
- CDEBUG(D_IOCTL,"connect: addr %Lx cookie %Lx\n",
- (long long)conn->addr, (long long)conn->cookie);
- RETURN(0);
+ LASSERT(exp->exp_obd == obd);
+
+ RETURN(target_handle_reconnect(conn, exp, cluuid));
}
}
spin_unlock(&obd->obd_dev_lock);
if (obd->u.mds.mds_recoverable_clients != 0) {
CERROR("denying connection for new client %s: in recovery\n",
cluuid);
- MOD_DEC_USE_COUNT;
RETURN(-EBUSY);
}
+ dont_check_exports:
/* XXX There is a small race between checking the list and adding a
* new connection for the same UUID, but the real threat (list
* corruption when multiple different clients connect) is solved.
*/
rc = class_connect(conn, obd, cluuid);
if (rc)
- GOTO(out_dec, rc);
+ RETURN(rc);
exp = class_conn2export(conn);
LASSERT(exp);
med = &exp->exp_mds_data;
OBD_FREE(mcd, sizeof(*mcd));
out_export:
class_disconnect(conn);
-out_dec:
- MOD_DEC_USE_COUNT;
return rc;
}
list_for_each_safe(tmp, n, &med->med_open_head) {
struct mds_file_data *mfd =
list_entry(tmp, struct mds_file_data, mfd_list);
+ CERROR("force closing client file handle for %*s\n",
+ mfd->mfd_file->f_dentry->d_name.len,
+ mfd->mfd_file->f_dentry->d_name.name);
rc = mds_close_mfd(mfd, med);
- if (rc) {
- /* XXX better diagnostics, with file path and stuff */
- CDEBUG(D_INODE, "Error %d closing mfd %p\n", rc, mfd);
- }
+ if (rc)
+ CDEBUG(D_INODE, "Error closing file: %d\n", rc);
}
spin_unlock(&med->med_open_lock);
mds_client_free(export);
rc = class_disconnect(conn);
- if (!rc)
- MOD_DEC_USE_COUNT;
RETURN(rc);
}
if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_GETSTATUS_PACK)) {
CERROR("mds: out of memory for message: size=%d\n", size);
req->rq_status = -ENOMEM;
- RETURN(0);
+ RETURN(-ENOMEM);
}
/* Flush any outstanding transactions to disk so the client will
if (rc) {
CERROR("mds: out of memory for message: size=%d\n", size[1]);
req->rq_status = -ENOMEM;
- RETURN(0);
+ RETURN(-ENOMEM);
}
- desc = lustre_msg_buf(req->rq_repmsg, 0);
- rc = mds_get_lovdesc(mds, desc);
- if (rc) {
- req->rq_status = rc;
+ if (!mds->mds_has_lov_desc) {
+ req->rq_status = -ENOENT;
RETURN(0);
}
+ desc = lustre_msg_buf(req->rq_repmsg, 0);
+ memcpy(desc, &mds->mds_lov_desc, sizeof *desc);
+ lov_packdesc(desc);
tgt_count = le32_to_cpu(desc->ld_tgt_count);
if (tgt_count * sizeof(obd_uuid_t) > streq->repbuf) {
CERROR("too many targets, enlarge client buffers\n");
RETURN(0);
}
- /* XXX the MDS should not really know about this */
- mds->mds_max_mdsize = lov_mds_md_size(tgt_count);
rc = mds_get_lovtgts(mds, tgt_count,
lustre_msg_buf(req->rq_repmsg, 1));
if (rc) {
mds_pack_inode2fid(&body->fid1, inode);
mds_pack_inode2body(body, inode);
- if (S_ISREG(inode->i_mode) /* && reqbody->valid & OBD_MD_FLEASIZE */) {
+ if (S_ISREG(inode->i_mode) && reqbody->valid & OBD_MD_FLEASIZE) {
rc = mds_pack_md(mds, req, reply_off + 1, body, inode);
} else if (S_ISLNK(inode->i_mode) && reqbody->valid & OBD_MD_LINKNAME) {
char *symname = lustre_msg_buf(req->rq_repmsg, reply_off + 1);
RETURN(rc);
}
+static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
+ int offset)
+{
+ struct mds_obd *mds = mds_req2mds(req);
+ struct mds_body *body;
+ int rc = 0, size[2] = {sizeof(*body)}, bufcount = 1;
+ ENTRY;
+
+ body = lustre_msg_buf(req->rq_reqmsg, offset);
+
+ if (S_ISREG(inode->i_mode) && body->valid & OBD_MD_FLEASIZE) {
+ int rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0);
+ CDEBUG(D_INODE, "got %d bytes MD data for inode %lu\n",
+ rc, inode->i_ino);
+ if (rc < 0) {
+ if (rc != -ENODATA)
+ CERROR("error getting inode %lu MD: rc = %d\n",
+ inode->i_ino, rc);
+ size[bufcount] = 0;
+ } else if (rc > mds->mds_max_mdsize) {
+ size[bufcount] = 0;
+ CERROR("MD size %d larger than maximum possible %u\n",
+ rc, mds->mds_max_mdsize);
+ } else
+ size[bufcount] = rc;
+ bufcount++;
+ } else if (body->valid & OBD_MD_LINKNAME) {
+ size[bufcount] = MIN(inode->i_size + 1, body->size);
+ bufcount++;
+ CDEBUG(D_INODE, "symlink size: %Lu, reply space: "LPU64"\n",
+ inode->i_size + 1, body->size);
+ }
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETATTR_PACK)) {
+ CERROR("failed MDS_GETATTR_PACK test\n");
+ req->rq_status = -ENOMEM;
+ GOTO(out, rc = -ENOMEM);
+ }
+
+ rc = lustre_pack_msg(bufcount, size, NULL, &req->rq_replen,
+ &req->rq_repmsg);
+ if (rc) {
+ CERROR("out of memoryK\n");
+ req->rq_status = rc;
+ GOTO(out, rc);
+ }
+
+ EXIT;
+ out:
+ return(rc);
+}
+
static int mds_getattr_name(int offset, struct ptlrpc_request *req)
{
struct mds_obd *mds = mds_req2mds(req);
push_ctxt(&saved, &mds->mds_ctxt, &uc);
de = mds_fid2dentry(mds, &body->fid1, NULL);
if (IS_ERR(de)) {
- GOTO(out_pre_de, rc = -ENOENT);
+ GOTO(out_pre_de, rc = PTR_ERR(de));
}
dir = de->d_inode;
GOTO(out_create_de, rc = -EIO);
}
}
- ldlm_lock_dump_handle(&lockh);
+ ldlm_lock_dump_handle(D_OTHER, &lockh);
down(&dir->i_sem);
dchild = lookup_one_len(name, de, namelen - 1);
+ up(&dir->i_sem);
if (IS_ERR(dchild)) {
CDEBUG(D_INODE, "child lookup error %ld\n", PTR_ERR(dchild));
- up(&dir->i_sem);
GOTO(out_create_dchild, rc = PTR_ERR(dchild));
+ } else if (dchild->d_inode == NULL) {
+ GOTO(out_create_dchild, rc = -ENOENT);
}
+ if (req->rq_repmsg == NULL)
+ mds_getattr_pack_msg(req, dchild->d_inode, offset);
+
rc = mds_getattr_internal(mds, dchild, req, body, offset);
EXIT;
out_create_dchild:
l_dput(dchild);
- up(&dir->i_sem);
ldlm_lock_decref(&lockh, lock_mode);
out_create_de:
l_dput(de);
out_pre_de:
req->rq_status = rc;
pop_ctxt(&saved, &mds->mds_ctxt, &uc);
- return 0;
+ return rc;
}
static int mds_getattr(int offset, struct ptlrpc_request *req)
struct mds_obd *mds = mds_req2mds(req);
struct obd_run_ctxt saved;
struct dentry *de;
- struct inode *inode;
struct mds_body *body;
struct obd_ucred uc;
- int rc = 0, size[2] = {sizeof(*body)}, bufcount = 1;
+ int rc = 0;
ENTRY;
body = lustre_msg_buf(req->rq_reqmsg, offset);
GOTO(out_pop, PTR_ERR(de));
}
- inode = de->d_inode;
- if (S_ISREG(body->fid1.f_type)) {
- int rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0);
- CDEBUG(D_INODE, "got %d bytes MD data for inode %lu\n",
- rc, inode->i_ino);
- if (rc < 0) {
- if (rc != -ENODATA)
- CERROR("error getting inode %lu MD: rc = %d\n",
- inode->i_ino, rc);
- size[bufcount] = 0;
- } else if (rc > mds->mds_max_mdsize) {
- size[bufcount] = 0;
- CERROR("MD size %d larger than maximum possible %u\n",
- rc, mds->mds_max_mdsize);
- } else
- size[bufcount] = rc;
- bufcount++;
- } else if (body->valid & OBD_MD_LINKNAME) {
- size[bufcount] = MIN(inode->i_size + 1, body->size);
- bufcount++;
- CDEBUG(D_INODE, "symlink size: %d, reply space: %d\n",
- inode->i_size + 1, body->size);
- }
-
- if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETATTR_PACK)) {
- CERROR("failed MDS_GETATTR_PACK test\n");
- req->rq_status = -ENOMEM;
- GOTO(out, rc = -ENOMEM);
- }
-
- rc = lustre_pack_msg(bufcount, size, NULL, &req->rq_replen,
- &req->rq_repmsg);
- if (rc) {
- CERROR("out of memoryK\n");
- req->rq_status = rc;
- GOTO(out, rc);
- }
+ rc = mds_getattr_pack_msg(req, de->d_inode, offset);
req->rq_status = mds_getattr_internal(mds, de, req, body, 0);
- EXIT;
-out:
l_dput(de);
+ EXIT;
out_pop:
pop_ctxt(&saved, &mds->mds_ctxt, &uc);
return rc;
uc.ouc_cap = body->capability;
push_ctxt(&saved, &mds->mds_ctxt, &uc);
mds_start_transno(mds);
- handle = fsfilt_start(obd, inode,FSFILT_OP_SETATTR);
+ handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR);
if (IS_ERR(handle)) {
rc = PTR_ERR(handle);
mds_finish_transno(mds, handle, req, rc);
mfd = mds_handle2mfd(&body->handle);
if (!mfd) {
- CERROR("no handle for file close "LPD64
- ": addr "LPX64", cookie "LPX64"\n",
- body->fid1.id, body->handle.addr, body->handle.cookie);
+ DEBUG_REQ(D_ERROR, req, "no handle for file close "LPD64
+ ": addr "LPX64", cookie "LPX64"\n",
+ body->fid1.id, body->handle.addr,
+ body->handle.cookie);
RETURN(-ESTALE);
}
/* to make this asynchronous make sure that the handling function
doesn't send a reply when this function completes. Instead a
callback function would send the reply */
- /* note: in case of an error, dentry_open puts dentry */
rc = mds_sendpage(req, file, body->size);
filp_close(file, 0);
struct ptlrpc_request *req;
req = list_entry(mds->mds_recovery_queue.next,
struct ptlrpc_request, rq_list);
+ LASSERT(req->rq_reqmsg->transno >= mds->mds_next_recovery_transno);
return req->rq_reqmsg->transno == mds->mds_next_recovery_transno;
}
static void process_recovery_queue(struct mds_obd *mds)
{
struct ptlrpc_request *req;
+ ENTRY;
for (;;) {
spin_lock(&mds->mds_processing_task_lock);
+ LASSERT(mds->mds_processing_task == current->pid);
req = list_entry(mds->mds_recovery_queue.next,
struct ptlrpc_request, rq_list);
if (req->rq_reqmsg->transno != mds->mds_next_recovery_transno) {
spin_unlock(&mds->mds_processing_task_lock);
+ CDEBUG(D_HA, "Waiting for transno "LPD64" (1st is "
+ LPD64")\n",
+ mds->mds_next_recovery_transno,
+ req->rq_reqmsg->transno);
wait_event(mds->mds_next_transno_waitq,
check_for_next_transno(mds));
continue;
}
- list_del(&req->rq_list);
+ list_del_init(&req->rq_list);
spin_unlock(&mds->mds_processing_task_lock);
- DEBUG_REQ(D_HA, req, "");
- mds_handle(req);
-
- if (list_empty(&mds->mds_recovery_queue))
+ DEBUG_REQ(D_ERROR, req, "processing: ");
+ (void)mds_handle(req);
+ mds_fsync_super(mds->mds_sb);
+ OBD_FREE(req, sizeof *req);
+ spin_lock(&mds->mds_processing_task_lock);
+ mds->mds_next_recovery_transno++;
+ if (list_empty(&mds->mds_recovery_queue)) {
+ mds->mds_processing_task = 0;
+ spin_unlock(&mds->mds_processing_task_lock);
break;
+ }
+ spin_unlock(&mds->mds_processing_task_lock);
}
+ EXIT;
}
static int queue_recovery_request(struct ptlrpc_request *req,
struct mds_obd *mds)
{
struct list_head *tmp;
- int inserted = 0, transno = req->rq_reqmsg->transno;
+ int inserted = 0;
+ __u64 transno = req->rq_reqmsg->transno;
+ struct ptlrpc_request *saved_req;
if (!transno) {
- DEBUG_REQ(D_HA, req, "not queueing");
+ INIT_LIST_HEAD(&req->rq_list);
+ DEBUG_REQ(D_ERROR, req, "not queueing");
return 1;
}
if (mds->mds_processing_task == current->pid) {
/* Processing the queue right now, don't re-add. */
+ LASSERT(list_empty(&req->rq_list));
spin_unlock(&mds->mds_processing_task_lock);
return 1;
}
+ OBD_ALLOC(saved_req, sizeof *saved_req);
+ if (!saved_req)
+ LBUG();
+ memcpy(saved_req, req, sizeof *req);
+ req = saved_req;
+ INIT_LIST_HEAD(&req->rq_list);
+
/* XXX O(n^2) */
list_for_each(tmp, &mds->mds_recovery_queue) {
struct ptlrpc_request *reqiter =
list_entry(tmp, struct ptlrpc_request, rq_list);
+
if (reqiter->rq_reqmsg->transno > transno) {
list_add_tail(&req->rq_list, &reqiter->rq_list);
inserted = 1;
}
}
- if (!inserted)
+ if (!inserted) {
list_add_tail(&req->rq_list, &mds->mds_recovery_queue);
+ }
if (mds->mds_processing_task != 0) {
/* Someone else is processing this queue, we'll leave it to
* them.
*/
- spin_unlock(&mds->mds_processing_task_lock);
if (transno == mds->mds_next_recovery_transno)
wake_up(&mds->mds_next_transno_waitq);
+ spin_unlock(&mds->mds_processing_task_lock);
return 0;
}
switch (req->rq_reqmsg->opc) {
case MDS_CONNECT:
case MDS_DISCONNECT:
- case MDS_OPEN:
*process = 1;
RETURN(0);
+ case MDS_OPEN:
case MDS_GETSTATUS: /* used in unmounting */
case MDS_REINT:
case LDLM_ENQUEUE:
default:
DEBUG_REQ(D_ERROR, req, "not permitted during recovery");
*process = 0;
+ /* XXX what should we set rq_status to here? */
RETURN(ptlrpc_error(req->rq_svc, req));
}
}
static int mds_queue_final_reply(struct ptlrpc_request *req, int rc)
{
struct mds_obd *mds = mds_req2mds(req);
+ struct ptlrpc_request *saved_req;
+ spin_lock(&mds->mds_processing_task_lock);
if (rc) {
/* Just like ptlrpc_error, but without the sending. */
lustre_pack_msg(0, NULL, NULL, &req->rq_replen,
req->rq_type = PTL_RPC_MSG_ERR;
}
+ LASSERT(list_empty(&req->rq_list));
+ OBD_ALLOC(saved_req, sizeof *saved_req);
+ memcpy(saved_req, req, sizeof *saved_req);
+ req = saved_req;
list_add(&req->rq_list, &mds->mds_delayed_reply_queue);
if (--mds->mds_recoverable_clients == 0) {
struct list_head *tmp, *n;
-
- CDEBUG(D_HA,
+ ldlm_reprocess_all_ns(req->rq_export->exp_obd->obd_namespace);
+ CDEBUG(D_ERROR,
"all clients recovered, sending delayed replies\n");
list_for_each_safe(tmp, n, &mds->mds_delayed_reply_queue) {
req = list_entry(tmp, struct ptlrpc_request, rq_list);
- DEBUG_REQ(D_HA, req, "delayed:");
+ DEBUG_REQ(D_ERROR, req, "delayed:");
ptlrpc_reply(req->rq_svc, req);
+ list_del(&req->rq_list);
+ OBD_FREE(req, sizeof *req);
}
} else {
- CDEBUG(D_HA, "%d recoverable clients remain\n",
+ CERROR("%d recoverable clients remain\n",
mds->mds_recoverable_clients);
}
+ spin_unlock(&mds->mds_processing_task_lock);
return 1;
}
rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_HANDLE_UNPACK)) {
- CERROR("lustre_mds: Invalid request\n");
+ DEBUG_REQ(D_ERROR, req, "invalid request (%d)", rc);
GOTO(out, rc);
}
mds = mds_req2mds(req);
mds_fsync_super(mds->mds_sb);
}
+
+ /* Let the client know if it can replay. */
+ if (mds->mds_recoverable_clients) {
+ lustre_msg_add_flags(req->rq_repmsg,
+ MSG_REPLAY_IN_PROGRESS);
+ }
break;
case MDS_DISCONNECT:
/* Make sure that last_rcvd is correct. */
if (!rc)
mds_fsync_super(mds->mds_sb);
- goto out;
+ req->rq_status = rc;
+ break;
case MDS_GETSTATUS:
DEBUG_REQ(D_INODE, req, "getstatus");
rc = mds_getattr(0, req);
break;
+ case MDS_GETATTR_NAME:
+ DEBUG_REQ(D_INODE, req, "getattr_name");
+ OBD_FAIL_RETURN(OBD_FAIL_MDS_GETATTR_NAME_NET, 0);
+ rc = mds_getattr_name(0, req);
+ break;
+
case MDS_STATFS:
DEBUG_REQ(D_INODE, req, "statfs");
OBD_FAIL_RETURN(OBD_FAIL_MDS_STATFS_NET, 0);
EXIT;
- if (!rc) {
+ /* If we're DISCONNECTing, the mds_export_data is already freed */
+ if (!rc && req->rq_reqmsg->opc != MDS_DISCONNECT) {
struct mds_export_data *med = &req->rq_export->exp_mds_data;
req->rq_repmsg->last_xid =
HTON__u64(le64_to_cpu(med->med_mcd->mcd_last_xid));
req->rq_repmsg->last_committed =
HTON__u64(mds->mds_last_committed);
- CDEBUG(D_INFO, "last_rcvd ~%Lu, last_committed %Lu, xid %d\n",
+ CDEBUG(D_INFO, "last_transno %Lu, last_committed %Lu, xid %d\n",
(unsigned long long)mds->mds_last_rcvd,
(unsigned long long)mds->mds_last_committed,
cpu_to_le32(req->rq_xid));
return mds_queue_final_reply(req, rc);
}
+ /* XXX bug 578 */
/* MDS_CONNECT / EALREADY (note: not -EALREADY!) isn't an error */
if (rc && (req->rq_reqmsg->opc != MDS_CONNECT ||
rc != EALREADY)) {
- CERROR("mds: processing error (opcode %d): %d\n",
- req->rq_reqmsg->opc, rc);
+ DEBUG_REQ(D_ERROR, req, "processing error (%d)", rc);
ptlrpc_error(req->rq_svc, req);
} else {
- CDEBUG(D_NET, "sending reply\n");
+ DEBUG_REQ(D_NET, req, "sending reply");
ptlrpc_reply(req->rq_svc, req);
}
return 0;
* then the server last_rcvd value may be less than that of the clients.
* This will alert us that we may need to do client recovery.
*
- * Assumes we are already in the server filesystem context.
- *
* Also assumes for mds_last_rcvd that we are not modifying it (no locking).
*/
int mds_update_server_data(struct mds_obd *mds)
{
struct mds_server_data *msd = mds->mds_server_data;
struct file *filp = mds->mds_rcvd_filp;
+ struct obd_run_ctxt saved;
loff_t off = 0;
int rc;
+ push_ctxt(&saved, &mds->mds_ctxt, NULL);
msd->msd_last_rcvd = cpu_to_le64(mds->mds_last_rcvd);
msd->msd_mount_count = cpu_to_le64(mds->mds_mount_count);
if (rc != sizeof(*msd)) {
CERROR("error writing MDS server data: rc = %d\n", rc);
if (rc > 0)
- RETURN(-EIO);
- RETURN(rc);
+ rc = -EIO;
+ GOTO(out, rc);
}
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
rc = fsync_dev(filp->f_dentry->d_inode->i_rdev);
if (rc)
CERROR("error flushing MDS server data: rc = %d\n", rc);
- return 0;
-}
-
-/* Do recovery actions for the MDS */
-static int mds_recovery_complete(struct obd_device *obddev)
-{
- struct mds_obd *mds = &obddev->u.mds;
- struct obd_run_ctxt saved;
- int rc;
-
- LASSERT(mds->mds_recoverable_clients == 0);
-
- /* This happens at the end when recovery is complete */
- ++mds->mds_mount_count;
- push_ctxt(&saved, &mds->mds_ctxt, NULL);
- rc = mds_update_server_data(mds);
+out:
pop_ctxt(&saved, &mds->mds_ctxt, NULL);
-
- return rc;
+ RETURN(rc);
}
/* mount the file system (secretly) */
int rc = 0;
ENTRY;
- MOD_INC_USE_COUNT;
#ifdef CONFIG_DEV_RDONLY
dev_clear_rdonly(2);
#endif
if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2)
- GOTO(err_dec, rc = -EINVAL);
+ RETURN(rc = -EINVAL);
obddev->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2);
if (IS_ERR(obddev->obd_fsops))
- GOTO(err_dec, rc = PTR_ERR(obddev->obd_fsops));
+ RETURN(rc = PTR_ERR(obddev->obd_fsops));
mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, NULL);
if (IS_ERR(mnt)) {
spin_lock_init(&mds->mds_processing_task_lock);
mds->mds_processing_task = 0;
+ mds->mds_has_lov_desc = 0;
INIT_LIST_HEAD(&mds->mds_recovery_queue);
INIT_LIST_HEAD(&mds->mds_delayed_reply_queue);
+ init_waitqueue_head(&mds->mds_next_transno_waitq);
RETURN(0);
lock_kernel();
err_ops:
fsfilt_put_ops(obddev->obd_fsops);
-err_dec:
- MOD_DEC_USE_COUNT;
RETURN(rc);
}
{
struct super_block *sb;
struct mds_obd *mds = &obddev->u.mds;
- struct obd_run_ctxt saved;
ENTRY;
sb = mds->mds_sb;
if (!mds->mds_sb)
RETURN(0);
- push_ctxt(&saved, &mds->mds_ctxt, NULL);
mds_update_server_data(mds);
-
- if (mds->mds_rcvd_filp) {
- int rc = filp_close(mds->mds_rcvd_filp, 0);
- mds->mds_rcvd_filp = NULL;
-
- if (rc)
- CERROR("last_rcvd file won't close, rc=%d\n", rc);
- }
- pop_ctxt(&saved, &mds->mds_ctxt, NULL);
+ mds_fs_cleanup(obddev);
unlock_kernel();
mntput(mds->mds_vfsmnt);
#ifdef CONFIG_DEV_RDONLY
dev_clear_rdonly(2);
#endif
- mds_fs_cleanup(obddev);
fsfilt_put_ops(obddev->obd_fsops);
- MOD_DEC_USE_COUNT;
RETURN(0);
}
-static int ldlm_intent_policy(struct ldlm_lock *lock, void *req_cookie,
- ldlm_mode_t mode, int flags, void *data)
+static int ldlm_intent_policy(struct ldlm_namespace *ns, struct ldlm_lock *lock,
+ void *req_cookie, ldlm_mode_t mode, int flags,
+ void *data)
{
struct ptlrpc_request *req = req_cookie;
int rc = 0;
if (req->rq_reqmsg->bufcount > 1) {
/* an intent needs to be considered */
struct ldlm_intent *it = lustre_msg_buf(req->rq_reqmsg, 1);
- struct mds_obd *mds= &req->rq_export->exp_obd->u.mds;
+ struct mds_obd *mds = &req->rq_export->exp_obd->u.mds;
struct mds_body *mds_rep;
struct ldlm_reply *rep;
__u64 new_resid[3] = {0, 0, 0}, old_res;
rep->lock_policy_res2 = req->rq_status;
mds_rep = lustre_msg_buf(req->rq_repmsg, 1);
- /* If the client is about to open a file that doesn't have an MD
- * stripe record, it's going to need a write lock. */
+ /* If the client is about to open a file that doesn't have an
+ * MD stripe record, it's going to need a write lock.
+ */
if (it->opc & IT_OPEN && !(mds_rep->valid & OBD_MD_FLEASIZE)) {
LDLM_DEBUG(lock, "open with no EA; returning PW lock");
lock->l_req_mode = LCK_PW;
LBUG();
old_res = lock->l_resource->lr_name[0];
- ldlm_lock_change_resource(lock, new_resid);
+ ldlm_lock_change_resource(ns, lock, new_resid);
if (lock->l_resource == NULL) {
LBUG();
RETURN(-ENOMEM);
int rc = 0;
ENTRY;
- MOD_INC_USE_COUNT;
-
mds->mds_service = ptlrpc_init_svc(MDS_NEVENTS, MDS_NBUFS,
MDS_BUFSIZE, MDS_MAXREQSIZE,
MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL,
"self", mds_handle, "mds");
if (!mds->mds_service) {
CERROR("failed to start service\n");
- GOTO(err_dec, rc = -ENOMEM);
+ RETURN(rc = -ENOMEM);
}
for (i = 0; i < MDT_NUM_THREADS; i++) {
err_thread:
ptlrpc_stop_all_threads(mds->mds_service);
ptlrpc_unregister_service(mds->mds_service);
-err_dec:
- MOD_DEC_USE_COUNT;
RETURN(rc);
}
ptlrpc_stop_all_threads(mds->mds_service);
ptlrpc_unregister_service(mds->mds_service);
- MOD_DEC_USE_COUNT;
RETURN(0);
}
/* use obd ops to offer management infrastructure */
static struct obd_ops mds_obd_ops = {
+ o_owner: THIS_MODULE,
o_attach: mds_attach,
o_detach: mds_detach,
o_connect: mds_connect,
};
static struct obd_ops mdt_obd_ops = {
+ o_owner: THIS_MODULE,
o_setup: mdt_setup,
o_cleanup: mdt_cleanup,
};
#include <linux/lustre_fsfilt.h>
#include <linux/lprocfs_status.h>
-int rd_uuid(char* page, char **start, off_t off, int count, int *eof,
+int rd_uuid(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
struct obd_device* temp = (struct obd_device*)data;
- int len = 0;
- len += snprintf(page, count, "%s\n", temp->obd_uuid);
- return len;
+ return snprintf(page, count, "%s\n", temp->obd_uuid);
}
-int rd_blksize(char* page, char **start, off_t off, int count, int *eof,
+
+int rd_blksize(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
struct obd_device* temp = (struct obd_device*)data;
struct mds_obd *mds = &temp->u.mds;
struct statfs mystats;
- int rc, len = 0;
-
+ int rc;
+
rc = vfs_statfs(mds->mds_sb, &mystats);
if (rc) {
CERROR("mds: statfs failed: rc %d\n", rc);
return 0;
}
- len += snprintf(page, count, LPU64"\n", (__u64)(mystats.f_bsize));
- return len;
-
+ return snprintf(page, count, LPU64"\n", (__u64)(mystats.f_bsize));
}
-int rd_kbtotal(char* page, char **start, off_t off, int count, int *eof,
+
+int rd_kbtotal(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
struct obd_device* temp = (struct obd_device*)data;
struct mds_obd *mds = &temp->u.mds;
struct statfs mystats;
- int rc, len = 0;
+ int rc;
__u32 blk_size;
__u64 result;
-
+
rc = vfs_statfs(mds->mds_sb, &mystats);
if (rc) {
CERROR("mds: statfs failed: rc %d\n", rc);
return 0;
}
-
+
blk_size = mystats.f_bsize;
blk_size >>= 10;
result = mystats.f_blocks;
- while(blk_size >>= 1){
+ while(blk_size >>= 1)
result <<= 1;
- }
- len += snprintf(page, count, LPU64"\n", result);
- return len;
-
+
+ return snprintf(page, count, LPU64"\n", result);
}
-int rd_kbfree(char* page, char **start, off_t off, int count, int *eof,
+int rd_kbfree(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
struct obd_device* temp = (struct obd_device*)data;
struct mds_obd *mds = &temp->u.mds;
struct statfs mystats;
- int rc, len = 0;
+ int rc;
__u32 blk_size;
__u64 result;
-
rc = vfs_statfs(mds->mds_sb, &mystats);
if (rc) {
blk_size = mystats.f_bsize;
blk_size >>= 10;
result = mystats.f_blocks;
- while(blk_size >>= 1){
+ while (blk_size >>= 1)
result <<= 1;
- }
- len += snprintf(page, count, LPU64"\n", result);
- return len;
-
+
+ return snprintf(page, count, LPU64"\n", result);
}
int rd_fstype(char *page, char **start, off_t off, int count, int *eof,
return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type);
}
-int rd_filestotal(char* page, char **start, off_t off, int count, int *eof,
+int rd_filestotal(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
struct obd_device* temp = (struct obd_device*)data;
struct mds_obd *mds = &temp->u.mds;
struct statfs mystats;
- int rc, len = 0;
-
+ int rc;
+
rc = vfs_statfs(mds->mds_sb, &mystats);
if (rc) {
CERROR("mds: statfs failed: rc %d\n", rc);
return 0;
}
-
- len += snprintf(page, count, LPU64"\n", (__u64)(mystats.f_files));
- return len;
-
-
+ return snprintf(page, count, LPU64"\n", (__u64)(mystats.f_files));
}
-int rd_filesfree(char* page, char **start, off_t off, int count, int *eof,
- void *data)
+int rd_filesfree(char *page, char **start, off_t off, int count, int *eof,
+ void *data)
{
struct obd_device* temp = (struct obd_device*)data;
struct mds_obd *mds = &temp->u.mds;
struct statfs mystats;
int rc, len = 0;
-
+
rc = vfs_statfs(mds->mds_sb, &mystats);
if (rc) {
CERROR("mds: statfs failed: rc %d\n", rc);
return 0;
}
-
+
len += snprintf(page, count, LPU64"\n", (__u64)(mystats.f_ffree));
- return len;
+ return len;
}
-int rd_filegroups(char* page, char **start, off_t off, int count, int *eof,
+int rd_filegroups(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
return 0;
{"status/filegroups", rd_filegroups, 0, 0},
{0}
};
-int rd_numrefs(char* page, char **start, off_t off, int count, int *eof,
+
+int rd_numrefs(char *page, char **start, off_t off, int count, int *eof,
void *data)
{
- struct obd_type* class = (struct obd_type*)data;
- int len = 0;
- len += snprintf(page, count, "%d\n", class->typ_refcnt);
- return len;
+ struct obd_type *class = (struct obd_type*)data;
+
+ return snprintf(page, count, "%d\n", class->typ_refcnt);
}
struct lprocfs_vars status_class_var[]={
RETURN(written);
RETURN(-EIO);
}
+ CDEBUG(D_INFO, "wrote client mcd at off %u (len %u)\n",
+ MDS_LR_CLIENT + (cl_off * MDS_LR_SIZE),
+ (unsigned int)sizeof(*med->med_mcd));
}
return 0;
}
if (!med->med_mcd)
RETURN(0);
- CDEBUG(D_INFO, "freeing client at offset %d with UUID '%s'\n",
- med->med_off, med->med_mcd->mcd_uuid);
+ off = MDS_LR_CLIENT + (med->med_off * MDS_LR_SIZE);
+
+ CDEBUG(D_INFO, "freeing client at offset %u (%lld)with UUID '%s'\n",
+ med->med_off, off, med->med_mcd->mcd_uuid);
if (!test_and_clear_bit(med->med_off, last_rcvd_slots)) {
- CERROR("MDS client %d: bit already clear in bitmap!!\n",
+ CERROR("MDS client %u: bit already clear in bitmap!!\n",
med->med_off);
LBUG();
}
- off = med->med_off;
-
memset(&zero_mcd, 0, sizeof zero_mcd);
push_ctxt(&saved, &mds->mds_ctxt, NULL);
written = lustre_fwrite(mds->mds_rcvd_filp, (const char *)&zero_mcd,
- sizeof zero_mcd, &off);
+ sizeof(zero_mcd), &off);
pop_ctxt(&saved, &mds->mds_ctxt, NULL);
- if (written != sizeof zero_mcd) {
+ if (written != sizeof(zero_mcd)) {
CERROR("error zeroing out client %s off %d in %s: %d\n",
med->med_mcd->mcd_uuid, med->med_off, LAST_RCVD,
written);
- LBUG();
} else {
CDEBUG(D_INFO, "zeroed out disconnecting client %s at off %d\n",
med->med_mcd->mcd_uuid, med->med_off);
struct mds_client_data *mcd = NULL;
loff_t off = 0;
int cl_off;
- int max_off = f->f_dentry->d_inode->i_size / sizeof(*mcd);
+ unsigned long last_rcvd_size = f->f_dentry->d_inode->i_size;
__u64 last_rcvd = 0;
__u64 last_mount;
int rc = 0;
if (rc != sizeof(*msd)) {
CERROR("error reading MDS %s: rc = %d\n", LAST_RCVD, rc);
- if (rc > 0) {
+ if (rc > 0)
rc = -EIO;
- }
GOTO(err_msd, rc);
}
+ CDEBUG(D_INODE, "last_rcvd has size %lu (msd + %lu clients)\n",
+ last_rcvd_size, (last_rcvd_size - sizeof *msd) / sizeof *mcd);
+
/*
* When we do a clean MDS shutdown, we save the last_rcvd into
* the header. If we find clients with higher last_rcvd values
*/
last_rcvd = le64_to_cpu(msd->msd_last_rcvd);
mds->mds_last_rcvd = last_rcvd;
- CDEBUG(D_INODE, "got %Lu for server last_rcvd value\n",
- (unsigned long long)last_rcvd);
+ CDEBUG(D_INODE, "got "LPU64" for server last_rcvd value\n", last_rcvd);
last_mount = le64_to_cpu(msd->msd_mount_count);
mds->mds_mount_count = last_mount;
- CDEBUG(D_INODE, "got %Lu for server last_mount value\n",
- (unsigned long long)last_mount);
+ CDEBUG(D_INODE, "got "LPU64" for server last_mount value\n",last_mount);
- for (off = MDS_LR_CLIENT, cl_off = 0;
- off < max_off;
- off += MDS_LR_SIZE, cl_off++) {
+ /* off is adjusted by lustre_fread, so we don't adjust it in the loop */
+ for (off = MDS_LR_CLIENT, cl_off = 0; off < last_rcvd_size; cl_off++) {
int mount_age;
if (!mcd) {
if (rc != sizeof(*mcd)) {
CERROR("error reading MDS %s offset %d: rc = %d\n",
LAST_RCVD, cl_off, rc);
- if (rc > 0)
+ if (rc > 0) /* XXX fatal error or just abort reading? */
rc = -EIO;
break;
}
last_rcvd = le64_to_cpu(mcd->mcd_last_rcvd);
- /* The exports are cleaned up by mds_disconnect, so they
- * need to be set up like real exports also.
+ /* These exports are cleaned up by mds_disconnect(), so they
+ * need to be set up like real exports as mds_connect() does.
*/
mount_age = last_mount - le64_to_cpu(mcd->mcd_mount_count);
- if (last_rcvd && mount_age < MDS_MOUNT_RECOV) {
+ if (mount_age < MDS_MOUNT_RECOV) {
struct obd_export *exp = class_new_export(obddev);
struct mds_export_data *med;
med = &exp->exp_mds_data;
med->med_mcd = mcd;
mds_client_add(mds, med, cl_off);
- /* XXX put this in a helper if it gets more complex */
+ /* create helper if export init gets more complex */
INIT_LIST_HEAD(&med->med_open_head);
spin_lock_init(&med->med_open_lock);
mcd = NULL;
mds->mds_recoverable_clients++;
- MOD_INC_USE_COUNT;
} else {
CDEBUG(D_INFO,
"discarded client %d, UUID '%s', count %Ld\n",
(long long)le64_to_cpu(mcd->mcd_mount_count));
}
- if (last_rcvd > mds->mds_last_rcvd) {
- CDEBUG(D_OTHER,
- "client at offset %d has last_rcvd = %Lu\n",
- cl_off, (unsigned long long)last_rcvd);
+ CDEBUG(D_OTHER, "client at offset %d has last_rcvd = %Lu\n",
+ cl_off, (unsigned long long)last_rcvd);
+
+ if (last_rcvd > mds->mds_last_rcvd)
mds->mds_last_rcvd = last_rcvd;
- }
}
mds->mds_last_committed = mds->mds_last_rcvd;
if (mds->mds_recoverable_clients) {
- CERROR("need recovery: %d recoverable clients, last_rcvd %Lu\n",
+ CERROR("RECOVERY: %d recoverable clients, last_rcvd "LPU64"\n",
mds->mds_recoverable_clients, mds->mds_last_rcvd);
+ mds->mds_next_recovery_transno = mds->mds_last_committed + 1;
}
if (mcd)
if (!S_ISREG(f->f_dentry->d_inode->i_mode)) {
CERROR("%s is not a regular file!: mode = %o\n", LAST_RCVD,
f->f_dentry->d_inode->i_mode);
- GOTO(err_pop, rc = -ENOENT);
+ GOTO(err_filp, rc = -ENOENT);
}
rc = fsfilt_journal_data(obddev, f);
RETURN(mds_fs_prep(obddev));
}
-void mds_fs_cleanup(struct obd_device *obddev)
+int mds_fs_cleanup(struct obd_device *obddev)
{
struct mds_obd *mds = &obddev->u.mds;
+ struct obd_run_ctxt saved;
+ int rc = 0;
class_disconnect_all(obddev); /* this cleans up client info too */
mds_server_free_data(mds);
+
+ push_ctxt(&saved, &mds->mds_ctxt, NULL);
+ if (mds->mds_rcvd_filp) {
+ rc = filp_close(mds->mds_rcvd_filp, 0);
+ mds->mds_rcvd_filp = NULL;
+
+ if (rc)
+ CERROR("last_rcvd file won't close, rc=%d\n", rc);
+ }
+ pop_ctxt(&saved, &mds->mds_ctxt, NULL);
+
+ return rc;
}
ENTRY;
tgt_count = desc->ld_tgt_count;
+ if (desc->ld_default_stripe_count > desc->ld_tgt_count) {
+ CERROR("default stripe count %u > OST count %u\n",
+ desc->ld_default_stripe_count, desc->ld_tgt_count);
+ RETURN(-EINVAL);
+ }
+ if (desc->ld_default_stripe_size & (PAGE_SIZE - 1)) {
+ CERROR("default stripe size "LPU64" not a multiple of %lu\n",
+ desc->ld_default_stripe_size, PAGE_SIZE);
+ RETURN(-EINVAL);
+ }
+ if (desc->ld_default_stripe_offset > desc->ld_tgt_count) {
+ CERROR("default stripe offset "LPU64" > max OST index %u\n",
+ desc->ld_default_stripe_offset, desc->ld_tgt_count);
+ RETURN(-EINVAL);
+ }
+ if (desc->ld_pattern != 0) {
+ CERROR("stripe pattern %u unknown\n",
+ desc->ld_pattern);
+ RETURN(-EINVAL);
+ }
+
+ memcpy(&mds->mds_lov_desc, desc, sizeof *desc);
+ mds->mds_has_lov_desc = 1;
+ /* XXX the MDS should not really know about this */
+ mds->mds_max_mdsize = lov_mds_md_size(desc->ld_tgt_count);
+
lov_packdesc(desc);
push_ctxt(&saved, &mds->mds_ctxt, NULL);
GOTO(out, rc = PTR_ERR(f));
}
+#warning FIXME: if there is an existing LOVDESC, verify new tgt_count > old
rc = lustre_fwrite(f, (char *)desc, sizeof(*desc), &f->f_pos);
if (filp_close(f, 0))
CERROR("Error closing LOVDESC file\n");
GOTO(out, rc = PTR_ERR(f));
}
+#warning FIXME: if there is an existing LOVTGTS, verify existing UUIDs same
rc = 0;
for (i = 0; i < tgt_count ; i++) {
rc = lustre_fwrite(f, uuidarray[i],
written = lustre_fwrite(mds->mds_rcvd_filp, (char *)mcd, sizeof(*mcd),
&off);
CDEBUG(D_INODE, "wrote trans #"LPD64" for client %s at #%d: written = "
- "%d\n", last_rcvd, mcd->mcd_uuid, med->med_off, written);
+ LPSZ"\n", last_rcvd, mcd->mcd_uuid, med->med_off, written);
if (written == sizeof(*mcd))
GOTO(out, rc = 0);
CDEBUG(D_INODE, "parent ino %lu name %s mode %o\n",
dir->i_ino, rec->ur_name, rec->ur_mode);
- ldlm_lock_dump_handle(&lockh);
+ ldlm_lock_dump_handle(D_OTHER, &lockh);
down(&dir->i_sem);
dchild = lookup_one_len(rec->ur_name, de, rec->ur_namelen - 1);
rec->ur_mode |= S_ISGID;
}
+ if (rec->ur_fid2->id)
+ dchild->d_fsdata = (void *)(unsigned long)rec->ur_fid2->id;
+ else
+ LASSERT(!(rec->ur_opcode & REINT_REPLAYING));
+
/* From here on, we must exit via a path that calls mds_finish_transno,
* so that we release the mds_transno_sem (and, in the case of success,
* update the transno correctly). out_create_commit and
GOTO(out_transno_dchild, rc = -EINVAL);
}
+ /* In case we stored the desired inum in here, we want to clean up.
+ * We also do this in the out_transno_dchild block, for the error cases.
+ */
+ dchild->d_fsdata = NULL;
+
if (rc) {
CDEBUG(D_INODE, "error during create: %d\n", rc);
GOTO(out_create_commit, rc);
ATTR_MTIME | ATTR_CTIME;
if (rec->ur_fid2->id) {
- LASSERT(rec->ur_opcode & REINT_REPLAYING);
+ LASSERT(rec->ur_fid2->id == inode->i_ino);
inode->i_generation = rec->ur_fid2->generation;
/* Dirtied and committed by the upcoming setattr. */
- CDEBUG(D_INODE, "recreated ino %lu with gen %lu\n",
+ CDEBUG(D_INODE, "recreated ino %lu with gen %x\n",
inode->i_ino, inode->i_generation);
} else {
- CDEBUG(D_INODE, "created ino %lu\n", inode->i_ino);
+ CDEBUG(D_INODE, "created ino %lu with gen %x\n",
+ inode->i_ino, inode->i_generation);
}
rc = fsfilt_setattr(obd, dchild, handle, &iattr);
return 0;
out_transno_dchild:
+ dchild->d_fsdata = NULL;
/* Need to release the transno lock, and then put the dchild. */
LASSERT(rc);
mds_finish_transno(mds, handle, req, rc);
dir = de->d_inode;
inode = dchild->d_inode;
- CDEBUG(D_INODE, "parent ino %lu\n", dir->i_ino);
+ DEBUG_REQ(D_INODE, req, "parent ino %lu, child ino %lu\n", dir->i_ino,
+ inode ? inode->i_ino : 0);
if (!inode) {
if (rec->ur_opcode & REINT_REPLAYING) {
GOTO(out_link_src_put, rc = -EIO);
}
} else {
- ldlm_lock_dump_handle(&srclockh);
+ ldlm_lock_dump_handle(D_OTHER, &srclockh);
}
de_tgt_dir = mds_fid2dentry(mds, rec->ur_fid2, NULL);
GOTO(out_link_tgt_dir_put, rc = -EIO);
}
} else {
- ldlm_lock_dump_handle(&tgtlockh);
+ ldlm_lock_dump_handle(D_OTHER, &tgtlockh);
}
down(&de_tgt_dir->d_inode->i_sem);
GOTO(out_rename_srcput, rc = -EIO);
}
} else {
- ldlm_lock_dump_handle(&srclockh);
+ ldlm_lock_dump_handle(D_OTHER, &srclockh);
}
de_tgtdir = mds_fid2dentry(mds, rec->ur_fid2, NULL);
GOTO(out_rename_tgtput, rc = -EIO);
}
} else {
- ldlm_lock_dump_handle(&tgtlockh);
+ ldlm_lock_dump_handle(D_OTHER, &tgtlockh);
}
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
FSMOD = fsfilt_extN
endif
-modulefs_DATA = lustre_build_version obdclass.o $(FSMOD).o
-EXTRA_PROGRAMS = obdclass $(FSMOD)
+modulefs_DATA = lustre_build_version obdclass.o $(FSMOD).o fsfilt_reiserfs.o
+EXTRA_PROGRAMS = obdclass $(FSMOD) fsfilt_reiserfs
obdclass_SOURCES = debug.c genops.c class_obd.c sysctl.c uuid.c lprocfs_status.c
-obdclass_SOURCES += fsfilt.c
+obdclass_SOURCES += fsfilt.c statfs_pack.c
include $(top_srcdir)/Rules
+
+# XXX I'm sure there's some automake mv-if-different helper for this.
lustre_build_version:
perl $(top_srcdir)/scripts/version_tag.pl $(top_srcdir) > tmpver
- diff -u $(top_builddir)/include/linux/lustre_build_version.h tmpver \
- 2> /dev/null &&\
- $(RM) tmpver || \
+ cmp -z $(top_builddir)/include/linux/lustre_build_version.h tmpver \
+ 2> /dev/null && \
+ $(RM) tmpver || \
mv tmpver $(top_builddir)/include/linux/lustre_build_version.h
struct obd_device obd_dev[MAX_OBD_DEVICES];
struct list_head obd_types;
atomic_t obd_memory;
+int obd_memmax;
/* The following are visible and mutable through /proc/sys/lustre/. */
unsigned long obd_fail_loc;
unsigned long obd_timeout = 100;
char obd_recovery_upcall[128] = "/usr/lib/lustre/ha_assist";
-extern struct obd_type *class_nm_to_type(char *nm);
-
/* opening /dev/obd */
static int obd_class_open(struct inode * inode, struct file * file)
{
ENTRY;
file->private_data = NULL;
- CDEBUG(D_IOCTL, "MOD_INC_USE for open: count = %d\n",
- atomic_read(&(THIS_MODULE)->uc.usecount));
MOD_INC_USE_COUNT;
RETURN(0);
}
// XXX drop lsm, connections here
if (file->private_data)
file->private_data = NULL;
-
- CDEBUG(D_IOCTL, "MOD_DEC_USE for close: count = %d\n",
- atomic_read(&(THIS_MODULE)->uc.usecount) - 1);
MOD_DEC_USE_COUNT;
RETURN(0);
}
if (obd->obd_flags & OBD_ATTACHED || obd->obd_type) {
CERROR("OBD: Device %d already typed as %s.\n",
obd->obd_minor, MKSTR(obd->obd_type->typ_name));
- GOTO(out, err=-EBUSY);
+ GOTO(out, err = -EBUSY);
}
if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
CERROR("No type passed!\n");
- GOTO(out, err=-EINVAL);
+ GOTO(out, err = -EINVAL);
}
if (data->ioc_inlbuf1[data->ioc_inllen1-1] !=0) {
CERROR("Type not nul terminated!\n");
- GOTO(out, err=-EINVAL);
+ GOTO(out, err = -EINVAL);
}
CDEBUG(D_IOCTL, "attach type %s name: %s uuid: %s\n",
MKSTR(data->ioc_inlbuf2), MKSTR(data->ioc_inlbuf3));
/* find the type */
- type = class_nm_to_type(data->ioc_inlbuf1);
+ type = class_get_type(data->ioc_inlbuf1);
if (!type) {
CERROR("OBD: unknown type dev %d\n", obd->obd_minor);
- GOTO(out, err=-EINVAL);
+ GOTO(out, err = -EINVAL);
}
minor = obd->obd_minor;
int len = strlen(data->ioc_inlbuf2) + 1;
OBD_ALLOC(obd->obd_name, len);
if (!obd->obd_name) {
- CERROR("no memory\n");
- LBUG();
+ class_put_type(obd->obd_type);
+ GOTO(out, err = -ENOMEM);
}
memcpy(obd->obd_name, data->ioc_inlbuf2, len);
} else {
if (data->ioc_inlbuf3) {
int len = strlen(data->ioc_inlbuf3);
if (len >= sizeof(obd->obd_uuid)) {
- CERROR("uuid must be < %d bytes long\n",
+ CERROR("uuid must be < "LPSZ" bytes long\n",
sizeof(obd->obd_uuid));
if (obd->obd_name)
OBD_FREE(obd->obd_name,
strlen(obd->obd_name) + 1);
+ class_put_type(obd->obd_type);
GOTO(out, err=-EINVAL);
}
memcpy(obd->obd_uuid, data->ioc_inlbuf3, len);
if (err) {
if(data->ioc_inlbuf2)
OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1);
+ class_put_type(obd->obd_type);
obd->obd_type = NULL;
} else {
obd->obd_flags |= OBD_ATTACHED;
type->typ_refcnt++;
CDEBUG(D_IOCTL, "OBD: dev %d attached type %s\n",
obd->obd_minor, data->ioc_inlbuf1);
-
- CDEBUG(D_IOCTL, "MOD_INC_USE for attach: count = %d\n",
- atomic_read(&(THIS_MODULE)->uc.usecount));
- MOD_INC_USE_COUNT;
}
GOTO(out, err);
}
forcibly_detach_exports(obd);
}
- if (OBP(obd, detach))
- err=OBP(obd,detach)(obd);
+ if (OBP(obd, detach))
+ err = OBP(obd,detach)(obd);
if (obd->obd_name) {
OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1);
obd->obd_flags &= ~OBD_ATTACHED;
obd->obd_type->typ_refcnt--;
+ class_put_type(obd->obd_type);
obd->obd_type = NULL;
- CDEBUG(D_IOCTL, "MOD_DEC_USE for detach: count = %d\n",
- atomic_read(&(THIS_MODULE)->uc.usecount) - 1);
- MOD_DEC_USE_COUNT;
GOTO(out, err = 0);
}
GOTO(out, err);
}
- case OBD_IOC_DEC_USE_COUNT: {
- CDEBUG(D_IOCTL, "MOD_DEC_USE for force dec: count = %d\n",
- atomic_read(&(THIS_MODULE)->uc.usecount) - 1);
- MOD_DEC_USE_COUNT;
- GOTO(out, err=0);
- }
-
default:
obd_data2conn(&conn, data);
EXPORT_SYMBOL(obd_dev);
EXPORT_SYMBOL(obdo_cachep);
EXPORT_SYMBOL(obd_memory);
+EXPORT_SYMBOL(obd_memmax);
EXPORT_SYMBOL(obd_fail_loc);
EXPORT_SYMBOL(obd_timeout);
EXPORT_SYMBOL(obd_recovery_upcall);
EXPORT_SYMBOL(class_register_type);
EXPORT_SYMBOL(class_unregister_type);
+EXPORT_SYMBOL(class_get_type);
+EXPORT_SYMBOL(class_put_type);
EXPORT_SYMBOL(class_name2dev);
EXPORT_SYMBOL(class_uuid2dev);
EXPORT_SYMBOL(class_uuid2obd);
EXPORT_SYMBOL(class_uuid_unparse);
EXPORT_SYMBOL(class_signal_connection_failure);
-EXPORT_SYMBOL(class_nm_to_type);
static int __init init_obdclass(void)
{
obd->obd_minor = i;
err = obd_init_caches();
-
if (err)
return err;
+
obd_sysctl_init();
err = lprocfs_reg_main();
err = lprocfs_dereg_main();
- CERROR("obd memory leaked: %ld bytes\n", obd_memory);
+ CERROR("obd mem max: %d leaked: %d\n", obd_memmax,
+ atomic_read(&obd_memory));
EXIT;
}
#include <linux/module.h>
#include <linux/kmod.h>
#include <linux/slab.h>
-#include <linux/extN_fs.h>
-#include <linux/extN_jbd.h>
-#include <linux/extN_xattr.h>
#include <linux/kp30.h>
#include <linux/lustre_fsfilt.h>
#include <linux/kp30.h>
#include <linux/lustre_fsfilt.h>
#include <linux/obd.h>
+#include <linux/obd_class.h>
#include <linux/module.h>
static kmem_cache_t *fcb_cache;
RETURN(handle);
}
-static int fsfilt_extN_commit(struct inode *inode, void *handle)
+static int fsfilt_extN_commit(struct inode *inode, void *h /*, force_sync */)
{
int rc;
+ handle_t *handle = h;
+
+#if 0
+ if (force_sync)
+ handle->h_sync = 1; /* recovery likes this */
+#endif
lock_kernel();
- rc = journal_stop((handle_t *)handle);
+ rc = journal_stop(handle);
unlock_kernel();
return rc;
int rc;
lock_kernel();
+
+ /* A _really_ horrible hack to avoid removing the data stored
+ * in the block pointers; this is really the "small" stripe MD data.
+ * We can avoid further hackery by virtue of the MDS file size being
+ * zero all the time (which doesn't invoke block truncate at unlink
+ * time), so we assert we never change the MDS file size from zero.
+ */
+ if (iattr->ia_valid & ATTR_SIZE) {
+ CERROR("hmm, setting %*s file size to %lld\n",
+ dentry->d_name.len, dentry->d_name.name, iattr->ia_size);
+ LASSERT(iattr->ia_size == 0);
+#if 0
+ /* ATTR_SIZE would invoke truncate: clear it */
+ iattr->ia_valid &= ~ATTR_SIZE;
+ inode->i_size = iattr->ia_size;
+
+ /* make sure _something_ gets set - so new inode
+ * goes to disk (probably won't work over XFS
+ */
+ if (!iattr->ia_valid & ATTR_MODE) {
+ iattr->ia_valid |= ATTR_MODE;
+ iattr->ia_mode = inode->i_mode;
+ }
+#endif
+ }
if (inode->i_op->setattr)
rc = inode->i_op->setattr(dentry, iattr);
else
{
int rc;
- down(&inode->i_sem);
- lock_kernel();
- rc = extN_xattr_set(handle, inode, EXTN_XATTR_INDEX_LUSTRE,
- XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0);
- unlock_kernel();
- up(&inode->i_sem);
+ /* Nasty hack city - store stripe MD data in the block pointers if
+ * it will fit, because putting it in an EA currently kills the MDS
+ * performance. We'll fix this with "fast EAs" in the future.
+ */
+ if (lmm_size <= sizeof(EXTN_I(inode)->i_data) -
+ sizeof(EXTN_I(inode)->i_data[0])) {
+ /* XXX old_size is debugging only */
+ int old_size = EXTN_I(inode)->i_data[0];
+ if (old_size != 0) {
+ LASSERT(old_size < sizeof(EXTN_I(inode)->i_data));
+ CERROR("setting EA on %lu again... interesting\n",
+ inode->i_ino);
+ }
+
+ EXTN_I(inode)->i_data[0] = cpu_to_le32(lmm_size);
+ memcpy(&EXTN_I(inode)->i_data[1], lmm, lmm_size);
+ mark_inode_dirty(inode);
+ return 0;
+ } else {
+ down(&inode->i_sem);
+ lock_kernel();
+ rc = extN_xattr_set(handle, inode, EXTN_XATTR_INDEX_LUSTRE,
+ XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0);
+ unlock_kernel();
+ up(&inode->i_sem);
+ }
- if (rc) {
+ if (rc)
CERROR("error adding MD data to inode %lu: rc = %d\n",
inode->i_ino, rc);
- if (rc != -ENOSPC) LBUG();
- }
return rc;
}
-static int fsfilt_extN_get_md(struct inode *inode, void *lmm, int size)
+static int fsfilt_extN_get_md(struct inode *inode, void *lmm, int lmm_size)
{
int rc;
+ if (EXTN_I(inode)->i_data[0]) {
+ int size = le32_to_cpu(EXTN_I(inode)->i_data[0]);
+ LASSERT(size < sizeof(EXTN_I(inode)->i_data));
+ if (lmm) {
+ if (size > lmm_size)
+ return -ERANGE;
+ memcpy(lmm, &EXTN_I(inode)->i_data[1], size);
+ }
+ return size;
+ }
+
down(&inode->i_sem);
lock_kernel();
rc = extN_xattr_get(inode, EXTN_XATTR_INDEX_LUSTRE,
- XATTR_LUSTRE_MDS_OBJID, lmm, size);
+ XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size);
unlock_kernel();
up(&inode->i_sem);
if (rc < 0) {
CDEBUG(D_INFO, "error getting EA %s from inode %lu: "
"rc = %d\n", XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc);
- memset(lmm, 0, size);
+ memset(lmm, 0, lmm_size);
return (rc == -ENODATA) ? 0 : rc;
}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lustre/lib/fsfilt_reiserfs.c
+ * Lustre filesystem abstraction routines
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * NOTE - According to Hans Reiser, this could actually be implemented more
+ * efficiently than creating a directory and putting ASCII objids in it.
+ * Instead, we should return the reiserfs object ID as the lustre objid
+ * (although I'm not sure what impact that would have on backup/restore).
+ */
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/kp30.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/obd.h>
+#include <linux/obd_class.h>
+#include <linux/module.h>
+
+static void *fsfilt_reiserfs_start(struct inode *inode, int op)
+{
+ return (void *)0xf00f00be;
+}
+
+static void *fsfilt_reiserfs_brw_start(int objcount, struct fsfilt_objinfo *fso,
+ int niocount, struct niobuf_remote *nb)
+{
+ return (void *)0xf00f00be;
+}
+
+static int fsfilt_reiserfs_commit(struct inode *inode, void *handle)
+{
+ if (handle != (void *)0xf00f00be) {
+ CERROR("bad handle %p", handle);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int fsfilt_reiserfs_setattr(struct dentry *dentry, void *handle,
+ struct iattr *iattr)
+{
+ struct inode *inode = dentry->d_inode;
+ int rc;
+
+ lock_kernel();
+
+ /* A _really_ horrible hack to avoid removing the data stored
+ * in the block pointers; this is really the "small" stripe MD data.
+ * We can avoid further hackery by virtue of the MDS file size being
+ * zero all the time (which doesn't invoke block truncate at unlink
+ * time), so we assert we never change the MDS file size from zero.
+ */
+ if (iattr->ia_valid & ATTR_SIZE) {
+ CERROR("hmm, setting %*s file size to %llu\n",
+ dentry->d_name.len, dentry->d_name.name, iattr->ia_size);
+ LASSERT(iattr->ia_size == 0);
+#if 0
+ /* ATTR_SIZE would invoke truncate: clear it */
+ iattr->ia_valid &= ~ATTR_SIZE;
+ inode->i_size = iattr->ia_size;
+
+ /* make sure _something_ gets set - so new inode
+ * goes to disk (probably won't work over XFS
+ */
+ if (!iattr->ia_valid & ATTR_MODE) {
+ iattr->ia_valid |= ATTR_MODE;
+ iattr->ia_mode = inode->i_mode;
+ }
+#endif
+ }
+ if (inode->i_op->setattr)
+ rc = inode->i_op->setattr(dentry, iattr);
+ else
+ rc = inode_setattr(inode, iattr);
+
+ unlock_kernel();
+
+ return rc;
+}
+
+static int fsfilt_reiserfs_set_md(struct inode *inode, void *handle,
+ void *lmm, int lmm_size)
+{
+ /* XXX write stripe data into MDS file itself */
+ CERROR("not implemented yet\n");
+
+ return -ENOSYS;
+}
+
+static int fsfilt_reiserfs_get_md(struct inode *inode, void *lmm, int lmm_size)
+{
+ if (lmm == NULL)
+ return inode->i_size;
+
+ CERROR("not implemented yet\n");
+ return -ENOSYS;
+}
+
+static ssize_t fsfilt_reiserfs_readpage(struct file *file, char *buf, size_t count,
+ loff_t *offset)
+{
+ return file->f_op->read(file, buf, count, offset);
+}
+
+static int fsfilt_reiserfs_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd,
+ void *handle, fsfilt_cb_t cb_func)
+{
+ static long next = 0;
+
+ if (time_after(jiffies, next)) {
+ CERROR("no journal callback kernel patch, faking it...\n");
+ next = jiffies + 300 * HZ;
+ }
+
+ cb_func(obd, last_rcvd, 0);
+
+ return 0;
+}
+
+static int fsfilt_reiserfs_journal_data(struct file *filp)
+{
+ CERROR("not implemented yet\n");
+ return 0;
+}
+
+static int fsfilt_reiserfs_statfs(struct super_block *sb, struct obd_statfs *osfs)
+{
+ struct statfs sfs;
+ int rc = vfs_statfs(sb, &sfs);
+
+ statfs_pack(osfs, &sfs);
+ return rc;
+}
+
+static struct fsfilt_operations fsfilt_reiserfs_ops = {
+ fs_type: "reiserfs",
+ fs_owner: THIS_MODULE,
+ fs_start: fsfilt_reiserfs_start,
+ fs_brw_start: fsfilt_reiserfs_brw_start,
+ fs_commit: fsfilt_reiserfs_commit,
+ fs_setattr: fsfilt_reiserfs_setattr,
+ fs_set_md: fsfilt_reiserfs_set_md,
+ fs_get_md: fsfilt_reiserfs_get_md,
+ fs_readpage: fsfilt_reiserfs_readpage,
+ fs_journal_data: fsfilt_reiserfs_journal_data,
+ fs_set_last_rcvd: fsfilt_reiserfs_set_last_rcvd,
+ fs_statfs: fsfilt_reiserfs_statfs,
+};
+
+static int __init fsfilt_reiserfs_init(void)
+{
+ return fsfilt_register_ops(&fsfilt_reiserfs_ops);
+}
+
+static void __exit fsfilt_reiserfs_exit(void)
+{
+ fsfilt_unregister_ops(&fsfilt_reiserfs_ops);
+}
+
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre reiserfs Filesystem Helper v0.1");
+MODULE_LICENSE("GPL");
+
+module_init(fsfilt_reiserfs_init);
+module_exit(fsfilt_reiserfs_exit);
* support functions: we could use inter-module communication, but this
* is more portable to other OS's
*/
-static struct obd_type *class_search_type(char *nm)
+static struct obd_type *class_search_type(char *name)
{
struct list_head *tmp;
struct obd_type *type;
- CDEBUG(D_INFO, "SEARCH %s\n", nm);
+ CDEBUG(D_INFO, "SEARCH %s\n", name);
tmp = &obd_types;
list_for_each(tmp, &obd_types) {
type = list_entry(tmp, struct obd_type, typ_chain);
CDEBUG(D_INFO, "TYP %s\n", type->typ_name);
- if (strlen(type->typ_name) == strlen(nm) &&
- strcmp(type->typ_name, nm) == 0 ) {
+ if (strlen(type->typ_name) == strlen(name) &&
+ strcmp(type->typ_name, name) == 0) {
return type;
}
}
return NULL;
}
-struct obd_type *class_nm_to_type(char *nm)
+struct obd_type *class_get_type(char *name)
{
- struct obd_type *type = class_search_type(nm);
+ struct obd_type *type = class_search_type(name);
#ifdef CONFIG_KMOD
- if ( !type ) {
- if ( !request_module(nm) ) {
- CDEBUG(D_INFO, "Loaded module '%s'\n", nm);
- type = class_search_type(nm);
- } else {
- CDEBUG(D_INFO, "Can't load module '%s'\n", nm);
- }
+ if (!type) {
+ if (!request_module(name)) {
+ CDEBUG(D_INFO, "Loaded module '%s'\n", name);
+ type = class_search_type(name);
+ } else
+ CDEBUG(D_INFO, "Can't load module '%s'\n", name);
}
#endif
+ if (type)
+ __MOD_INC_USE_COUNT(type->typ_ops->o_owner);
return type;
}
+void class_put_type(struct obd_type *type)
+{
+ LASSERT(type);
+ __MOD_DEC_USE_COUNT(type->typ_ops->o_owner);
+}
+
int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars,
- char *nm)
+ char *name)
{
struct obd_type *type;
int rc;
-
ENTRY;
- LASSERT (strnlen (nm, 1024) < 1024); /* sanity check */
-
- if (class_search_type(nm)) {
- CDEBUG(D_IOCTL, "Type %s already registered\n", nm);
+ LASSERT(strnlen(name, 1024) < 1024); /* sanity check */
+
+ if (class_search_type(name)) {
+ CDEBUG(D_IOCTL, "Type %s already registered\n", name);
RETURN(-EEXIST);
}
RETURN(rc);
OBD_ALLOC(type->typ_ops, sizeof(*type->typ_ops));
- OBD_ALLOC(type->typ_name, strlen(nm) + 1);
- if (type->typ_ops == NULL ||
- type->typ_name == NULL)
+ OBD_ALLOC(type->typ_name, strlen(name) + 1);
+ if (type->typ_ops == NULL || type->typ_name == NULL)
GOTO (failed, rc);
-
+
*(type->typ_ops) = *ops;
- strcpy(type->typ_name, nm);
+ strcpy(type->typ_name, name);
list_add(&type->typ_chain, &obd_types);
rc = lprocfs_reg_class(type, vars, type);
if (rc != 0) {
- list_del (&type->typ_chain);
- GOTO (failed, rc);
+ list_del(&type->typ_chain);
+ GOTO(failed, rc);
}
-
- CDEBUG(D_INFO, "MOD_INC_USE for register_type: count = %d\n",
- atomic_read(&(THIS_MODULE)->uc.usecount));
- MOD_INC_USE_COUNT;
+
RETURN (0);
failed:
if (type->typ_ops != NULL)
- OBD_FREE (type->typ_name, strlen (nm) + 1);
+ OBD_FREE(type->typ_name, strlen(name) + 1);
if (type->typ_ops != NULL)
OBD_FREE (type->typ_ops, sizeof (*type->typ_ops));
RETURN(rc);
}
-int class_unregister_type(char *nm)
+int class_unregister_type(char *name)
{
- struct obd_type *type = class_nm_to_type(nm);
-
+ struct obd_type *type = class_search_type(name);
ENTRY;
if (!type) {
}
if (type->typ_refcnt) {
- CERROR("type %s has refcount (%d)\n", nm, type->typ_refcnt);
+ CERROR("type %s has refcount (%d)\n", name, type->typ_refcnt);
/* This is a bad situation, let's make the best of it */
/* Remove ops, but leave the name for debugging */
OBD_FREE(type->typ_ops, sizeof(*type->typ_ops));
lprocfs_dereg_class(type);
list_del(&type->typ_chain);
- OBD_FREE(type->typ_name, strlen(nm) + 1);
+ OBD_FREE(type->typ_name, strlen(name) + 1);
if (type->typ_ops != NULL)
OBD_FREE(type->typ_ops, sizeof(*type->typ_ops));
OBD_FREE(type, sizeof(*type));
- CDEBUG(D_INFO, "MOD_DEC_USE for register_type: count = %d\n",
- atomic_read(&(THIS_MODULE)->uc.usecount) - 1);
- MOD_DEC_USE_COUNT;
RETURN(0);
} /* class_unregister_type */
if (!name)
return -1;
- for (i=0; i < MAX_OBD_DEVICES; i++) {
+ for (i = 0; i < MAX_OBD_DEVICES; i++) {
struct obd_device *obd = &obd_dev[i];
if (obd->obd_name && strcmp(name, obd->obd_name) == 0) {
res = i;
int res = -1;
int i;
- for (i=0; i < MAX_OBD_DEVICES; i++) {
+ for (i = 0; i < MAX_OBD_DEVICES; i++) {
struct obd_device *obd = &obd_dev[i];
if (strncmp(uuid, obd->obd_uuid, sizeof(obd->obd_uuid)) == 0) {
res = i;
{
int i;
- for (i=0; i < MAX_OBD_DEVICES; i++) {
+ for (i = 0; i < MAX_OBD_DEVICES; i++) {
struct obd_device *obd = &obd_dev[i];
if (strncmp(uuid, obd->obd_uuid, sizeof(obd->obd_uuid)) == 0)
return obd;
spin_unlock(&obddev->obd_dev_lock);
CERROR("force disconnecting %s:%s export %p\n",
export->exp_obd->obd_type->typ_name,
- export->exp_connection->c_remote_uuid, export);
+ export->exp_connection ?
+ (char *)export->exp_connection->c_remote_uuid :
+ "<unconnected>", export);
rc = obd_disconnect(&conn);
if (rc < 0) {
/* AED: not so sure about this... We can't
rm_entry = temp;
temp = temp->parent;
remove_proc_entry(rm_entry->name, rm_entry->parent);
- if (temp == parent) break;
+ if (temp == parent)
+ break;
}
}
new_root = root;
mover_str = temp_string;
while ((my_str = strsep(&mover_str, tok))) {
- if(!*my_str)
+ if (!*my_str)
continue;
CDEBUG(D_OTHER, "SEARCH= %s\t, ROOT=%s\n", my_str,
new_root->name);
CDEBUG(D_OTHER, "Adding: %s\n", my_str);
temp_entry = lprocfs_mkdir(my_str, new_root);
if (temp_entry == NULL) {
- CDEBUG(D_OTHER,
+ CDEBUG(D_OTHER,
"! Did not create new dir %s !!\n",
my_str);
return temp_entry;
return new_root;
}
-int lprocfs_new_vars(struct proc_dir_entry* root,
- struct lprocfs_vars* list,
+int lprocfs_new_vars(struct proc_dir_entry* root, struct lprocfs_vars* list,
const char* tok, void* data)
{
struct proc_dir_entry *temp_root;
{
struct proc_dir_entry* this_dev_root;
int retval;
-
- if(lprocfs_srch(device->obd_type->typ_procroot, device->obd_name)){
- CDEBUG(D_OTHER, "Device with name [%s] exists!",
+
+ if (lprocfs_srch(device->obd_type->typ_procroot, device->obd_name)) {
+ CDEBUG(D_OTHER, "Device with name [%s] exists!",
device->obd_name);
return 0;
}
struct proc_dir_entry* lprocfs_reg_mnt(char* mnt_name)
{
- if(lprocfs_srch(proc_lustre_fs_root, mnt_name)){
+ if (lprocfs_srch(proc_lustre_fs_root, mnt_name)) {
CDEBUG(D_OTHER, "Mount with same name exists!");
return 0;
}
int lprocfs_dereg_mnt(struct proc_dir_entry* root)
{
- if(root == NULL){
+ if (root == NULL) {
CDEBUG(D_OTHER, "Non-existent root!");
return 0;
}
int lprocfs_reg_class(struct obd_type* type, struct lprocfs_vars* list,
void* data)
{
-
struct proc_dir_entry* root;
int retval;
root = lprocfs_mkdir(type->typ_name, proc_lustre_dev_root);
int lprocfs_dereg_class(struct obd_type* class)
{
- if(class == NULL){
- CDEBUG(D_OTHER, "Non-existent class",
- class->typ_name);
+ if (class == NULL) {
+ CDEBUG(D_OTHER, "Non-existent class");
return 0;
}
lprocfs_remove_all(class->typ_procroot);
return 0;
}
+
int lprocfs_reg_main()
{
proc_lustre_root = lprocfs_mkdir("lustre", &proc_root);
*
*/
-#define DEBUG_SUBSYSTEM S_LLITE
+#define DEBUG_SUBSYSTEM S_CLASS
+#define EXPORT_SYMTAB
#include <linux/lustre_net.h>
#include <linux/obd_support.h>
tgt->os_namelen = HTON__u32(src->os_namelen);
}
-#define obd_statfs_unpack(tgt, src) obd_statfs_pack(tgt, src)
+void obd_statfs_unpack(struct obd_statfs *tgt, struct obd_statfs *src)
+{
+ obd_statfs_pack(tgt, src);
+}
void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs)
{
sfs->f_namelen = osfs->os_namelen;
}
+EXPORT_SYMBOL(obd_statfs_pack);
+EXPORT_SYMBOL(obd_statfs_unpack);
+EXPORT_SYMBOL(statfs_pack);
+EXPORT_SYMBOL(statfs_unpack);
obd_uuid_t cluuid, struct recovd_obd *recovd,
ptlrpc_recovery_cb_t recover)
{
- int rc;
-
- MOD_INC_USE_COUNT;
- rc = class_connect(conn, obd, cluuid);
-
- if (rc)
- MOD_DEC_USE_COUNT;
-
- return rc;
-}
-
-static int echo_disconnect(struct lustre_handle *conn)
-{
- int rc;
-
- rc = class_disconnect(conn);
- if (!rc)
- MOD_DEC_USE_COUNT;
-
- return rc;
+ return class_connect(conn, obd, cluuid);
}
static __u64 echo_next_id(struct obd_device *obddev)
struct obd_device *obd = class_conn2obd(conn);
if (!obd) {
- CERROR("invalid client %Lx\n", conn->addr);
+ CERROR("invalid client "LPX64"\n", conn->addr);
return -EINVAL;
}
}
static struct obd_ops echo_obd_ops = {
- o_attach: echo_attach,
- o_detach: echo_detach,
- o_connect: echo_connect,
- o_disconnect: echo_disconnect,
- o_create: echo_create,
- o_destroy: echo_destroy,
- o_open: echo_open,
- o_close: echo_close,
- o_getattr: echo_getattr,
- o_setattr: echo_setattr,
- o_preprw: echo_preprw,
- o_commitrw: echo_commitrw,
- o_setup: echo_setup,
- o_cleanup: echo_cleanup
+ o_owner: THIS_MODULE,
+ o_attach: echo_attach,
+ o_detach: echo_detach,
+ o_connect: echo_connect,
+ o_disconnect: class_disconnect,
+ o_create: echo_create,
+ o_destroy: echo_destroy,
+ o_open: echo_open,
+ o_close: echo_close,
+ o_getattr: echo_getattr,
+ o_setattr: echo_setattr,
+ o_preprw: echo_preprw,
+ o_commitrw: echo_commitrw,
+ o_setup: echo_setup,
+ o_cleanup: echo_cleanup
};
extern int echo_client_init(void);
void *addr = kmap(pgp->pg);
rc = page_debug_check("test_brw", addr,
- PAGE_SIZE, pgp->off, id);
+ pgp->count, pgp->off, id);
kunmap(pgp->pg);
}
__free_pages(pgp->pg, 0);
GOTO(out, rc);
}
default:
- CERROR ("echo_ioctl(): unrecognised ioctl %#lx\n", cmd);
+ CERROR ("echo_ioctl(): unrecognised ioctl %#x\n", cmd);
GOTO (out, rc = -ENOTTY);
}
RETURN(-EINVAL);
}
- MOD_INC_USE_COUNT;
tgt = class_uuid2obd(data->ioc_inlbuf1);
if (!tgt || !(tgt->obd_flags & OBD_ATTACHED) ||
!(tgt->obd_flags & OBD_SET_UP)) {
CERROR("device not attached or not set up (%d)\n",
data->ioc_dev);
- GOTO(error_dec, rc = -EINVAL);
+ RETURN(rc = -EINVAL);
}
rc = obd_connect(&ec->conn, tgt, NULL, NULL, NULL);
- if (rc) {
+ if (rc)
CERROR("fail to connect to device %d\n", data->ioc_dev);
- GOTO(error_dec, rc = -EINVAL);
- }
- RETURN(rc);
-error_dec:
- MOD_DEC_USE_COUNT;
RETURN(rc);
}
RETURN(-EINVAL);
}
- MOD_DEC_USE_COUNT;
RETURN(0);
}
}
static struct obd_ops echo_obd_ops = {
+ o_owner: THIS_MODULE,
o_setup: echo_setup,
o_cleanup: echo_cleanup,
o_iocontrol: echo_iocontrol,
modulefs_DATA = obdfilter.o
EXTRA_PROGRAMS = obdfilter
-LINX=simple.c ll_pack.c
-ll_pack.c:
- test -e ll_pack.c || ln -sf $(top_srcdir)/lib/ll_pack.c
-
+LINX=simple.c
simple.c:
test -e simple.c || ln -sf $(top_srcdir)/lib/simple.c
len = sprintf(name, LPU64, id);
CDEBUG(D_INODE, "opening object O/%*s/%s\n",
dparent->d_name.len, dparent->d_name.name, name);
- if (!locked)
- down(&dparent->d_inode->i_sem);
+ //if (!locked)
+ //down(&dparent->d_inode->i_sem);
dchild = lookup_one_len(name, dparent, len);
- if (!locked)
- up(&dparent->d_inode->i_sem);
+ //if (!locked)
+ //up(&dparent->d_inode->i_sem);
if (IS_ERR(dchild)) {
CERROR("child lookup error %ld\n", PTR_ERR(dchild));
RETURN(dchild);
RETURN(ERR_PTR(-EINVAL));
}
- ffd = kmem_cache_alloc(filter_open_cache, SLAB_KERNEL);
+ PORTAL_SLAB_ALLOC(ffd, filter_open_cache, sizeof(*ffd));
if (!ffd) {
CERROR("obdfilter: out of memory\n");
RETURN(ERR_PTR(-ENOMEM));
pop_ctxt(&saved, &filter->fo_ctxt, NULL);
if (IS_ERR(file)) {
- CERROR("error opening %s: rc %d\n", name, PTR_ERR(file));
+ CERROR("error opening %s: rc %ld\n", name, PTR_ERR(file));
GOTO(out_fdd, file);
}
kmem_cache_free(filter_dentry_cache, fdd);
out_ffd:
ffd->ffd_servercookie = DEAD_HANDLE_MAGIC;
- kmem_cache_free(filter_open_cache, ffd);
+ PORTAL_SLAB_FREE(ffd, filter_open_cache, sizeof(*ffd));
goto out;
}
}
f_dput(object_dentry);
- kmem_cache_free(filter_open_cache, ffd);
+ PORTAL_SLAB_FREE(ffd, filter_open_cache, sizeof(*ffd));
RETURN(rc);
}
}
static int filter_brw(int cmd, struct lustre_handle *conn,
- struct lov_stripe_md *lsm, obd_count oa_bufs,
- struct brw_page *pga, struct obd_brw_set *set)
+ struct lov_stripe_md *lsm, obd_count oa_bufs,
+ struct brw_page *pga, struct obd_brw_set *set)
{
struct obd_ioobj ioo;
struct niobuf_local *lnb;
OBD_ALLOC(lnb, oa_bufs * sizeof(struct niobuf_local));
OBD_ALLOC(rnb, oa_bufs * sizeof(struct niobuf_remote));
- if ( lnb == NULL || rnb == NULL )
+ if (lnb == NULL || rnb == NULL)
GOTO(out, ret = -ENOMEM);
- for ( i = 0 ; i < oa_bufs ; i++ ) {
+ for (i = 0; i < oa_bufs; i++) {
rnb[i].offset = pga[i].off;
rnb[i].len = pga[i].count;
}
ioo.ioo_type = S_IFREG;
ioo.ioo_bufcnt = oa_bufs;
- ret = filter_preprw(cmd, conn, 1, &ioo, oa_bufs, rnb, lnb,
- &desc_private);
- if ( ret != 0 )
+ ret = filter_preprw(cmd, conn, 1, &ioo, oa_bufs, rnb, lnb,
+ &desc_private);
+ if (ret != 0)
GOTO(out, ret);
- for ( i = 0; i < oa_bufs ; i++ ) {
+ for (i = 0; i < oa_bufs; i++) {
void *virt = kmap(pga[i].pg);
obd_off off = pga[i].off & ~PAGE_MASK;
- if ( cmd & OBD_BRW_WRITE )
+ if (cmd & OBD_BRW_WRITE)
memcpy(lnb[i].addr + off, virt + off, pga[i].count);
else
memcpy(virt + off, lnb[i].addr + off, pga[i].count);
ret = filter_commitrw(cmd, conn, 1, &ioo, oa_bufs, lnb, desc_private);
out:
- if ( lnb )
+ if (lnb)
OBD_FREE(lnb, oa_bufs * sizeof(struct niobuf_local));
- if ( rnb )
+ if (rnb)
OBD_FREE(rnb, oa_bufs * sizeof(struct niobuf_remote));
RETURN(ret);
}
}
static struct obd_ops filter_obd_ops = {
- o_attach: filter_attach,
- o_detach: filter_detach,
- o_get_info: filter_get_info,
- o_setup: filter_setup,
- o_cleanup: filter_cleanup,
- o_connect: filter_connect,
- o_disconnect: filter_disconnect,
- o_statfs: filter_statfs,
- o_getattr: filter_getattr,
- o_create: filter_create,
- o_setattr: filter_setattr,
- o_destroy: filter_destroy,
- o_open: filter_open,
- o_close: filter_close,
- o_brw: filter_brw,
- o_punch: filter_truncate,
- o_preprw: filter_preprw,
- o_commitrw: filter_commitrw
+ o_owner: THIS_MODULE,
+ o_attach: filter_attach,
+ o_detach: filter_detach,
+ o_get_info: filter_get_info,
+ o_setup: filter_setup,
+ o_cleanup: filter_cleanup,
+ o_connect: filter_connect,
+ o_disconnect: filter_disconnect,
+ o_statfs: filter_statfs,
+ o_getattr: filter_getattr,
+ o_create: filter_create,
+ o_setattr: filter_setattr,
+ o_destroy: filter_destroy,
+ o_open: filter_open,
+ o_close: filter_close,
+ o_brw: filter_brw,
+ o_punch: filter_truncate,
+ o_preprw: filter_preprw,
+ o_commitrw: filter_commitrw
#if 0
- o_preallocate: filter_preallocate_inodes,
- o_migrate: filter_migrate,
- o_copy: filter_copy_data,
- o_iterate: filter_iterate
+ o_preallocate: filter_preallocate_inodes,
+ o_migrate: filter_migrate,
+ o_copy: filter_copy_data,
+ o_iterate: filter_iterate
#endif
};
modulefs_DATA = osc.o
EXTRA_PROGRAMS = osc
-LINX= obd_pack.c ll_pack.c client.c
+LINX= obd_pack.c client.c
osc_SOURCES = osc_request.c lproc_osc.c $(LINX)
obd_pack.c:
test -e obd_pack.c || ln -sf $(top_srcdir)/lib/obd_pack.c
-ll_pack.c:
- test -e ll_pack.c || ln -sf $(top_srcdir)/lib/ll_pack.c
client.c:
test -e client.c || ln -sf $(top_srcdir)/lib/client.c
}
/* this is the callback function which is invoked by the Portals
- * event handler associated with the bulk_sink queue and bulk_source queue.
+ * event handler associated with the bulk_sink queue and bulk_source queue.
*/
static void osc_ptl_ev_hdlr(struct ptlrpc_bulk_desc *desc)
{
struct ptlrpc_bulk_desc *desc = NULL;
struct ost_body *body;
int rc, size[3] = {sizeof(*body)}, mapped = 0;
- void *iooptr, *nioptr;
+ unsigned long flags;
+ struct obd_ioobj *iooptr;
+ void *nioptr;
__u32 xid;
ENTRY;
ost_pack_ioo(&iooptr, lsm, page_count);
/* end almost identical to brw_write case */
- spin_lock(&imp->imp_lock);
+ spin_lock_irqsave(&imp->imp_lock, flags);
xid = ++imp->imp_last_xid; /* single xid for all pages */
- spin_unlock(&imp->imp_lock);
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
obd_kmap_get(page_count, 0);
goto out_req;
}
-static int osc_brw_write(struct lustre_handle *conn, struct lov_stripe_md *md,
+static int osc_brw_write(struct lustre_handle *conn, struct lov_stripe_md *lsm,
obd_count page_count, struct brw_page *pga,
struct obd_brw_set *set)
{
- struct ptlrpc_connection *connection =
- client_conn2cli(conn)->cl_import.imp_connection;
+ struct obd_import *imp = class_conn2cliimp(conn);
+ struct ptlrpc_connection *connection = imp->imp_connection;
struct ptlrpc_request *request = NULL;
struct ptlrpc_bulk_desc *desc = NULL;
struct ost_body *body;
struct niobuf_local *local = NULL;
struct niobuf_remote *remote;
- int rc, j, size[3] = {sizeof(*body)}, mapped = 0;
- void *iooptr, *nioptr;
+ int rc, size[3] = {sizeof(*body)}, mapped = 0;
+ int j;
+ struct obd_ioobj *iooptr;
+ void *nioptr;
ENTRY;
size[1] = sizeof(struct obd_ioobj);
- size[2] = page_count * sizeof(*remote);
+ size[2] = page_count * sizeof(struct niobuf_remote);
- request = ptlrpc_prep_req(class_conn2cliimp(conn), OST_WRITE, 3, size,
- NULL);
+ request = ptlrpc_prep_req(imp, OST_WRITE, 3, size, NULL);
if (!request)
RETURN(-ENOMEM);
desc = ptlrpc_prep_bulk(connection);
if (!desc)
- GOTO(out_req, rc = -ENOMEM);
+ GOTO(out_req, rc = -ENOMEM);
desc->bd_portal = OSC_BULK_PORTAL;
desc->bd_ptl_ev_hdlr = osc_ptl_ev_hdlr;
CDEBUG(D_PAGE, "desc = %p\n", desc);
iooptr = lustre_msg_buf(request->rq_reqmsg, 1);
nioptr = lustre_msg_buf(request->rq_reqmsg, 2);
- ost_pack_ioo(&iooptr, md, page_count);
+ ost_pack_ioo(&iooptr, lsm, page_count);
/* end almost identical to brw_read case */
OBD_ALLOC(local, page_count * sizeof(*local));
for (mapped = 0; mapped < page_count; mapped++) {
local[mapped].addr = kmap(pga[mapped].pg);
- CDEBUG(D_INFO, "kmap(pg) = %p ; pg->flags = %lx ; pg->count = "
+ CDEBUG(D_INFO, "kmap(pg) = %p ; pg->flags = %lx ; pg->refcount = "
"%d ; page %d of %d\n",
local[mapped].addr, pga[mapped].pg->flags,
page_count(pga[mapped].pg),
if (!bulk)
GOTO(out_unmap, rc = -ENOMEM);
- bulk->bp_buf = (void *)(unsigned long)local[j].addr;
+ bulk->bp_buf = local[j].addr;
bulk->bp_buflen = local[j].len;
bulk->bp_xid = remote->xid;
bulk->bp_page = pga[j].pg;
return rc;
}
+/* Retrieve object striping information.
+ *
+ * @lmmu is a pointer to an in-core struct with lmm_ost_count indicating
+ * the maximum number of OST indices which will fit in the user buffer.
+ * lmm_magic must be LOV_MAGIC (we only use 1 slot here).
+ */
+static int osc_getstripe(struct lustre_handle *conn, struct lov_stripe_md *lsm,
+ struct lov_mds_md *lmmu)
+{
+ struct lov_mds_md lmm, *lmmk;
+ int rc, lmm_size;
+ ENTRY;
+
+ if (!lsm)
+ RETURN(-ENODATA);
+
+ rc = copy_from_user(&lmm, lmmu, sizeof(lmm));
+ if (rc)
+ RETURN(-EFAULT);
+
+ if (lmm.lmm_magic != LOV_MAGIC)
+ RETURN(-EINVAL);
+
+ if (lmm.lmm_ost_count < 1)
+ RETURN(-EOVERFLOW);
+
+ lmm_size = sizeof(lmm) + sizeof(lmm.lmm_objects[0]);
+ OBD_ALLOC(lmmk, lmm_size);
+ if (rc < 0)
+ RETURN(rc);
+
+ lmmk->lmm_stripe_count = 1;
+ lmmk->lmm_ost_count = 1;
+ lmmk->lmm_object_id = lsm->lsm_object_id;
+ lmmk->lmm_objects[0].l_object_id = lsm->lsm_object_id;
+
+ if (copy_to_user(lmmu, lmmk, lmm_size))
+ rc = -EFAULT;
+
+ OBD_FREE(lmmk, lmm_size);
+
+ RETURN(rc);
+}
+
static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
void *karg, void *uarg)
{
OBD_FREE(buf, len);
GOTO(out, err);
}
+ case LL_IOC_LOV_SETSTRIPE:
+ err = obd_alloc_memmd(conn, karg);
+ if (err > 0)
+ err = 0;
+ GOTO(out, err);
+ case LL_IOC_LOV_GETSTRIPE:
+ err = osc_getstripe(conn, karg, uarg);
+ GOTO(out, err);
default:
- CERROR ("osc_ioctl(): unrecognised ioctl %#lx\n", cmd);
+ CERROR ("osc_ioctl(): unrecognised ioctl %#x\n", cmd);
GOTO(out, err = -ENOTTY);
}
out:
fakeconn.addr = (__u64)(unsigned long)exp;
fakeconn.cookie = exp->exp_cookie;
- ioc_data.ioc_inlbuf1 = imp->imp_obd->obd_uuid;
+ ioc_data.ioc_inlbuf1 = imp->imp_obd->u.cli.cl_target_uuid;
ioc_data.ioc_offset = active;
rc = obd_iocontrol(IOC_LOV_SET_OSC_ACTIVE, &fakeconn,
sizeof ioc_data, &ioc_data, NULL);
}
}
-
-/* XXX looks a lot like super.c:invalidate_request_list, don't it? */
-static void abort_inflight_for_import(struct obd_import *imp)
-{
- struct list_head *tmp, *n;
-
- /* Make sure that no new requests get processed for this import.
- * ptlrpc_queue_wait must (and does) hold imp_lock while testing this
- * flag and then putting requests on sending_list or delayed_list.
- */
- spin_lock(&imp->imp_lock);
- imp->imp_flags |= IMP_INVALID;
- spin_unlock(&imp->imp_lock);
-
- list_for_each_safe(tmp, n, &imp->imp_sending_list) {
- struct ptlrpc_request *req =
- list_entry(tmp, struct ptlrpc_request, rq_list);
-
- DEBUG_REQ(D_HA, req, "inflight");
- req->rq_flags |= PTL_RPC_FL_ERR;
- wake_up(&req->rq_wait_for_rep);
- }
-
- list_for_each_safe(tmp, n, &imp->imp_delayed_list) {
- struct ptlrpc_request *req =
- list_entry(tmp, struct ptlrpc_request, rq_list);
-
- DEBUG_REQ(D_HA, req, "aborting waiting req");
- req->rq_flags |= PTL_RPC_FL_ERR;
- wake_up(&req->rq_wait_for_rep);
- }
-}
-
static int osc_recover(struct obd_import *imp, int phase)
{
int rc;
+ unsigned long flags;
+ struct ptlrpc_request *req;
ENTRY;
switch(phase) {
case PTLRPC_RECOVD_PHASE_RECOVER:
imp->imp_flags &= ~IMP_INVALID;
- rc = ptlrpc_reconnect_import(imp, OST_CONNECT);
+ rc = ptlrpc_reconnect_import(imp, OST_CONNECT, &req);
+ ptlrpc_req_finished(req);
if (rc) {
imp->imp_flags |= IMP_INVALID;
RETURN(rc);
}
- spin_lock(&imp->imp_lock);
+ spin_lock_irqsave(&imp->imp_lock, flags);
imp->imp_level = LUSTRE_CONN_FULL;
- spin_unlock(&imp->imp_lock);
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
+
+ /* Is this the right place? Should we do this in _PREPARE
+ * as well? What about raising the level right away?
+ */
+ ptlrpc_wake_delayed(imp);
set_osc_active(imp, 1 /* active */);
RETURN(0);
}
struct obd_ops osc_obd_ops = {
+ o_owner: THIS_MODULE,
o_attach: osc_attach,
o_detach: osc_detach,
o_setup: client_obd_setup,
modulefs_DATA = ost.o
EXTRA_PROGRAMS = ost
-LINX=obd_pack.c ll_pack.c target.c
+LINX=obd_pack.c target.c
-ll_pack.c:
- test -e ll_pack.c || ln -sf $(top_srcdir)/lib/ll_pack.c
obd_pack.c:
test -e obd_pack.c || ln -sf $(top_srcdir)/lib/obd_pack.c
target.c:
#include <linux/obd_ost.h>
#include <linux/lustre_net.h>
#include <linux/lustre_dlm.h>
+#include <linux/lustre_export.h>
#include <linux/init.h>
#include <linux/lprocfs_status.h>
static int ost_bulk_timeout(void *data)
{
- struct ptlrpc_bulk_desc *desc = data;
-
ENTRY;
- recovd_conn_fail(desc->bd_connection);
+ /* We don't fail the connection here, because having the export
+ * killed makes the (vital) call to commitrw very sad.
+ */
RETURN(1);
}
{
struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
struct ptlrpc_bulk_desc *desc;
- void *tmp1, *tmp2, *end2;
+ struct obd_ioobj *tmp1;
+ void *tmp2, *end2;
struct niobuf_remote *remote_nb;
struct niobuf_local *local_nb = NULL;
struct obd_ioobj *ioo;
{
struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
struct ptlrpc_bulk_desc *desc;
+ struct obd_ioobj *tmp1;
+ void *tmp2, *end2;
struct niobuf_remote *remote_nb;
- struct niobuf_local *local_nb, *lnb;
+ struct niobuf_local *local_nb = NULL;
+ struct niobuf_local *lnb;
struct obd_ioobj *ioo;
struct ost_body *body;
- int cmd, rc, i, j, objcount, niocount, size[2] = {sizeof(*body)};
- void *tmp1, *tmp2, *end2;
+ struct l_wait_info lwi;
+ int rc, cmd, i, j, objcount, niocount;
+ int size[2] = {sizeof(*body)};
void *desc_priv = NULL;
int reply_sent = 0;
struct ptlrpc_service *srv;
- struct l_wait_info lwi;
__u32 xid;
ENTRY;
if (rc) {
if (rc != -ETIMEDOUT)
LBUG();
- GOTO(fail_bulk, rc);
+ ptlrpc_abort_bulk(desc);
+ recovd_conn_fail(desc->bd_connection);
+ obd_commitrw(cmd, conn, objcount, tmp1, niocount, local_nb,
+ desc->bd_desc_private);
+ } else {
+ rc = obd_commitrw(cmd, conn, objcount, tmp1, niocount, local_nb,
+ desc->bd_desc_private);
}
- rc = obd_commitrw(cmd, conn, objcount, tmp1, niocount, local_nb,
- desc->bd_desc_private);
ptlrpc_bulk_decref(desc);
EXIT;
out_free:
fail_bulk:
ptlrpc_free_bulk(desc);
fail_preprw:
- /* FIXME: how do we undo the preprw? */
+ /* FIXME: how do we undo the preprw? - answer = call commitrw */
goto out_free;
}
req->rq_export == NULL) {
CERROR("lustre_ost: operation %d on unconnected OST\n",
req->rq_reqmsg->opc);
+ req->rq_status = -ENOTCONN;
GOTO(out, rc = -ENOTCONN);
}
RETURN(-EINVAL);
}
- MOD_INC_USE_COUNT;
tgt = class_uuid2obd(data->ioc_inlbuf1);
if (!tgt || !(tgt->obd_flags & OBD_ATTACHED) ||
!(tgt->obd_flags & OBD_SET_UP)) {
CERROR("device not attached or not set up (%d)\n",
data->ioc_dev);
- GOTO(error_dec, err = -EINVAL);
+ RETURN(err = -EINVAL);
}
err = obd_connect(&ost->ost_conn, tgt, NULL, NULL, NULL);
if (err) {
CERROR("fail to connect to device %d\n", data->ioc_dev);
- GOTO(error_dec, err = -EINVAL);
+ RETURN(err);
}
ost->ost_service = ptlrpc_init_svc(OST_NEVENTS, OST_NBUFS,
error_disc:
obd_disconnect(&ost->ost_conn);
-error_dec:
- MOD_DEC_USE_COUNT;
RETURN(err);
}
ptlrpc_unregister_service(ost->ost_service);
err = obd_disconnect(&ost->ost_conn);
- if (err) {
+ if (err)
CERROR("lustre ost: fail to disconnect device\n");
- RETURN(-EINVAL);
- }
- MOD_DEC_USE_COUNT;
- RETURN(0);
+ RETURN(err);
}
+
int ost_attach(struct obd_device *dev, obd_count len, void *data)
{
return lprocfs_reg_obd(dev, status_var_nm_1, dev);
int ost_detach(struct obd_device *dev)
{
return lprocfs_dereg_obd(dev);
-
}
+/* This is so similar to mds_connect that it makes my heart weep: we should
+ * shuffle the UUID into obd_export proper and make this all happen in
+ * target_handle_connect.
+ */
+static int ost_connect(struct lustre_handle *conn,
+ struct obd_device *obd, obd_uuid_t cluuid,
+ struct recovd_obd *recovd,
+ ptlrpc_recovery_cb_t recover)
+{
+ struct obd_export *exp;
+ struct ost_export_data *oed;
+ struct list_head *p;
+ int rc;
+ ENTRY;
+
+ if (!conn || !obd || !cluuid)
+ RETURN(-EINVAL);
+
+ /* lctl gets a backstage, all-access pass. */
+ if (!strcmp(cluuid, "OBD_CLASS_UUID"))
+ goto dont_check_exports;
+
+ spin_lock(&obd->obd_dev_lock);
+ list_for_each(p, &obd->obd_exports) {
+ exp = list_entry(p, struct obd_export, exp_obd_chain);
+ oed = &exp->exp_ost_data;
+ if (!memcmp(cluuid, oed->oed_uuid, sizeof oed->oed_uuid)) {
+ spin_unlock(&obd->obd_dev_lock);
+ LASSERT(exp->exp_obd == obd);
+
+ RETURN(target_handle_reconnect(conn, exp, cluuid));
+ }
+ }
+
+ dont_check_exports:
+ rc = class_connect(conn, obd, cluuid);
+ if (rc)
+ RETURN(rc);
+ exp = class_conn2export(conn);
+ LASSERT(exp);
+
+ oed = &exp->exp_ost_data;
+ memcpy(oed->oed_uuid, cluuid, sizeof oed->oed_uuid);
+
+ RETURN(0);
+}
/* use obd ops to offer management infrastructure */
static struct obd_ops ost_obd_ops = {
- o_attach: ost_attach,
- o_detach: ost_detach,
- o_setup: ost_setup,
- o_cleanup: ost_cleanup,
+ o_owner: THIS_MODULE,
+ o_attach: ost_attach,
+ o_detach: ost_detach,
+ o_setup: ost_setup,
+ o_cleanup: ost_cleanup,
+ o_connect: ost_connect,
};
static int __init ost_init(void)
{
int rc;
- rc = class_register_type(&ost_obd_ops, status_class_var,
+ rc = class_register_type(&ost_obd_ops, status_class_var,
LUSTRE_OST_NAME);
RETURN(rc);
static void __exit ost_exit(void)
{
-
class_unregister_type(LUSTRE_OST_NAME);
}
+++ /dev/null
-.Xrefs
-config.log
-config.status
-configure
-Makefile
-Makefile.in
-.deps
-TAGS
--- /dev/null
+.deps
+Makefile
+Makefile.in
--- /dev/null
+# Copyright (C) 2002 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+DEFS=
+
+MODULE = ptlbd
+modulefs_DATA = ptlbd.o
+EXTRA_PROGRAMS = ptlbd
+
+ptlbd_SOURCES = blk.c client.c main.c rpc.c server.c
+
+include $(top_srcdir)/Rules
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/module.h>
+#include <linux/major.h>
+#include <linux/smp.h>
+
+#define DEBUG_SUBSYSTEM S_PTLBD
+
+#include <linux/lustre_lite.h>
+#include <linux/lustre_ha.h>
+#include <linux/obd_support.h>
+#include <linux/lustre_idl.h>
+#include <linux/obd_ptlbd.h>
+
+/*
+ * todo:
+ * assign proper major number
+ * allow more minors
+ * discover actual block sizes?
+ * allow more than one sector per io
+ * think about vary-io
+ * restrict single ops to sequential block io
+ * ddn target addresses need to be 32 bit
+ * cant get to addresses after 0xFFFF0000
+ */
+
+#define PTLBD_MAJOR 253
+#define PTLBD_MAX_MINOR 1
+
+#define MAJOR_NR PTLBD_MAJOR
+#define LOCAL_END_REQUEST
+#include <linux/blk.h>
+#include <linux/blkdev.h>
+#include <linux/devfs_fs_kernel.h>
+
+static int ptlbd_size_size[PTLBD_MAX_MINOR];
+static int ptlbd_size[PTLBD_MAX_MINOR];
+static int ptlbd_hardsect_size[PTLBD_MAX_MINOR];
+static int ptlbd_max_sectors[PTLBD_MAX_MINOR];
+//RHism static char ptlbd_dev_varyio[PTLBD_MAX_MINOR];
+
+/*
+ * per minor state, indexed by minor.
+ */
+
+static struct ptlbd_obd *one_for_now;
+
+void ptlbd_blk_register(struct ptlbd_obd *ptlbd)
+{
+ ENTRY;
+ one_for_now = ptlbd;
+ EXIT;
+}
+
+static struct ptlbd_obd * ptlbd_get_minor(int minor)
+{
+ ENTRY;
+ if ( minor >= PTLBD_MAX_MINOR )
+ RETURN( ERR_PTR(-ENODEV) );
+ RETURN(one_for_now);
+}
+
+static struct ptlbd_obd * ptlbd_get_inode(struct inode *inode)
+{
+ ENTRY;
+
+ if ( inode == NULL ) /* can this really happen? */
+ RETURN( ERR_PTR(-EINVAL) );
+
+ return ptlbd_get_minor(MINOR(inode->i_rdev));
+}
+
+static int ptlbd_open(struct inode *inode, struct file *file)
+{
+ struct ptlbd_obd *ptlbd = ptlbd_get_inode(inode);
+ ENTRY;
+
+ if ( IS_ERR(ptlbd) )
+ RETURN(PTR_ERR(ptlbd));
+ if ( ptlbd->bd_import.imp_connection == NULL )
+ RETURN(-ENODEV);
+
+ ptlbd->refcount++;
+ RETURN(0);
+}
+
+static int ptlbd_ioctl(struct inode *inode, struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct ptlbd_obd *ptlbd;
+
+ if ( ! capable(CAP_SYS_ADMIN) )
+ RETURN(-EPERM);
+
+ ptlbd = ptlbd_get_inode(inode);
+ if ( IS_ERR(ptlbd) )
+ RETURN( PTR_ERR(ptlbd) );
+
+ /* XXX getattr{,64} */
+
+ RETURN(-EINVAL);
+}
+
+static int ptlbd_release(struct inode *inode, struct file *file)
+{
+ struct ptlbd_obd *ptlbd = ptlbd_get_inode(inode);
+ ENTRY;
+
+ if ( IS_ERR(ptlbd) )
+ RETURN( PTR_ERR(ptlbd) );
+
+ ptlbd->refcount--;
+ RETURN(0);
+}
+
+static void ptlbd_end_request_havelock(struct request *req)
+{
+ struct buffer_head *bh;
+ int uptodate = 1;
+
+ if ( req->errors )
+ uptodate = 0;
+
+ while( (bh = req->bh) != NULL ) {
+ blk_finished_io(bh->b_size >> 9);
+ req->bh = bh->b_reqnext;
+ bh->b_reqnext = NULL;
+ bh->b_end_io(bh, uptodate);
+ }
+ blkdev_release_request(req);
+}
+
+#if 0
+static void ptlbd_end_request_getlock(struct request *req)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&io_request_lock, flags);
+ ptlbd_end_request_havelock(req);
+ spin_unlock_irqrestore(&io_request_lock, flags);
+}
+#endif
+
+static void ptlbd_request(request_queue_t *q)
+{
+ struct ptlbd_obd *ptlbd;
+ struct request *req;
+ ptlbd_cmd_t cmd;
+ ENTRY;
+
+ while ( !QUEUE_EMPTY ) {
+ req = CURRENT;
+ ptlbd = ptlbd_get_minor(MINOR(req->rq_dev));
+
+ blkdev_dequeue_request(req);
+
+ if ( ptlbd->refcount <= 0 ) {
+ req->errors++;
+ ptlbd_end_request_havelock(req);
+ return;
+ }
+
+ spin_unlock_irq(&io_request_lock);
+
+ /* XXX dunno if we're supposed to get this or not.. */
+ LASSERT(req->cmd != READA);
+
+ if ( req->cmd == READ )
+ cmd = PTLBD_READ;
+ else
+ cmd = PTLBD_WRITE;
+
+ ptlbd_send_req(ptlbd, cmd, req->bh);
+
+ spin_lock_irq(&io_request_lock);
+
+ ptlbd_end_request_havelock(req);
+ }
+}
+
+static struct block_device_operations ptlbd_ops = {
+ .owner = THIS_MODULE,
+ .open = ptlbd_open,
+ .release = ptlbd_release,
+ .ioctl = ptlbd_ioctl,
+};
+
+int ptlbd_blk_init(void)
+{
+ int ret;
+ int i;
+ ENTRY;
+
+ ret = register_blkdev(PTLBD_MAJOR, "ptlbd", &ptlbd_ops);
+ if ( ret < 0 )
+ RETURN(ret);
+
+ blk_size[PTLBD_MAJOR] = ptlbd_size;
+ blksize_size[PTLBD_MAJOR] = ptlbd_size_size;
+ hardsect_size[PTLBD_MAJOR] = ptlbd_hardsect_size;
+ max_sectors[PTLBD_MAJOR] = ptlbd_max_sectors;
+ //RHism blkdev_varyio[PTLBD_MAJOR] = ptlbd_dev_varyio;
+
+ blk_init_queue(BLK_DEFAULT_QUEUE(PTLBD_MAJOR), ptlbd_request);
+ blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
+
+ for ( i = 0 ; i < PTLBD_MAX_MINOR ; i++) {
+ ptlbd_size_size[i] = 4096;
+ ptlbd_size[i] = (4096*2048) >> BLOCK_SIZE_BITS;
+ ptlbd_hardsect_size[i] = 4096;
+ ptlbd_max_sectors[i] = 2;
+ //RHism ptlbd_dev_varyio[i] = 0;
+ /* XXX register_disk? */
+ }
+
+ return 0;
+}
+
+void ptlbd_blk_exit(void)
+{
+ int ret;
+ ENTRY;
+ blk_cleanup_queue(BLK_DEFAULT_QUEUE(PTLBD_MAJOR));
+ ret = unregister_blkdev(PTLBD_MAJOR, "ptlbd");
+ if ( ret ) /* XXX */
+ printk("unregister_blkdev() failed: %d\n", ret);
+}
+
+#undef MAJOR_NR
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+
+#define DEBUG_SUBSYSTEM S_PTLBD
+
+#include <linux/obd_support.h>
+#include <linux/obd_class.h>
+#include <linux/lustre_debug.h>
+#include <linux/lprocfs_status.h>
+#include <linux/obd_ptlbd.h>
+
+static int ptlbd_cl_setup(struct obd_device *obddev, obd_count len, void *buf)
+{
+ struct ptlbd_obd *ptlbd = &obddev->u.ptlbd;
+ struct obd_import *imp = &ptlbd->bd_import;
+ struct obd_ioctl_data* data = buf;
+ obd_uuid_t server_uuid;
+ ENTRY;
+
+ if ( ptlbd->bd_import.imp_connection != NULL )
+ RETURN(-EALREADY);
+
+ if (data->ioc_inllen1 < 1) {
+ CERROR("requires a PTLBD server UUID\n");
+ RETURN(-EINVAL);
+ }
+
+ if (data->ioc_inllen1 > 37) {
+ CERROR("PTLBD server UUID must be less than 38 characters\n");
+ RETURN(-EINVAL);
+ }
+
+ memcpy(server_uuid, data->ioc_inlbuf1, MIN(data->ioc_inllen1,
+ sizeof(server_uuid)));
+
+ imp->imp_connection = ptlrpc_uuid_to_connection(server_uuid);
+ if (!imp->imp_connection)
+ RETURN(-ENOENT);
+
+ INIT_LIST_HEAD(&imp->imp_replay_list);
+ INIT_LIST_HEAD(&imp->imp_sending_list);
+ INIT_LIST_HEAD(&imp->imp_delayed_list);
+ spin_lock_init(&imp->imp_lock);
+ /*
+ * from client_obd_connect.. *shrug*
+ */
+ INIT_LIST_HEAD(&imp->imp_chain);
+ imp->imp_last_xid = 0;
+ imp->imp_max_transno = 0;
+ imp->imp_peer_last_xid = 0;
+ imp->imp_peer_committed_transno = 0;
+ imp->imp_level = LUSTRE_CONN_FULL;
+
+ ptlrpc_init_client(PTLBD_REQUEST_PORTAL, PTLBD_REPLY_PORTAL,
+ "ptlbd", &ptlbd->bd_client);
+ imp->imp_client = &ptlbd->bd_client;
+ imp->imp_obd = obddev;
+
+ ptlbd_blk_register(ptlbd);
+
+ RETURN(0);
+}
+
+static int ptlbd_cl_cleanup(struct obd_device *obddev)
+{
+// struct ptlbd_obd *ptlbd = &obddev->u.ptlbd;
+ ENTRY;
+
+ CERROR("I should be cleaning things up\n");
+
+ RETURN(0);
+}
+
+#if 0
+static int ptlbd_cl_connect(struct lustre_handle *conn, struct obd_device *obd,
+ obd_uuid_t cluuid, struct recovd_obd *recovd,
+ ptlrpc_recovery_cb_t recover)
+{
+ struct ptlbd_obd *ptlbd = &obd->u.ptlbd;
+ struct obd_import *imp = &ptlbd->bd_import;
+ int rc;
+ ENTRY;
+
+ rc = class_connect(conn, obd, cluuid);
+ if (rc)
+ RETURN(rc);
+
+ INIT_LIST_HEAD(&imp->imp_chain);
+ imp->imp_last_xid = 0;
+ imp->imp_max_transno = 0;
+ imp->imp_peer_last_xid = 0;
+ imp->imp_peer_committed_transno = 0;
+ imp->imp_level = LUSTRE_CONN_FULL;
+
+ RETURN(0);
+}
+#endif
+
+static struct obd_ops ptlbd_cl_obd_ops = {
+ o_owner: THIS_MODULE,
+ o_setup: ptlbd_cl_setup,
+ o_cleanup: ptlbd_cl_cleanup,
+#if 0
+ o_connect: ptlbd_cl_connect,
+ o_disconnect: class_disconnect
+#endif
+};
+
+int ptlbd_cl_init(void)
+{
+ extern struct lprocfs_vars status_class_var[];
+
+ return class_register_type(&ptlbd_cl_obd_ops, status_class_var,
+ OBD_PTLBD_CL_DEVICENAME);
+}
+
+void ptlbd_cl_exit(void)
+{
+ class_unregister_type(OBD_PTLBD_CL_DEVICENAME);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/major.h>
+#include <linux/smp.h>
+
+#define DEBUG_SUBSYSTEM S_PTLBD
+
+#include <linux/lustre_lite.h>
+#include <linux/lustre_ha.h>
+#include <linux/obd_support.h>
+
+#include <linux/obd_ptlbd.h>
+
+static int __init ptlbd_init(void)
+{
+ int ret;
+ ENTRY;
+
+ ret = ptlbd_cl_init();
+ if ( ret < 0 )
+ RETURN(ret);
+
+ ret = ptlbd_sv_init();
+ if ( ret < 0 )
+ GOTO(out_cl, ret);
+
+ ret = ptlbd_blk_init();
+ if ( ret < 0 )
+ GOTO(out_sv, ret);
+
+ RETURN(0);
+
+out_sv:
+ ptlbd_sv_exit();
+out_cl:
+ ptlbd_cl_exit();
+ RETURN(ret);
+}
+
+static void __exit ptlbd_exit(void)
+{
+ ENTRY;
+ ptlbd_cl_exit();
+ ptlbd_sv_exit();
+ EXIT;
+}
+
+module_init(ptlbd_init);
+module_exit(ptlbd_exit);
+MODULE_LICENSE("GPL");
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+
+#define DEBUG_SUBSYSTEM S_PTLBD
+
+#include <linux/obd_support.h>
+#include <linux/obd_class.h>
+#include <linux/lustre_debug.h>
+#include <linux/lprocfs_status.h>
+#include <linux/obd_ptlbd.h>
+
+static __u32 get_next_xid(struct obd_import *imp)
+{
+ unsigned long flags;
+ __u32 xid;
+ spin_lock_irqsave(&imp->imp_lock, flags);
+ xid = ++imp->imp_last_xid;
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
+ return xid;
+}
+
+static int ptlbd_brw_callback(struct obd_brw_set *set, int phase)
+{
+ ENTRY;
+ RETURN(0);
+}
+
+static void decref_bulk_desc(void *data)
+{
+ struct ptlrpc_bulk_desc *desc = data;
+ ENTRY;
+
+ ptlrpc_bulk_decref(desc);
+ EXIT;
+}
+
+/* this is the callback function which is invoked by the Portals
+ * event handler associated with the bulk_sink queue and bulk_source queue.
+ */
+static void ptlbd_ptl_ev_hdlr(struct ptlrpc_bulk_desc *desc)
+{
+ ENTRY;
+
+ LASSERT(desc->bd_brw_set != NULL);
+ LASSERT(desc->bd_brw_set->brw_callback != NULL);
+
+ desc->bd_brw_set->brw_callback(desc->bd_brw_set, CB_PHASE_FINISH);
+
+ prepare_work(&desc->bd_queue, decref_bulk_desc, desc);
+ schedule_work(&desc->bd_queue);
+
+ EXIT;
+}
+
+
+int ptlbd_write_put_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd,
+ struct buffer_head *first_bh, unsigned int page_count)
+{
+ struct obd_import *imp = &ptlbd->bd_import;
+ struct ptlbd_op *op;
+ struct ptlbd_niob *niob, *niobs;
+ struct ptlbd_rsp *rsp;
+ struct ptlrpc_request *req;
+ struct ptlrpc_bulk_desc *desc;
+ struct buffer_head *bh;
+ int rc, size[2];
+ struct obd_brw_set *set;
+ ENTRY;
+
+ size[0] = sizeof(struct ptlbd_op);
+ size[1] = page_count * sizeof(struct ptlbd_niob);
+
+ req = ptlrpc_prep_req(imp, cmd, 2, size, NULL);
+ if (!req)
+ GOTO(out, rc = -ENOMEM);
+ /* XXX might not need these */
+ req->rq_request_portal = PTLBD_REQUEST_PORTAL;
+ req->rq_reply_portal = PTLBD_REPLY_PORTAL;
+
+ op = lustre_msg_buf(req->rq_reqmsg, 0);
+ niobs = lustre_msg_buf(req->rq_reqmsg, 1);
+
+ /* XXX pack */
+ op->op_cmd = cmd;
+ op->op_lun = 0;
+ op->op_niob_cnt = page_count;
+ op->op__padding = 0;
+ op->op_block_cnt = page_count;
+
+ desc = ptlrpc_prep_bulk(imp->imp_connection);
+ if ( desc == NULL )
+ GOTO(out_req, rc = -ENOMEM);
+ desc->bd_portal = PTLBD_BULK_PORTAL;
+ desc->bd_ptl_ev_hdlr = ptlbd_ptl_ev_hdlr;
+
+ /* XXX someone needs to free this */
+ set = obd_brw_set_new();
+ if (set == NULL)
+ GOTO(out_desc, rc = -ENOMEM);
+
+ set->brw_callback = ptlbd_brw_callback;
+
+#if 0
+ xid = get_next_xid(imp);
+#endif
+
+ for ( niob = niobs, bh = first_bh ; bh ; bh = bh->b_next, niob++ ) {
+#if 0
+ struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
+ if (bulk == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+#endif
+
+#if 0
+ niob->n_xid = xid;
+#endif
+ niob->n_block_nr = bh->b_blocknr;
+ niob->n_offset = bh_offset(bh);
+ niob->n_length = bh->b_size;
+
+
+#if 0
+ bulk->bp_xid = xid;
+ bulk->bp_buf = bh->b_data;
+ bulk->bp_page = bh->b_page;
+ bulk->bp_buflen = bh->b_size;
+#endif
+ }
+
+
+ size[0] = sizeof(struct ptlbd_rsp);
+ size[1] = sizeof(struct ptlbd_niob) * page_count;
+ req->rq_replen = lustre_msg_size(2, size);
+
+ /* XXX find out how we're really supposed to manage levels */
+ req->rq_level = imp->imp_level;
+ rc = ptlrpc_queue_wait(req);
+
+ rsp = lustre_msg_buf(req->rq_repmsg, 0);
+
+ niob = lustre_msg_buf(req->rq_repmsg, 1);
+ /* XXX check that op->num matches ours */
+ for ( bh = first_bh ; bh ; bh = bh->b_next, niob++ ) {
+ struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
+ if (bulk == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+
+ bulk->bp_xid = niob->n_xid;
+ bulk->bp_page = bh->b_page;
+ bulk->bp_buf = bh->b_data;
+ bulk->bp_buflen = bh->b_size;
+ }
+
+ obd_brw_set_add(set, desc);
+ rc = ptlrpc_send_bulk(desc);
+
+ /* if there's an error, no brw_finish called, just like
+ * osc_brw_read */
+
+ GOTO(out_req, rc);
+
+out_set:
+ obd_brw_set_free(set);
+out_desc:
+ ptlrpc_bulk_decref(desc);
+out_req:
+ ptlrpc_req_finished(req);
+out:
+ RETURN(rc);
+}
+
+int ptlbd_read_put_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd,
+ struct buffer_head *first_bh, unsigned int page_count)
+{
+ struct obd_import *imp = &ptlbd->bd_import;
+ struct ptlbd_op *op;
+ struct ptlbd_niob *niob, *niobs;
+ struct ptlbd_rsp *rsp;
+ struct ptlrpc_request *req;
+ struct ptlrpc_bulk_desc *desc;
+ struct buffer_head *bh;
+ int rc, rep_size, size[2];
+ struct obd_brw_set *set;
+ __u32 xid;
+ ENTRY;
+
+ size[0] = sizeof(struct ptlbd_op);
+ size[1] = page_count * sizeof(struct ptlbd_niob);
+
+ req = ptlrpc_prep_req(imp, cmd, 2, size, NULL);
+ if (!req)
+ GOTO(out, rc = -ENOMEM);
+ /* XXX might not need these? */
+ req->rq_request_portal = PTLBD_REQUEST_PORTAL;
+ req->rq_reply_portal = PTLBD_REPLY_PORTAL;
+
+ op = lustre_msg_buf(req->rq_reqmsg, 0);
+ niobs = lustre_msg_buf(req->rq_reqmsg, 1);
+
+ /* XXX pack */
+ op->op_cmd = cmd;
+ op->op_lun = 0;
+ op->op_niob_cnt = page_count;
+ op->op__padding = 0;
+ op->op_block_cnt = page_count;
+
+ desc = ptlrpc_prep_bulk(imp->imp_connection);
+ if ( desc == NULL )
+ GOTO(out_req, rc = -ENOMEM);
+ desc->bd_portal = PTLBD_BULK_PORTAL;
+ desc->bd_ptl_ev_hdlr = ptlbd_ptl_ev_hdlr;
+
+ /* XXX someone needs to free this */
+ set = obd_brw_set_new();
+ if (set == NULL)
+ GOTO(out_desc, rc = -ENOMEM);
+
+ set->brw_callback = ptlbd_brw_callback;
+
+ xid = get_next_xid(imp);
+
+ for ( niob = niobs, bh = first_bh ; bh ; bh = bh->b_next, niob++ ) {
+ struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
+ if (bulk == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+
+ niob->n_xid = xid;
+ niob->n_block_nr = bh->b_blocknr;
+ niob->n_offset = bh_offset(bh);
+ niob->n_length = bh->b_size;
+
+ bulk->bp_xid = xid;
+ bulk->bp_buf = bh->b_data;
+ bulk->bp_page = bh->b_page;
+ bulk->bp_buflen = bh->b_size;
+ }
+
+ /* XXX put in OBD_FAIL_CHECK for ptlbd? */
+ rc = ptlrpc_register_bulk(desc);
+ if (rc)
+ GOTO(out_set, rc);
+
+ obd_brw_set_add(set, desc);
+
+ rep_size = sizeof(struct ptlbd_rsp);
+ req->rq_replen = lustre_msg_size(1, &rep_size);
+
+ /* XXX find out how we're really supposed to manage levels */
+ req->rq_level = imp->imp_level;
+ rc = ptlrpc_queue_wait(req);
+
+ rsp = lustre_msg_buf(req->rq_repmsg, 0);
+
+ /* if there's an error, no brw_finish called, just like
+ * osc_brw_read */
+
+ GOTO(out_req, rc);
+
+out_set:
+ obd_brw_set_free(set);
+out_desc:
+ ptlrpc_bulk_decref(desc);
+out_req:
+ ptlrpc_req_finished(req);
+out:
+ RETURN(rc);
+}
+
+int ptlbd_send_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd,
+ struct buffer_head *first_bh)
+{
+ unsigned int page_count = 0;
+ struct buffer_head *bh;
+ int rc;
+ ENTRY;
+
+ for ( page_count = 0, bh = first_bh ; bh ; bh = bh->b_next )
+ page_count++;
+
+ switch (cmd) {
+ case PTLBD_READ:
+ rc = ptlbd_read_put_req(ptlbd, cmd,
+ first_bh, page_count);
+ break;
+ case PTLBD_WRITE:
+ rc = ptlbd_write_put_req(ptlbd, cmd,
+ first_bh, page_count);
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ };
+
+ RETURN(rc);
+}
+
+static int ptlbd_bulk_timeout(void *data)
+{
+/* struct ptlrpc_bulk_desc *desc = data;*/
+ ENTRY;
+
+ CERROR("ugh, timed out\n");
+
+ RETURN(1);
+}
+
+#define SILLY_MAX 2048
+static struct page *pages[SILLY_MAX] = {NULL,};
+
+static struct page * fake_page(int block_nr)
+{
+ if ( block_nr >= SILLY_MAX )
+ return NULL;
+
+ if (pages[block_nr] == NULL) {
+ void *vaddr = (void *)get_free_page(GFP_KERNEL);
+ pages[block_nr] = virt_to_page(vaddr);
+ }
+ return pages[block_nr];
+}
+
+static int ptlbd_put_write(struct ptlrpc_request *req)
+{
+ struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
+ struct ptlbd_op *op;
+ struct ptlbd_niob *reply_niob, *request_niob;
+ struct ptlbd_rsp *rsp;
+ struct ptlrpc_bulk_desc *desc;
+ struct ptlrpc_service *srv;
+ struct l_wait_info lwi;
+ int size[2];
+ int i, page_count, rc;
+ __u32 xid;
+
+ op = lustre_msg_buf(req->rq_reqmsg, 0);
+ request_niob = lustre_msg_buf(req->rq_reqmsg, 1);
+ page_count = req->rq_reqmsg->buflens[1] / sizeof(struct ptlbd_niob);
+
+ size[0] = sizeof(struct ptlbd_rsp);
+ size[1] = sizeof(struct ptlbd_niob) * page_count;
+ rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, &req->rq_repmsg);
+ if (rc)
+ GOTO(out, rc);
+ reply_niob = lustre_msg_buf(req->rq_repmsg, 1);
+
+ desc = ptlrpc_prep_bulk(req->rq_connection);
+ if (desc == NULL)
+ GOTO(out, rc = -ENOMEM);
+ desc->bd_ptl_ev_hdlr = NULL;
+ desc->bd_portal = PTLBD_BULK_PORTAL;
+ memcpy(&(desc->bd_conn), &conn, sizeof(conn)); /* XXX what? */
+
+ srv = req->rq_obd->u.ptlbd.ptlbd_service;
+ spin_lock(&srv->srv_lock);
+ xid = srv->srv_xid++; /* single xid for all pages */
+ spin_unlock(&srv->srv_lock);
+
+ for ( i = 0; i < page_count; i++) {
+ struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
+ if (bulk == NULL)
+ GOTO(out_desc, rc = -ENOMEM);
+
+ reply_niob[i] = request_niob[i];
+ reply_niob[i].n_xid = xid;
+
+ bulk->bp_xid = xid;
+ bulk->bp_page = fake_page(request_niob[i].n_block_nr);
+ bulk->bp_buf = page_address(bulk->bp_page);
+ bulk->bp_buflen = request_niob[i].n_length;
+ }
+
+ rc = ptlrpc_register_bulk(desc);
+ if ( rc )
+ GOTO(out_desc, rc);
+
+ rsp = lustre_msg_buf(req->rq_reqmsg, 0);
+ rsp->r_status = 42;
+ rsp->r_error_cnt = 13;
+ ptlrpc_reply(req->rq_svc, req);
+
+ /* this synchronization probably isn't good enough */
+ lwi = LWI_TIMEOUT(obd_timeout * HZ, ptlbd_bulk_timeout, desc);
+ rc = l_wait_event(desc->bd_waitq, desc->bd_flags &PTL_BULK_FL_RCVD,
+ &lwi);
+
+out_desc:
+ ptlrpc_free_bulk(desc);
+out:
+ RETURN(rc);
+}
+
+static int ptlbd_put_read(struct ptlrpc_request *req)
+{
+ struct ptlbd_op *op;
+ struct ptlbd_niob *niob, *niobs;
+ struct ptlbd_rsp *rsp;
+ struct ptlrpc_bulk_desc *desc;
+ struct l_wait_info lwi;
+ int size[1];
+ int i, page_count, rc;
+
+ op = lustre_msg_buf(req->rq_reqmsg, 0);
+ niobs = lustre_msg_buf(req->rq_reqmsg, 1);
+ page_count = req->rq_reqmsg->buflens[1] / sizeof(struct ptlbd_niob);
+
+ desc = ptlrpc_prep_bulk(req->rq_connection);
+ if (desc == NULL)
+ GOTO(out, rc = -ENOMEM);
+ desc->bd_portal = PTLBD_BULK_PORTAL;
+
+ for ( i = 0, niob = niobs ; i < page_count; niob++, i++) {
+ struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc);
+ if (bulk == NULL)
+ GOTO(out_bulk, rc = -ENOMEM);
+
+ /*
+ * XXX what about the block number?
+ */
+ bulk->bp_xid = niob->n_xid;
+ bulk->bp_page = fake_page(niob->n_block_nr);
+ bulk->bp_buf = page_address(bulk->bp_page);
+ bulk->bp_buflen = niob->n_length;
+ }
+
+ rc = ptlrpc_send_bulk(desc);
+ if ( rc )
+ GOTO(out_bulk, rc);
+
+ /* this synchronization probably isn't good enough */
+ lwi = LWI_TIMEOUT(obd_timeout * HZ, ptlbd_bulk_timeout, desc);
+ rc = l_wait_event(desc->bd_waitq, desc->bd_flags &PTL_BULK_FL_SENT,
+ &lwi);
+
+ size[0] = sizeof(struct ptlbd_rsp);
+ rc = lustre_pack_msg(1, size, NULL, &req->rq_replen, &req->rq_repmsg);
+ if ( rc )
+ GOTO(out, rc);
+
+ rsp = lustre_msg_buf(req->rq_repmsg, 0);
+ if ( rsp == NULL )
+ GOTO(out, rc = -EINVAL);
+
+ rsp->r_error_cnt = 42;
+ rsp->r_status = 69;
+
+ req->rq_status = 0; /* XXX */
+ ptlrpc_reply(req->rq_svc, req);
+
+out_bulk:
+ ptlrpc_free_bulk(desc);
+out:
+ RETURN(rc);
+}
+
+
+int ptlbd_parse_req(struct ptlrpc_request *req)
+{
+ struct ptlbd_op *op;
+ int rc;
+ ENTRY;
+
+ rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
+ if ( rc )
+ RETURN(rc);
+
+ op = lustre_msg_buf(req->rq_reqmsg, 0);
+
+ switch(op->op_cmd) {
+ case PTLBD_READ:
+ ptlbd_put_read(req);
+ break;
+ case PTLBD_WRITE:
+ ptlbd_put_write(req);
+ break;
+ default:
+ CERROR("fix this %d\n", op->op_cmd);
+ break;
+ }
+
+ RETURN(0);
+}
+
+
+#if 0
+int ptlbd_bh_req(int cmd, struct ptlbd_state *st, struct buffer_head *first_bh)
+{
+ struct obd_brw_set *set = NULL;
+ struct brw_page *pg = NULL;
+ struct buffer_head *bh;
+ int rc, i, pg_bytes = 0;
+ ENTRY;
+
+ for ( bh = first_bh ; bh ; bh = bh->b_reqnext )
+ pg_bytes += sizeof(struct brw_page);
+
+ OBD_ALLOC(pg, pg_bytes);
+ if ( pg == NULL )
+ GOTO(out, rc = -ENOMEM);
+
+ set = obd_brw_set_new();
+ if (set == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ for ( i = 0, bh = first_bh ; bh ; bh = bh->b_reqnext, i++) {
+ pg[i].pg = bh->b_page;
+ pg[i].off = bh_offset(bh);
+ pg[i].count = bh->b_size;
+ pg[i].flag = 0;
+ }
+
+ set->brw_callback = ll_brw_sync_wait;
+ rc = obd_brw(cmd, /* lsm */NULL, num_pages, pg, set);
+ if ( rc )
+ GOTO(out, rc);
+
+ rc = ll_brw_sync_wait(set, CB_PHASE_START);
+ if (rc)
+ CERROR("error from callback: rc = %d\n", rc);
+
+out:
+ if ( pg != NULL )
+ OBD_FREE(pg, pg_bytes);
+ if ( set != NULL )
+ obd_brw_set_free(set);
+
+ RETURN(rc);
+}
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+
+#define DEBUG_SUBSYSTEM S_PTLBD
+
+#include <linux/obd_support.h>
+#include <linux/obd_class.h>
+#include <linux/lustre_debug.h>
+#include <linux/lprocfs_status.h>
+#include <linux/obd_ptlbd.h>
+
+#if 0
+static int ptlbd_sv_callback(struct ptlrpc_request *req)
+{
+ int rc;
+ ENTRY;
+
+ rc = ptlbd_parse_request(req);
+
+ rc = lustre_unpack_msg(req->rq_reqmsg, req->rq_reqlen);
+ if ( rc )
+ GOTO(out, rc);
+
+ printk("callback got a friggin opc %d\n", req->rq_reqmsg->opc);
+
+out:
+ RETURN(rc);
+}
+#endif
+
+static int ptlbd_sv_already_setup = 1;
+
+static int ptlbd_sv_setup(struct obd_device *obddev, obd_count len, void *buf)
+{
+#if 0
+ struct obd_ioctl_data* data = buf;
+ obd_uuid_t server_uuid;
+#endif
+ struct ptlbd_obd *ptlbd = &obddev->u.ptlbd;
+ int rc;
+ ENTRY;
+
+#if 0
+ if (data->ioc_inllen1 < 1) {
+ CERROR("requires a PTLBD server UUID\n");
+ RETURN(rc = -EINVAL);
+ }
+
+ if (data->ioc_inllen1 > 37) {
+ CERROR("PTLBD server UUID must be less than 38 characters\n");
+ RETURN(rc = -EINVAL);
+ }
+
+ memcpy(server_uuid, data->ioc_inlbuf1, MIN(data->ioc_inllen1,
+ sizeof(server_uuid)));
+
+#endif
+ ptlbd->ptlbd_service =
+ ptlrpc_init_svc(PTLBD_NEVENTS, PTLBD_NBUFS, PTLBD_BUFSIZE,
+ PTLBD_MAXREQSIZE, PTLBD_REQUEST_PORTAL,
+ PTLBD_REPLY_PORTAL, "self",
+ ptlbd_parse_req, "ptlbd_sv");
+
+ if (!ptlbd->ptlbd_service) {
+ CERROR("failed to start service\n");
+ RETURN(rc = -ENOMEM);
+ }
+
+ rc = ptlrpc_start_thread(obddev, ptlbd->ptlbd_service, "ptldb");
+ if (rc) {
+ CERROR("cannot start PTLBD thread: rc %d\n", rc);
+ LBUG();
+ GOTO(out_thread, rc);
+ }
+
+ ptlbd_sv_already_setup = 1;
+
+ RETURN(0);
+
+ out_thread:
+ ptlrpc_stop_all_threads(ptlbd->ptlbd_service);
+ ptlrpc_unregister_service(ptlbd->ptlbd_service);
+
+ return rc;
+}
+
+static int ptlbd_sv_cleanup(struct obd_device *obddev)
+{
+ struct ptlbd_obd *ptlbd = &obddev->u.ptlbd;
+ ENTRY;
+
+ /* XXX check for state */
+
+ ptlrpc_stop_all_threads(ptlbd->ptlbd_service);
+ ptlrpc_unregister_service(ptlbd->ptlbd_service);
+
+ ptlbd_sv_already_setup = 0;
+ RETURN(0);
+}
+
+#if 0
+static int ptlbd_sv_connect(struct lustre_handle *conn, struct obd_device *src,
+ obd_uuid_t cluuid, struct recovd_obd *recovd,
+ ptlrpc_recovery_cb_t recover)
+{
+ return class_connect(conn, src, cluuid);
+}
+#endif
+
+static struct obd_ops ptlbd_sv_obd_ops = {
+ o_owner: THIS_MODULE,
+/* o_iocontrol: ptlbd_iocontrol,*/
+ o_setup: ptlbd_sv_setup,
+ o_cleanup: ptlbd_sv_cleanup,
+#if 0
+ o_connect: ptlbd_sv_connect,
+ o_disconnect: class_disconnect
+#endif
+};
+
+int ptlbd_sv_init(void)
+{
+ extern struct lprocfs_vars status_class_var[];
+
+ return class_register_type(&ptlbd_sv_obd_ops, status_class_var,
+ OBD_PTLBD_SV_DEVICENAME);
+}
+
+void ptlbd_sv_exit(void)
+{
+ class_unregister_type(OBD_PTLBD_SV_DEVICENAME);
+}
{
struct ptlrpc_connection *conn;
struct ptlrpc_request *request;
+ unsigned long flags;
int rc;
ENTRY;
INIT_LIST_HEAD(&request->rq_list);
atomic_set(&request->rq_refcount, 1);
- spin_lock(&imp->imp_lock);
+ spin_lock_irqsave(&imp->imp_lock, flags);
request->rq_xid = HTON__u32(++imp->imp_last_xid);
- spin_unlock(&imp->imp_lock);
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
request->rq_reqmsg->magic = PTLRPC_MSG_MAGIC;
request->rq_reqmsg->version = PTLRPC_MSG_VERSION;
request, request->rq_reqmsg->opc,
request->rq_connection->c_remote_uuid,
request->rq_import->imp_client->cli_request_portal,
- request->rq_refcount);
+ atomic_read (&request->rq_refcount));
/* LBUG(); */
}
}
if (request->rq_import) {
+ unsigned long flags = 0;
if (!locked)
- spin_lock(&request->rq_import->imp_lock);
+ spin_lock_irqsave(&request->rq_import->imp_lock, flags);
list_del_init(&request->rq_list);
if (!locked)
- spin_unlock(&request->rq_import->imp_lock);
+ spin_unlock_irqrestore(&request->rq_import->imp_lock,
+ flags);
}
ptlrpc_put_connection(request->rq_connection);
if (request == NULL)
RETURN(1);
+ if (request == (void *)(long)(0x5a5a5a5a5a5a5a5a)) {
+ CERROR("dereferencing freed request (bug 575)\n");
+ LBUG();
+ RETURN(1);
+ }
+
DEBUG_REQ(D_INFO, request, "refcount now %u",
atomic_read(&request->rq_refcount) - 1);
ENTRY;
if (req->rq_repmsg != NULL) {
req->rq_transno = NTOH__u64(req->rq_repmsg->transno);
+ /* Store transno in reqmsg for replay. */
+ req->rq_reqmsg->transno = req->rq_repmsg->transno;
req->rq_flags |= PTL_RPC_FL_REPLIED;
GOTO(out, rc = 1);
}
err = req->rq_repmsg->status;
if (req->rq_repmsg->type == NTOH__u32(PTL_RPC_MSG_ERR)) {
- DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR (%d)\n", err);
+ DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR (%d)", err);
RETURN(err ? err : -EINVAL);
}
{
/* First remove the ME for the reply; in theory, this means
* that we can tear down the buffer safely. */
- PtlMEUnlink(request->rq_reply_me_h);
+ if (PtlMEUnlink(request->rq_reply_me_h) != PTL_OK)
+ RETURN(0);
OBD_FREE(request->rq_reply_md.start, request->rq_replen);
+
+ memset(&request->rq_reply_me_h, 0, sizeof(request->rq_reply_me_h));
+ request->rq_reply_md.start = NULL;
request->rq_repmsg = NULL;
- request->rq_replen = 0;
return 0;
}
struct list_head *tmp, *saved;
struct ptlrpc_request *req;
struct ptlrpc_connection *conn = imp->imp_connection;
+ unsigned long flags;
ENTRY;
LASSERT(conn);
- spin_lock(&imp->imp_lock);
+ spin_lock_irqsave(&imp->imp_lock, flags);
list_for_each_safe(tmp, saved, &imp->imp_replay_list) {
req = list_entry(tmp, struct ptlrpc_request, rq_list);
req->rq_import = NULL;
__ptlrpc_req_finished(req, 0);
}
- spin_unlock(&imp->imp_lock);
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
EXIT;
return;
}
DEBUG_REQ(D_ERROR, req, "timeout");
+ ptlrpc_abort(req);
req->rq_flags |= PTL_RPC_FL_TIMEOUT;
if (!req->rq_import) {
if (!req->rq_import->imp_connection->c_recovd_data.rd_recovd)
RETURN(1);
- req->rq_timeout = 0;
recovd_conn_fail(req->rq_import->imp_connection);
-#if 0
/* If this request is for recovery or other primordial tasks,
* don't go back to sleep.
*/
if (req->rq_level < LUSTRE_CONN_FULL)
RETURN(1);
-#endif
RETURN(0);
}
RETURN(1); /* ignored, as of this writing */
}
-/* If the import has been invalidated (such as by an OST failure), the
- * request must fail with -EIO.
- *
- * Must be called with imp_lock held, will drop it if it returns -EIO.
- */
-#define EIO_IF_INVALID(req) \
-if (req->rq_import->imp_flags & IMP_INVALID) { \
- DEBUG_REQ(D_ERROR, req, "IMP_INVALID:"); \
- spin_unlock(&imp->imp_lock); \
- RETURN(-EIO); \
-}
-
int ptlrpc_queue_wait(struct ptlrpc_request *req)
{
int rc = 0;
struct l_wait_info lwi;
struct obd_import *imp = req->rq_import;
struct ptlrpc_connection *conn = imp->imp_connection;
+ unsigned int flags;
ENTRY;
init_waitqueue_head(&req->rq_wait_for_rep);
NTOH__u32(req->rq_reqmsg->status), req->rq_xid,
conn->c_peer.peer_nid, NTOH__u32(req->rq_reqmsg->opc));
- spin_lock(&imp->imp_lock);
- EIO_IF_INVALID(req);
+ spin_lock_irqsave(&imp->imp_lock, flags);
+
+ /*
+ * If the import has been invalidated (such as by an OST failure), the
+ * request must fail with -EIO.
+ */
+ if (req->rq_import->imp_flags & IMP_INVALID) {
+ DEBUG_REQ(D_ERROR, req, "IMP_INVALID:");
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
+ RETURN(-EIO);
+ }
+
if (req->rq_level > imp->imp_level) {
list_del(&req->rq_list);
list_add_tail(&req->rq_list, &imp->imp_delayed_list);
- spin_unlock(&imp->imp_lock);
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
DEBUG_REQ(D_HA, req, "\"%s\" waiting for recovery: (%d < %d)",
current->comm, req->rq_level, imp->imp_level);
(req->rq_level <= imp->imp_level) ||
(req->rq_flags & PTL_RPC_FL_ERR), &lwi);
- spin_lock(&imp->imp_lock);
+ spin_lock_irqsave(&imp->imp_lock, flags);
list_del_init(&req->rq_list);
- spin_unlock(&imp->imp_lock);
if (req->rq_flags & PTL_RPC_FL_ERR)
- RETURN(-EIO);
+ rc = -EIO;
- if (rc)
+ if (rc) {
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
RETURN(rc);
+ }
CERROR("process %d resumed\n", current->pid);
}
LASSERT(list_empty(&req->rq_list));
list_add_tail(&req->rq_list, &imp->imp_sending_list);
- spin_unlock(&imp->imp_lock);
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
rc = ptl_send_rpc(req);
if (rc) {
CDEBUG(D_HA, "error %d, opcode %d, need recovery\n", rc,
interrupted_request, req);
} else {
DEBUG_REQ(D_NET, req, "-- sleeping");
- lwi = LWI_TIMEOUT_INTR(req->rq_timeout * HZ, expired_request,
+ lwi = LWI_TIMEOUT_INTR(obd_timeout * HZ, expired_request,
interrupted_request, req);
}
l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
DEBUG_REQ(D_NET, req, "-- done sleeping");
- spin_lock(&imp->imp_lock);
+ spin_lock_irqsave(&imp->imp_lock, flags);
list_del_init(&req->rq_list);
- spin_unlock(&imp->imp_lock);
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
if (req->rq_flags & PTL_RPC_FL_ERR) {
ptlrpc_abort(req);
req->rq_flags &= ~PTL_RPC_FL_RESEND;
lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT);
DEBUG_REQ(D_HA, req, "resending: ");
+ spin_lock_irqsave(&imp->imp_lock, flags);
goto resend;
}
GOTO(out, rc = -EINVAL);
}
#endif
- DEBUG_REQ(D_NET, req, "status %d\n", req->rq_repmsg->status);
+ DEBUG_REQ(D_NET, req, "status %d", req->rq_repmsg->status);
/* We're a rejected connection, need to invalidate and rebuild. */
if (req->rq_repmsg->status == -ENOTCONN) {
- spin_lock(&imp->imp_lock);
+ spin_lock_irqsave(&imp->imp_lock, flags);
/* If someone else is reconnecting us (CONN_RECOVD) or has
* already completed it (handle mismatch), then we just need
* to get out.
if (imp->imp_level == LUSTRE_CONN_RECOVD ||
imp->imp_handle.addr != req->rq_reqmsg->addr ||
imp->imp_handle.cookie != req->rq_reqmsg->cookie) {
- spin_unlock(&imp->imp_lock);
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
GOTO(out, rc = -EIO);
}
imp->imp_level = LUSTRE_CONN_RECOVD;
- spin_unlock(&imp->imp_lock);
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
rc = imp->imp_recover(imp, PTLRPC_RECOVD_PHASE_NOTCONN);
if (rc)
LBUG();
GOTO(out, rc = -EIO);
}
+ rc = ptlrpc_check_status(req);
+
if (req->rq_import->imp_flags & IMP_REPLAYABLE) {
- spin_lock(&imp->imp_lock);
- if (req->rq_flags & PTL_RPC_FL_REPLAY || req->rq_transno != 0) {
+ spin_lock_irqsave(&imp->imp_lock, flags);
+ if ((req->rq_flags & PTL_RPC_FL_REPLAY || req->rq_transno != 0)
+ && rc >= 0) {
/* Balanced in ptlrpc_free_committed, usually. */
atomic_inc(&req->rq_refcount);
list_add_tail(&req->rq_list, &imp->imp_replay_list);
imp->imp_peer_committed_transno =
req->rq_repmsg->last_committed;
ptlrpc_free_committed(imp);
- spin_unlock(&imp->imp_lock);
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
}
- rc = ptlrpc_check_status(req);
-
EXIT;
out:
return rc;
}
-#undef EIO_IF_INVALID
-
int ptlrpc_replay_req(struct ptlrpc_request *req)
{
int rc = 0, old_level, old_status = 0;
init_waitqueue_head(&req->rq_wait_for_rep);
DEBUG_REQ(D_NET, req, "");
- req->rq_timeout = obd_timeout;
req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
/* XXX looks a lot like super.c:invalidate_request_list, don't it? */
void ptlrpc_abort_inflight(struct obd_import *imp)
{
+ unsigned long flags;
struct list_head *tmp, *n;
/* Make sure that no new requests get processed for this import.
* ptlrpc_queue_wait must (and does) hold imp_lock while testing this
* flag and then putting requests on sending_list or delayed_list.
*/
- spin_lock(&imp->imp_lock);
+ spin_lock_irqsave(&imp->imp_lock, flags);
imp->imp_flags |= IMP_INVALID;
- spin_unlock(&imp->imp_lock);
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
list_for_each_safe(tmp, n, &imp->imp_sending_list) {
struct ptlrpc_request *req =
iov[desc->bd_md.niov].iov_base = bulk->bp_buf;
iov[desc->bd_md.niov].iov_len = bulk->bp_buflen;
+ if (iov[desc->bd_md.niov].iov_len <= 0) {
+ CERROR("bad bp_buflen[%d] @ %p: %d\n", desc->bd_md.niov,
+ bulk->bp_buf, bulk->bp_buflen);
+ CERROR("desc: xid %u, pages %d, ptl %d, ref %d\n",
+ xid, desc->bd_page_count, desc->bd_portal,
+ atomic_read(&desc->bd_refcount));
+ LBUG();
+ }
desc->bd_md.niov++;
desc->bd_md.length += bulk->bp_buflen;
}
/* add a ref, which will be balanced in request_out_callback */
atomic_inc(&request->rq_refcount);
if (request->rq_replen != 0) {
- /* request->rq_repmsg is set only when the reply comes in, in
- * client_packet_callback() */
- if (request->rq_reply_md.start) {
+ if (request->rq_reply_md.start != NULL) {
rc = PtlMEUnlink(request->rq_reply_me_h);
- LASSERT (rc == PTL_OK);
- OBD_FREE(request->rq_reply_md.start,
- request->rq_replen);
- /* If we're resending, rq_repmsg needs to be NULLed out
- * again so that ptlrpc_check_reply doesn't trip early.
- */
+ if (rc != PTL_OK && rc != PTL_INV_ME) {
+ CERROR("rc %d\n", rc);
+ LBUG();
+ }
+ repbuf = (char *)request->rq_reply_md.start;
request->rq_repmsg = NULL;
- }
- OBD_ALLOC(repbuf, request->rq_replen);
- if (!repbuf) {
- LBUG();
- RETURN(ENOMEM);
+ } else {
+ OBD_ALLOC(repbuf, request->rq_replen);
+ if (!repbuf) {
+ LBUG();
+ RETURN(ENOMEM);
+ }
}
rc = PtlMEAttach(request->rq_connection->c_peer.peer_ni,
/* dump_connection_list, but shorter for nicer debugging logs */
static void d_c_l(struct list_head *head)
{
- int sanity = 0;
struct list_head *tmp;
list_for_each(tmp, head) {
CDEBUG(D_HA, " %p = %s (%d/%d)\n", conn, conn->c_remote_uuid,
conn->c_recovd_data.rd_phase,
conn->c_recovd_data.rd_next_phase);
- if (sanity++ > 1000)
- LBUG();
}
}
static int recovd_main(void *arg)
{
struct recovd_obd *recovd = (struct recovd_obd *)arg;
-
+ unsigned long flags;
ENTRY;
lock_kernel();
sigfillset(¤t->blocked);
recalc_sigpending();
#else
- spin_lock_irq(¤t->sigmask_lock);
+ spin_lock_irqsave(¤t->sigmask_lock, flags);
sigfillset(¤t->blocked);
recalc_sigpending(current);
- spin_unlock_irq(¤t->sigmask_lock);
+ spin_unlock_irqrestore(¤t->sigmask_lock, flags);
#endif
sprintf(current->comm, "lustre_recovd");
#include <linux/lustre_net.h>
#include <linux/obd.h>
-int ptlrpc_reconnect_import(struct obd_import *imp, int rq_opc)
+int ptlrpc_reconnect_import(struct obd_import *imp, int rq_opc,
+ struct ptlrpc_request **reqptr)
{
struct obd_device *obd = imp->imp_obd;
struct client_obd *cli = &obd->u.cli;
int rc;
request = ptlrpc_prep_req(imp, rq_opc, 2, size, tmp);
+ if (!request)
+ RETURN(-ENOMEM);
request->rq_level = LUSTRE_CONN_NEW;
request->rq_replen = lustre_msg_size(0, NULL);
/*
sizeof (old_hdl.addr)) &&
!memcmp(&old_hdl.cookie, &request->rq_repmsg->cookie,
sizeof (old_hdl.cookie))) {
- CERROR("%s@%s didn't like our handle %Lx/%Lx, failed\n",
+ CERROR("%s@%s didn't like our handle "LPX64"/"LPX64", failed\n",
cli->cl_target_uuid, conn->c_remote_uuid,
(__u64)(unsigned long)ldlmexp,
ldlmexp->exp_cookie);
old_hdl.addr = request->rq_repmsg->addr;
old_hdl.cookie = request->rq_repmsg->cookie;
if (memcmp(&imp->imp_handle, &old_hdl, sizeof(old_hdl))) {
- CERROR("%s@%s changed handle from %Lx/%Lx to %Lx/%Lx; "
+ CERROR("%s@%s changed handle from "LPX64"/"LPX64" to "LPX64"/"LPX64"; "
"copying, but this may foreshadow disaster\n",
cli->cl_target_uuid, conn->c_remote_uuid,
old_hdl.addr, old_hdl.cookie,
old_hdl = imp->imp_handle;
imp->imp_handle.addr = request->rq_repmsg->addr;
imp->imp_handle.cookie = request->rq_repmsg->cookie;
- CERROR("now connected to %s@%s (%Lx/%Lx, was %Lx/%Lx)!\n",
+ CERROR("now connected to %s@%s ("LPX64"/"LPX64", was "LPX64"/"LPX64")!\n",
cli->cl_target_uuid, conn->c_remote_uuid,
imp->imp_handle.addr, imp->imp_handle.cookie,
old_hdl.addr, old_hdl.cookie);
}
out_disc:
- ptlrpc_req_finished(request);
+ *reqptr = request;
return rc;
}
RETURN(0);
}
-int ptlrpc_replay(struct obd_import *imp, int send_last_flag)
+int ptlrpc_replay(struct obd_import *imp)
{
int rc = 0;
struct list_head *tmp, *pos;
struct ptlrpc_request *req;
+ unsigned long flags;
__u64 committed = imp->imp_peer_committed_transno;
ENTRY;
/* It might have committed some after we last spoke, so make sure we
* get rid of them now.
*/
- spin_lock(&imp->imp_lock);
+ spin_lock_irqsave(&imp->imp_lock, flags);
ptlrpc_free_committed(imp);
list_for_each_safe(tmp, pos, &imp->imp_replay_list) {
req = list_entry(tmp, struct ptlrpc_request, rq_list);
- if (req->rq_transno == imp->imp_max_transno &&
- send_last_flag) {
- req->rq_reqmsg->flags |= MSG_LAST_REPLAY;
- DEBUG_REQ(D_HA, req, "LAST_REPLAY:");
- } else {
- DEBUG_REQ(D_HA, req, "REPLAY:");
- }
+ DEBUG_REQ(D_HA, req, "REPLAY:");
+ /* XXX locking WRT failure during replay? */
rc = ptlrpc_replay_req(req);
- req->rq_reqmsg->flags &= ~MSG_LAST_REPLAY;
if (rc) {
- CERROR("recovery replay error %d for req %Ld\n",
+ CERROR("recovery replay error %d for req "LPD64"\n",
rc, req->rq_xid);
GOTO(out, rc);
}
}
out:
- spin_unlock(&imp->imp_lock);
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
return rc;
}
static int resend_type(struct ptlrpc_request *req, __u64 committed)
{
- if (req->rq_transno < committed) {
+ if (req->rq_transno && req->rq_transno < committed) {
if (req->rq_flags & PTL_RPC_FL_REPLIED) {
/* Saw the reply and it was committed, no biggie. */
DEBUG_REQ(D_HA, req, "NO_RESEND");
int rc = 0;
struct list_head *tmp, *pos;
struct ptlrpc_request *req;
+ unsigned long flags;
__u64 committed = imp->imp_peer_committed_transno;
ENTRY;
- spin_lock(&imp->imp_lock);
+ spin_lock_irqsave(&imp->imp_lock, flags);
list_for_each(tmp, &imp->imp_sending_list) {
req = list_entry(tmp, struct ptlrpc_request, rq_list);
DEBUG_REQ(D_HA, req, "SENDING: ");
}
}
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
RETURN(rc);
}
void ptlrpc_wake_delayed(struct obd_import *imp)
{
+ unsigned long flags;
struct list_head *tmp, *pos;
struct ptlrpc_request *req;
- spin_lock(&imp->imp_lock);
+ spin_lock_irqsave(&imp->imp_lock, flags);
list_for_each_safe(tmp, pos, &imp->imp_delayed_list) {
req = list_entry(tmp, struct ptlrpc_request, rq_list);
DEBUG_REQ(D_HA, req, "waking:");
wake_up(&req->rq_wait_for_rep);
}
- spin_unlock(&imp->imp_lock);
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
}
int err;
ENTRY;
- MOD_INC_USE_COUNT;
memset(recovd, 0, sizeof(*recovd));
err = recovd_setup(recovd);
- if (err) {
- MOD_DEC_USE_COUNT;
- RETURN(err);
- }
-
- RETURN(0);
+ RETURN(err);
}
int connmgr_cleanup(struct obd_device *dev)
int err;
err = recovd_cleanup(recovd);
- if (err)
- LBUG();
-
- MOD_DEC_USE_COUNT;
- RETURN(0);
+ RETURN(err);
}
-int connmgr_iocontrol(unsigned int cmd, struct lustre_handle *hdl, int len, void *karg,
- void *uarg)
+int connmgr_iocontrol(unsigned int cmd, struct lustre_handle *hdl, int len,
+ void *karg, void *uarg)
{
struct ptlrpc_connection *conn = NULL;
struct obd_device *obd = class_conn2obd(hdl);
if (cmd != OBD_IOC_RECOVD_NEWCONN && cmd != OBD_IOC_RECOVD_FAILCONN)
RETURN(-EINVAL); /* XXX ENOSYS? */
-
+
/* Find the connection that's been rebuilt or has failed. */
spin_lock(&recovd->recovd_lock);
list_for_each(tmp, &recovd->recovd_troubled_items) {
list_for_each(tmp, &recovd->recovd_managed_items) {
conn = list_entry(tmp, struct ptlrpc_connection,
c_recovd_data.rd_managed_chain);
-
+
LASSERT(conn->c_recovd_data.rd_recovd == recovd);
-
+
if (!strcmp(conn->c_remote_uuid, data->ioc_inlbuf1))
break;
conn = NULL;
}
ptlrpc_readdress_connection(conn, conn->c_remote_uuid);
spin_unlock(&conn->c_lock);
-
+
conn->c_recovd_data.rd_phase = RD_PREPARED;
wake_up(&recovd->recovd_waitq);
out:
{
return lprocfs_dereg_obd(dev);
}
+
/* use obd ops to offer management infrastructure */
static struct obd_ops recovd_obd_ops = {
- o_attach: connmgr_attach,
- o_detach: conmgr_detach,
- o_setup: connmgr_setup,
- o_cleanup: connmgr_cleanup,
- o_iocontrol: connmgr_iocontrol,
- o_connect: connmgr_connect,
- o_disconnect: class_disconnect
+ o_owner: THIS_MODULE,
+ o_attach: connmgr_attach,
+ o_detach: conmgr_detach,
+ o_setup: connmgr_setup,
+ o_cleanup: connmgr_cleanup,
+ o_iocontrol: connmgr_iocontrol,
+ o_connect: connmgr_connect,
+ o_disconnect: class_disconnect
};
static int __init ptlrpc_init(void)
{
- int rc;
+ int rc;
rc = ptlrpc_init_portals();
- if (rc)
+ if (rc)
RETURN(rc);
ptlrpc_init_connection();
rc = class_register_type(&recovd_obd_ops, status_class_var,
LUSTRE_HA_NAME);
- if (rc)
+ if (rc)
RETURN(rc);
ptlrpc_put_connection_superhack = ptlrpc_put_connection;
return 0;
ptlrpc_link_svc_me(rqbd);
}
- CDEBUG(D_NET, "Starting service listening on portal %d (eq: %p)\n",
+ CDEBUG(D_NET, "Starting service listening on portal %d (eq: %lu)\n",
service->srv_req_portal, service->srv_eq_h.handle_idx);
RETURN(service);
goto out;
}
- CDEBUG(D_RPCTRACE, "Handling RPC pid:xid:nid:opc %d:"
- LPX64":%x:%d\n",
+ CDEBUG(D_RPCTRACE, "Handling RPC pid:xid:nid:opc %d:"LPX64":"LPX64":%d\n",
NTOH__u32(request->rq_reqmsg->status),
request->rq_xid,
event->initiator.nid,
struct ptlrpc_request *request;
ptl_event_t *event;
int rc = 0;
-
+ unsigned long flags;
ENTRY;
lock_kernel();
sigfillset(¤t->blocked);
recalc_sigpending();
#else
- spin_lock_irq(¤t->sigmask_lock);
+ spin_lock_irqsave(¤t->sigmask_lock, flags);
sigfillset(¤t->blocked);
recalc_sigpending(current);
- spin_unlock_irq(¤t->sigmask_lock);
+ spin_unlock_irqrestore(¤t->sigmask_lock, flags);
#endif
#ifdef __arch_um__
list_add(&thread->t_link, &svc->srv_threads);
spin_unlock(&svc->srv_lock);
+ /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
+ * just drop the VM and FILES in ptlrpc_daemonize() right away.
+ */
rc = kernel_thread(ptlrpc_main, (void *) &d, CLONE_VM | CLONE_FILES);
if (rc < 0) {
CERROR("cannot start thread\n");
openclose
createdestroy
createmany
+statmany
mkdirmany
lovstripe
*.xml
setuid
multifstat
checkstat
+wantedi
rundbench \
elan-client.cfg mds.cfg trivial.sh
pkgexampledir = '${exec_prefix}/usr/lib/$(PACKAGE)/examples'
-pkgexample_SCRIPTS = llmount.sh llmountcleanup.sh llecho.sh local.sh uml.sh lov.sh
+pkgexample_SCRIPTS = llmount.sh llmountcleanup.sh llecho.sh llechocleanup.sh local.sh echo.sh uml.sh lov.sh
noinst_SCRIPTS = llsetup.sh llrsetup.sh llcleanup.sh
noinst_DATA = lustre.cfg
noinst_SCRIPTS += fs.sh intent-test.sh intent-test2.sh leak_finder.pl \
runtests runvmstat snaprun.sh tbox.sh common.sh
noinst_PROGRAMS = openunlink testreq truncate directio openme writeme mcreate
noinst_PROGRAMS += munlink tchmod toexcl fsx test_brw openclose createdestroy
-noinst_PROGRAMS += lovstripe stat createmany mkdirmany multifstat
+noinst_PROGRAMS += stat createmany statmany mkdirmany multifstat
# noinst_PROGRAMS += ldaptest
-noinst_PROGRAMS += checkstat
+noinst_PROGRAMS += checkstat wantedi
# ldaptest_SOURCES = ldaptest.c
tchmod_SOURCES = tchmod.c
test_brw_SOURCES = test_brw.c
openclose_SOURCES = openclose.c
createdestroy_SOURCES = createdestroy.c
-lovstripe_SOURCES = lovstripe.c
stat_SOURCES = stat.c
createmany_SOURCES = createmany.c
+statmany_SOURCES = statmany.c
mkdirmany_SOURCES = mkdirmany.c
multifstat_SOURCES = multifstat.c
checkstat_SOURCES = checkstat.c
+wantedi_SOURCES = wantedi.c
include $(top_srcdir)/Rules
int main(int argc, char ** argv)
{
- int i, rc, count;
+ int i, rc = 0, do_open;
char filename[4096];
+ long int start, last, end, count;
- if (argc < 3) {
- printf("Usage %s filenamebase count\n", argv[0]);
+ if (argc != 4) {
+ printf("Usage %s <-o|-m> filenamebase <count|-time>\n",
+ argv[0]);
return 1;
}
- if (strlen(argv[1]) > 4080) {
+ if (strcmp(argv[1], "-o") == 0) {
+ do_open = 1;
+ } else if (strcmp(argv[1], "-m") == 0) {
+ do_open = 0;
+ } else {
+ printf("Usage %s {-o|-m} filenamebase <count|-time>\n",
+ argv[0]);
+ return 1;
+ }
+
+ if (strlen(argv[2]) > 4080) {
printf("name too long\n");
return 1;
}
- count = strtoul(argv[2], NULL, 0);
+ start = last = time(0);
+
+ end = strtol(argv[3], NULL, 0);
- for (i=0 ; i < count ; i++) {
- sprintf(filename, "%s-%d", argv[1], i);
- rc = mknod(filename, S_IFREG| 0444, 0);
- if (rc) {
- printf("mknod(%s) error: %s\n",
- filename, strerror(errno));
- break;
+ if (end > 0) {
+ count = end;
+ end = -1UL >> 1;
+ } else {
+ end = start - end;
+ count = -1UL >> 1;
+ }
+
+ for (i = 0; i < count && time(0) < end; i++) {
+ sprintf(filename, "%s%d", argv[2], i);
+ if (do_open) {
+ int fd = open(filename, O_CREAT|O_RDWR, 0644);
+ if (fd < 0) {
+ printf("open(%s) error: %s\n", filename,
+ strerror(errno));
+ rc = errno;
+ break;
+ }
+ close(fd);
+ } else {
+ rc = mknod(filename, S_IFREG| 0444, 0);
+ if (rc) {
+ printf("mknod(%s) error: %s\n",
+ filename, strerror(errno));
+ rc = errno;
+ break;
+ }
+ }
+ if ((i % 10000) == 0) {
+ printf(" - created %d (time %ld ; total %ld ; last %ld)\n",
+ i, time(0), time(0) - start, time(0) - last);
+ last = time(0);
}
- if ((i % 10000) == 0)
- printf(" - created %d (time %ld)\n", i, time(0));
}
+ printf("total: %d creates in %ld seconds: %f creates/second\n", i,
+ time(0) - start, ((float)i / (time(0) - start)));
+
return rc;
}
--- /dev/null
+#!/bin/bash
+
+config=${1:-$(basename $0 .sh).xml}
+LMC=${LMC:-../utils/lmc -m $config}
+
+SERVER=localhost
+CLIENT=localhost
+
+# FIXME: make LMC not require MDS for obdecho LOV
+MDSDEV=$TMP/mds1
+MDSSIZE=10000
+
+STRIPE_BYTES=65536
+STRIPES_PER_OBJ=2 # 0 means stripe over all OSTs
+
+LOV=0
+while [ "$1" ]; do
+ case $1 in
+ --lov) LOV="1" ;;
+ *) OPTS="$OPTS $1" ;;
+ esac
+ shift
+done
+
+rm -f $config
+# create nodes
+$LMC --add node --node $SERVER || exit 1
+$LMC --add net --node $SERVER --nid $SERVER --nettype tcp || exit 2
+
+if (($LOV)); then
+ $LMC --add mds --node $SERVER --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10
+ $LMC --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 11
+ $LMC --add ost --node $SERVER --lov lov1 --obdtype=obdecho || exit 12
+ $LMC --add ost --node $SERVER --lov lov1 --obdtype=obdecho || exit 13
+ OBD_NAME=lov1
+else
+ $LMC --add ost --obd obd1 --node $SERVER --obdtype=obdecho || exit 2
+ OBD_NAME=obd1
+fi
+
+if [ "$SERVER" != "$CLIENT" ]; then
+ $LMC --add node --node $CLIENT || exit 1
+ $LMC --add net --node $CLIENT --nid $CLIENT --nettype tcp || exit 2
+fi
+
+$LMC --add echo_client --node $CLIENT --obd ${OBD_NAME} || exit 3
+
#!/bin/sh
-config=echo.xml
LCONF=${LCONF:-../utils/lconf}
-LMC=${LMC:-../utils/lmc}
+NAME=${NAME:-echo}
-SERVER=localhost
-CLIENT=cfs4
+config=$NAME.xml
+mkconfig=./$NAME.sh
-# FIXME: make LMC not require MDS for obdecho LOV
-MDSDEV=$TMP/mds1
-MDSSIZE=10000
-
-STRIPE_BYTES=65536
-STRIPES_PER_OBJ=2 # 0 means stripe over all OSTs
-
-LOV=0
-while [ "$1" ]; do
- case $1 in
- --lov) LOV="1" ;;
- *) OPTS="$OPTS $1" ;;
- esac
- shift
-done
-
-rm -f $config
-# create nodes
-$LMC -o $config --add node --node $SERVER || exit 1
-$LMC -m $config --add net --node $SERVER --nid $SERVER --nettype tcp || exit 2
-
-if (($LOV)); then
- $LMC -m $config --add mds --node $SERVER --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10
- $LMC -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 11
- $LMC -m $config --add ost --node $SERVER --lov lov1 --obdtype=obdecho || exit 12
- $LMC -m $config --add ost --node $SERVER --lov lov1 --obdtype=obdecho || exit 13
- OBD_NAME=lov1
-else
- $LMC -m $config --add ost --obd obd1 --node $SERVER --obdtype=obdecho || exit 2
- OBD_NAME=obd1
-fi
-
-if [ "$SERVER" != "$CLIENT" ]; then
- $LMC -m $config --add node --node $CLIENT || exit 1
- $LMC -m $config --add net --node $CLIENT --nid $CLIENT --nettype tcp || exit 2
-fi
-
-$LMC -m $config --add echo_client --node $CLIENT --obd ${OBD_NAME} || exit 3
+sh $mkconfig $config || exit 1
$LCONF --reformat --gdb $OPTS $config || exit 4
#!/bin/sh
LCONF=../utils/lconf
+NAME=${NAME:-echo}
+TMP=${TMP:-/tmp}
-if [ -f echo.xml ]; then
- ${LCONF} --cleanup echo.xml
-else
- echo "no echo.xml found"
+config=$NAME.xml
+mkconfig=./$NAME.sh
+
+if [ ! -f $config ]; then
+ sh $mkconfig $config || exit 1
fi
+${LCONF} --cleanup echo.xml
+
config=$NAME.xml
mkconfig=./$NAME.sh
-if [ ! -f $config -o $mkconfig -nt $config ]; then
- sh $mkconfig $config || exit 1
-fi
+sh $mkconfig $config || exit 1
${LCONF} --reformat --gdb $config || exit 2
sync; sleep 2; sync
${LCONF} --cleanup --dump $TMP/debug $config
-LEAK=`dmesg | grep -v " 0 bytes" | grep leaked`
-if [ "$LEAK" ]; then
- echo "$LEAK" 1>&2
- mv $TMP/debug $TMP/debug.`date +%s`
- #exit -1
-fi
BUSY=`dmesg | grep -i destruct`
if [ "$BUSY" ]; then
echo "$BUSY" 1>&2
- #exit -2
+ mv $TMP/debug $TMP/debug-busy.`date +%s`
+ exit -1
+fi
+LEAK_LUSTRE=`dmesg | tail -20 | grep -v "leaked: 0" | grep leaked`
+LEAK_PORTALS=`dmesg | tail -20 | grep "Portals memory leaked"`
+if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then
+ echo "$LEAK_LUSTRE" 1>&2
+ echo "$LEAK_PORTALS" 1>&2
+ mv $TMP/debug $TMP/debug-leak.`date +%s`
+ exit -2
fi
LMC="${LMC:-../utils/lmc} -m $config"
TMP=${TMP:-/tmp}
-MDSDEV=$TMP/mds1
-MDSSIZE=50000
+MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSSIZE=${MDSSIZE:-50000}
-OSTDEV=$TMP/ost1
-OSTSIZE=200000
+OSTDEV=${OSTDEV:-$TMP/ost1}
+OSTSIZE=${OSTSIZE:-200000}
kver=`uname -r | cut -d "." -f 1,2`
LMC=${LMC:-../utils/lmc}
TMP=${TMP:-/tmp}
-MDSDEV=$TMP/mds1
-MDSSIZE=50000
+MDSDEV=${MDSDEV:-$TMP/mds1}
+MDSSIZE=${MDSSIZE:-50000}
-OSTDEV1=$TMP/ost1
-OSTDEV2=$TMP/ost2
-OSTDEV3=$TMP/ost3
-OSTSIZE=100000
+OSTDEV1=${OSTDEV1:-$TMP/ost1}
+OSTDEV2=${OSTDEV2:-$TMP/ost2}
+OSTDEV3=${OSTDEV3:-$TMP/ost3}
+OSTSIZE=${OSTSIZE:-100000}
STRIPE_BYTES=65536
STRIPES_PER_OBJ=2 # 0 means stripe over all OSTs
+++ /dev/null
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/ioctl.h>
-#include <string.h>
-#include <errno.h>
-
-
-/****************** Custom includes ********************/
-#include <linux/lustre_lite.h>
-#include <linux/lustre_idl.h>
-
-
-/****************** Functions ******************/
-int write_file(char *name, struct lov_mds_md *striping, int bufsize,
- char *buf1, char *buf2);
-
-
-/************************ Main **********************/
-
-#define STRIPE_SIZE 128 * 1024
-
-int main(int argc, char *argv[])
-{
- struct lov_mds_md a_striping;
- long bufsize = sizeof(long) * STRIPE_SIZE;
- char *rbuf, *wbuf;
- int data, *dp;
- int result;
-
- rbuf = malloc(bufsize);
- wbuf = malloc(bufsize);
- if (!rbuf || !wbuf) {
- fprintf(stderr, "%s: unable to allocate buffers\n", argv[0]);
- return 1;
- }
-
- /* Initialize to an easily-verified pattern */
- for (data = 0, dp = (int *)wbuf; data < STRIPE_SIZE; data++, dp++)
- *dp = data;
-
- /* Init defaults on striping info */
- a_striping.lmm_magic = LOV_MAGIC;
- a_striping.lmm_stripe_size = STRIPE_SIZE;
- a_striping.lmm_stripe_pattern = 0;
-
- /* Write file for OST1 only */
- /* Start at OST 0, and use only 1 OST */
- a_striping.lmm_stripe_offset = 0;
- a_striping.lmm_stripe_count = 1;
-
- result = write_file("/mnt/lustre/ost1", &a_striping, bufsize,
- wbuf, rbuf);
-
- if (result < 0)
- goto out;
-
- /* Write file for OST2 only */
- /* Start at OST 1, and use only 1 OST */
- a_striping.lmm_stripe_offset = 1;
- a_striping.lmm_stripe_count = 1;
-
- result = write_file("/mnt/lustre/ost2", &a_striping, bufsize,
- wbuf, rbuf);
-
- if (result < 0)
- goto out;
-
- /* Write file across both OST1 and OST2 */
- /* Start at OST 0, and use only 2 OSTs */
- a_striping.lmm_stripe_offset = 0;
- a_striping.lmm_stripe_count = 2;
-
- result = write_file("/mnt/lustre/ost1and2", &a_striping, bufsize,
- wbuf, rbuf);
-
- if (result < 0)
- goto out;
-
-out:
- free(rbuf);
- free(wbuf);
- return result;
-}
-
-
-int write_file(char *name, struct lov_mds_md *striping, int bufsize,
- char *wbuf, char *rbuf)
-{
- int fd, result;
-
- printf("opening %s\n", name);
- fd = open(name, O_CREAT | O_RDWR | O_LOV_DELAY_CREATE, 0644);
- if (fd < 0) {
- fprintf(stderr, "\nUnable to open '%s': %s\n",
- name, strerror(errno));
- return -errno;
- }
-
- printf("setting stripe data on %s\n", name);
- result = ioctl(fd, LL_IOC_LOV_SETSTRIPE, striping);
- if (result < 0) {
- fprintf(stderr, "\nError on ioctl for '%s' (%d): %s\n",
- name, fd, strerror(errno));
- close(fd);
- return -errno;
- }
-
- /* Write bogus data */
- printf("writing data to %s\n", name);
- result = write(fd, wbuf, bufsize);
- if (result < 0) {
- fprintf(stderr, "\nerror: writing data to '%s' (%d): %s\n",
- name, fd, strerror(errno));
- close(fd);
- return -errno;
- }
-
- if (result != bufsize) {
- fprintf(stderr, "\nerror: short write to '%s' (%d): %d != %d\n",
- name, fd, result, bufsize);
- close(fd);
- return -1;
- }
-
- /* Seek to beginning again */
- printf("seeking in %s\n", name);
- result = lseek(fd, 0, SEEK_SET);
- if (result < 0) {
- fprintf(stderr, "\nerror: seeking to beginning '%s' (%d): %s\n",
- name, fd, strerror(errno));
- close(fd);
- return -errno;
- }
-
- /* Read bogus data back */
- printf("reading data from %s\n", name);
- result = read(fd, rbuf, bufsize);
- if (result < 0) {
- fprintf(stderr, "\nerror: reading data from '%s' (%d): %s\n",
- name, fd, strerror(errno));
- close(fd);
- return -errno;
- }
-
- if (result != bufsize) {
- fprintf(stderr,"\nerror: short read from '%s' (%d): %d != %d\n",
- name, fd, result, bufsize);
- close(fd);
- return -1;
- }
-
- if (memcmp(wbuf, rbuf, bufsize)) {
- fprintf(stderr, "\nerror: comparing data in '%s' (%d): %s\n",
- name, fd, strerror(errno));
- close(fd);
- return -1;
- }
-
- close(fd);
-
- return 0;
-}
$CLEAN
$START
-echo '== cleanup ========================================='
+echo '== stripe sanity ================================= test27'
+echo "--test 26.1 create one stripe"
+mkdir $MOUNT/d27
+../utils/lstripe $MOUNT/d27/f0 4096 0 1
+$CHECKSTAT -t file $MOUNT/d27/f0
+echo "--test 26.2 write to one stripe file"
+cp /etc/hosts $MOUNT/d27/f0
+pass
+$CLEAN
+$START
+
+echo "--test 26.3 create two stripes"
+../utils/lstripe $MOUNT/d27/f01 4096 0 2
+echo "--test 26.4 write to two stripe file"
+cp /etc/hosts $MOUNT/d27/f01
+pass
+$CLEAN
+$START
+
+echo "--test 26.5 lstripe existing file (should return error)"
+../utils/lstripe $MOUNT/d27/f12 4096 1 2
+! ../utils/lstripe $MOUNT/d27/f12 4096 1 2
+pass
+$CLEAN
+$START
+
+echo "--test 26.6 lfind "
+../utils/lfind $MOUNT/d27
+pass
+$CLEAN
+$START
+
+
+echo '== cleanup ============================================='
rm -r $MOUNT/[Rdfs][1-9]*
echo '======================= finished ======================='
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <sys/ioctl.h>
+
+#if 0
+#include <linux/extN_fs.h>
+#endif
+#include <linux/lustre_lib.h>
+#include <linux/obd.h>
+
+struct option longopts[] = {
+ {"ea", 0, 0, 'e'},
+ {"lookup", 0, 0, 'l'},
+ {"random", 0, 0, 'r'},
+ {"stat", 0, 0, 's'},
+ {NULL, 0, 0, 0},
+};
+char *shortopts = "ehlr:s0123456789";
+
+static int usage(char *prog, FILE *out)
+{
+ fprintf(out,
+ "Usage: %s [-r rand_seed] {-s|-e|-l} filenamebase total_files iterations\n"
+ "-r : random seed\n"
+ "-s : regular stat() calls\n"
+ "-e : open then GET_EA ioctl\n"
+ "-l : lookup ioctl only\n", prog);
+ exit(out == stderr);
+}
+
+#ifndef LONG_MAX
+#define LONG_MAX (1 << ((8 * sizeof(long)) - 1))
+#endif
+
+int main(int argc, char ** argv)
+{
+ long i, count, iter = LONG_MAX, mode, offset;
+ long int start, length = LONG_MAX, last, rc = 0;
+ char parent[4096], *t;
+ char c, *prog = argv[0], *base;
+ int seed = 0;
+ int fd = -1;
+
+ while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) {
+ char *e;
+ switch (c) {
+ case 'r':
+ seed = strtoul(optarg, &e, 0);
+ if (*e) {
+ fprintf(stderr, "bad -r option %s\n", optarg);
+ usage(prog, stderr);
+ }
+ break;
+ case 'e':
+ case 'l':
+ case 's':
+ mode = c;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (length == LONG_MAX)
+ length = c - '0';
+ else
+ length = length * 10 + (c - '0');
+ break;
+ case 'h':
+ usage(prog, stdout);
+ case '?':
+ usage(prog, stderr);
+ }
+ }
+
+ if (optind + 2 + (length == LONG_MAX) != argc) {
+ fprintf(stderr, "missing filenamebase, total_files, or iterations\n");
+ usage(prog, stderr);
+ }
+
+ base = argv[optind];
+ if (strlen(base) > 4080) {
+ fprintf(stderr, "filenamebase too long\n");
+ exit(1);
+ }
+
+ if (seed == 0) {
+ int f = open("/dev/urandom", O_RDONLY);
+
+ if (f < 0 || read(f, &seed, sizeof(seed)) < sizeof(seed))
+ seed = time(0);
+ if (f > 0)
+ close(f);
+ }
+
+ printf("using seed %u\n", seed);
+ srand(seed);
+
+ count = strtoul(argv[optind + 1], NULL, 0);
+ if (length == LONG_MAX) {
+ iter = strtoul(argv[optind + 2], NULL, 0);
+ printf("running for %lu iterations\n", iter);
+ } else
+ printf("running for %lu seconds\n", length);
+
+ start = last = time(0);
+
+ t = strrchr(base, '/');
+ if (t == NULL) {
+ strcpy(parent, ".");
+ offset = -1;
+ } else {
+ strncpy(parent, base, t - base);
+ offset = t - base + 1;
+ }
+
+ if (mode == 'l') {
+ fd = open(parent, O_RDONLY);
+ if (fd < 0) {
+ printf("open(%s) error: %s\n", parent,
+ strerror(errno));
+ exit(errno);
+ }
+ }
+
+ for (i = 0; i < iter && time(0) - start < length; i++) {
+ char filename[4096];
+ int tmp;
+
+ tmp = random() % count;
+ sprintf(filename, "%s%d", base, tmp);
+
+ if (mode == 'e') {
+#if 0
+ fd = open(filename, O_RDWR|O_LARGEFILE);
+ if (fd < 0) {
+ printf("open(%s) error: %s\n", filename,
+ strerror(errno));
+ break;
+ }
+ rc = ioctl(fd, EXTN_IOC_GETEA, NULL);
+ if (rc < 0) {
+ printf("ioctl(%s) error: %s\n", filename,
+ strerror(errno));
+ break;
+ }
+ close(fd);
+ break;
+#endif
+ } else if (mode == 's') {
+ struct stat buf;
+
+ rc = stat(filename, &buf);
+ if (rc < 0) {
+ printf("stat(%s) error: %s\n", filename,
+ strerror(errno));
+ break;
+ }
+ } else if (mode == 'l') {
+ struct obd_ioctl_data data;
+ char rawbuf[8192];
+ char *buf = rawbuf;
+ int max = sizeof(rawbuf);
+
+ memset(&data, 0, sizeof(data));
+ data.ioc_version = OBD_IOCTL_VERSION;
+ data.ioc_len = sizeof(data);
+ if (offset >= 0)
+ data.ioc_inlbuf1 = filename + offset;
+ else
+ data.ioc_inlbuf1 = filename;
+ data.ioc_inllen1 = strlen(data.ioc_inlbuf1) + 1;
+
+ if (obd_ioctl_pack(&data, &buf, max)) {
+ printf("ioctl_pack failed.\n");
+ break;
+ }
+
+ rc = ioctl(fd, IOC_MDC_LOOKUP, buf);
+ if (rc < 0) {
+ printf("ioctl(%s) error: %s\n", filename,
+ strerror(errno));
+ break;
+ }
+ }
+ if ((i % 10000) == 0) {
+ printf(" - stat %lu (time %ld ; total %ld ; last %ld)\n",
+ i, time(0), time(0) - start, time(0) - last);
+ last = time(0);
+ }
+ }
+
+ if (mode == 'l')
+ close(fd);
+
+ printf("total: %lu stats in %ld seconds: %f stats/second\n", i,
+ time(0) - start, ((float)i / (time(0) - start)));
+
+ exit(rc);
+}
# Three separate systems
MDSNODE=uml1
-OSTNODE=uml2
+OSTNODES="uml2 uml2"
CLIENTS="uml3"
# Single system with additional clients
#MDSNODE=uml1
-#OSTNODE=uml1
+#OSTNODES="uml1 uml1"
#CLIENTS="$MDSNODE client"
# Two systems with client on MDS, and additional clients (set up OST first)
#MDSNODE=uml1
-#OSTNODE=uml2
+#OSTNODES="uml2 uml2"
#CLIENTS="$MDSNODE client"
# Two systems with client on OST, and additional clients (set up MDS first)
#MDSNODE=uml1
-#OSTNODE=uml2
-#CLIENTS="$OSTNODE client"
+#OSTNODES="uml2 uml2"
+#CLIENTS="$OSTNODES client"
rm -f $config
# create nodes
-for NODE in $MDSNODE $OSTNODE $CLIENTS; do
+for NODE in $MDSNODE $OSTNODES $CLIENTS; do
eval [ \$$NODE ] && continue
${LMC} -m $config --add net --node $NODE --nid $NODE --nettype tcp || exit 1
eval "$NODE=done"
# configure ost
${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20
-${LMC} -m $config --add ost --node $OSTNODE --lov lov1 --dev $OSTDEV1 --size $OSTSIZE || exit 21
-${LMC} -m $config --add ost --node $OSTNODE --lov lov1 --dev $OSTDEV2 --size $OSTSIZE || exit 22
+COUNT=1
+for NODE in $OSTNODES; do
+ eval OSTDEV=\$OSTDEV$COUNT
+ ${LMC} -m $config --add ost --node $NODE --lov lov1 --dev $OSTDEV --size $OSTSIZE || exit 21
+ COUNT=`expr $COUNT + 1`
+done
# create client config(s)
for NODE in $CLIENTS; do
${LMC} -m $config --add mtpt --node $NODE --path /mnt/lustre --mds mds1 --lov lov1 || exit 30
done
-
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <sys/ioctl.h>
+#include <linux/lustre_lib.h>
+#include <linux/obd.h>
+
+static int usage(char *prog, FILE *out)
+{
+ fprintf(out,
+ "Usage: %s <dir> <desired child ino>\n", prog);
+ exit(out == stderr);
+}
+
+#define EXTN_IOC_CREATE_INUM _IOW('f', 5, long)
+
+int main(int argc, char ** argv)
+{
+ int dirfd, wantedi, rc;
+
+ if (argc < 2 || argc > 3)
+ usage(argv[0], stderr);
+
+ dirfd = open(argv[1], O_RDONLY);
+ if (dirfd < 0) {
+ perror("open");
+ exit(1);
+ }
+
+ wantedi = atoi(argv[2]);
+ printf("Creating %s/%d with ino %d\n", argv[1], wantedi, wantedi);
+
+ rc = ioctl(dirfd, EXTN_IOC_CREATE_INUM, wantedi);
+ if (rc < 0) {
+ perror("ioctl(EXTN_IOC_CREATE_INUM)");
+ exit(2);
+ }
+
+ return 0;
+}
--- /dev/null
+#!/bin/sh
+
+if [ -z "$1" ]; then
+ echo "No UUID given to Lustre upcall!" | wall
+ exit 1
+fi
+
+# FIXME: OSTHOST can't be hard-coded!
+OST=$1
+OSTHOST=dev7
+LUSTRE=/home/pschwan/lustre/lustre
+
+while ( ! ping -c 1 -w 3 $OSTHOST ) ; do
+ sleep 2
+done;
+
+echo -n "OST $OSTHOST UUID $OST responding to pings : "
+date
+
+$LUSTRE/utils/lctl <<EOF
+network tcp
+close_uuid $OST
+del_uuid $OST
+connect $OSTHOST 988
+add_uuid $OST $OSTHOST
+quit
+EOF
+
+$LUSTRE/utils/lctl <<EOF
+device \$RPCDEV
+probe
+newconn $OST
+quit
+EOF
#
# Based in part on the XML obdctl modifications done by Brian Behlendorf
-import sys, getopt
-import string, os, stat, popen2, socket, time, random
+import sys, getopt, types
+import string, os, stat, popen2, socket, time, random, fcntl, FCNTL, select
import re, exceptions
import xml.dom.minidom
print """usage: lconf config.xml
config.xml Lustre configuration in xml format.
---get <url> URL to fetch a config file
+--ldapurl LDAP server URL, eg. ldap://localhost
+--config Cluster config name used for LDAP query
--node <nodename> Load config for <nodename>
+--select service=nodeA,service2=nodeB U
-d | --cleanup Cleans up config. (Shutdown)
-f | --force Forced unmounting and/or obd detach during cleanup
-v | --verbose Print system commands as they are run
30 - obd, mdd
40 - mds, ost
50 - mdc, osc
- 60 - lov, lovconfig
+ 60 - lov
70 - mountpoint, echo_client
--lustre=src_dir Base directory of lustre sources. This parameter will cause lconf
to load modules from a source tree.
self._portals_dir = ''
self._minlevel = 0
self._maxlevel = 100
- self._timeout = -1
+ self._timeout = 0
self._recovery_upcall = ''
+ self._ldapurl = ''
+ self._config_name = ''
+ self._select = {}
def verbose(self, flag = None):
if flag: self._verbose = flag
if val: self._node = val
return self._node
- def url(self, val = None):
- if val: self._url = val
- return self._url
-
def gdb_script(self):
if os.path.isdir('/r'):
return '/r' + self._gdb_script
def dump_file(self, val = None):
if val: self._dump_file = val
return self._dump_file
-
def minlevel(self, val = None):
if val: self._minlevel = int(val)
return self._minlevel
if val: self._recovery_upcall = val
return self._recovery_upcall
+ def ldapurl(self, val = None):
+ if val: self._ldapurl = val
+ return self._ldapurl
+
+ def config_name(self, val = None):
+ if val: self._config_name = val
+ return self._config_name
+
+ def init_select(self, arg):
+ # arg = "service=nodeA,service2=nodeB"
+ list = string.split(arg, ',')
+ for entry in list:
+ srv, node = string.split(entry, '=')
+ self._select[srv] = node
+
+ def select(self, srv):
+ if self._select.has_key(srv):
+ return self._select[srv]
+ return None
+
+
config = Config()
# ============================================================
else:
raise CommandError('lctl', "unable to find lctl binary.")
+ def set_nonblock(self, fd):
+ fl = fcntl.fcntl(fd, FCNTL.F_GETFL)
+ fcntl.fcntl(fd, FCNTL.F_SETFL, fl | os.O_NDELAY)
+
def run(self, cmds):
"""
run lctl
"""
debug("+", self.lctl, cmds)
if config.noexec(): return (0, [])
- p = popen2.Popen3(self.lctl, 1)
- p.tochild.write(cmds + "\n")
- p.tochild.close()
- out = p.fromchild.readlines()
- err = p.childerr.readlines()
- ret = p.wait()
+
+ child = popen2.Popen3(self.lctl, 1) # Capture stdout and stderr from command
+ child.tochild.write(cmds + "\n")
+ child.tochild.close()
+
+ # From "Python Cookbook" from O'Reilly
+ outfile = child.fromchild
+ outfd = outfile.fileno()
+ self.set_nonblock(outfd)
+ errfile = child.childerr
+ errfd = errfile.fileno()
+ self.set_nonblock(errfd)
+
+ outdata = errdata = ''
+ outeof = erreof = 0
+ while 1:
+ ready = select.select([outfd,errfd],[],[]) # Wait for input
+ if outfd in ready[0]:
+ outchunk = outfile.read()
+ if outchunk == '': outeof = 1
+ outdata = outdata + outchunk
+ if errfd in ready[0]:
+ errchunk = errfile.read()
+ if errchunk == '': erreof = 1
+ errdata = errdata + errchunk
+ if outeof and erreof: break
+ # end of "borrowed" code
+
+ ret = child.wait()
if os.WIFEXITED(ret):
rc = os.WEXITSTATUS(ret)
else:
rc = 0
- if rc or len(err):
- raise CommandError(self.lctl, err, rc)
- return rc, out
+ if rc or len(errdata):
+ raise CommandError(self.lctl, errdata, rc)
+ return rc, outdata
def runcmd(self, *args):
"""
return dev
if config.reformat() or not os.access(file, os.R_OK | os.W_OK):
if size < 8000:
- error(file, "size must be larger than 8MB")
- run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, file))
+ panic(file, "size must be larger than 8MB, currently set to:", size)
+ (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size,
+ file))
+ if ret:
+ panic("Unable to create backing store:", file)
+
loop = loop_base()
# find next free loop
for n in xrange(0, MAX_LOOP_DEVICES):
""" Base class for the rest of the modules. The default cleanup method is
defined here, as well as some utilitiy funcs.
"""
- def __init__(self, module_name, dom_node):
- self.dom_node = dom_node
+ def __init__(self, module_name, db):
+ self.db = db
self.module_name = module_name
- self.name = get_attr(dom_node, 'name')
- self.uuid = get_attr(dom_node, 'uuid')
+ self.name = self.db.getName()
+ self.uuid = self.db.getUUID()
self.kmodule_list = []
self._server = None
self._connected = 0
msg = string.join(map(str,args))
print self.module_name + ":", self.name, self.uuid, msg
-
def lookup_server(self, srv_uuid):
""" Lookup a server's network information """
- net = get_ost_net(self.dom_node.parentNode, srv_uuid)
+ net = self.db.get_ost_net(srv_uuid)
if not net:
panic ("Unable to find a server for:", srv_uuid)
self._server = Network(net)
class Network(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'NETWORK', dom_node)
- self.net_type = get_attr(dom_node,'type')
- self.nid = get_text(dom_node, 'server', '*')
- self.port = get_text_int(dom_node, 'port', 0)
- self.send_mem = get_text_int(dom_node, 'send_mem', DEFAULT_TCPBUF)
- self.recv_mem = get_text_int(dom_node, 'recv_mem', DEFAULT_TCPBUF)
+ def __init__(self,db):
+ Module.__init__(self, 'NETWORK', db)
+ self.net_type = self.db.get_val('nettype')
+ self.nid = self.db.get_val('nid', '*')
+ self.port = self.db.get_val_int('port', 0)
+ self.send_mem = self.db.get_val_int('send_mem', DEFAULT_TCPBUF)
+ self.recv_mem = self.db.get_val_int('recv_mem', DEFAULT_TCPBUF)
if '*' in self.nid:
self.nid = get_local_address(self.net_type, self.nid)
if not self.nid:
ret, out = run(TCP_ACCEPTOR, '-s', self.send_mem, '-r', self.recv_mem, nal_id, self.port)
if ret:
raise CommandError(TCP_ACCEPTOR, out, ret)
- ret = self.dom_node.getElementsByTagName('route_tbl')
- for a in ret:
- for r in a.getElementsByTagName('route'):
- net_type = get_attr(r, 'type')
- gw = get_attr(r, 'gw')
- lo = get_attr(r, 'lo')
- hi = get_attr(r,'hi', '')
- lctl.add_route(net_type, gw, lo, hi)
- if net_type in ('tcp', 'toe') and net_type == self.net_type and hi == '':
- srv = nid2server(self.dom_node.parentNode.parentNode, lo)
- if not srv:
- panic("no server for nid", lo)
- else:
- lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
+ for net_type, gw, lo, hi in self.db.get_route_tbl():
+ lctl.add_route(net_type, gw, lo, hi)
+ if net_type in ('tcp', 'toe') and net_type == self.net_type and hi == '':
+ srvdb = self.db.nid2server(lo)
+ if not srv:
+ panic("no server for nid", lo)
+ else:
+ srv = Network(srvdb)
+ lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
lctl.network(self.net_type, self.nid)
def cleanup(self):
self.info(self.net_type, self.nid, self.port)
- ret = self.dom_node.getElementsByTagName('route_tbl')
- for a in ret:
- for r in a.getElementsByTagName('route'):
- lo = get_attr(r, 'lo')
- hi = get_attr(r,'hi', '')
- if self.net_type in ('tcp', 'toe') and hi == '':
- srv = nid2server(self.dom_node.parentNode.parentNode, lo)
- if not srv:
- panic("no server for nid", lo)
- else:
- try:
- lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
- except CommandError, e:
- print "disconnect failed: ", self.name
- e.dump()
- cleanup_error(e.rc)
- try:
- lctl.del_route(self.net_type, self.nid, lo, hi)
- except CommandError, e:
- print "del_route failed: ", self.name
- e.dump()
- cleanup_error(e.rc)
+ for net_type, gw, lo, hi in self.db.get_route_tbl():
+ if self.net_type in ('tcp', 'toe') and hi == '':
+ srvdb = self.db.nid2server(lo)
+ if not srv:
+ panic("no server for nid", lo)
+ else:
+ srv = Network(srvdb)
+ try:
+ lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid)
+ except CommandError, e:
+ print "disconnect failed: ", self.name
+ e.dump()
+ cleanup_error(e.rc)
+ try:
+ lctl.del_route(self.net_type, self.nid, lo, hi)
+ except CommandError, e:
+ print "del_route failed: ", self.name
+ e.dump()
+ cleanup_error(e.rc)
try:
lctl.cleanup("RPCDEV", "RPCDEV_UUID")
run("killall acceptor")
class LDLM(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'LDLM', dom_node)
+ def __init__(self,db):
+ Module.__init__(self, 'LDLM', db)
self.add_lustre_module('ldlm', 'ldlm')
def prepare(self):
if is_prepared(self.uuid):
setup ="")
class LOV(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'LOV', dom_node)
- self.mds_uuid = get_first_ref(dom_node, 'mds')
- mds= lookup(dom_node.parentNode, self.mds_uuid)
- self.mds_name = getName(mds)
- devs = dom_node.getElementsByTagName('devices')
- if len(devs) > 0:
- dev_node = devs[0]
- self.stripe_sz = get_attr_int(dev_node, 'stripesize', 65536)
- self.stripe_off = get_attr_int(dev_node, 'stripeoffset', 0)
- self.pattern = get_attr_int(dev_node, 'pattern', 0)
- self.devlist = get_all_refs(dev_node, 'obd')
- self.stripe_cnt = get_attr_int(dev_node, 'stripecount', len(self.devlist))
+ def __init__(self,db):
+ Module.__init__(self, 'LOV', db)
+ self.mds_uuid = self.db.get_first_ref('mds')
+ mds= self.db.lookup(self.mds_uuid)
+ self.mds_name = mds.getName()
+ self.stripe_sz = self.db.get_val_int('stripesize', 65536)
+ self.stripe_off = self.db.get_val_int('stripeoffset', 0)
+ self.pattern = self.db.get_val_int('stripepattern', 0)
+ self.devlist = self.db.get_refs('obd')
+ self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
self.add_lustre_module('mdc', 'mdc')
self.add_lustre_module('lov', 'lov')
if is_prepared(self.uuid):
return
for obd_uuid in self.devlist:
- obd = lookup(self.dom_node.parentNode, obd_uuid)
+ obd = self.db.lookup(obd_uuid)
osc = get_osc(obd)
if osc:
try:
print "Error preparing OSC %s (inactive)\n" % osc_uuid
else:
panic('osc not found:', osc_uuid)
- mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid)
+ mdc_uuid = prepare_mdc(self.db, self.mds_uuid)
self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
self.stripe_off, self.pattern, self.devlist, self.mds_name)
lctl.newdev(attach="lov %s %s" % (self.name, self.uuid),
if not is_prepared(self.uuid):
return
for obd_uuid in self.devlist:
- obd = lookup(self.dom_node.parentNode, obd_uuid)
+ obd = self.db.lookup(obd_uuid)
osc = get_osc(obd)
if osc:
osc.cleanup()
else:
panic('osc not found:', osc_uuid)
Module.cleanup(self)
- cleanup_mdc(self.dom_node.parentNode, self.mds_uuid)
+ cleanup_mdc(self.db, self.mds_uuid)
def load_module(self):
for obd_uuid in self.devlist:
- obd = lookup(self.dom_node.parentNode, obd_uuid)
+ obd = self.db.lookup(obd_uuid)
osc = get_osc(obd)
if osc:
osc.load_module()
def cleanup_module(self):
Module.cleanup_module(self)
for obd_uuid in self.devlist:
- obd = lookup(self.dom_node.parentNode, obd_uuid)
+ obd = self.db.lookup(obd_uuid)
osc = get_osc(obd)
if osc:
osc.cleanup_module()
panic('osc not found:', osc_uuid)
class LOVConfig(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'LOVConfig', dom_node)
- self.lov_uuid = get_first_ref(dom_node, 'lov')
- l = lookup(dom_node.parentNode, self.lov_uuid)
+ def __init__(self,db):
+ Module.__init__(self, 'LOVConfig', db)
+
+ self.lov_uuid = self.db.get_first_ref('lov')
+ l = self.db.lookup(self.lov_uuid)
self.lov = LOV(l)
def prepare(self):
#nothing to do here
pass
-
-class MDS(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'MDS', dom_node)
- self.devname, self.size = get_device(dom_node)
- self.fstype = get_text(dom_node, 'fstype')
+class MDSDEV(Module):
+ def __init__(self,db):
+ Module.__init__(self, 'MDSDEV', db)
+ self.devname = self.db.get_val('devpath','')
+ self.size = self.db.get_val_int('devsize', 0)
+ self.fstype = self.db.get_val('fstype', '')
+ # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
+ self.uuid = self.db.get_first_ref('mds')
+ mds = self.db.lookup(self.uuid)
+ self.name = mds.getName()
+ self.lovconfig_uuids = mds.get_refs('lovconfig')
# FIXME: if fstype not set, then determine based on kernel version
- self.format = get_text(dom_node, 'autoformat', "no")
+ self.format = self.db.get_val('autoformat', "no")
if self.fstype == 'extN':
self.add_lustre_module('extN', 'extN')
self.add_lustre_module('mds', 'mds')
- self.add_lustre_module('obdclass', 'fsfilt_%s'%(self.fstype))
+ if self.fstype:
+ self.add_lustre_module('obdclass', 'fsfilt_%s' % (self.fstype))
def prepare(self):
if is_prepared(self.uuid):
setup ="")
lctl.newdev(attach="mds %s %s" % (self.name, self.uuid),
setup ="%s %s" %(blkdev, self.fstype))
+ for uuid in self.lovconfig_uuids:
+ db = self.db.lookup(uuid)
+ lovconfig = LOVConfig(db)
+ lovconfig.prepare()
+
def cleanup(self):
if is_prepared('MDT_UUID'):
try:
# Very unusual case, as there is no MDC element in the XML anymore
# Builds itself from an MDS node
class MDC(Module):
- def __init__(self,dom_node):
- self.mds = MDS(dom_node)
- self.dom_node = dom_node
+ def __init__(self,db):
+ self.mds_uuid = db.getUUID()
+ self.mds_name = db.getName()
+ self.db = db
+ node_name = config.select(self.mds_name)
+ if node_name:
+ self.mdd_uuid = self.db.get_mdd(node_name, self.mds_uuid)
+ else:
+ self.mdd_uuid = db.get_first_ref('active')
+ if not self.mdd_uuid:
+ panic("No MDSDEV found for MDS service:", self.mds_name)
self.module_name = 'MDC'
self.kmodule_list = []
self._server = None
self._connected = 0
host = socket.gethostname()
- self.name = 'MDC_%s' % (self.mds.name)
+ self.name = 'MDC_%s' % (self.mds_name)
self.uuid = '%s_%05x_%05x' % (self.name, int(random.random() * 1048576),
int(random.random() * 1048576))
- self.lookup_server(self.mds.uuid)
+ self.lookup_server(self.mdd_uuid)
self.add_lustre_module('mdc', 'mdc')
def prepare(self):
if is_prepared(self.uuid):
return
- self.info(self.mds.uuid)
+ self.info(self.mds_uuid)
srv = self.get_server()
lctl.connect(srv.net_type, srv.nid, srv.port, srv.uuid, srv.send_mem, srv.recv_mem)
lctl.newdev(attach="mdc %s %s" % (self.name, self.uuid),
- setup ="%s %s" %(self.mds.uuid, srv.uuid))
+ setup ="%s %s" %(self.mds_uuid, srv.uuid))
class OBD(Module):
- def __init__(self, dom_node):
- Module.__init__(self, 'OBD', dom_node)
- self.obdtype = get_attr(dom_node, 'type')
- self.devname, self.size = get_device(dom_node)
- self.fstype = get_text(dom_node, 'fstype')
- self.active_target = get_text(dom_node, 'active_target')
+ def __init__(self, db):
+ Module.__init__(self, 'OBD', db)
+ self.obdtype = self.db.get_val('obdtype')
+ self.devname = self.db.get_val('devpath', '')
+ self.size = self.db.get_val_int('devsize', 0)
+ self.fstype = self.db.get_val('fstype', '')
+ self.active_target = self.db.get_first_ref('active')
# FIXME: if fstype not set, then determine based on kernel version
- self.format = get_text(dom_node, 'autoformat', 'yes')
+ self.format = self.db.get_val('autoformat', 'yes')
if self.fstype == 'extN':
self.add_lustre_module('extN', 'extN')
self.add_lustre_module(self.obdtype, self.obdtype)
clean_loop(self.devname)
class COBD(Module):
- def __init__(self, dom_node):
- Module.__init__(self, 'COBD', dom_node)
- self.real_uuid = get_first_ref(dom_node, 'real_obd')
- self.cache_uuid = get_first_ref(dom_node, 'cache_obd')
+ def __init__(self, db):
+ Module.__init__(self, 'COBD', db)
+ self.real_uuid = self.db.get_first_ref('realobd')
+ self.cache_uuid = self.db.get_first_ref('cacheobd')
self.add_lustre_module('cobd' , 'cobd')
# need to check /proc/mounts and /etc/mtab before
setup ="%s %s" %(self.real_uuid, self.cache_uuid))
class OST(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'OST', dom_node)
- self.obd_uuid = get_first_ref(dom_node, 'obd')
+ def __init__(self,db):
+ Module.__init__(self, 'OST', db)
+ self.obd_uuid = self.db.get_first_ref('obd')
self.add_lustre_module('ost', 'ost')
def prepare(self):
# virtual interface for OSC and LOV
class VOSC(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'VOSC', dom_node)
- if dom_node.nodeName == 'lov':
- self.osc = LOV(dom_node)
+ def __init__(self,db):
+ Module.__init__(self, 'VOSC', db)
+ if db.get_class() == 'lov':
+ self.osc = LOV(db)
else:
- self.osc = get_osc(dom_node)
+ self.osc = get_osc(db)
def get_uuid(self):
return self.osc.uuid
def prepare(self):
class OSC(Module):
- def __init__(self, dom_node, obd_name, obd_uuid, ost_uuid):
- self.dom_node = dom_node
+ def __init__(self, db, obd_name, obd_uuid, ost_uuid):
+ self.db = db
self.module_name = 'OSC'
self.name = 'OSC_%s' % (obd_name)
self.uuid = '%s_%05x' % (self.name, int(random.random() * 1048576))
self.obd_uuid = obd_uuid
self.ost_uuid = ost_uuid
+ debug("OSC:", obd_uuid, ost_uuid)
self.lookup_server(self.ost_uuid)
self.add_lustre_module('osc', 'osc')
class ECHO_CLIENT(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'ECHO_CLIENT', dom_node)
+ def __init__(self,db):
+ Module.__init__(self, 'ECHO_CLIENT', db)
self.add_lustre_module('obdecho', 'obdecho')
- self.obd_uuid = get_first_ref(dom_node, 'obd')
- obd = lookup(self.dom_node.parentNode, self.obd_uuid)
+ self.obd_uuid = self.db.get_first_ref('obd')
+ obd = self.db.lookup(self.obd_uuid)
self.osc = VOSC(obd)
def prepare(self):
return
self.osc.prepare() # XXX This is so cheating. -p
self.info(self.obd_uuid)
-
+
lctl.newdev(attach="echo_client %s %s" % (self.name, self.uuid),
- setup = self.obd_uuid)
+ setup = self.osc.get_uuid())
def cleanup(self):
if not is_prepared(self.uuid):
class Mountpoint(Module):
- def __init__(self,dom_node):
- Module.__init__(self, 'MTPT', dom_node)
- self.path = get_text(dom_node, 'path')
- self.mds_uuid = get_first_ref(dom_node, 'mds')
- self.obd_uuid = get_first_ref(dom_node, 'obd')
+ def __init__(self,db):
+ Module.__init__(self, 'MTPT', db)
+ self.path = self.db.get_val('path')
+ self.mds_uuid = self.db.get_first_ref('mds')
+ self.obd_uuid = self.db.get_first_ref('obd')
self.add_lustre_module('mdc', 'mdc')
self.add_lustre_module('llite', 'llite')
- obd = lookup(self.dom_node.parentNode, self.obd_uuid)
+ obd = self.db.lookup(self.obd_uuid)
self.osc = VOSC(obd)
def prepare(self):
self.osc.prepare()
- mdc_uuid = prepare_mdc(self.dom_node.parentNode, self.mds_uuid)
+ mdc_uuid = prepare_mdc(self.db, self.mds_uuid)
self.info(self.path, self.mds_uuid, self.obd_uuid)
cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
(self.osc.get_uuid(), mdc_uuid, self.path)
panic("fs is still mounted:", self.path)
self.osc.cleanup()
- cleanup_mdc(self.dom_node.parentNode, self.mds_uuid)
+ cleanup_mdc(self.db, self.mds_uuid)
def load_module(self):
self.osc.load_module()
osc = OSC(obd_dom, obd.name, obd.uuid, obd.active_target)
return osc
+class LustreDB:
+ def lookup(self, uuid):
+ """ lookup returns a new LustreDB instance"""
+ return self._lookup_by_uuid(uuid)
+
+ def lookup_name(self, name, class_name = ""):
+ """ lookup returns a new LustreDB instance"""
+ return self._lookup_by_name(name, class_name)
+
+ def lookup_class(self, class_name):
+ """ lookup returns a new LustreDB instance"""
+ return self._lookup_by_class(class_name)
+
+ def get_val(self, tag, default=None):
+ v = self._get_val(tag)
+ if v:
+ return v
+ if default != None:
+ return default
+ debug("LustreDB", self.getName(), " no value for:", tag)
+ return None
-def get_device(obd):
- list = obd.getElementsByTagName('device')
- if len(list) > 0:
- dev = list[0]
- dev.normalize();
- size = get_attr_int(dev, 'size', 0)
- return dev.firstChild.data, size
- return '', 0
-
-# Get the text content from the first matching child
-# If there is no content (or it is all whitespace), return
-# the default
-def get_text(dom_node, tag, default=""):
- list = dom_node.getElementsByTagName(tag)
- if len(list) > 0:
- dom_node = list[0]
- dom_node.normalize()
- if dom_node.firstChild:
- txt = string.strip(dom_node.firstChild.data)
- if txt:
- return txt
- return default
-
-def get_text_int(dom_node, tag, default=0):
- list = dom_node.getElementsByTagName(tag)
- n = default
- if len(list) > 0:
- dom_node = list[0]
- dom_node.normalize()
- if dom_node.firstChild:
- txt = string.strip(dom_node.firstChild.data)
- if txt:
- try:
- n = int(txt)
- except ValueError:
- panic("text value is not integer:", txt)
- return n
-
-def get_attr(dom_node, attr, default=""):
- v = dom_node.getAttribute(attr)
- if v:
- return v
- return default
-
-def get_attr_int(dom_node, attr, default=0):
- n = default
- v = dom_node.getAttribute(attr)
- if v:
+ def get_class(self):
+ return self._get_class()
+
+ def get_val_int(self, tag, default=0):
+ str = self._get_val(tag)
try:
- n = int(v)
+ if str:
+ return int(str)
+ return default
except ValueError:
- panic("attr value is not integer", v)
- return n
-
-def get_first_ref(dom_node, tag):
- """ Get the first uuidref of the type TAG. Used one only
- one is expected. Returns the uuid."""
- uuid = None
- refname = '%s_ref' % tag
- list = dom_node.getElementsByTagName(refname)
- if len(list) > 0:
- uuid = getRef(list[0])
- return uuid
+ panic("text value is not integer:", str)
+
+ def get_first_ref(self, tag):
+ """ Get the first uuidref of the type TAG. Only
+ one is expected. Returns the uuid."""
+ uuids = self._get_refs(tag)
+ if len(uuids) > 0:
+ return uuids[0]
+ return None
-def get_all_refs(dom_node, tag):
- """ Get all the refs of type TAG. Returns list of uuids. """
- uuids = []
- refname = '%s_ref' % tag
- list = dom_node.getElementsByTagName(refname)
- if len(list) > 0:
- for i in list:
- uuids.append(getRef(i))
- return uuids
-
-def get_ost_net(dom_node, uuid):
- ost = lookup(dom_node, uuid)
- uuid = get_first_ref(ost, 'network')
- if not uuid:
+ def get_refs(self, tag):
+ """ Get all the refs of type TAG. Returns list of uuids. """
+ uuids = self._get_refs(tag)
+ return uuids
+
+ def get_all_refs(self):
+ """ Get all the refs. Returns list of uuids. """
+ uuids = self._get_all_refs()
+ return uuids
+
+ def get_ost_net(self, uuid):
+ ost = self.lookup(uuid)
+ uuid = ost.get_first_ref('network')
+ if not uuid:
+ return None
+ return ost.lookup(uuid)
+
+ def nid2server(self, nid):
+ netlist = self.parent.parent.attrs['network']
+ for net_db in netlist:
+ if net_db.get_val('nid') == nid:
+ return net
return None
- return lookup(dom_node, uuid)
-
-def nid2server(dom_node, nid):
- netlist = dom_node.getElementsByTagName('network')
- for net_node in netlist:
- if get_text(net_node, 'server') == nid:
- return Network(net_node)
- return None
-def lookup(dom_node, uuid):
- for n in dom_node.childNodes:
- if n.nodeType == n.ELEMENT_NODE:
- if getUUID(n) == uuid:
- return n
+ # the tag name is the service type
+ # fixme: this should do some checks to make sure the dom_node is a service
+ #
+ # determine what "level" a particular node is at.
+
+ # the order of iniitailization is based on level.
+ def getServiceLevel(self):
+ type = self.get_class()
+ ret=0;
+ if type in ('network',):
+ ret = 10
+ elif type in ('device', 'ldlm'):
+ ret = 20
+ elif type in ('obd', 'mdd', 'cobd'):
+ ret = 30
+ elif type in ('mdsdev','ost'):
+ ret = 40
+ elif type in ('mdc','osc'):
+ ret = 50
+ elif type in ('lov',):
+ ret = 60
+ elif type in ('mountpoint', 'echoclient'):
+ ret = 70
+
+ if ret < config.minlevel() or ret > config.maxlevel():
+ ret = 0
+ return ret
+
+ #
+ # return list of services in a profile. list is a list of tuples
+ # [(level, db_object),]
+ def getServices(self):
+ list = []
+ for ref_class, ref_uuid in self.get_all_refs():
+ servdb = self.lookup(ref_uuid)
+ if servdb:
+ level = servdb.getServiceLevel()
+ if level > 0:
+ list.append((level, servdb))
+ else:
+ panic('service not found: ' + ref_uuid)
+
+ list.sort()
+ return list
+
+ # Find the mdsdev attached to node_name that points to
+ # mds_uuid
+ # node->profiles->mdsdev_refs->mds
+ def get_mdd(self, node_name, mds_uuid):
+ node_db = self.lookup_name(node_name)
+ if not node_db:
+ return None
+ prof_list = node_db.get_refs('profile')
+ for prof_uuid in prof_list:
+ prof_db = node_db.lookup(prof_uuid)
+ mdd_list = prof_db.get_refs('mdsdev')
+ for mdd_uuid in mdd_list:
+ mdd = self.lookup(mdd_uuid)
+ if mdd.get_first_ref('mds') == mds_uuid:
+ return mdd_uuid
+ return None
+
+
+class LustreDB_XML(LustreDB):
+ def __init__(self, dom, root_node):
+ # init xmlfile
+ self.dom_node = dom
+ self.root_node = root_node
+
+ def xmltext(self, dom_node, tag):
+ list = dom_node.getElementsByTagName(tag)
+ if len(list) > 0:
+ dom_node = list[0]
+ dom_node.normalize()
+ if dom_node.firstChild:
+ txt = string.strip(dom_node.firstChild.data)
+ if txt:
+ return txt
+
+ def xmlattr(self, dom_node, attr):
+ return dom_node.getAttribute(attr)
+
+ def _get_val(self, tag):
+ """a value could be an attribute of the current node
+ or the text value in a child node"""
+ ret = self.xmlattr(self.dom_node, tag)
+ if not ret:
+ ret = self.xmltext(self.dom_node, tag)
+ return ret
+
+ def _get_class(self):
+ return self.dom_node.nodeName
+
+ #
+ # [(ref_class, ref_uuid),]
+ def _get_all_refs(self):
+ list = []
+ for n in self.dom_node.childNodes:
+ if n.nodeType == n.ELEMENT_NODE:
+ ref_uuid = self.xml_get_ref(n)
+ ref_class = n.nodeName
+ list.append((ref_class, ref_uuid))
+
+ list.sort()
+ return list
+
+ def _get_refs(self, tag):
+ """ Get all the refs of type TAG. Returns list of uuids. """
+ uuids = []
+ refname = '%s_ref' % tag
+ reflist = self.dom_node.getElementsByTagName(refname)
+ for r in reflist:
+ uuids.append(self.xml_get_ref(r))
+ return uuids
+
+ def xmllookup_by_uuid(self, dom_node, uuid):
+ for n in dom_node.childNodes:
+ if n.nodeType == n.ELEMENT_NODE:
+ if self.xml_get_uuid(n) == uuid:
+ return n
+ else:
+ n = self.xmllookup_by_uuid(n, uuid)
+ if n: return n
+ return None
+
+ def _lookup_by_uuid(self, uuid):
+ dom = self. xmllookup_by_uuid(self.root_node, uuid)
+ if dom:
+ return LustreDB_XML(dom, self.root_node)
+
+ def xmllookup_by_name(self, dom_node, name):
+ for n in dom_node.childNodes:
+ if n.nodeType == n.ELEMENT_NODE:
+ if self.xml_get_name(n) == name:
+ return n
+ else:
+ n = self.xmllookup_by_name(n, name)
+ if n: return n
+ return None
+
+ def _lookup_by_name(self, name, class_name):
+ dom = self.xmllookup_by_name(self.root_node, name)
+ if dom:
+ return LustreDB_XML(dom, self.root_node)
+
+ def xmllookup_by_class(self, dom_node, class_name):
+ return dom_node.getElementsByTagName(class_name)
+
+ def _lookup_by_class(self, class_name):
+ ret = []
+ domlist = self.xmllookup_by_class(self.root_node, class_name)
+ for node in domlist:
+ ret.append(LustreDB_XML(node, self.root_node))
+ return ret
+
+ def xml_get_name(self, n):
+ return n.getAttribute('name')
+
+ def getName(self):
+ return self.xml_get_name(self.dom_node)
+
+ def xml_get_ref(self, n):
+ return n.getAttribute('uuidref')
+
+ def xml_get_uuid(self, dom_node):
+ return dom_node.getAttribute('uuid')
+
+ def getUUID(self):
+ return self.xml_get_uuid(self.dom_node)
+
+ def get_routes(self, type, gw):
+ """ Return the routes as a list of tuples of the form:
+ [(type, gw, lo, hi),]"""
+ res = []
+ tbl = self.dom_node.getElementsByTagName('route_tbl')
+ for t in tbl:
+ routes = t.getElementsByTagName('route')
+ for r in routes:
+ lo = self.xmlattr(r, 'lo')
+ hi = self.xmlattr(r, 'hi', '')
+ res.append((type, gw, lo, hi))
+ return res
+
+ def get_route_tbl(self):
+ ret = []
+ tbls = self.dom_node.getElementsByTagName('route_tbl')
+ for tbl in tbls:
+ for r in tbl.getElementsByTagName('route'):
+ net_type = self.xmlattr(r, 'type')
+ gw = self.xmlattr(r, 'gw')
+ lo = self.xmlattr(r, 'lo')
+ hi = self.xmlattr(r,'hi', '')
+ ret.append((net_type, gw, lo, hi))
+ return ret
+
+
+# ================================================================
+# LDAP Support
+class LustreDB_LDAP(LustreDB):
+ def __init__(self, name, attrs,
+ base = "fs=lustre",
+ parent = None,
+ url = "ldap://localhost",
+ user = "cn=Manager, fs=lustre",
+ pw = "secret"
+ ):
+ self._name = name
+ self._attrs = attrs
+ self._base = base
+ self._parent = parent
+ self._url = url
+ self._user = user
+ self._pw = pw
+ if parent:
+ self.l = parent.l
+ self._base = parent._base
+ else:
+ self.open()
+
+ def open(self):
+ import ldap
+ try:
+ self.l = ldap.initialize(self._url)
+ # Set LDAP protocol version used
+ self.l.protocol_version=ldap.VERSION3
+ # user and pw only needed if modifying db
+ self.l.bind_s("", "", ldap.AUTH_SIMPLE);
+ except ldap.LDAPerror, e:
+ panic(e)
+ # FIXME, do something useful here
+
+ def close(self):
+ self.l.unbind_s()
+
+ def ldap_search(self, filter):
+ """Return list of uuids matching the filter."""
+ import ldap
+ dn = self._base
+ ret = []
+ uuids = []
+ try:
+ for name, attrs in self.l.search_s(dn, ldap.SCOPE_ONELEVEL,
+ filter, ["uuid"]):
+ for v in attrs['uuid']:
+ uuids.append(v)
+ except ldap.NO_SUCH_OBJECT, e:
+ pass
+ except ldap.LDAPError, e:
+ print e # FIXME: die here?
+ if len(uuids) > 0:
+ for uuid in uuids:
+ ret.append(self._lookup_by_uuid(uuid))
+ return ret
+
+ def _lookup_by_name(self, name, class_name):
+ list = self.ldap_search("lustreName=%s" %(name))
+ if len(list) == 1:
+ return list[0]
+ return []
+
+ def _lookup_by_class(self, class_name):
+ return self.ldap_search("objectclass=%s" %(string.upper(class_name)))
+
+ def _lookup_by_uuid(self, uuid):
+ import ldap
+ dn = "uuid=%s,%s" % (uuid, self._base)
+ ret = None
+ try:
+ for name, attrs in self.l.search_s(dn, ldap.SCOPE_BASE,
+ "objectclass=*"):
+ ret = LustreDB_LDAP(name, attrs, parent = self)
+
+ except ldap.NO_SUCH_OBJECT, e:
+ debug("NO_SUCH_OBJECT:", uuid)
+ pass # just return empty list
+ except ldap.LDAPError, e:
+ print e # FIXME: die here?
+ return ret
+
+
+ def _get_val(self, k):
+ ret = None
+ if self._attrs.has_key(k):
+ v = self._attrs[k]
+ if type(v) == types.ListType:
+ ret = str(v[0])
else:
- n = lookup(n, uuid)
- if n: return n
- return None
-
-# Get name attribute of dom_node
-def getName(dom_node):
- return dom_node.getAttribute('name')
+ ret = str(v)
+ return ret
-def getRef(dom_node):
- return dom_node.getAttribute('uuidref')
+ def _get_class(self):
+ return string.lower(self._attrs['objectClass'][0])
-# Get name attribute of dom_node
-def getUUID(dom_node):
- return dom_node.getAttribute('uuid')
+ #
+ # [(ref_class, ref_uuid),]
+ def _get_all_refs(self):
+ list = []
+ for k in self._attrs.keys():
+ if re.search('.*Ref', k):
+ for uuid in self._attrs[k]:
+ list.append((k, uuid))
+ return list
-# the tag name is the service type
-# fixme: this should do some checks to make sure the dom_node is a service
-def getServiceType(dom_node):
- return dom_node.nodeName
+ def _get_refs(self, tag):
+ """ Get all the refs of type TAG. Returns list of uuids. """
+ uuids = []
+ refname = '%sRef' % tag
+ if self._attrs.has_key(refname):
+ return self._attrs[refname]
+ return []
-#
-# determine what "level" a particular node is at.
-# the order of iniitailization is based on level.
-def getServiceLevel(dom_node):
- type = getServiceType(dom_node)
- ret=0;
- if type in ('network',):
- ret = 10
- elif type in ('device', 'ldlm'):
- ret = 20
- elif type in ('obd', 'mdd', 'cobd'):
- ret = 30
- elif type in ('mds','ost'):
- ret = 40
- elif type in ('mdc','osc'):
- ret = 50
- elif type in ('lov', 'lovconfig'):
- ret = 60
- elif type in ('mountpoint', 'echo_client'):
- ret = 70
-
- if ret < config.minlevel() or ret > config.maxlevel():
- ret = 0
- return ret
+ def getName(self):
+ return self._get_val('lustreName')
-#
-# return list of services in a profile. list is a list of tuples
-# [(level, dom_node),]
-def getServices(lustreNode, profileNode):
- list = []
- for n in profileNode.childNodes:
- if n.nodeType == n.ELEMENT_NODE:
- servNode = lookup(lustreNode, getRef(n))
- if not servNode:
- print n
- panic('service not found: ' + getRef(n))
- level = getServiceLevel(servNode)
- if level > 0:
- list.append((level, servNode))
- list.sort()
- return list
-
-def getByName(lustreNode, name, tag):
- ndList = lustreNode.getElementsByTagName(tag)
- for nd in ndList:
- if getName(nd) == name:
- return nd
- return None
-
+ def getUUID(self):
+ return self._get_val('uuid')
+
+ def get_route_tbl(self):
+ return []
############################################################
# MDC UUID hack -
# FIXME: clean this mess up!
#
saved_mdc = {}
-def prepare_mdc(dom_node, mds_uuid):
+def prepare_mdc(db, mds_uuid):
global saved_mdc
- mds_node = lookup(dom_node, mds_uuid);
- if not mds_node:
+ mds_db = db.lookup(mds_uuid);
+ if not mds_db:
panic("no mds:", mds_uuid)
if saved_mdc.has_key(mds_uuid):
return saved_mdc[mds_uuid]
- mdc = MDC(mds_node)
+ mdc = MDC(mds_db)
mdc.prepare()
saved_mdc[mds_uuid] = mdc.uuid
return mdc.uuid
-def cleanup_mdc(dom_node, mds_uuid):
+def cleanup_mdc(db, mds_uuid):
global saved_mdc
- mds_node = lookup(dom_node, mds_uuid);
- if not mds_node:
+ mds_db = db.lookup(mds_uuid);
+ if not mds_db:
panic("no mds:", mds_uuid)
if not saved_mdc.has_key(mds_uuid):
- mdc = MDC(mds_node)
+ mdc = MDC(mds_db)
mdc.cleanup()
saved_mdc[mds_uuid] = mdc.uuid
local_node = []
router_flag = 0
-def init_node(dom_node):
+def init_node(node_db):
global local_node, router_flag
- netlist = dom_node.getElementsByTagName('network')
- for dom_net in netlist:
- type = get_attr(dom_net, 'type')
- gw = get_text(dom_net, 'server')
+ netlist = node_db.lookup_class('network')
+ for db in netlist:
+ type = db.get_val('nettype')
+ gw = db.get_val('nid')
local_node.append((type, gw))
def node_needs_router():
return router_flag
-def get_routes(type, gw, dom_net):
- """ Return the routes as a list of tuples of the form:
- [(type, gw, lo, hi),]"""
- res = []
- tbl = dom_net.getElementsByTagName('route_tbl')
- for t in tbl:
- routes = t.getElementsByTagName('route')
- for r in routes:
- lo = get_attr(r, 'lo')
- hi = get_attr(r, 'hi', '')
- res.append((type, gw, lo, hi))
- return res
-
-
def init_route_config(lustre):
""" Scan the lustre config looking for routers. Build list of
routes. """
global routes, router_flag
routes = []
- list = lustre.getElementsByTagName('node')
- for node in list:
- if get_attr(node, 'router'):
+ list = lustre.lookup_class('node')
+ for node_db in list:
+ if node_db.get_val_int('router', 0):
router_flag = 1
for (local_type, local_nid) in local_node:
gw = None
- netlist = node.getElementsByTagName('network')
- for dom_net in netlist:
- if local_type == get_attr(dom_net, 'type'):
- gw = get_text(dom_net, 'server')
+ netlist = node_db.lookup_class('network')
+ for db in netlist:
+ if local_type == db.get_val('type'):
+ gw = db.get_val('server')
break
if not gw:
continue
- for dom_net in netlist:
- if local_type != get_attr(dom_net, 'type'):
- for route in get_routes(local_type, gw, dom_net):
+ for db in netlist:
+ if local_type != db.get_val('type'):
+ for route in db.get_routes(local_type, gw):
routes.append(route)
def local_net(net):
global local_node
for iface in local_node:
+ #debug("local_net a:", net.net_type, "b:", iface[0])
if net.net_type == iface[0]:
return 1
return 0
return None
-
############################################################
# lconf level logic
# Start a service.
-def startService(dom_node, module_flag):
- type = getServiceType(dom_node)
- debug('Service:', type, getName(dom_node), getUUID(dom_node))
+def startService(db, module_flag):
+ type = db.get_class()
+ debug('Service:', type, db.getName(), db.getUUID())
# there must be a more dynamic way of doing this...
n = None
if type == 'ldlm':
- n = LDLM(dom_node)
+ n = LDLM(db)
elif type == 'lov':
- n = LOV(dom_node)
- elif type == 'lovconfig':
- n = LOVConfig(dom_node)
+ n = LOV(db)
elif type == 'network':
- n = Network(dom_node)
+ n = Network(db)
elif type == 'obd':
- n = OBD(dom_node)
+ n = OBD(db)
elif type == 'cobd':
- n = COBD(dom_node)
+ n = COBD(db)
elif type == 'ost':
- n = OST(dom_node)
- elif type == 'mds':
- n = MDS(dom_node)
+ n = OST(db)
+ elif type == 'mdsdev':
+ n = MDSDEV(db)
elif type == 'osc':
- n = VOSC(dom_node)
+ n = VOSC(db)
elif type == 'mdc':
- n = MDC(dom_node)
+ n = MDC(db)
elif type == 'mountpoint':
- n = Mountpoint(dom_node)
- elif type == 'echo_client':
- n = ECHO_CLIENT(dom_node)
+ n = Mountpoint(db)
+ elif type == 'echoclient':
+ n = ECHO_CLIENT(db)
else:
panic ("unknown service type:", type)
# * make sure partitions are in place and prepared
# * initialize devices with lctl
# Levels is important, and needs to be enforced.
-def startProfile(lustreNode, profileNode, module_flag):
- if not profileNode:
+def startProfile(prof_db, module_flag):
+ if not prof_db:
panic("profile:", profile, "not found.")
- services = getServices(lustreNode, profileNode)
+ services = prof_db.getServices()
if config.cleanup():
services.reverse()
for s in services:
#
# Load profile for
-def doHost(lustreNode, hosts):
+def doHost(lustreDB, hosts):
global routes
global router_flag
- dom_node = None
+ node_db = None
for h in hosts:
- dom_node = getByName(lustreNode, h, 'node')
- if dom_node:
+ node_db = lustreDB.lookup_name(h, 'node')
+ if node_db:
break
- if not dom_node:
+ if not node_db:
print 'No host entry found.'
return
- if get_attr(dom_node, 'router'):
- router_flag = 1
- else:
- router_flag = 0
- recovery_upcall = get_attr(dom_node, 'recovery_upcall')
- timeout = get_attr_int(dom_node, 'timeout')
+ router_flag = node_db.get_val_int('router', 0)
+ recovery_upcall = node_db.get_val('recovery_upcall', '')
+ timeout = node_db.get_val_int('timeout', 0)
if not router_flag:
- init_node(dom_node)
- init_route_config(lustreNode)
+ init_node(node_db)
+ init_route_config(lustreDB)
# Two step process: (1) load modules, (2) setup lustre
# if not cleaning, load modules first.
module_flag = not config.cleanup()
- reflist = dom_node.getElementsByTagName('profile')
- for profile in reflist:
- startProfile(lustreNode, profile, module_flag)
+ prof_list = node_db.get_refs('profile')
+ for prof_uuid in prof_list:
+ prof_db = node_db.lookup(prof_uuid)
+ startProfile(prof_db, module_flag)
if not config.cleanup():
sys_set_debug_path()
sys_set_timeout(timeout)
sys_set_recovery_upcall(recovery_upcall)
-
module_flag = not module_flag
- for profile in reflist:
- startProfile(lustreNode, profile, module_flag)
+ for prof_uuid in prof_list:
+ prof_db = node_db.lookup(prof_uuid)
+ startProfile(prof_db, module_flag)
############################################################
# Command line processing
"portals=", "makeldiff", "cleanup", "noexec",
"help", "node=", "nomod", "nosetup",
"dump=", "force", "minlevel=", "maxlevel=",
- "timeout=", "recovery_upcall="]
+ "timeout=", "recovery_upcall=",
+ "ldapurl=", "config=", "select="]
opts = []
args = []
config.dump_file(a)
if o in ("-f", "--force"):
config.force(1)
- if o in ("--minlevel",):
+ if o == "--minlevel":
config.minlevel(a)
- if o in ("--maxlevel",):
+ if o == "--maxlevel":
config.maxlevel(a)
- if o in ("--timeout",):
+ if o == "--timeout":
config.timeout(a)
- if o in ("--recovery_upcall",):
+ if o == "--recovery_upcall":
config.recovery_upcall(a)
+ if o == "--ldapurl":
+ config.ldapurl(a)
+ if o == "--config":
+ config.config_name(a)
+ if o == "--select":
+ config.init_select(a)
+
return args
def fetch(url):
def sys_set_timeout(timeout):
# the command overrides the value in the node config
- if config.timeout() >= 0:
+ if config.timeout() > 0:
timeout = config.timeout()
- if timeout >= 0:
+ if timeout > 0:
debug("setting timeout:", timeout)
sysctl('lustre/timeout', timeout)
if not os.access(args[0], os.R_OK):
print 'File not found or readable:', args[0]
sys.exit(1)
- dom = xml.dom.minidom.parse(args[0])
- elif config.url():
- xmldata = fetch(config.url())
- dom = xml.dom.minidom.parseString(xmldata)
+ try:
+ dom = xml.dom.minidom.parse(args[0])
+ except Exception:
+ panic("%s does not appear to be a config file." % (args[0]))
+ sys.exit(1) # make sure to die here, even in debug mode.
+ db = LustreDB_XML(dom.documentElement, dom.documentElement)
+ elif config.ldapurl():
+ if not config.config_name():
+ panic("--ldapurl requires --config name")
+ dn = "config=%s,fs=lustre" % (config.config_name())
+ db = LustreDB_LDAP('', {}, base=dn, url = config.ldapurl())
else:
usage()
sys_make_devices()
sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
- doHost(dom.documentElement, node_list)
+
+ doHost(db, node_list)
if __name__ == "__main__":
try:
"usage: lov_set_osc_active <OSC UUID> <1|0 (active|inactive)>"},
{"newconn", jt_obd_newconn, 0, "newconn <olduuid> [newuuid]"},
{"failconn", jt_obd_failconn, 0, "failconn <uuid>"},
+ {"lookup", jt_obd_mdc_lookup, 0, "usage: lookup <directory> <file>"},
/* Debug commands */
{"======== debug =========", jt_noop, 0, "debug"},
#include <linux/lustre_lite.h>
#include <linux/obd_lov.h>
-#warning Max obds per lov currently hardcoded to 1000 in lov/lov_obd.c
+/* XXX Max obds per lov currently hardcoded to 1000 in lov/lov_obd.c */
#define MAX_LOV_UUID_COUNT 1000
#define OBD_NOT_FOUND ((__u32)-1)
else
buflen = lmmlen;
-#warning max ioctl buffer size currently hardcoded to 8192
+ /* XXX max ioctl buffer size currently hardcoded to 8192 */
if (buflen > 8192) {
int nuuids, remaining, nluoinfos;
if (flag != FTW_F)
return 0;
- if ((obdcount == 0) && (getobdindex(path) == OBD_NOT_FOUND)) {
+ if (getobdindex(path) == OBD_NOT_FOUND && obdcount == 0) {
/* terminate nftw walking this tree */
return(1);
}
- if ((fd = open(path, O_RDONLY)) < 0) {
+ if ((fd = open(path, O_RDONLY | O_LOV_DELAY_CREATE)) < 0) {
errMsg("open \"%.20s\" failed.", path);
perror("open");
exit(1);
if ((rc = ioctl(fd, LL_IOC_LOV_GETSTRIPE, (void *)lmm)) < 0) {
errMsg("LL_IOC_LOV_GETSTRIPE ioctl failed.");
perror("ioctl");
- exit(1);
+ return 0;
}
close(fd);
- if (query || verbose || lmm->lmm_objects[obdindex].l_object_id)
+ if (query || verbose ||
+ (obdindex != OBD_NOT_FOUND &&
+ lmm->lmm_objects[obdindex].l_object_id))
printf("%s\n", path);
if (verbose) {
printf("lmm_magic: 0x%x\n", lmm->lmm_magic);
printf("lmm_object_id: "LPX64"\n", lmm->lmm_object_id);
- printf("lmm_stripe_offset: %d\n", lmm->lmm_stripe_offset);
- printf("lmm_stripe_count: %d\n", lmm->lmm_stripe_count);
- printf("lmm_ost_count: %d\n", lmm->lmm_ost_count);
- printf("lmm_stripe_pattern: %d\n", lmm->lmm_stripe_pattern);
+ printf("lmm_stripe_offset: %u\n", (int)lmm->lmm_stripe_offset);
+ printf("lmm_stripe_count: %u\n", (int)lmm->lmm_stripe_count);
+ printf("lmm_stripe_size: %u\n", (int)lmm->lmm_stripe_size);
+ printf("lmm_ost_count: %u\n", lmm->lmm_ost_count);
+ printf("lmm_stripe_pattern: %d\n", lmm->lmm_magic & 0xf);
}
count = lmm->lmm_ost_count;
--node node_name
--mds mds_name
--dev path
+ --fstype extN|ext3
--size size
--add lov
--lov lov_name
--dev path
--size size
+ --fstype extN|ext3
--obduuid uuid
--add mtpt - Mountpoint
def network(self, name, uuid, hostname, net, port=0, tcpbuf=0):
"""create <network> node"""
network = self.newService("network", name, uuid)
- network.setAttribute("type", net);
- self.addElement(network, "server", hostname)
+ network.setAttribute("nettype", net);
+ self.addElement(network, "nid", hostname)
if port:
self.addElement(network, "port", "%d" %(port))
if tcpbuf:
- self.addElement(network, "send_mem", "%d" %(tcpbuf))
- self.addElement(network, "recv_mem", "%d" %(tcpbuf))
+ self.addElement(network, "sendmem", "%d" %(tcpbuf))
+ self.addElement(network, "recvmem", "%d" %(tcpbuf))
return network
ref.setAttribute("hi", hi)
return ref
- def node(self, name, uuid):
+ def profile(self, name, uuid):
+ """ create a host """
+ profile = self.newService("profile", name, uuid)
+ return profile
+
+ def node(self, name, uuid, prof_uuid):
""" create a host """
node = self.newService("node", name, uuid)
- self.addElement(node, 'profile')
+ node.appendChild(self.ref("profile", prof_uuid))
return node
def ldlm(self, name, uuid):
def obd(self, name, uuid, fs, obdtype, devname, format, ost_uuid, dev_size=0):
obd = self.newService("obd", name, uuid)
- obd.setAttribute('type', obdtype)
- self.addElement(obd, 'active_target', ost_uuid)
+ obd.setAttribute('obdtype', obdtype)
+ obd.appendChild(self.ref("active", ost_uuid))
if fs:
self.addElement(obd, "fstype", fs)
if devname:
- dev = self.addElement(obd, "device", devname)
- if (dev_size):
- dev.setAttribute("size", "%s" % (dev_size))
+ dev = self.addElement(obd, "devpath", devname)
self.addElement(obd, "autoformat", format)
+ if dev_size:
+ self.addElement(obd, "devsize", "%s" % (dev_size))
return obd
-# def osc(self, name, uuid, obd_uuid, net_uuid):
-# osc = self.newService("osc", name, uuid)
-# osc.appendChild(self.ref("ost", net_uuid))
-# osc.appendChild(self.ref("obd", obd_uuid))
-# return osc
-
def cobd(self, name, uuid, real_uuid, cache_uuid):
cobd = self.newService("cobd", name, uuid)
- cobd.appendChild(self.ref("real_obd",real_uuid))
- cobd.appendChild(self.ref("cache_obd",cache_uuid))
+ cobd.appendChild(self.ref("realobd",real_uuid))
+ cobd.appendChild(self.ref("cacheobd",cache_uuid))
return cobd
def ost(self, name, uuid, obd_uuid, net_uuid):
def lov(self, name, uuid, mds_uuid, stripe_sz, stripe_cnt, pattern):
lov = self.newService("lov", name, uuid)
lov.appendChild(self.ref("mds", mds_uuid))
- devs = self.addElement(lov, "devices" )
- devs.setAttribute("stripesize", stripe_sz)
- devs.setAttribute("stripecount", stripe_cnt)
- devs.setAttribute("pattern", pattern)
+ lov.setAttribute("stripesize", stripe_sz)
+ lov.setAttribute("stripecount", stripe_cnt)
+ lov.setAttribute("stripepattern", pattern)
return lov
def lovconfig(self, name, uuid, lov_uuid):
lovconfig.appendChild(self.ref("lov", lov_uuid))
return lovconfig
- def mds(self, name, uuid, fs, devname, format, net_uuid, node_uuid,
- failover_uuid = "", dev_size=0 ):
+ def mds(self, name, uuid, mdd_uuid):
mds = self.newService("mds", name, uuid)
- self.addElement(mds, "fstype", fs)
- dev = self.addElement(mds, "device", devname)
- if dev_size:
- dev.setAttribute("size", "%s" % (dev_size))
- self.addElement(mds, "autoformat", format)
- mds.appendChild(self.ref("network", net_uuid))
- mds.appendChild(self.ref("node", node_uuid))
- if failover_uuid:
- mds.appendChild(self.ref("failover", failover_uuid))
+ mds.appendChild(self.ref("active",mdd_uuid))
return mds
+ def mdsdev(self, name, uuid, fs, devname, format, net_uuid, node_uuid,
+ mds_uuid, dev_size=0 ):
+ mdd = self.newService("mdsdev", name, uuid)
+ self.addElement(mdd, "fstype", fs)
+ dev = self.addElement(mdd, "devpath", devname)
+ self.addElement(mdd, "autoformat", format)
+ if dev_size:
+ self.addElement(mdd, "devsize", "%s" % (dev_size))
+ mdd.appendChild(self.ref("network", net_uuid))
+ mdd.appendChild(self.ref("mds", mds_uuid))
+ return mdd
+
def mountpoint(self, name, uuid, mds_uuid, osc_uuid, path):
mtpt = self.newService("mountpoint", name, uuid)
mtpt.appendChild(self.ref("mds", mds_uuid))
return mtpt
def echo_client(self, name, uuid, osc_uuid):
- ec = self.newService("echo_client", name, uuid)
+ ec = self.newService("echoclient", name, uuid)
ec.appendChild(self.ref("obd", osc_uuid))
return ec
n = lookup(n, uuid)
if n: return n
return None
-
-
-def mds2node(lustre, mds_name):
- """ Find the node a MDS is configured on """
- mds = findByName(lustre, mds_name, 'mds')
- ref = mds.getElementsByTagName('node_ref')
- if not ref:
- error("mds2node:", "no node_ref found for", '"'+mds_name+'"')
- node_uuid = ref[0].getAttribute('uuidref')
- node = lookup(lustre, node_uuid)
- if not node:
- error('mds2node:', "no node found for :", '"'+mds_name+'"')
- return node
def name2uuid(lustre, name, tag="", fatal=1):
def lov_add_obd(gen, lov, osc_uuid):
- devs = lov.getElementsByTagName('devices')
- if len(devs) == 1:
- devs[0].appendChild(gen.ref("obd", osc_uuid))
- else:
- error("No devices element found for LOV:", lov)
-
+ lov.appendChild(gen.ref("obd", osc_uuid))
def node_add_profile(gen, node, ref, uuid):
- ret = node.getElementsByTagName('profile')
+ refname = "%s_ref" % "profile"
+ ret = node.getElementsByTagName(refname)
if not ret:
- error('node has no profile:', node)
- ret[0].appendChild(gen.ref(ref, uuid))
+ error('node has no profile ref:', node)
+ prof_uuid = ret[0].getAttribute('uuidref')
+ profile = lookup(node.parentNode, prof_uuid)
+ profile.appendChild(gen.ref(ref, uuid))
def get_attr(dom_node, attr, default=""):
v = dom_node.getAttribute(attr)
#
def do_add_node(gen, lustre, options, node_name):
uuid = new_uuid(node_name)
- node = gen.node(node_name, uuid)
+ prof_name = new_name("PROFILE_" + node_name)
+ prof_uuid = new_uuid(prof_name)
+ profile = gen.profile(prof_name, prof_uuid)
+ node = gen.node(node_name, uuid, prof_uuid)
+ lustre.appendChild(node)
+ lustre.appendChild(profile)
+
node_add_profile(gen, node, 'ldlm', ldlm_uuid)
if has_option(options, 'router'):
node.setAttribute('router', '1')
node.setAttribute('timeout', get_option(options, 'timeout'))
if has_option(options, 'recovery_upcall'):
node.setAttribute('recovery_upcall', get_option(options, 'recovery_upcall'))
- lustre.appendChild(node)
return node
""" create a node with a network config """
node_name = get_option(options, 'node')
-
ret = findByName(lustre, node_name, "node")
if ret:
print "Node:", node_name, "exists."
netlist = node.getElementsByTagName('network')
net = netlist[0]
- rlist = net.getElementsByTagName('route_tbl')
+ rlist = net.getElementsByTagName('routetbl')
if len(rlist) > 0:
rtbl = rlist[0]
else:
- rtbl = gen.addElement(net, 'route_tbl')
+ rtbl = gen.addElement(net, 'routetbl')
rtbl.appendChild(gen.route(net_type, gw, lo, hi))
def add_mds(gen, lustre, options):
node_name = get_option(options, 'node')
- mds_orig = get_option(options, 'mds')
- mds_name = new_name(mds_orig)
- if mds_name != mds_orig:
- warning("name:", mds_orig, "already used. using:", mds_name)
+ mds_name = get_option(options, 'mds')
+ mdd_name = new_name("MDD_" + mds_name +"_" + node_name)
+ mdd_uuid = new_uuid(mdd_name)
+
+ mds_uuid = name2uuid(lustre, mds_name, fatal=0)
+ if not mds_uuid:
+ mds_uuid = new_uuid(mds_name)
+ mds = gen.mds(mds_name, mds_uuid, mdd_uuid)
+ lustre.appendChild(mds)
+
devname = get_option(options, 'dev')
size = get_option(options, 'size', 0)
fstype = get_option(options, 'fstype', 'extN')
- mds_uuid = new_uuid(mds_name)
-
node_uuid = name2uuid(lustre, node_name, 'node')
node = findByName(lustre, node_name, "node")
- node_add_profile(gen, node, "mds", mds_uuid)
+ node_add_profile(gen, node, "mdsdev", mdd_uuid)
net_uuid = get_net_uuid(lustre, node_name)
if not net_uuid:
error("NODE: ", node_name, "not found")
- mds = gen.mds(mds_name, mds_uuid, fstype, devname, get_format_flag(options),
- net_uuid, node_uuid, dev_size=size)
- lustre.appendChild(mds)
+ mdd = gen.mdsdev(mdd_name, mdd_uuid, fstype, devname, get_format_flag(options),
+ net_uuid, node_uuid, mds_uuid, dev_size=size)
+ lustre.appendChild(mdd)
def add_ost(gen, lustre, options):
echoname = new_name('ECHO_'+ node_name)
echo_uuid = new_uuid(echoname)
- node_add_profile(gen, node, 'echo_client', echo_uuid)
+ node_add_profile(gen, node, 'echoclient', echo_uuid)
lov_uuid = name2uuid(lustre, lov_name, tag='lov', fatal=0)
if not lov_uuid:
lov = gen.lov(name, uuid, mds_uuid, stripe_sz, stripe_cnt, pattern)
lustre.appendChild(lov)
- # add an lovconfig entry to the mds profile
+ # add an lovconfig entry to the active mdsdev profile
lovconfig_name = new_name('LVCFG_' + name)
lovconfig_uuid = new_uuid(lovconfig_name)
- node = mds2node(lustre, mds_name)
- node_add_profile(gen, node, "lovconfig", lovconfig_uuid)
+ mds = findByName(lustre, mds_name)
+ mds.appendChild(gen.ref("lovconfig", lovconfig_uuid))
lovconfig = gen.lovconfig(lovconfig_name, lovconfig_uuid, uuid)
lustre.appendChild(lovconfig)
if __name__ == "__main__":
main()
-
-
void usage(char *pgm)
{
- fprintf(stderr, "\nIncorrect parameters! Correct usage:\n\n" );
- fprintf(stderr, "%s <output filename> <stripe size> <OST #> <stripe #>\n", pgm);
+ fprintf(stderr, "usage: %s <filename> <stripe size> <start stripe> <stripe count>\n", pgm);
- fprintf(stderr, "\n\nArgument explanations:\n---------------------\n\n");
- fprintf(stderr, "<output filename> = the full name and path of the output file to create\n");
- fprintf(stderr, "<stripe size> = the number of bytes to have in each stripe.\n");
- fprintf(stderr, "<OST #> = the OST number to start the striping on.\n");
- fprintf(stderr, "<stripe #> = the number of stripes to use.\n");
-
- fprintf(stderr, "\n\nExamples:\n---------\n\n");
-
- fprintf(stderr, "%s /mnt/lustre/ost1 131072 0 1\n", pgm);
- fprintf(stderr, "\t\tcreates a file only on ost1.\n\n");
-
- fprintf(stderr, "%s /mnt/lustre/ost2 131072 1 1\n", pgm);
- fprintf(stderr, "\t\tcreates a file only on ost2.\n\n");
-
- fprintf(stderr, "%s /mnt/lustre/ost1and2 131072 0 2\n", pgm);
- fprintf(stderr, "\t\tcreates a 128k file with 2 stripes, on ost1 and ost2.\n");
-
- fprintf(stderr, "%s /mnt/lustre/ost1and2 131072 1 2\n", pgm);
- fprintf(stderr, "\t\tcreates a 128k file with 2 stripes, on ost2 and ost1.\n");
+ fprintf(stderr, "\tstripe size: number of bytes in each stripe\n");
+ fprintf(stderr, "\tstripe start: OST index which holds first stripe\n");
+ fprintf(stderr, "\tstripe count: number of OSTs to stripe over\n");
}
int create_file(char *name, long stripe_size, int stripe_offset,
/* Initialize IOCTL striping pattern structure */
a_striping.lmm_magic = LOV_MAGIC;
- a_striping.lmm_stripe_pattern = 0;
a_striping.lmm_stripe_size = stripe_size;
a_striping.lmm_stripe_offset = stripe_offset;
a_striping.lmm_stripe_count = stripe_count;
+++ /dev/null
-<!-- Lustre Management DTD -->\r
-\r
-<!-- basic entities -->\r
-<!ENTITY % tag.content "(#PCDATA)">\r
-<!ENTITY % tag.ref "\r
- num CDATA #IMPLIED\r
- name CDATA #IMPLIED\r
- uuidref CDATA #REQUIRED">\r
-<!ENTITY % tag.attr "\r
- name CDATA #REQUIRED\r
- uuid CDATA #REQUIRED">\r
-\r
-<!-- main elements -->\r
-<!ELEMENT lustre (node | mountpoint | ldlm | echo_client |\r
- mds | mdc | obd | ost | osc | lov | lovconfig)*>\r
-\r
-<!ELEMENT node (network | profile)*>\r
-<!ATTLIST node router CDATA #IMPLIED\r
- %tag.attr;>\r
-<!ELEMENT network (server | port | route_tbl | send_mem | recv_mem)*>\r
-<!ATTLIST network type (tcp | elan | gm) 'tcp'\r
- %tag.attr;>\r
-\r
-<!ELEMENT route_tbl (route)*>\r
-<!ELEMENT route %tag.content;>\r
-<!ATTLIST route type (elan | tcp | gm) #REQUIRED\r
- gw CDATA #REQUIRED\r
- lo CDATA #REQUIRED\r
- hi CDATA #IMPLIED >\r
-\r
-<!ELEMENT profile (ldlm_ref | network_ref | obd_ref | ost_ref | osc_ref |\r
- echo_client_ref | mds_ref | mdc_ref | lov_ref |\r
- lovconfig_ref| mountpoint_ref)*>\r
-<!ATTLIST profile >\r
-\r
-<!ELEMENT mountpoint (path | fileset | mds_ref | osc_ref)*>\r
-<!ATTLIST mountpoint %tag.attr;>\r
-<!ELEMENT echo_client (osc_ref)*>\r
-<!ATTLIST echo_client %tag.attr;>\r
-<!ELEMENT ldlm EMPTY>\r
-<!ATTLIST ldlm %tag.attr;>\r
-\r
-<!ELEMENT obd (fstype | device | autoformat)*>\r
-<!ATTLIST obd %tag.attr; type (obdfilter | obdecho) 'obdfilter'>\r
-<!ELEMENT ost (network_ref | obd_ref | failover_ref)*>\r
-<!ATTLIST ost %tag.attr;>\r
-<!ELEMENT mds (network_ref | fstype | device | autoformat | \r
- server_ref | failover_ref | node_ref )*>\r
-<!ATTLIST mds %tag.attr;>\r
-\r
-<!ELEMENT osc (ost_ref | obd_ref)*>\r
-<!ATTLIST osc %tag.attr;>\r
-<!ELEMENT mdc (network_ref | mds_ref)*>\r
-<!ATTLIST mdc %tag.attr;>\r
-<!ELEMENT lov (devices | mds_ref)*>\r
-<!ATTLIST lov %tag.attr;>\r
-<!ELEMENT lovconfig (lov_ref)>\r
-<!ATTLIST lovconfig %tag.attr;>\r
-<!ELEMENT devices (osc_ref)+>\r
-<!ATTLIST devices stripesize CDATA #REQUIRED\r
- stripecount CDATA #REQUIRED\r
- stripeoffset CDATA #IMPLIED\r
- pattern CDATA #REQUIRED>\r
-\r
-<!-- basic elements -->\r
-\r
-<!ELEMENT fstype %tag.content;>\r
-<!ELEMENT device %tag.content;>\r
-<!ATTLIST device size CDATA #IMPLIED>\r
-<!ELEMENT server %tag.content;>\r
-<!ELEMENT port %tag.content;>\r
-<!ELEMENT send_mem %tag.content;>\r
-<!ELEMENT recv_mem %tag.content;>\r
-<!ELEMENT autoformat %tag.content;>\r
-<!ELEMENT path %tag.content;>\r
-<!ELEMENT fileset %tag.content;>\r
-\r
-<!-- id tag elements -->\r
-<!ELEMENT network_ref %tag.content;>\r
-<!ATTLIST network_ref %tag.ref;>\r
-<!ELEMENT node_ref %tag.content;>\r
-<!ATTLIST node_ref %tag.ref;>\r
-<!ELEMENT profile_ref %tag.content;>\r
-<!ATTLIST profile_ref %tag.ref;>\r
-<!ELEMENT obd_ref %tag.content;>\r
-<!ATTLIST obd_ref %tag.ref;>\r
-<!ELEMENT mds_ref %tag.content;>\r
-<!ATTLIST mds_ref %tag.ref;>\r
-<!ELEMENT osc_ref %tag.content;>\r
-<!ATTLIST osc_ref %tag.ref;>\r
-<!ELEMENT ost_ref %tag.content;>\r
-<!ATTLIST ost_ref %tag.ref;>\r
-<!ELEMENT lov_ref %tag.content;>\r
-<!ATTLIST lov_ref %tag.ref;>\r
-<!ELEMENT lovconfig_ref %tag.content;>\r
-<!ATTLIST lovconfig_ref %tag.ref;>\r
-<!ELEMENT mdc_ref %tag.content;>\r
-<!ATTLIST mdc_ref %tag.ref;>\r
-<!ELEMENT mountpoint_ref %tag.content;>\r
-<!ATTLIST mountpoint_ref %tag.ref;>\r
-<!ELEMENT echo_client_ref %tag.content;>\r
-<!ATTLIST echo_client_ref %tag.ref;>\r
-<!ELEMENT server_ref %tag.content;>\r
-<!ATTLIST server_ref %tag.ref;>\r
-<!ELEMENT failover_ref %tag.content;>\r
-<!ATTLIST failover_ref %tag.ref;>\r
-<!ELEMENT ldlm_ref %tag.content;>\r
-<!ATTLIST ldlm_ref %tag.ref;>\r
-\r
-\r
if (strlen(argv[1]) > sizeof(desc.ld_uuid) - 1) {
fprintf(stderr,
- "error: %s: LOV uuid '%s' longer than %zd characters\n",
+ "error: %s: LOV uuid '%s' longer than "LPSZ" characters\n",
cmdname(argv[0]), argv[1], sizeof(desc.ld_uuid) - 1);
return -EINVAL;
}
struct obd_ioctl_data data;
struct lov_desc desc;
obd_uuid_t *uuidarray;
- int rc;
+ char *path;
+ int rc, tmpfd;
+ /* FIXME: ug. IOCINIT checks fd. */
+ tmpfd = fd;
+ fd = 1;
IOCINIT(data);
+ fd = tmpfd;
if (argc != 2)
return CMD_HELP;
- if (strlen(argv[1]) > sizeof(desc.ld_uuid) - 1) {
- fprintf(stderr,
- "error: %s: LOV uuid '%s' longer than %zd characters\n",
- cmdname(argv[0]), argv[1], sizeof(desc.ld_uuid) - 1);
- return -EINVAL;
+ path = argv[1];
+ tmpfd = open(path, O_RDONLY);
+ if (tmpfd < 0) {
+ fprintf(stderr, "open \"%s\" failed: %s\n", path,
+ strerror(errno));
+ return -1;
}
memset(&desc, 0, sizeof(desc));
if (!uuidarray) {
fprintf(stderr, "error: %s: no memory for %d uuid's\n",
cmdname(argv[0]), desc.ld_tgt_count);
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto out;
}
data.ioc_inllen1 = sizeof(desc);
rc = -EINVAL;
goto out;
}
- rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf);
+ rc = ioctl(tmpfd, OBD_IOC_LOV_GET_CONFIG, buf);
if (rc == -ENOSPC) {
free(uuidarray);
goto repeat;
}
out:
free(uuidarray);
+ close(tmpfd);
return rc;
}
return rc;
}
+int jt_obd_mdc_lookup(int argc, char **argv)
+{
+ struct obd_ioctl_data data;
+ char *parent, *child;
+ int rc, tmpfd, verbose = 1;
+
+ if (argc < 3 || argc > 4)
+ return CMD_HELP;
+
+ parent = argv[1];
+ child = argv[2];
+ if (argc == 4)
+ verbose = get_verbose(argv[0], argv[3]);
+
+ /* FIXME: ug. IOCINIT checks fd. */
+ tmpfd = fd;
+ fd = 1;
+ IOCINIT(data);
+ fd = tmpfd;
+
+ data.ioc_inllen1 = strlen(child) + 1;
+ data.ioc_inlbuf1 = child;
+
+ IOC_PACK(argv[0], data);
+
+ tmpfd = open(parent, O_RDONLY);
+ if (tmpfd < 0) {
+ fprintf(stderr, "open \"%s\" failed: %s\n", parent,
+ strerror(errno));
+ return -1;
+ }
+
+ rc = ioctl(tmpfd, IOC_MDC_LOOKUP, buf);
+ if (rc < 0) {
+ fprintf(stderr, "error: %s: ioctl error: %s\n",
+ cmdname(argv[0]), strerror(rc = errno));
+ }
+ close(tmpfd);
+
+ if (verbose) {
+ IOC_UNPACK(argv[0], data);
+ printf("%s: mode %o uid %d gid %d\n", child,
+ data.ioc_obdo1.o_mode, data.ioc_obdo1.o_uid,
+ data.ioc_obdo1.o_gid);
+ }
+
+ return rc;
+}
+
static void signal_server(int sig)
{
if (sig == SIGINT) {
int jt_obd_lov_set_osc_active(int argc, char **argv);
int jt_obd_newconn(int argc, char **argv);
int jt_obd_failconn(int argc, char **argv);
+int jt_obd_mdc_lookup(int argc, char **argv);
int jt_get_version(int argc, char **argv);
#endif