return (-EINVAL);
err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count);
+ kportal_put_ni (data->ioc_nal);
break;
}
return obd_get_info(&cobd->cobd_target, keylen, key, vallen, val);
}
-static int
-cobd_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
+static int cobd_statfs(struct obd_export *exp, struct obd_statfs *osfs)
{
- struct obd_device *obd = class_conn2obd(conn);
- struct cache_obd *cobd;
+ struct obd_export *cobd_exp;
+ int rc;
- if (obd == NULL) {
- CERROR("invalid client cookie "LPX64"\n", conn->cookie);
+ if (exp->exp_obd == NULL)
return -EINVAL;
- }
- cobd = &obd->u.cobd;
- return (obd_statfs (&cobd->cobd_target, osfs));
+ cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target);
+ rc = obd_statfs(cobd_exp, osfs);
+ class_export_put(cobd_exp);
+ return rc;
}
static int
return (obd_close (&cobd->cobd_target, oa, lsm, oti));
}
-static int cobd_preprw(int cmd, struct obd_export *exp,
+static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *nb,
struct niobuf_local *res, void **desc_private,
return -EOPNOTSUPP;
cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target);
- rc = obd_preprw(cmd, cobd_exp, objcount, obj, niocount, nb, res,
+ rc = obd_preprw(cmd, cobd_exp, obdo, objcount, obj, niocount, nb, res,
desc_private, oti);
class_export_put(cobd_exp);
return rc;
DESCRIPTION
\layout Standard
-This program configures a node following directives in the <XML-config-file>..
+This program configures a node following directives in the <XML-config-file>.
There will be single configuration file for all the nodes in a single cluster.
This file should be distributed to all the nodes in the cluster or kept
in a location accessible to all the nodes.
\layout Description
--group\SpecialChar ~
-<arg> The group of devices tol cleanup/configure.
+<arg> The group of devices to cleanup/configure.
\layout Description
--nomod Only setup devices and services, do not load modules.
\layout Description
--noexec,-n Don't do anything, but print what would happen.
+ Useful for debugging purposes.
\layout Description
--verbose,-v Be verbose and show actions while going along.
will cause lconf to load the lustre modules from this soure tree.
\layout Description
---portals=src_dir Portals source directory.
- If this is a relative path, it is assumed to be relative to Lustre source
- tree location.
-
-\layout Description
-
--ptldebug\SpecialChar ~
debug\SpecialChar ~
level This options can be used to set the required debug
- level
+ level.
\layout Description
--subsystem\SpecialChar ~
-<arg> Set the portals debug subsystem
+<arg> Set the portals debug subsystem.
\layout Subsection
EXAMPLES
void ptlrpc_run_failed_import_upcall(struct obd_import *imp);
void ptlrpc_run_recovery_over_upcall(struct obd_device *obd);
-int ptlrpc_reconnect_import(struct obd_import *imp,
- struct ptlrpc_request **reqptr);
+int ptlrpc_reconnect_import(struct obd_import *imp);
int ptlrpc_replay(struct obd_import *imp);
int ptlrpc_resend(struct obd_import *imp);
void ptlrpc_free_committed(struct obd_import *imp);
#define LUSTRE_CONN_NEW 1
#define LUSTRE_CONN_CON 2
#define LUSTRE_CONN_NOTCONN 3
-#define LUSTRE_CONN_RECOVD 4
+#define LUSTRE_CONN_RECOVER 4
#define LUSTRE_CONN_FULL 5
/* packet types */
obd_time o_mtime;
obd_time o_ctime;
obd_size o_size;
- obd_blocks o_blocks;
- obd_rdev o_rdev;
+ obd_blocks o_blocks; /* brw: clients sent cached bytes */
+ obd_rdev o_rdev; /* brw: clients/servers sent grant */
obd_blksize o_blksize; /* optimal IO blocksize */
obd_mode o_mode;
obd_uid o_uid;
obd_gid o_gid;
obd_flag o_flags;
- obd_count o_nlink;
+ obd_count o_nlink; /* brw: checksum */
obd_count o_generation;
obd_flag o_valid; /* hot fields in this obdo */
obd_flag o_obdflags;
/* ost_body.data values for OST_BRW */
-#define OBD_BRW_READ 0x01
-#define OBD_BRW_WRITE 0x02
-#define OBD_BRW_RWMASK (OBD_BRW_READ | OBD_BRW_WRITE)
-#define OBD_BRW_CREATE 0x04
-#define OBD_BRW_SYNC 0x08
-#define OBD_BRW_CHECK 0x10
+#define OBD_BRW_READ 0x01
+#define OBD_BRW_WRITE 0x02
+#define OBD_BRW_RWMASK (OBD_BRW_READ | OBD_BRW_WRITE)
+#define OBD_BRW_CREATE 0x04
+#define OBD_BRW_SYNC 0x08
+#define OBD_BRW_CHECK 0x10
+#define OBD_BRW_FROM_GRANT 0x20
#define OBD_OBJECT_EOF 0xffffffffffffffffULL
__u64 imp_peer_committed_transno;
struct obd_uuid imp_target_uuid; /* XXX -> lustre_name */
struct lustre_handle imp_remote_handle;
-
+ unsigned long imp_next_ping;
+
/* Protects flags, level, generation, *_list */
spinlock_t imp_lock;
#define CHECKSUM_BULK 0
#if CHECKSUM_BULK
-static inline void ost_checksum(__u64 *cksum, void *addr, int len)
+static inline void ost_checksum(obd_count *cksum, void *addr, int len)
{
unsigned char *ptr = (unsigned char *)addr;
- __u64 sum = 0;
+ obd_count sum = 0;
/* very stupid, but means I don't have to think about byte order */
while (len-- > 0)
#define ll_d2d(dentry) ((struct ll_dentry_data*) dentry->d_fsdata)
-struct ll_dirty_offsets {
- rb_root_t do_root;
- spinlock_t do_lock;
- unsigned long do_num_dirty;
-};
-
-void ll_lldo_init(struct ll_dirty_offsets *lldo);
-void ll_record_dirty(struct inode *inode, unsigned long offset);
-void ll_remove_dirty(struct inode *inode, unsigned long start,
- unsigned long end);
-int ll_find_dirty(struct ll_dirty_offsets *lldo, unsigned long *start,
- unsigned long *end);
-int ll_farthest_dirty(struct ll_dirty_offsets *lldo, unsigned long *farthest);
extern struct file_operations ll_pgcache_seq_fops;
struct ll_inode_info {
struct list_head lli_read_extents;
loff_t lli_maxbytes;
spinlock_t lli_read_extent_lock;
- struct ll_dirty_offsets lli_dirty;
unsigned long lli_flags;
#define LLI_F_HAVE_SIZE_LOCK 0
down(&ll_d2d(de)->lld_it_sem); \
LASSERT(de->d_it == NULL); \
de->d_it = it; \
- CDEBUG(D_DENTRY, "D_IT DOWN dentry %p fsdata %p intent: %s sem %d\n", \
- de, ll_d2d(de), ldlm_it2str(de->d_it->it_op), \
+ CDEBUG(D_DENTRY, \
+ "D_IT DOWN dentry %p fsdata %p intent: %p %s sem %d\n", \
+ de, ll_d2d(de), de->d_it, ldlm_it2str(de->d_it->it_op), \
atomic_read(&(ll_d2d(de)->lld_it_sem.count))); \
} while(0)
LASSERT(it); \
LASSERT(it->it_op != IT_RELEASED_MAGIC); \
\
- CDEBUG(D_DENTRY, "D_IT UP dentry %p fsdata %p intent: %s\n", \
- de, ll_d2d(de), ldlm_it2str(de->d_it->it_op)); \
+ CDEBUG(D_DENTRY, "D_IT UP dentry %p fsdata %p intent: %p %s\n", \
+ de, ll_d2d(de), de->d_it, ldlm_it2str(de->d_it->it_op)); \
de->d_it = NULL; \
it->it_op = IT_RELEASED_MAGIC; \
up(&ll_d2d(de)->lld_it_sem); \
#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
enum {
- LPROC_LL_DIRTY_PAGES = 0,
- LPROC_LL_DIRTY_HITS,
+ LPROC_LL_DIRTY_HITS = 0,
LPROC_LL_DIRTY_MISSES,
LPROC_LL_WB_WRITEPAGE,
LPROC_LL_WB_PRESSURE,
/* Spare the preprocessor, spoil the bugs. */
#define FLAG(field, str) (field ? str : "")
-#define DEBUG_REQ_FLAGS(req) \
- ((req->rq_phase == RQ_PHASE_NEW) ? "New" : \
- (req->rq_phase == RQ_PHASE_RPC) ? "Rpc" : \
- (req->rq_phase == RQ_PHASE_INTERPRET) ? "Interpret" : \
- (req->rq_phase == RQ_PHASE_COMPLETE) ? "Complete" : "?phase?"), \
- FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \
- FLAG(req->rq_want_ack, "A"), FLAG(req->rq_err, "E"), \
- FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \
- FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \
- FLAG(req->rq_no_resend, "N"), FLAG(req->rq_resent, "s"), \
+#define DEBUG_REQ_FLAGS(req) \
+ ((req->rq_phase == RQ_PHASE_NEW) ? "New" : \
+ (req->rq_phase == RQ_PHASE_RPC) ? "RPC" : \
+ (req->rq_phase == RQ_PHASE_INTERPRET) ? "Interpret" : \
+ (req->rq_phase == RQ_PHASE_COMPLETE) ? "Complete" : \
+ (req->rq_phase == RQ_PHASE_BULK) ? "Bulk" : "?phase?"), \
+ FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \
+ FLAG(req->rq_want_ack, "A"), FLAG(req->rq_err, "E"), \
+ FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \
+ FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \
+ FLAG(req->rq_no_resend, "N"), FLAG(req->rq_resent, "s"), \
FLAG(req->rq_no_recov, "n"), FLAG(req->rq_waiting, "W")
-#define REQ_FLAGS_FMT "%s%s%s%s%s%s%s%s%s%s%s%s%s"
+#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s"
#define DEBUG_REQ(level, req, fmt, args...) \
do { \
struct obd_uuid *cluuid);
int client_import_disconnect(struct lustre_handle *conn, int failover);
+/* ptlrpc/pinger.c */
+int ptlrpc_pinger_add_import(struct obd_import *imp);
+int ptlrpc_pinger_del_import(struct obd_import *imp);
+
#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _LUSTRE_OTREE_H
+#define _LUSTRE_OTREE_H
+
+/* XXX ok, I can't make sense of our header nest right now.. */
+#ifdef __KERNEL__
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+
+struct otree {
+ rb_root_t ot_root;
+ spinlock_t ot_lock;
+ unsigned long ot_num_marked;
+};
+#else
+struct otree {
+ unsigned long lalala;
+};
+#endif
+
+int ot_mark_offset(struct otree *ot, unsigned long offset);
+int ot_clear_extent(struct otree *ot, unsigned long start, unsigned long end);
+int ot_find_marked_extent(struct otree *ot, unsigned long *start,
+ unsigned long *end);
+int ot_last_marked(struct otree *ot, unsigned long *last);
+unsigned long ot_num_marked(struct otree *ot);
+void ot_init(struct otree *ot);
+
+#endif
#ifndef __OBD_H
#define __OBD_H
+#include <linux/lustre_otree.h>
+
struct lov_oinfo { /* per-child structure */
__u64 loi_id; /* object ID on the target OST */
struct lustre_handle *loi_handle; /* open file handle for obj on OST */
int loi_ost_idx; /* OST stripe index in lmd_objects array */
+ /* tracking offsets per file, per stripe.. */
+ struct otree *loi_dirty_ot; /* lets lov stack on osc */
+ struct otree loi_dirty_ot_inline;
};
struct lov_stripe_md {
struct file_operations *fo_fop;
struct inode_operations *fo_iop;
struct address_space_operations *fo_aops;
+
struct list_head fo_export_list;
spinlock_t fo_fddlock; /* protect setting dentry->d_fsdata */
int fo_subdir_count;
+ spinlock_t fo_grant_lock; /* protects tot_granted */
+ obd_size fo_tot_granted;
+ obd_size fo_tot_cached;
};
struct mds_server_data;
int cl_max_mds_easize;
struct obd_device *cl_containing_lov;
kdev_t cl_sandev;
+ struct semaphore cl_dirty_sem;
+ obd_size cl_dirty; /* both in bytes */
+ obd_size cl_dirty_granted;
+ /* this is just to keep existing infinitely caching behaviour between
+ * clients and OSTs that don't have the grant code in yet.. it can
+ * be yanked once everything speaks grants */
+ char cl_ost_can_grant;
};
struct mds_obd {
wait_queue_head_t obd_refcount_waitq;
struct proc_dir_entry *obd_proc_entry;
struct list_head obd_exports;
+ int obd_num_exports;
struct list_head obd_imports;
struct ldlm_namespace *obd_namespace;
struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */
struct obd_uuid *cluuid);
int (*o_disconnect)(struct lustre_handle *conn, int failover);
- int (*o_statfs)(struct lustre_handle *conn, struct obd_statfs *osfs);
+ int (*o_statfs)(struct obd_export *exp, struct obd_statfs *osfs);
int (*o_syncfs)(struct obd_export *);
int (*o_packmd)(struct lustre_handle *, struct lov_mds_md **disk_tgt,
struct lov_stripe_md *mem_src);
int (*o_iterate)(struct lustre_handle *conn,
int (*)(obd_id, obd_gr, void *),
obd_id *startid, obd_gr group, void *data);
- int (*o_preprw)(int cmd, struct obd_export *,
+ int (*o_preprw)(int cmd, struct obd_export *, struct obdo *obdo,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *remote,
struct niobuf_local *local, void **desc_private,
int (*o_san_preprw)(int cmd, struct lustre_handle *conn,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *remote);
+ int (*o_mark_page_dirty)(struct lustre_handle *conn,
+ struct lov_stripe_md *ea,
+ unsigned long offset);
+ int (*o_clear_dirty_pages)(struct lustre_handle *conn,
+ struct lov_stripe_md *ea,
+ unsigned long start,
+ unsigned long end,
+ unsigned long *cleared);
+ int (*o_last_dirty_offset)(struct lustre_handle *conn,
+ struct lov_stripe_md *ea,
+ unsigned long *offset);
void (*o_destroy_export)(struct obd_export *export);
};
int class_register_type(struct obd_ops *ops, struct lprocfs_vars *, char *nm);
int class_unregister_type(char *nm);
int class_name2dev(char *name);
+struct obd_device *class_name2obd(char *name);
int class_uuid2dev(struct obd_uuid *uuid);
struct obd_device *class_uuid2obd(struct obd_uuid *uuid);
EXIT;
}
-static inline int obd_statfs(struct lustre_handle *conn,struct obd_statfs *osfs)
+static inline int obd_statfs(struct obd_export *exp, struct obd_statfs *osfs)
{
- struct obd_export *exp;
int rc;
ENTRY;
- OBD_CHECK_ACTIVE(conn, exp);
OBD_CHECK_OP(exp->exp_obd, statfs);
OBD_COUNTER_INCREMENT(exp->exp_obd, statfs);
- rc = OBP(exp->exp_obd, statfs)(conn, osfs);
- class_export_put(exp);
+ rc = OBP(exp->exp_obd, statfs)(exp, osfs);
RETURN(rc);
}
RETURN(rc);
}
-static inline int obd_preprw(int cmd, struct obd_export *exp,
+static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *remote,
struct niobuf_local *local, void **desc_private,
OBD_CHECK_OP(exp->exp_obd, preprw);
OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
- rc = OBP(exp->exp_obd, preprw)(cmd, exp, objcount, obj, niocount,
+ rc = OBP(exp->exp_obd, preprw)(cmd, exp, obdo, objcount, obj, niocount,
remote, local, desc_private, oti);
RETURN(rc);
}
RETURN(rc);
}
+static inline int obd_mark_page_dirty(struct lustre_handle *conn,
+ struct lov_stripe_md *lsm,
+ unsigned long offset)
+{
+ struct obd_export *exp;
+ int rc;
+
+ OBD_CHECK_SETUP(conn, exp);
+ OBD_CHECK_OP(exp->exp_obd, mark_page_dirty);
+
+ rc = OBP(exp->exp_obd, mark_page_dirty)(conn, lsm, offset);
+ class_export_put(exp);
+ RETURN(rc);
+}
+
+static inline int obd_clear_dirty_pages(struct lustre_handle *conn,
+ struct lov_stripe_md *lsm,
+ unsigned long start,
+ unsigned long end,
+ unsigned long *cleared)
+{
+ struct obd_export *exp;
+ int rc;
+
+ OBD_CHECK_SETUP(conn, exp);
+ OBD_CHECK_OP(exp->exp_obd, clear_dirty_pages);
+
+ rc = OBP(exp->exp_obd, clear_dirty_pages)(conn, lsm, start, end,
+ cleared);
+ class_export_put(exp);
+ RETURN(rc);
+}
+
+static inline int obd_last_dirty_offset(struct lustre_handle *conn,
+ struct lov_stripe_md *lsm,
+ unsigned long *offset)
+{
+ struct obd_export *exp;
+ int rc;
+
+ OBD_CHECK_SETUP(conn, exp);
+ OBD_CHECK_OP(exp->exp_obd, last_dirty_offset);
+
+ rc = OBP(exp->exp_obd, last_dirty_offset)(conn, lsm, offset);
+ class_export_put(exp);
+ RETURN(rc);
+}
/* OBD Metadata Support */
extern int obd_memmax;
extern unsigned long obd_fail_loc;
extern unsigned long obd_timeout;
+extern unsigned long obd_max_dirty_pages;
extern char obd_lustre_upcall[128];
extern unsigned long obd_sync_filter;
--- /dev/null
+ 0 files changed
+
+--- linux-2.4.20-rh/drivers/net/netconsole.c~netconsole_sysrq 2003-04-11 14:04:57.000000000 +0800
++++ linux-2.4.20-rh-root/drivers/net/netconsole.c 2003-07-01 11:10:26.000000000 +0800
+@@ -988,7 +988,15 @@ static void netconsole_netdump (struct p
+ reply.info = 0;
+ send_netdump_skb(dev, tmp, strlen(tmp), &reply);
+ break;
+-
++ case COMM_SYSRQ:
++ Dprintk("got SYSRQ command.\n");
++ printk("netdump: got SYSRQ command %d \n", req->from);
++ handle_sysrq(req->from, regs, NULL, NULL);
++ reply.code = REPLY_SYSRQ;
++ reply.nr = req->nr;
++ reply.info = req->from;
++ send_netdump_skb(dev, tmp, strlen(tmp), &reply);
++ break;
+ default:
+ reply.code = REPLY_ERROR;
+ reply.nr = req->nr;
+--- linux-2.4.20-rh/drivers/net/netconsole.h~netconsole_sysrq 2003-04-11 14:04:57.000000000 +0800
++++ linux-2.4.20-rh-root/drivers/net/netconsole.h 2003-07-01 11:11:29.000000000 +0800
+@@ -42,6 +42,7 @@ enum netdump_commands {
+ COMM_START_NETDUMP_ACK = 7,
+ COMM_GET_REGS = 8,
+ COMM_SHOW_STATE = 9,
++ COMM_SYSRQ=10,
+ };
+
+ #define NETDUMP_REQ_SIZE (8+4*4)
+@@ -69,6 +70,7 @@ enum netdump_replies {
+ REPLY_REGS = 10,
+ REPLY_MAGIC = 11,
+ REPLY_SHOW_STATE = 12,
++ REPLY_SYSRQ=13,
+ };
+
+ typedef struct netdump_reply_s {
+
+_
--- /dev/null
+ 0 files changed
+
+--- linux-2.4.20-rh/drivers/net/tg3.c~tg3_netconsole 2003-04-11 14:04:56.000000000 +0800
++++ linux-2.4.20-rh-root/drivers/net/tg3.c 2003-07-01 11:27:46.000000000 +0800
+@@ -170,6 +170,10 @@ static void tg3_write_indirect_reg32(str
+ }
+ }
+
++#ifdef HAVE_POLL_CONTROLLER
++static void Poll_tg3(struct net_device *);
++#endif
++
+ #define tw32(reg,val) tg3_write_indirect_reg32(tp,(reg),(val))
+ #define tw32_mailbox(reg, val) writel(((val) & 0xffffffff), tp->regs + (reg))
+ #define tw16(reg,val) writew(((val) & 0xffff), tp->regs + (reg))
+@@ -1899,7 +1903,138 @@ static int tg3_vlan_rx(struct tg3 *tp, s
+ return vlan_hwaccel_receive_skb(skb, tp->vlgrp, vlan_tag);
+ }
+ #endif
++/* for netconsole */
++static int upcall_rx_hook(struct net_device *dev)
++{
++ struct tg3 *tp = dev->priv;
++ u32 work_mask;
++ u32 rx_rcb_ptr = tp->rx_rcb_ptr;
++ u16 hw_idx, sw_idx;
++ int received;
++
++ hw_idx = tp->hw_status->idx[0].rx_producer;
++ sw_idx = rx_rcb_ptr % TG3_RX_RCB_RING_SIZE;
++ work_mask = 0;
++ received = 0;
++ while (sw_idx != hw_idx) {
++ struct tg3_rx_buffer_desc *desc = &tp->rx_rcb[sw_idx];
++ unsigned int len;
++ struct sk_buff *skb;
++ dma_addr_t dma_addr;
++ u32 opaque_key, desc_idx, *post_ptr;
++
++ desc_idx = desc->opaque & RXD_OPAQUE_INDEX_MASK;
++ opaque_key = desc->opaque & RXD_OPAQUE_RING_MASK;
++ if (opaque_key == RXD_OPAQUE_RING_STD) {
++ dma_addr = pci_unmap_addr(&tp->rx_std_buffers[desc_idx],
++ mapping);
++ skb = tp->rx_std_buffers[desc_idx].skb;
++ post_ptr = &tp->rx_std_ptr;
++ } else if (opaque_key == RXD_OPAQUE_RING_JUMBO) {
++ dma_addr = pci_unmap_addr(&tp->rx_jumbo_buffers[desc_idx],
++ mapping);
++ skb = tp->rx_jumbo_buffers[desc_idx].skb;
++ post_ptr = &tp->rx_jumbo_ptr;
++ }
++ else {
++ goto next_pkt_nopost;
++ }
++
++ work_mask |= opaque_key;
++
++ if ((desc->err_vlan & RXD_ERR_MASK) != 0 &&
++ (desc->err_vlan != RXD_ERR_ODD_NIBBLE_RCVD_MII)) {
++ drop_it:
++ tg3_recycle_rx(tp, opaque_key,
++ desc_idx, *post_ptr);
++ drop_it_no_recycle:
++ /* Other statistics kept track of by card. */
++ tp->net_stats.rx_dropped++;
++ goto next_pkt;
++ }
++
++ len = ((desc->idx_len & RXD_LEN_MASK) >> RXD_LEN_SHIFT) - 4; /* omit crc */
++
++ if (len > RX_COPY_THRESHOLD) {
++ int skb_size;
++
++ skb_size = tg3_alloc_rx_skb(tp, opaque_key,
++ desc_idx, *post_ptr);
++ if (skb_size < 0)
++ goto drop_it;
++
++ pci_unmap_single(tp->pdev, dma_addr,
++ skb_size - tp->rx_offset,
++ PCI_DMA_FROMDEVICE);
++
++ skb_put(skb, len);
++ } else {
++ struct sk_buff *copy_skb;
++
++ tg3_recycle_rx(tp, opaque_key,
++ desc_idx, *post_ptr);
++
++ copy_skb = dev_alloc_skb(len + 2);
++ if (copy_skb == NULL)
++ goto drop_it_no_recycle;
++
++ copy_skb->dev = tp->dev;
++ skb_reserve(copy_skb, 2);
++ skb_put(copy_skb, len);
++ pci_dma_sync_single(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
++ memcpy(copy_skb->data, skb->data, len);
++
++ /* We'll reuse the original ring buffer. */
++ skb = copy_skb;
++ }
++
++ if ((tp->tg3_flags & TG3_FLAG_RX_CHECKSUMS) &&
++ (desc->type_flags & RXD_FLAG_TCPUDP_CSUM) &&
++ (((desc->ip_tcp_csum & RXD_TCPCSUM_MASK)
++ >> RXD_TCPCSUM_SHIFT) == 0xffff))
++ skb->ip_summed = CHECKSUM_UNNECESSARY;
++ else
++ skb->ip_summed = CHECKSUM_NONE;
++
++ skb->protocol = eth_type_trans(skb, tp->dev);
++/*into netconsole driver*/
++ dev->rx_hook(skb);
++ kfree_skb(skb);
++ tp->dev->last_rx = jiffies;
++ received++;
++next_pkt:
++ (*post_ptr)++;
++next_pkt_nopost:
++ rx_rcb_ptr++;
++ sw_idx = rx_rcb_ptr % TG3_RX_RCB_RING_SIZE;
++ }
++
++ /* ACK the status ring. */
++ tp->rx_rcb_ptr = rx_rcb_ptr;
++ tw32_mailbox(MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW,
++ (rx_rcb_ptr % TG3_RX_RCB_RING_SIZE));
++ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
++ tr32(MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW);
+
++ /* Refill RX ring(s). */
++ if (work_mask & RXD_OPAQUE_RING_STD) {
++ sw_idx = tp->rx_std_ptr % TG3_RX_RING_SIZE;
++ tw32_mailbox(MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW,
++ sw_idx);
++ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
++ tr32(MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW);
++ }
++ if (work_mask & RXD_OPAQUE_RING_JUMBO) {
++ sw_idx = tp->rx_jumbo_ptr % TG3_RX_JUMBO_RING_SIZE;
++ tw32_mailbox(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW,
++ sw_idx);
++ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
++ tr32(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW);
++ }
++
++ return received;
++
++}
+ /* The RX ring scheme is composed of multiple rings which post fresh
+ * buffers to the chip, and one special ring the chip uses to report
+ * status back to the host.
+@@ -2006,7 +2141,7 @@ static int tg3_rx(struct tg3 *tp, int bu
+ /* We'll reuse the original ring buffer. */
+ skb = copy_skb;
+ }
+-
++
+ if ((tp->tg3_flags & TG3_FLAG_RX_CHECKSUMS) &&
+ (desc->type_flags & RXD_FLAG_TCPUDP_CSUM) &&
+ (((desc->ip_tcp_csum & RXD_TCPCSUM_MASK)
+@@ -2016,6 +2151,8 @@ static int tg3_rx(struct tg3 *tp, int bu
+ skb->ip_summed = CHECKSUM_NONE;
+
+ skb->protocol = eth_type_trans(skb, tp->dev);
++
++
+ #if TG3_VLAN_TAG_USED
+ if (tp->vlgrp != NULL &&
+ desc->type_flags & RXD_FLAG_VLAN) {
+@@ -2058,7 +2195,6 @@ next_pkt_nopost:
+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER)
+ tr32(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW);
+ }
+-
+ return received;
+ }
+
+@@ -2151,7 +2287,6 @@ static void tg3_interrupt(int irq, void
+ unsigned long flags;
+
+ spin_lock_irqsave(&tp->lock, flags);
+-
+ if (sblk->status & SD_STATUS_UPDATED) {
+ /*
+ * writing any value to intr-mbox-0 clears PCI INTA# and
+@@ -2169,8 +2304,17 @@ static void tg3_interrupt(int irq, void
+ tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+ sblk->status &= ~SD_STATUS_UPDATED;
+
+- if (likely(tg3_has_work(dev, tp)))
+- netif_rx_schedule(dev); /* schedule NAPI poll */
++ if (likely(tg3_has_work(dev, tp))){
++ if (unlikely(dev->rx_hook != NULL) && netdump_mode) {
++ int ret;
++ struct sk_buff *skb;
++ ret = upcall_rx_hook(dev);
++ if (!ret){
++ goto out;
++ }
++ }
++ netif_rx_schedule(dev); /* schedule NAPI poll */
++ }
+ else {
+ /* no work, shared interrupt perhaps? re-enable
+ * interrupts, and flush that PCI write
+@@ -2180,7 +2324,7 @@ static void tg3_interrupt(int irq, void
+ tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+ }
+ }
+-
++out:
+ spin_unlock_irqrestore(&tp->lock, flags);
+ }
+
+@@ -6804,7 +6948,10 @@ static int __devinit tg3_init_one(struct
+ dev->watchdog_timeo = TG3_TX_TIMEOUT;
+ dev->change_mtu = tg3_change_mtu;
+ dev->irq = pdev->irq;
+-
++#ifdef HAVE_POLL_CONTROLLER
++ dev->poll_controller = &Poll_tg3;
++#endif
++
+ err = tg3_get_invariants(tp);
+ if (err) {
+ printk(KERN_ERR PFX "Problem fetching invariants of chip, "
+@@ -6882,6 +7029,15 @@ err_out_disable_pdev:
+ return err;
+ }
+
++#ifdef HAVE_POLL_CONTROLLER
++static void Poll_tg3(struct net_device *dev)
++{
++ if (!netdump_mode) disable_irq(dev->irq);
++ tg3_interrupt(dev->irq, dev, NULL);
++ if (!netdump_mode) enable_irq(dev->irq);
++}
++#endif
++
+ static void __devexit tg3_remove_one(struct pci_dev *pdev)
+ {
+ struct net_device *dev = pci_get_drvdata(pdev);
+
+_
--- /dev/null
+drivers/net/netconsole.c
+drivers/net/netconsole.h
--- /dev/null
+drivers/net/tg3.c
imp->imp_replayable = 1;
CDEBUG(D_HA, "connected to replayable target: %s\n",
imp->imp_target_uuid.uuid);
+ ptlrpc_pinger_add_import(imp);
}
imp->imp_level = LUSTRE_CONN_FULL;
imp->imp_remote_handle = request->rq_repmsg->handle;
/* Yeah, obd_no_recov also (mainly) means "forced shutdown". */
if (obd->obd_no_recov) {
- ptlrpc_abort_inflight(imp);
+ ptlrpc_set_import_active(imp, 0);
} else {
request = ptlrpc_prep_req(imp, rq_opc, 0, NULL, NULL);
if (!request)
request->rq_replen = lustre_msg_size(0, NULL);
- /* Process disconnects even if we're waiting for recovery. */
- request->rq_level = LUSTRE_CONN_RECOVD;
-
rc = ptlrpc_queue_wait(request);
if (rc)
GOTO(out_req, rc);
}
+ if (imp->imp_replayable)
+ ptlrpc_pinger_del_import(imp);
+
EXIT;
out_req:
if (request)
class_disconnect_exports(obd, 0);
abort_delayed_replies(obd);
abort_recovery_queue(obd);
+ ptlrpc_run_recovery_over_upcall(obd);
}
static void target_recovery_expired(unsigned long castmeharder)
!(lock->l_flags & LDLM_FL_LOCAL))
continue;
- if ((flags & LDLM_FL_MATCH_DATA) && lock->l_data != data)
+ if ((flags & LDLM_FL_MATCH_DATA) && lock->l_data != data) {
+ LDLM_DEBUG(lock, "data mismatch: have %p, want %p",
+ lock->l_data, data);
continue;
+ }
ldlm_lock_addref_internal(lock, mode);
return lock;
ldlm_add_waiting_lock(lock);
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
- req->rq_level = LUSTRE_CONN_RECOVD;
+ req->rq_level = LUSTRE_CONN_RECOVER;
req->rq_timeout = 2; /* 2 second timeout for initial AST reply */
rc = ptlrpc_queue_wait(req);
if (rc == -ETIMEDOUT || rc == -EINTR) {
LDLM_DEBUG(lock, "server preparing completion AST");
req->rq_replen = lustre_msg_size(0, NULL);
- req->rq_level = LUSTRE_CONN_RECOVD;
+ req->rq_level = LUSTRE_CONN_RECOVER;
req->rq_timeout = 2; /* 2 second timeout for initial AST reply */
rc = ptlrpc_queue_wait(req);
if (rc == -ETIMEDOUT || rc == -EINTR) {
RETURN(-ENOMEM);
/* We're part of recovery, so don't wait for it. */
- req->rq_level = LUSTRE_CONN_RECOVD;
+ req->rq_level = LUSTRE_CONN_RECOVER;
body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body));
ldlm_lock2desc(lock, &body->lock_desc);
goto out_free;
}
- strncpy(param_uuid.uuid, mdc, sizeof(param_uuid.uuid));
- obd = class_uuid2obd(¶m_uuid);
+ obd = class_name2obd(mdc);
if (!obd) {
CERROR("MDC %s: not setup or attached\n", mdc);
err = -EINVAL;
mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection;
/* setup osc */
- strncpy(param_uuid.uuid, osc, sizeof(param_uuid.uuid));
- obd = class_uuid2obd(¶m_uuid);
+ obd = class_name2obd(osc);
if (!obd) {
CERROR("OSC %s: not setup or attached\n", osc);
err = -EINVAL;
if (it->it_lock_mode) {
handle = (struct lustre_handle *)it->it_lock_handle;
+ CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
+ " from it %p\n",
+ handle->cookie, it);
ldlm_lock_decref(handle, it->it_lock_mode);
/* intent_release may be called multiple times, from
rc = ll_intent_lock(de->d_parent->d_inode, &de, it, revalidate2_finish);
if (rc < 0) {
- CERROR("ll_intent_lock: rc %d : it->it_status %d\n", rc,
- it->it_status);
+ if (rc != -ESTALE) {
+ CERROR("ll_intent_lock: rc %d : it->it_status %d\n", rc,
+ it->it_status);
+ }
RETURN(0);
}
/* unfortunately ll_intent_lock may cause a callback and revoke our
dentry */
spin_lock(&dcache_lock);
list_del_init(&de->d_hash);
+ __d_rehash(de, 0);
spin_unlock(&dcache_lock);
- d_rehash(de);
RETURN(1);
}
struct mds_body *body;
struct lookup_intent it = { .it_op = IT_READDIR };
struct mdc_op_data data;
-
+ struct obd_device *obddev = class_conn2obd(&sbi->ll_mdc_conn);
+ struct ldlm_res_id res_id =
+ { .name = {inode->i_ino, (__u64)inode->i_generation} };
+ int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
GOTO(readpage_out, rc);
}
- ll_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0);
-
- rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_PR,
- &data, &lockh, NULL, 0,
- ldlm_completion_ast, ll_mdc_blocking_ast, inode);
- request = (struct ptlrpc_request *)it.it_data;
- if (request)
- ptlrpc_req_finished(request);
- if (rc < 0) {
- CERROR("lock enqueue: err: %d\n", rc);
- unlock_page(page);
- RETURN(rc);
+ rc = ldlm_lock_match(obddev->obd_namespace, flags, &res_id,
+ LDLM_PLAIN, NULL, 0, LCK_PR, inode,
+ &lockh);
+ if (!rc) {
+ ll_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0);
+
+ rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_PR,
+ &data, &lockh, NULL, 0,
+ ldlm_completion_ast, ll_mdc_blocking_ast,
+ inode);
+ request = (struct ptlrpc_request *)it.it_data;
+ if (request)
+ ptlrpc_req_finished(request);
+ if (rc < 0) {
+ CERROR("lock enqueue: err: %d\n", rc);
+ unlock_page(page);
+ RETURN(rc);
+ }
}
ldlm_lock_dump_handle(D_OTHER, &lockh);
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino,
inode->i_generation, inode, cmd);
- if ((cmd & 0xffffff00) == ((int)'T') << 8) /* tty ioctls */
+ if (_IOC_TYPE(cmd) == 'T') /* tty ioctls */
return -ENOTTY;
switch(cmd) {
/* getattr can race with writeback. we don't want to trust a getattr
* that doesn't include the writeback of our farthest cached pages
* that it raced with. */
+ /* Now that the OSC knows the cached-page status, it can and should be
+ * adjusting its getattr results to include the maximum cached offset
+ * for its stripe(s). */
do {
- bef = ll_farthest_dirty(&lli->lli_dirty, &before);
+ bef = obd_last_dirty_offset(ll_i2obdconn(inode), lli->lli_smd,
+ &before);
#if 0
rc = obd_getattr(&sbi->ll_osc_conn, &oa, lsm);
#else
if (rc)
RETURN(rc);
- aft = ll_farthest_dirty(&lli->lli_dirty, &after);
+ aft = obd_last_dirty_offset(ll_i2obdconn(inode), lli->lli_smd,
+ &after);
CDEBUG(D_INODE, " %d,%lu -> %d,%lu\n", bef, before, aft, after);
} while (bef == 0 &&
(aft != 0 || after < before) &&
CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino,
inode->i_generation, inode, cmd);
- if ((cmd & 0xffffff00) == ((int)'T') << 8) /* tty ioctls */
+ if (_IOC_TYPE(cmd) == 'T') /* tty ioctls */
return -ENOTTY;
lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_IOCTL);
#include <linux/rbtree.h>
#include <linux/seq_file.h>
#include <linux/time.h>
+#include "llite_internal.h"
/* PG_inactive_clean is shorthand for rmap, we want free_high/low here.. */
#ifdef PG_inactive_clean
/* we raced with truncate? */
if ( off >= inode->i_size ) {
- ll_remove_dirty(inode, page->index, page->index);
+ int rc;
+ rc = ll_clear_dirty_pages(ll_i2obdconn(inode),
+ ll_i2info(inode)->lli_smd,
+ page->index, page->index);
+
+ LASSERT(rc == 0);
+ CDEBUG(D_CACHE, "offset "LPU64" (index %lu) > i_size %llu\n",
+ off, page->index, inode->i_size);
unlock_page(page);
return 0;
}
pg->pg = page;
pg->off = off;
- pg->flag = OBD_BRW_CREATE;
+ pg->flag = OBD_BRW_CREATE|OBD_BRW_FROM_GRANT;
pg->count = PAGE_CACHE_SIZE;
/* catch partial writes for files that end mid-page */
CERROR("error from obd_brw_async: rc = %d\n", rc);
lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
LPROC_LL_WB_FAIL, llwp->npgs);
- } else
+ } else {
lprocfs_counter_add(ll_i2sbi(inode)->ll_stats,
LPROC_LL_WB_OK, (llwp->npgs));
+ }
for (i = 0 ; i < llwp->npgs ; i++) {
struct page *page = llwp->pga[i].pg;
CDEBUG(D_CACHE, "finished page %p at index %lu\n", page,
page->index);
LASSERT(PageLocked(page));
- ll_remove_dirty(inode, page->index, page->index);
+
+ rc = ll_clear_dirty_pages(ll_i2obdconn(inode),
+ ll_i2info(inode)->lli_smd,
+ page->index, page->index);
+ LASSERT(rc == 0);
unlock_page(page);
page_cache_release(page);
}
current->flags |= PF_MEMALLOC;
rc = ll_alloc_brw(inode, &llwp);
if (rc != 0)
- GOTO(cleanup, rc);
+ GOTO(restore_flags, rc);
if (llwp_consume_page(&llwp, inode, page) == 0)
ll_get_dirty_pages(inode, &llwp);
LPROC_LL_WB_WRITEPAGE, llwp.npgs);
ll_writeback(inode, &llwp);
}
-
kfree(llwp.pga);
-cleanup:
- current->flags = old_flags;
- RETURN(rc);
-}
-
-/*
- * we aggressively track offsets of pages that have been dirtied. we need this
- * to make file size decisions around lock acquisition and cancelation. all
- * extents include the offsets at their endpoints.
- */
-struct offset_extent {
- rb_node_t oe_node;
- unsigned long oe_start, oe_end;
-};
-
-static struct offset_extent *ll_find_oe(rb_root_t *root,
- struct offset_extent *needle)
-{
- struct rb_node_s *node = root->rb_node;
- struct offset_extent *oe;
- ENTRY;
-
- CDEBUG(D_INODE, "searching [%lu -> %lu]\n", needle->oe_start,
- needle->oe_end);
-
- while (node) {
- oe = rb_entry(node, struct offset_extent, oe_node);
- if (needle->oe_end < oe->oe_start)
- node = node->rb_left;
- else if (needle->oe_start > oe->oe_end)
- node = node->rb_right;
- else {
- CDEBUG(D_INODE, "returning [%lu -> %lu]\n",
- oe->oe_start, oe->oe_end);
- RETURN(oe);
- }
- }
- RETURN(NULL);
-}
-
-/* do the rbtree mechanics to insert a node, callers are responsible
- * for making sure that this new node doesn't overlap with existing
- * nodes */
-static void ll_insert_oe(rb_root_t *root, struct offset_extent *new_oe)
-{
- rb_node_t ** p = &root->rb_node;
- rb_node_t * parent = NULL;
- struct offset_extent *oe;
- ENTRY;
-
- LASSERT(new_oe->oe_start <= new_oe->oe_end);
-
- while (*p) {
- parent = *p;
- oe = rb_entry(parent, struct offset_extent, oe_node);
- if ( new_oe->oe_end < oe->oe_start )
- p = &(*p)->rb_left;
- else if ( new_oe->oe_start > oe->oe_end )
- p = &(*p)->rb_right;
- else
- LBUG();
- }
- rb_link_node(&new_oe->oe_node, parent, p);
- rb_insert_color(&new_oe->oe_node, root);
- EXIT;
-}
-
-static inline void lldo_dirty_add(struct inode *inode,
- struct ll_dirty_offsets *lldo,
- long val)
-{
- lldo->do_num_dirty += val;
- lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_DIRTY_PAGES,
- val);
-}
-
-void ll_record_dirty(struct inode *inode, unsigned long offset)
-{
- struct ll_dirty_offsets *lldo = &ll_i2info(inode)->lli_dirty;
- struct offset_extent needle, *oe, *new_oe;
- int rc;
- ENTRY;
-
- /* will allocate more intelligently later */
- OBD_ALLOC(new_oe, sizeof(*new_oe));
- LASSERT(new_oe); /* will have to do for now :/ */
-
- spin_lock(&lldo->do_lock);
-
- /* find neighbours that we might glom on to */
- needle.oe_start = (offset > 0) ? offset - 1 : offset;
- needle.oe_end = (offset < ~0) ? offset + 1 : offset;
- oe = ll_find_oe(&lldo->do_root, &needle);
- if ( oe == NULL ) {
- new_oe->oe_start = offset;
- new_oe->oe_end = offset;
- ll_insert_oe(&lldo->do_root, new_oe);
- lldo_dirty_add(inode, lldo, 1);
- new_oe = NULL;
- GOTO(out, rc = 1);
- }
-
- /* already recorded */
- if ( offset >= oe->oe_start && offset <= oe->oe_end )
- GOTO(out, rc = 2);
-
- /* ok, need to check for adjacent neighbours */
- needle.oe_start = offset;
- needle.oe_end = offset;
- if (ll_find_oe(&lldo->do_root, &needle))
- GOTO(out, rc = 3);
-
- /* ok, its safe to extend the oe we found */
- if ( offset == oe->oe_start - 1 )
- oe->oe_start--;
- else if ( offset == oe->oe_end + 1 )
- oe->oe_end++;
- else
- LBUG();
- lldo_dirty_add(inode, lldo, 1);
-
-out:
- CDEBUG(D_INODE, "%lu now dirty\n", lldo->do_num_dirty);
- spin_unlock(&lldo->do_lock);
- if ( new_oe )
- OBD_FREE(new_oe, sizeof(*new_oe));
- EXIT;
- return;
-}
-void ll_remove_dirty(struct inode *inode, unsigned long start,
- unsigned long end)
-{
- struct ll_dirty_offsets *lldo = &ll_i2info(inode)->lli_dirty;
- struct offset_extent needle, *oe, *new_oe;
- ENTRY;
-
- /* will allocate more intelligently later */
- OBD_ALLOC(new_oe, sizeof(*new_oe));
- LASSERT(new_oe); /* will have to do for now :/ */
-
- needle.oe_start = start;
- needle.oe_end = end;
-
- spin_lock(&lldo->do_lock);
- for ( ; (oe = ll_find_oe(&lldo->do_root, &needle)) ; ) {
-
- /* see if we're punching a hole and need to create a node */
- if (oe->oe_start < start && oe->oe_end > end) {
- new_oe->oe_start = end + 1;
- new_oe->oe_end = oe->oe_end;
- oe->oe_end = start - 1;
- ll_insert_oe(&lldo->do_root, new_oe);
- new_oe = NULL;
- lldo_dirty_add(inode, lldo, -(end - start + 1));
- break;
- }
-
- /* overlapping edges */
- if (oe->oe_start < start && oe->oe_end <= end) {
- lldo_dirty_add(inode, lldo, -(oe->oe_end - start + 1));
- oe->oe_end = start - 1;
- oe = NULL;
- continue;
- }
- if (oe->oe_end > end && oe->oe_start >= start) {
- lldo_dirty_add(inode, lldo, -(end - oe->oe_start + 1));
- oe->oe_start = end + 1;
- oe = NULL;
- continue;
- }
-
- /* an extent entirely within the one we're clearing */
- rb_erase(&oe->oe_node, &lldo->do_root);
- lldo_dirty_add(inode, lldo, -(oe->oe_end - oe->oe_start + 1));
- spin_unlock(&lldo->do_lock);
- OBD_FREE(oe, sizeof(*oe));
- spin_lock(&lldo->do_lock);
- }
- CDEBUG(D_INODE, "%lu now dirty\n", lldo->do_num_dirty);
- spin_unlock(&lldo->do_lock);
- if (new_oe)
- OBD_FREE(new_oe, sizeof(*new_oe));
- EXIT;
-}
-
-int ll_find_dirty(struct ll_dirty_offsets *lldo, unsigned long *start,
- unsigned long *end)
-{
- struct offset_extent needle, *oe;
- int rc = -ENOENT;
- ENTRY;
-
- needle.oe_start = *start;
- needle.oe_end = *end;
-
- spin_lock(&lldo->do_lock);
- oe = ll_find_oe(&lldo->do_root, &needle);
- if (oe) {
- *start = oe->oe_start;
- *end = oe->oe_end;
- rc = 0;
- }
- spin_unlock(&lldo->do_lock);
-
- RETURN(rc);
-}
-
-int ll_farthest_dirty(struct ll_dirty_offsets *lldo, unsigned long *farthest)
-{
- struct rb_node_s *last, *node;
- struct offset_extent *oe;
- int rc = -1;
- ENTRY;
-
- spin_lock(&lldo->do_lock);
- for (node = lldo->do_root.rb_node, last = NULL;
- node;
- last = node, node = node->rb_right)
- ;
-
- if (last) {
- oe = rb_entry(last, struct offset_extent, oe_node);
- *farthest = oe->oe_end;
- rc = 0;
- }
- spin_unlock(&lldo->do_lock);
+restore_flags:
+ current->flags = old_flags;
RETURN(rc);
}
-
-void ll_lldo_init(struct ll_dirty_offsets *lldo)
-{
- spin_lock_init(&lldo->do_lock);
- lldo->do_num_dirty = 0;
- lldo->do_root.rb_node = NULL;
-}
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2003 Cluster File Systems, Inc.
+ *
+ * This code is issued under the GNU General Public License.
+ * See the file COPYING in this distribution
+ */
+
+#ifndef LLITE_INTERNAL_H
+#define LLITE_INTERNAL_H
+
+struct lustre_handle;
+struct lov_stripe_md;
+
int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
int flags, void *opaque);
+int ll_rd_dirty_pages(char *page, char **start, off_t off, int count,
+ int *eof, void *data);
+int ll_rd_max_dirty_pages(char *page, char **start, off_t off, int count,
+ int *eof, void *data);
+int ll_wr_max_dirty_pages(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+int ll_clear_dirty_pages(struct lustre_handle *conn, struct lov_stripe_md *lsm,
+ unsigned long start, unsigned long end);
+int ll_mark_dirty_page(struct lustre_handle *conn, struct lov_stripe_md *lsm,
+ unsigned long index);
+
+#endif /* LLITE_INTERNAL_H */
#include <linux/lustre_lite.h>
#include <linux/lprocfs_status.h>
+#include "llite_internal.h"
+
/* /proc/lustre/llite mount point registration */
#ifndef LPROCFS
{ "filestotal", rd_filestotal, 0, 0 },
{ "filesfree", rd_filesfree, 0, 0 },
{ "filegroups", rd_filegroups, 0, 0 },
+ { "dirty_pages", ll_rd_dirty_pages, 0, 0},
+ { "max_dirty_pages", ll_rd_max_dirty_pages, ll_wr_max_dirty_pages, 0},
{ 0 }
};
const char *opname;
} llite_opcode_table[LPROC_LL_FILE_OPCODES] = {
/* file operation */
- { LPROC_LL_DIRTY_PAGES, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
- "dirty_pages" },
{ LPROC_LL_DIRTY_HITS, LPROCFS_TYPE_REGS, "dirty_pages_hits" },
{ LPROC_LL_DIRTY_MISSES, LPROCFS_TYPE_REGS, "dirty_pages_misses" },
{ LPROC_LL_WB_WRITEPAGE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES,
struct ll_sb_info *sbi = ll_s2sbi(sb);
struct obd_device *obd;
char name[MAX_STRING_SIZE + 1];
- struct obd_uuid uuid;
int err, id;
struct lprocfs_stats *svc_stats = NULL;
ENTRY;
goto out;
/* MDC info */
- strncpy(uuid.uuid, mdc, sizeof(uuid.uuid));
- obd = class_uuid2obd(&uuid);
+ obd = class_name2obd(mdc);
LASSERT(obd != NULL);
LASSERT(obd->obd_type != NULL);
goto out;
/* OSC */
- strncpy(uuid.uuid, osc, sizeof(uuid.uuid));
- obd = class_uuid2obd(&uuid);
+ obd = class_name2obd(osc);
LASSERT(obd != NULL);
LASSERT(obd->obd_type != NULL);
LASSERT (request != NULL);
if (intent_finish != NULL) {
+ struct lustre_handle old_lock;
+ struct ldlm_lock *lock;
+
rc = intent_finish(flag, request, parent, de, it, offset, ino);
dentry = *de; /* intent_finish may change *de */
inode = dentry->d_inode;
if (rc != 0)
GOTO(drop_lock, rc);
+
+ /* The intent processing may well have given us a lock different
+ * from the one we requested. If we already have a matching
+ * lock, then cancel the new one. (We have to do this here,
+ * instead of in mdc_enqueue, because we need to use the child's
+ * inode as the l_data to match, and that's not available until
+ * intent_finish has performed the iget().) */
+ lock = ldlm_handle2lock(&lockh);
+ if (lock) {
+ LDLM_DEBUG(lock, "matching against this");
+ LDLM_LOCK_PUT(lock);
+ memcpy(&old_lock, &lockh, sizeof(lockh));
+ if (ldlm_lock_match(NULL,
+ LDLM_FL_BLOCK_GRANTED |
+ LDLM_FL_MATCH_DATA,
+ NULL, LDLM_PLAIN, NULL, 0, LCK_NL,
+ inode, &old_lock)) {
+ ldlm_lock_decref_and_cancel(&lockh,
+ it->it_lock_mode);
+ memcpy(&lockh, &old_lock, sizeof(old_lock));
+ memcpy(it->it_lock_handle, &lockh,
+ sizeof(lockh));
+ }
+ }
+
}
ptlrpc_req_finished(request);
RETURN(rc);
}
+static int ll_mknod(struct inode *dir, struct dentry *dentry, int mode,
+ int rdev)
+{
+ LBUG();
+ return -ENOSYS;
+}
+
static int ll_mknod2(struct inode *dir, const char *name, int len, int mode,
int rdev)
{
RETURN(err);
}
-static int ll_mknod(struct inode *dir, struct dentry *dentry, int mode,
- int rdev)
+static int ll_symlink(struct inode *dir, struct dentry *dentry,
+ const char *symname)
{
- struct lookup_intent *it;
- struct inode *inode;
- int rc = 0;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
- dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
- LL_IT2STR(dentry->d_it));
-
- LL_GET_INTENT(dentry, it);
-
- if ((mode & S_IFMT) == 0)
- mode |= S_IFREG;
- inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
- NULL, 0, mode, rdev, it);
-
- if (IS_ERR(inode))
- RETURN(PTR_ERR(inode));
-
- /* no directory data updates when intents rule */
- if (it && it->it_disposition)
- d_instantiate(dentry, inode);
- else
- rc = ext2_add_nondir(dentry, inode);
-
- return rc;
+ LBUG();
+ return -ENOSYS;
}
static int ll_symlink2(struct inode *dir, const char *name, int len,
RETURN(err);
}
-static int ll_symlink(struct inode *dir, struct dentry *dentry,
- const char *symname)
+static int ll_link(struct dentry *old_dentry, struct inode * dir,
+ struct dentry *dentry)
{
- struct lookup_intent *it;
- unsigned l = strlen(symname) + 1;
- struct inode *inode;
- struct ll_inode_info *lli;
- int err = 0;
- ENTRY;
-
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
- dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
- LL_IT2STR(dentry->d_it));
-
- LL_GET_INTENT(dentry, it);
-
- inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
- symname, l, S_IFLNK | S_IRWXUGO, 0, it);
- if (IS_ERR(inode))
- RETURN(PTR_ERR(inode));
-
- lli = ll_i2info(inode);
-
- OBD_ALLOC(lli->lli_symlink_name, l);
- /* this _could_ be a non-fatal error, since the symlink is already
- * stored on the MDS by this point, and we can re-get it in readlink.
- */
- if (!lli->lli_symlink_name)
- RETURN(-ENOMEM);
-
- memcpy(lli->lli_symlink_name, symname, l);
- inode->i_size = l - 1;
-
- /* no directory data updates when intents rule */
- if (it && it->it_disposition)
- d_instantiate(dentry, inode);
- else
- err = ext2_add_nondir(dentry, inode);
-
- RETURN(err);
+ LBUG();
+ return -ENOSYS;
}
static int ll_link2(struct inode *src, struct inode *dir,
RETURN(err);
}
-static int ll_link(struct dentry *old_dentry, struct inode * dir,
- struct dentry *dentry)
+static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
- struct lookup_intent *it;
- struct inode *inode = old_dentry->d_inode;
- int rc;
- CDEBUG(D_VFSTRACE,
- "VFS Op:inode=%lu/%u(%p),dir=%lu/%u(%p),target=%s,intent=%s\n",
- inode->i_ino, inode->i_generation, inode, dir->i_ino,
- dir->i_generation, dir, dentry->d_name.name,
- LL_IT2STR(dentry->d_it));
-
- LL_GET_INTENT(dentry, it);
-
- if (it && it->it_disposition) {
- if (it->it_status)
- RETURN(it->it_status);
- LTIME_S(inode->i_ctime) = LTIME_S(CURRENT_TIME);
- ext2_inc_count(inode);
- atomic_inc(&inode->i_count);
- d_instantiate(dentry, inode);
- ll_invalidate_inode_pages(dir);
- RETURN(0);
- }
-
- if (S_ISDIR(inode->i_mode))
- return -EPERM;
-
- if (inode->i_nlink >= EXT2_LINK_MAX)
- return -EMLINK;
-
- rc = ll_link2(old_dentry->d_inode, dir,
- dentry->d_name.name, dentry->d_name.len);
- if (rc)
- RETURN(rc);
-
- LTIME_S(inode->i_ctime) = LTIME_S(CURRENT_TIME);
- ext2_inc_count(inode);
- atomic_inc(&inode->i_count);
-
- return ext2_add_nondir(dentry, inode);
+ LBUG();
+ return -ENOSYS;
}
static int ll_mkdir2(struct inode *dir, const char *name, int len, int mode)
RETURN(err);
}
-
-static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
- struct lookup_intent *it;
- struct inode * inode;
- int err = -EMLINK;
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
- dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
- LL_IT2STR(dentry->d_it));
-
- LL_GET_INTENT(dentry, it);
-
- if (dir->i_nlink >= EXT2_LINK_MAX)
- goto out;
-
- ext2_inc_count(dir);
- inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
- NULL, 0, S_IFDIR | mode, 0, it);
- err = PTR_ERR(inode);
- if (IS_ERR(inode))
- goto out_dir;
-
- err = ext2_make_empty(inode, dir);
- if (err)
- goto out_fail;
-
- /* no directory data updates when intents rule */
- if (!it || !it->it_disposition) {
- /* XXX FIXME This code needs re-checked for non-intents */
- ext2_inc_count(inode);
- err = ll_add_link(dentry, inode);
- if (err)
- goto out_fail;
- }
-
- d_instantiate(dentry, inode);
-out:
- EXIT;
- return err;
-
-out_fail:
- ext2_dec_count(inode);
- ext2_dec_count(inode);
- iput(inode);
- EXIT;
-out_dir:
- ext2_dec_count(dir);
- EXIT;
- goto out;
-}
-
static int ll_rmdir2(struct inode *dir, const char *name, int len)
{
int rc;
RETURN(rc);
}
-static int ll_common_unlink(struct inode *dir, struct dentry *dentry,
- struct lookup_intent *it, __u32 mode)
-{
- struct inode *inode = dentry->d_inode;
- struct ext2_dir_entry_2 * de;
- struct page * page;
- int rc = 0;
- ENTRY;
-
- if (it && it->it_disposition) {
- rc = it->it_status;
- ll_invalidate_inode_pages(dir);
- if (rc)
- GOTO(out, rc);
- GOTO(out_dec, 0);
- }
-
- de = ext2_find_entry(dir, dentry, &page);
- if (!de)
- GOTO(out, rc = -ENOENT);
- rc = ll_mdc_unlink(dir, dentry->d_inode, mode,
- dentry->d_name.name, dentry->d_name.len);
- if (rc)
- GOTO(out, rc);
-
- rc = ext2_delete_entry(de, page);
- if (rc)
- GOTO(out, rc);
-
- /* AED: not sure if needed - directory lock revocation should do it
- * in the case where the client has cached it for non-intent ops.
- */
- ll_invalidate_inode_pages(dir);
-
- inode->i_ctime = dir->i_ctime;
- EXIT;
-out_dec:
- ext2_dec_count(inode);
-out:
- return rc;
-}
-
static int ll_unlink(struct inode *dir, struct dentry *dentry)
{
- struct lookup_intent * it;
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
- dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
- LL_IT2STR(dentry->d_it));
-
- LL_GET_INTENT(dentry, it);
-
- RETURN(ll_common_unlink(dir, dentry, it, S_IFREG));
+ LBUG();
+ return -ENOSYS;
}
static int ll_rmdir(struct inode *dir, struct dentry *dentry)
{
- struct inode * inode = dentry->d_inode;
- struct lookup_intent *it;
- int rc;
- ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
- dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
- LL_IT2STR(dentry->d_it));
-
- LL_GET_INTENT(dentry, it);
-
- if ((!it || !it->it_disposition) && !ext2_empty_dir(inode))
- RETURN(-ENOTEMPTY);
-
- rc = ll_common_unlink(dir, dentry, it, S_IFDIR);
- if (!rc) {
- inode->i_size = 0;
- ext2_dec_count(inode);
- ext2_dec_count(dir);
- }
+ LBUG();
+ return -ENOSYS;
+}
- RETURN(rc);
+static int ll_rename(struct inode * old_dir, struct dentry * old_dentry,
+ struct inode * new_dir, struct dentry * new_dentry)
+{
+ LBUG();
+ return -ENOSYS;
}
static int ll_rename2(struct inode *src, struct inode *tgt,
RETURN(err);
}
-
-
-static int ll_rename(struct inode * old_dir, struct dentry * old_dentry,
- struct inode * new_dir, struct dentry * new_dentry)
-{
- struct lookup_intent *it;
- struct inode * old_inode = old_dentry->d_inode;
- struct inode * tgt_inode = new_dentry->d_inode;
- struct page * dir_page = NULL;
- struct ext2_dir_entry_2 * dir_de = NULL;
- struct ext2_dir_entry_2 * old_de;
- struct page * old_page;
- int err;
- CDEBUG(D_VFSTRACE, "VFS Op:oldname=%s,src_dir=%lu/%u(%p),newname=%s,"
- "tgt_dir=%lu/%u(%p),intent=%s\n",
- old_dentry->d_name.name, old_dir->i_ino, old_dir->i_generation,
- old_dir, new_dentry->d_name.name, new_dir->i_ino,
- new_dir->i_generation, new_dir, LL_IT2STR(new_dentry->d_it));
-
- LL_GET_INTENT(new_dentry, it);
-
- if (it && it->it_disposition) {
- if (tgt_inode) {
- tgt_inode->i_ctime = CURRENT_TIME;
- tgt_inode->i_nlink--;
- }
- ll_invalidate_inode_pages(old_dir);
- ll_invalidate_inode_pages(new_dir);
- GOTO(out, err = it->it_status);
- }
-
- err = ll_rename2(old_dir, new_dir,
- old_dentry->d_name.name, old_dentry->d_name.len,
- new_dentry->d_name.name, new_dentry->d_name.len);
- if (err)
- goto out;
-
- old_de = ext2_find_entry (old_dir, old_dentry, &old_page);
- if (!old_de)
- goto out;
-
- if (S_ISDIR(old_inode->i_mode)) {
- err = -EIO;
- dir_de = ext2_dotdot(old_inode, &dir_page);
- if (!dir_de)
- goto out_old;
- }
-
- if (tgt_inode) {
- struct page *new_page;
- struct ext2_dir_entry_2 *new_de;
-
- err = -ENOTEMPTY;
- if (dir_de && !ext2_empty_dir (tgt_inode))
- goto out_dir;
-
- err = -ENOENT;
- new_de = ext2_find_entry (new_dir, new_dentry, &new_page);
- if (!new_de)
- goto out_dir;
- ext2_inc_count(old_inode);
- ext2_set_link(new_dir, new_de, new_page, old_inode);
- tgt_inode->i_ctime = CURRENT_TIME;
- if (dir_de)
- tgt_inode->i_nlink--;
- ext2_dec_count(tgt_inode);
- } else {
- if (dir_de) {
- err = -EMLINK;
- if (new_dir->i_nlink >= EXT2_LINK_MAX)
- goto out_dir;
- }
- ext2_inc_count(old_inode);
- err = ll_add_link(new_dentry, old_inode);
- if (err) {
- ext2_dec_count(old_inode);
- goto out_dir;
- }
- if (dir_de)
- ext2_inc_count(new_dir);
- }
-
- ext2_delete_entry (old_de, old_page);
- ext2_dec_count(old_inode);
-
- if (dir_de) {
- ext2_set_link(old_inode, dir_de, dir_page, new_dir);
- ext2_dec_count(old_dir);
- }
- return 0;
-
-out_dir:
- if (dir_de) {
- kunmap(dir_page);
- page_cache_release(dir_page);
- }
-out_old:
- kunmap(old_page);
- page_cache_release(old_page);
-out:
- return err;
-}
-
extern int ll_inode_revalidate(struct dentry *dentry);
struct inode_operations ll_dir_inode_operations = {
create: ll_create,
lookup2: ll_lookup2,
- link: ll_link,
+ link: ll_link, /* LBUG() */
link2: ll_link2,
- unlink: ll_unlink,
+ unlink: ll_unlink, /* LBUG() */
unlink2: ll_unlink2,
- symlink: ll_symlink,
+ symlink: ll_symlink, /* LBUG() */
symlink2: ll_symlink2,
- mkdir: ll_mkdir,
+ mkdir: ll_mkdir, /* LBUG() */
mkdir2: ll_mkdir2,
- rmdir: ll_rmdir,
+ rmdir: ll_rmdir, /* LBUG() */
rmdir2: ll_rmdir2,
- mknod: ll_mknod,
+ mknod: ll_mknod, /* LBUG() */
mknod2: ll_mknod2,
- rename: ll_rename,
+ rename: ll_rename, /* LBUG() */
rename2: ll_rename2,
setattr: ll_setattr,
setattr_raw: ll_setattr_raw,
#include <linux/version.h>
#include <asm/system.h>
#include <asm/uaccess.h>
-
+#include "llite_internal.h"
#include <linux/fs.h>
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
return;
}
- /* vmtruncate just threw away our dirty pages, make sure
+ /* vmtruncate will just throw away our dirty pages, make sure
* we don't think they're still dirty, being careful to round
* i_size to the first whole page that was tossed */
- ll_remove_dirty(inode,
+ err = ll_clear_dirty_pages(ll_i2obdconn(inode), lsm,
(inode->i_size + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT,
~0);
* yet.
*/
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+static unsigned long ll_local_cache_dirty_pages;
+static unsigned long ll_max_dirty_pages = 20 * 1024 * 1024 / PAGE_SIZE;
+
+static spinlock_t ll_local_cache_page_count_lock = SPIN_LOCK_UNLOCKED;
+
+int ll_rd_dirty_pages(char *page, char **start, off_t off, int count, int *eof,
+ void *data)
+{
+ unsigned long dirty_count;
+ spin_lock(&ll_local_cache_page_count_lock);
+ dirty_count = ll_local_cache_dirty_pages;
+ spin_unlock(&ll_local_cache_page_count_lock);
+ return snprintf(page, count, "%lu\n", dirty_count);
+}
+
+int ll_rd_max_dirty_pages(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ unsigned long max_dirty;
+ spin_lock(&ll_local_cache_page_count_lock);
+ max_dirty = ll_max_dirty_pages;
+ spin_unlock(&ll_local_cache_page_count_lock);
+ return snprintf(page, count, "%lu\n", max_dirty);
+}
+
+int ll_wr_max_dirty_pages(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ unsigned long max_dirty;
+ signed long max_dirty_signed;
+ char kernbuf[20], *end;
+
+ if (count > (sizeof(kernbuf) - 1))
+ return -EINVAL;
+
+ if (copy_from_user(kernbuf, buffer, count))
+ return -EFAULT;
+
+ kernbuf[count] = '\0';
+
+ max_dirty_signed = simple_strtol(kernbuf, &end, 0);
+ if (kernbuf == end)
+ return -EINVAL;
+ max_dirty = (unsigned long)max_dirty_signed;
+
+#if 0
+ if (max_dirty < ll_local_cache_dirty_pages)
+ flush_to_new_max_dirty();
+#endif
+
+ spin_lock(&ll_local_cache_page_count_lock);
+ CDEBUG(D_CACHE, "changing max_dirty from %lu to %lu\n",
+ ll_max_dirty_pages, max_dirty);
+ ll_max_dirty_pages = max_dirty;
+ spin_unlock(&ll_local_cache_page_count_lock);
+ return count;
+}
+
+static int ll_local_cache_full(void)
+{
+ int full = 0;
+ spin_lock(&ll_local_cache_page_count_lock);
+ if (ll_max_dirty_pages &&
+ ll_local_cache_dirty_pages >= ll_max_dirty_pages) {
+ full = 1;
+ }
+ spin_unlock(&ll_local_cache_page_count_lock);
+ /* XXX instrument? */
+ /* XXX trigger async writeback when full, or 75% of full? */
+ return full;
+}
+
+static void ll_local_cache_flushed_pages(unsigned long pgcount)
+{
+ unsigned long dirty_count;
+ spin_lock(&ll_local_cache_page_count_lock);
+ dirty_count = ll_local_cache_dirty_pages;
+ ll_local_cache_dirty_pages -= pgcount;
+ CDEBUG(D_CACHE, "dirty pages: %lu->%lu)\n",
+ dirty_count, ll_local_cache_dirty_pages);
+ spin_unlock(&ll_local_cache_page_count_lock);
+ LASSERT(dirty_count >= pgcount);
+}
+
+static void ll_local_cache_dirtied_pages(unsigned long pgcount)
+{
+ unsigned long dirty_count;
+ spin_lock(&ll_local_cache_page_count_lock);
+ dirty_count = ll_local_cache_dirty_pages;
+ ll_local_cache_dirty_pages += pgcount;
+ CDEBUG(D_CACHE, "dirty pages: %lu->%lu\n",
+ dirty_count, ll_local_cache_dirty_pages);
+ spin_unlock(&ll_local_cache_page_count_lock);
+ /* XXX track maximum cached, report to lprocfs */
+}
+
+int ll_clear_dirty_pages(struct lustre_handle *conn, struct lov_stripe_md *lsm,
+ unsigned long start, unsigned long end)
+{
+ unsigned long cleared;
+ int rc;
+
+ ENTRY;
+ rc = obd_clear_dirty_pages(conn, lsm, start, end, &cleared);
+ if (!rc)
+ ll_local_cache_flushed_pages(cleared);
+ RETURN(rc);
+}
+
+int ll_mark_dirty_page(struct lustre_handle *conn, struct lov_stripe_md *lsm,
+ unsigned long index)
+{
+ int rc;
+
+ ENTRY;
+ if (ll_local_cache_full())
+ RETURN(-EDQUOT);
+
+ rc = obd_mark_page_dirty(conn, lsm, index);
+ if (!rc)
+ ll_local_cache_dirtied_pages(1);
+ RETURN(rc);
+}
+
static int ll_writepage(struct page *page)
{
struct inode *inode = page->mapping->host;
{
struct inode *inode = page->mapping->host;
loff_t size;
+ int rc = 0;
ENTRY;
LASSERT(inode == file->f_dentry->d_inode);
CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n",
inode, page, from, to, page->index);
- /* to match full page case in prepare_write */
- SetPageUptodate(page);
- /* mark the page dirty, put it on mapping->dirty,
- * mark the inode PAGES_DIRTY, put it on sb->dirty */
- if (!PageDirty(page))
+ if (!PageDirty(page)) {
lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
LPROC_LL_DIRTY_MISSES);
- else
+ rc = ll_mark_dirty_page(ll_i2obdconn(inode),
+ ll_i2info(inode)->lli_smd,
+ page->index);
+ if (rc < 0 && rc != -EDQUOT)
+ RETURN(rc); /* XXX lproc counter here? */
+ } else {
lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
LPROC_LL_DIRTY_HITS);
+ }
size = (((obd_off)page->index) << PAGE_SHIFT) + to;
if (size > inode->i_size)
inode->i_size = size;
- /* XXX temporary, bug 1286 */
- {
- struct ll_dirty_offsets *lldo = &ll_i2info(inode)->lli_dirty;
- int rc;
- if ((lldo->do_num_dirty * PAGE_CACHE_SIZE) > 10 * 1024 * 1024) {
- rc = ll_batch_writepage(inode, page);
- lock_page(page); /* caller expects to unlock */
- RETURN(rc);
- }
- }
-
+ SetPageUptodate(page);
set_page_dirty(page);
- ll_record_dirty(inode, page->index);
+
+ /* This means that we've hit either the local cache limit or the limit
+ * of the OST's grant. */
+ if (rc == -EDQUOT) {
+ int rc = ll_batch_writepage(inode, page);
+ lock_page(page); /* caller expects to unlock */
+ RETURN(rc);
+ }
RETURN(0);
} /* ll_commit_write */
struct inode *root = 0;
struct obd_device *obd;
struct ll_sb_info *sbi;
+ struct obd_export *mdc_export;
char *osc = NULL;
char *mdc = NULL;
int err;
struct ptlrpc_connection *mdc_conn;
struct ll_read_inode2_cookie lic;
class_uuid_t uuid;
- struct obd_uuid param_uuid;
ENTRY;
GOTO(out_free, sb = NULL);
}
- strncpy(param_uuid.uuid, mdc, sizeof(param_uuid.uuid));
- obd = class_uuid2obd(¶m_uuid);
+ obd = class_name2obd(mdc);
if (!obd) {
CERROR("MDC %s: not setup or attached\n", mdc);
GOTO(out_free, sb = NULL);
mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection;
- strncpy(param_uuid.uuid, osc, sizeof(param_uuid.uuid));
- obd = class_uuid2obd(¶m_uuid);
+ obd = class_name2obd(osc);
if (!obd) {
CERROR("OSC %s: not setup or attached\n", osc);
GOTO(out_mdc, sb = NULL);
sbi->ll_rootino = rootfid.id;
memset(&osfs, 0, sizeof(osfs));
- err = obd_statfs(&sbi->ll_mdc_conn, &osfs);
+ mdc_export = class_conn2export(&sbi->ll_mdc_conn);
+ if (mdc_export == NULL) {
+ CERROR("null mdc_export\n");
+ GOTO(out_osc, sb = NULL);
+ }
+ err = obd_statfs(mdc_export, &osfs);
+ class_export_put(mdc_export);
sb->s_blocksize = osfs.os_bsize;
sb->s_blocksize_bits = log2(osfs.os_bsize);
sb->s_magic = LL_SUPER_MAGIC;
static int ll_statfs(struct super_block *sb, struct statfs *sfs)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct obd_export *mdc_exp = class_conn2export(&sbi->ll_mdc_conn);
+ struct obd_export *osc_exp;
struct obd_statfs osfs;
int rc;
ENTRY;
+ if (mdc_exp == NULL)
+ RETURN(-EINVAL);
+
CDEBUG(D_VFSTRACE, "VFS Op:\n");
lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_STAFS);
memset(sfs, 0, sizeof(*sfs));
- rc = obd_statfs(&sbi->ll_mdc_conn, &osfs);
+ rc = obd_statfs(mdc_exp, &osfs);
statfs_unpack(sfs, &osfs);
if (rc)
CERROR("mdc_statfs fails: rc = %d\n", rc);
/* temporary until mds_statfs returns statfs info for all OSTs */
if (!rc) {
- rc = obd_statfs(&sbi->ll_osc_conn, &osfs);
+ osc_exp = class_conn2export(&sbi->ll_osc_conn);
+ if (osc_exp == NULL)
+ GOTO(out, rc = -EINVAL);
+ rc = obd_statfs(osc_exp, &osfs);
+ class_export_put(osc_exp);
if (rc) {
CERROR("obd_statfs fails: rc = %d\n", rc);
GOTO(out, rc);
}
out:
+ class_export_put(mdc_exp);
RETURN(rc);
}
sema_init(&lli->lli_open_sem, 1);
spin_lock_init(&lli->lli_read_extent_lock);
INIT_LIST_HEAD(&lli->lli_read_extents);
- ll_lldo_init(&lli->lli_dirty);
lli->lli_flags = 0;
/* We default to 2T-4k until the LSM is created/read, at which point
* it'll be updated. */
struct ptlrpc_connection *mdc_conn;
struct ll_read_inode2_cookie lic;
class_uuid_t uuid;
- struct obd_uuid param_uuid;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op:\n");
GOTO(out_free, sb = NULL);
}
- strncpy(param_uuid.uuid, mdc, sizeof(param_uuid.uuid));
- obd = class_uuid2obd(¶m_uuid);
+ obd = class_name2obd(mdc);
if (!obd) {
CERROR("MDC %s: not setup or attached\n", mdc);
GOTO(out_free, sb = NULL);
}
mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection;
- strncpy(param_uuid.uuid, osc, sizeof(param_uuid.uuid));
- obd = class_uuid2obd(¶m_uuid);
+ obd = class_name2obd(osc);
if (!obd) {
CERROR("OSC %s: not setup or attached\n", osc);
GOTO(out_mdc, sb = NULL);
#include <linux/init.h>
#include <linux/random.h>
#include <linux/slab.h>
+#include <linux/pagemap.h>
#include <asm/div64.h>
#else
#include <liblustre.h>
{
struct obd_ioctl_data *data = buf;
struct lov_obd *lov = &obd->u.lov;
- struct obd_uuid uuid;
int rc = 0;
ENTRY;
if (data->ioc_inllen1 < 1) {
- CERROR("LOV setup requires an MDC UUID\n");
- RETURN(-EINVAL);
- }
-
- if (data->ioc_inllen1 > 37) {
- CERROR("mdc UUID must be 36 characters or less\n");
+ CERROR("LOV setup requires an MDC name\n");
RETURN(-EINVAL);
}
spin_lock_init(&lov->lov_lock);
- obd_str2uuid(&uuid, data->ioc_inlbuf1);
- lov->mdcobd = class_uuid2obd(&uuid);
+ lov->mdcobd = class_name2obd(data->ioc_inlbuf1);
if (!lov->mdcobd) {
CERROR("LOV %s cannot locate MDC %s\n", obd->obd_uuid.uuid,
data->ioc_inlbuf1);
CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64" at idx %d\n",
lsm->lsm_object_id, loi->loi_id, ost_idx);
- if (!set)
+ if (set == 0)
lsm->lsm_stripe_offset = ost_idx;
lov_merge_attrs(oa, tmp, OBD_MD_FLBLKSZ, lsm, obj_alloc, &set);
+ ot_init(&loi->loi_dirty_ot_inline);
+ loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
++obj_alloc;
++loi;
goto out_tmp;
}
+#define lsm_bad_magic(LSMP) \
+({ \
+ struct lov_stripe_md *_lsm__ = (LSMP); \
+ int _ret__ = 0; \
+ if (!_lsm__) { \
+ CERROR("LOV requires striping ea\n"); \
+ _ret__ = 1; \
+ } else if (_lsm__->lsm_magic != LOV_MAGIC) { \
+ CERROR("LOV striping magic bad %#x != %#x\n", \
+ _lsm__->lsm_magic, LOV_MAGIC); \
+ _ret__ = 1; \
+ } \
+ _ret__; \
+})
+
static int lov_destroy(struct lustre_handle *conn, struct obdo *oa,
struct lov_stripe_md *lsm, struct obd_trans_info *oti)
{
int rc = 0, i;
ENTRY;
- if (!lsm) {
- CERROR("LOV requires striping ea for destruction\n");
- GOTO(out, rc = -EINVAL);
- }
-
- if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#x != %#x\n",
- lsm->lsm_magic, LOV_MAGIC);
+ if (lsm_bad_magic(lsm))
GOTO(out, rc = -EINVAL);
- }
if (!export || !export->exp_obd)
GOTO(out, rc = -ENODEV);
int i, rc = 0, set = 0;
ENTRY;
- if (!lsm) {
- CERROR("LOV requires striping ea\n");
+ if (lsm_bad_magic(lsm))
GOTO(out, rc = -EINVAL);
- }
-
- if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#x != %#x\n",
- lsm->lsm_magic, LOV_MAGIC);
- GOTO(out, rc = -EINVAL);
- }
if (!export || !export->exp_obd)
GOTO(out, rc = -ENODEV);
int rc = 0, i, set = 0;
ENTRY;
- if (!lsm) {
- CERROR("LOV requires striping ea\n");
+ if (lsm_bad_magic(lsm))
GOTO(out, rc = -EINVAL);
- }
-
- if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#x != %#x\n",
- lsm->lsm_magic, LOV_MAGIC);
- GOTO(out, rc = -EINVAL);
- }
if (!export || !export->exp_obd)
GOTO(out, rc = -ENODEV);
ENTRY;
LASSERT(och != NULL);
- if (!lsm) {
- CERROR("LOV requires striping ea for opening\n");
- GOTO(out_exp, rc = -EINVAL);
- }
-
- if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#x != %#x\n",
- lsm->lsm_magic, LOV_MAGIC);
+ if (lsm_bad_magic(lsm))
GOTO(out_exp, rc = -EINVAL);
- }
if (!export || !export->exp_obd)
GOTO(out_exp, rc = -ENODEV);
int rc = 0, i;
ENTRY;
- if (!lsm) {
- CERROR("LOV requires striping ea\n");
- GOTO(out, rc = -EINVAL);
- }
-
- if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#x != %#x\n",
- lsm->lsm_magic, LOV_MAGIC);
+ if (lsm_bad_magic(lsm))
GOTO(out, rc = -EINVAL);
- }
if (!export || !export->exp_obd)
GOTO(out, rc = -ENODEV);
int rc = 0, i;
ENTRY;
- if (!lsm) {
- CERROR("LOV requires striping ea\n");
- GOTO(out, rc = -EINVAL);
- }
-
- if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#x != %#x\n",
- lsm->lsm_magic, LOV_MAGIC);
+ if (lsm_bad_magic(lsm))
GOTO(out, rc = -EINVAL);
- }
if (!export || !export->exp_obd)
GOTO(out, rc = -ENODEV);
int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count;
ENTRY;
- if (!lsm) {
- CERROR("LOV requires striping ea\n");
+ if (lsm_bad_magic(lsm))
GOTO(out_exp, rc = -EINVAL);
- }
-
- if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#x != %#x\n",
- lsm->lsm_magic, LOV_MAGIC);
- GOTO(out_exp, rc = -EINVAL);
- }
lov = &export->exp_obd->u.lov;
int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count;
ENTRY;
- if (!lsm) {
- CERROR("LOV requires striping ea\n");
+ if (lsm_bad_magic(lsm))
GOTO(out_exp, rc = -EINVAL);
- }
-
- if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#x != %#x\n",
- lsm->lsm_magic, LOV_MAGIC);
- GOTO(out_exp, rc = -EINVAL);
- }
lov = &export->exp_obd->u.lov;
int i;
ENTRY;
- if (!lsm) {
- CERROR("LOV requires striping ea\n");
- GOTO(out_exp, rc = -EINVAL);
- }
-
- if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#x != %#x\n",
- lsm->lsm_magic, LOV_MAGIC);
+ if (lsm_bad_magic(lsm))
GOTO(out_exp, rc = -EINVAL);
- }
/* we should never be asked to replay a lock this way. */
LASSERT((*flags & LDLM_FL_REPLAY) == 0);
int i;
ENTRY;
- if (!lsm) {
- CERROR("LOV requires striping ea\n");
- GOTO(out_exp, rc = -EINVAL);
- }
-
- if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#x != %#x\n",
- lsm->lsm_magic, LOV_MAGIC);
+ if (lsm_bad_magic(lsm))
GOTO(out_exp, rc = -EINVAL);
- }
if (!export || !export->exp_obd)
GOTO(out_exp, rc = -ENODEV);
int rc = 0, i;
ENTRY;
- if (!lsm) {
- CERROR("LOV requires striping ea\n");
- GOTO(out, rc = -EINVAL);
- }
-
- if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("LOV striping magic bad %#x != %#x\n",
- lsm->lsm_magic, LOV_MAGIC);
+ if (lsm_bad_magic(lsm))
GOTO(out, rc = -EINVAL);
- }
if (!export || !export->exp_obd)
GOTO(out, rc = -ENODEV);
int rc = 0, i;
ENTRY;
- if (!lsm) {
- CERROR("LOV requires striping ea for lock cancellation\n");
+ if (lsm_bad_magic(lsm))
GOTO(out, rc = -EINVAL);
- }
if (!export || !export->exp_obd)
GOTO(out, rc = -ENODEV);
(tot) += (add); \
} while(0)
-static int lov_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
+static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs)
{
- struct obd_export *export = class_conn2export(conn);
+ struct obd_export *tgt_export;
struct lov_obd *lov;
struct obd_statfs lov_sfs;
int set = 0;
ENTRY;
if (!export || !export->exp_obd)
- GOTO(out, rc = -ENODEV);
+ RETURN(-ENODEV);
lov = &export->exp_obd->u.lov;
continue;
}
- err = obd_statfs(&lov->tgts[i].conn, &lov_sfs);
+ tgt_export = class_conn2export(&lov->tgts[i].conn);
+ if (!tgt_export) {
+ CDEBUG(D_HA, "lov idx %d NULL export\n", i);
+ continue;
+ }
+
+ err = obd_statfs(tgt_export, &lov_sfs);
+ class_export_put(tgt_export);
if (err) {
if (lov->tgts[i].active) {
CERROR("error: statfs OSC %s on OST idx %d: "
do_div(osfs->os_ffree, expected_stripes);
} else if (!rc)
rc = -EIO;
- GOTO(out, rc);
- out:
- class_export_put(export);
- return rc;
+ RETURN(rc);
}
static int lov_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len,
RETURN(-EINVAL);
}
+static int lov_mark_page_dirty(struct lustre_handle *conn,
+ struct lov_stripe_md *lsm, unsigned long offset)
+{
+ struct lov_obd *lov = &class_conn2obd(conn)->u.lov;
+ struct lov_oinfo *loi;
+ struct lov_stripe_md *submd;
+ int stripe, rc;
+ obd_off off;
+ ENTRY;
+
+ if (lsm_bad_magic(lsm))
+ RETURN(-EINVAL);
+
+ OBD_ALLOC(submd, lov_stripe_md_size(1));
+ if (submd == NULL)
+ RETURN(-ENOMEM);
+
+ stripe = lov_stripe_number(lsm, (obd_off)offset << PAGE_CACHE_SHIFT);
+ lov_stripe_offset(lsm, (obd_off)offset << PAGE_CACHE_SHIFT, stripe,
+ &off);
+ off >>= PAGE_CACHE_SHIFT;
+
+ loi = &lsm->lsm_oinfo[stripe];
+ CDEBUG(D_INODE, "off %lu => off %lu on stripe %d\n", offset,
+ (unsigned long)off, stripe);
+ submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
+
+ rc = obd_mark_page_dirty(&lov->tgts[loi->loi_ost_idx].conn, submd, off);
+ OBD_FREE(submd, lov_stripe_md_size(1));
+ RETURN(rc);
+}
+
+static int lov_clear_dirty_pages(struct lustre_handle *conn,
+ struct lov_stripe_md *lsm, unsigned long start,
+ unsigned long end, unsigned long *cleared)
+
+{
+ struct obd_export *export = class_conn2export(conn);
+ __u64 start_off = (__u64)start << PAGE_CACHE_SHIFT;
+ __u64 end_off = (__u64)end << PAGE_CACHE_SHIFT;
+ __u64 obd_start, obd_end;
+ struct lov_stripe_md *submd = NULL;
+ struct lov_obd *lov;
+ struct lov_oinfo *loi;
+ int i, rc;
+ unsigned long osc_cleared;
+ ENTRY;
+
+ *cleared = 0;
+
+ if (lsm_bad_magic(lsm))
+ GOTO(out_exp, rc = -EINVAL);
+
+ if (!export || !export->exp_obd)
+ GOTO(out_exp, rc = -ENODEV);
+
+ OBD_ALLOC(submd, lov_stripe_md_size(1));
+ if (submd == NULL)
+ GOTO(out_exp, rc = -ENOMEM);
+
+ lov = &export->exp_obd->u.lov;
+ rc = 0;
+ for (i = 0, loi = lsm->lsm_oinfo;
+ i < lsm->lsm_stripe_count;
+ i++, loi++) {
+ if (lov->tgts[loi->loi_ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ continue;
+ }
+
+ if(!lov_stripe_intersects(lsm, i, start_off, end_off,
+ &obd_start, &obd_end))
+ continue;
+ obd_start >>= PAGE_CACHE_SHIFT;
+ obd_end >>= PAGE_CACHE_SHIFT;
+
+ CDEBUG(D_INODE, "offs [%lu,%lu] => offs [%lu,%lu] stripe %d\n",
+ start, end, (unsigned long)obd_start,
+ (unsigned long)obd_end, loi->loi_ost_idx);
+ submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
+ rc = obd_clear_dirty_pages(&lov->tgts[loi->loi_ost_idx].conn,
+ submd, obd_start, obd_end,
+ &osc_cleared);
+ if (rc)
+ break;
+ *cleared += osc_cleared;
+ }
+out_exp:
+ if (submd)
+ OBD_FREE(submd, lov_stripe_md_size(1));
+ class_export_put(export);
+ RETURN(rc);
+}
+
+static int lov_last_dirty_offset(struct lustre_handle *conn,
+ struct lov_stripe_md *lsm,
+ unsigned long *offset)
+{
+ struct obd_export *export = class_conn2export(conn);
+ struct lov_stripe_md *submd = NULL;
+ struct lov_obd *lov;
+ struct lov_oinfo *loi;
+ unsigned long tmp, count, skip;
+ int err, i, rc;
+ ENTRY;
+
+ if (lsm_bad_magic(lsm))
+ GOTO(out_exp, rc = -EINVAL);
+
+ if (!export || !export->exp_obd)
+ GOTO(out_exp, rc = -ENODEV);
+
+ OBD_ALLOC(submd, lov_stripe_md_size(1));
+ if (submd == NULL)
+ GOTO(out_exp, rc = -ENOMEM);
+
+ *offset = 0;
+ lov = &export->exp_obd->u.lov;
+ rc = -ENOENT;
+ for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
+ i++, loi++) {
+
+ count = lsm->lsm_stripe_size >> PAGE_CACHE_SHIFT;
+ skip = (lsm->lsm_stripe_count - 1) * count;
+
+ submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline;
+
+ err = obd_last_dirty_offset(&lov->tgts[loi->loi_ost_idx].conn,
+ submd, &tmp);
+ if (err == -ENOENT)
+ continue;
+ if (err)
+ GOTO(out_exp, rc = err);
+
+ rc = 0;
+ if (tmp != ~0)
+ tmp += (tmp/count * skip) + (i * count);
+ if (tmp > *offset)
+ *offset = tmp;
+ }
+out_exp:
+ if (submd)
+ OBD_FREE(submd, lov_stripe_md_size(1));
+ class_export_put(export);
+ RETURN(rc);
+}
+
struct obd_ops lov_obd_ops = {
o_owner: THIS_MODULE,
o_attach: lov_attach,
o_cancel: lov_cancel,
o_cancel_unused: lov_cancel_unused,
o_iocontrol: lov_iocontrol,
- o_get_info: lov_get_info
+ o_get_info: lov_get_info,
+ .o_mark_page_dirty = lov_mark_page_dirty,
+ .o_clear_dirty_pages = lov_clear_dirty_pages,
+ .o_last_dirty_offset = lov_last_dirty_offset,
};
int __init lov_init(void)
LMM_ASSERT(loi - lsm->lsm_oinfo < stripe_count);
/* XXX LOV STACKING call down to osc_unpackmd() */
- loi->loi_id = le64_to_cpu (lmm->lmm_objects[ost_offset].l_object_id);
+ loi->loi_id =
+ le64_to_cpu (lmm->lmm_objects[ost_offset].l_object_id);
loi->loi_ost_idx = ost_offset;
+ loi->loi_dirty_ot = &loi->loi_dirty_ot_inline;
+ ot_init(loi->loi_dirty_ot);
loi++;
}
LMM_ASSERT(loi - lsm->lsm_oinfo > 0);
void mds_open_pack(struct ptlrpc_request *req, int offset,
struct mdc_op_data *op_data,
__u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time,
- __u32 flags,
- const void *data, int datalen)
+ __u32 flags, const void *data, int datalen)
{
struct mds_rec_create *rec;
char *tmp;
rc = mdc_reint(req, level);
/* Resend if we were told to. */
if (rc == -ERESTARTSYS) {
- level = LUSTRE_CONN_RECOVD;
+ level = LUSTRE_CONN_RECOVER;
goto resend;
}
lit->opc = (__u64)it->it_op;
/* pack the intended request */
- mds_open_pack(req, 2, data, it->it_mode, 0,
- current->fsuid, current->fsgid,
- LTIME_S(CURRENT_TIME), it->it_flags,
- tgt, tgtlen);
+ mds_open_pack(req, 2, data, it->it_mode, 0, current->fsuid,
+ current->fsgid, LTIME_S(CURRENT_TIME),
+ it->it_flags, tgt, tgtlen);
/* get ready for the reply */
reply_buffers = 3;
req->rq_replen = lustre_msg_size(3, repsize);
RETURN(rc);
} else { /* rc = 0 */
struct ldlm_lock *lock = ldlm_handle2lock(lockh);
- struct lustre_handle lockh2;
LASSERT(lock);
/* If the server gave us back a different lock mode, we should
lock_mode = lock->l_req_mode;
}
- /* The server almost certainly gave us a lock other than the
- * one that we asked for. If we already have a matching lock,
- * then cancel this one--we don't need two. */
- LDLM_DEBUG(lock, "matching against this");
-
- memcpy(&lockh2, lockh, sizeof(lockh2));
- if (ldlm_lock_match(NULL,
- LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA,
- NULL, LDLM_PLAIN, NULL, 0, LCK_NL, cb_data,
- &lockh2)) {
- /* We already have a lock; cancel the new one */
- ldlm_lock_decref_and_cancel(lockh, lock_mode);
- memcpy(lockh, &lockh2, sizeof(lockh2));
- }
LDLM_LOCK_PUT(lock);
}
}
}
-static int mdc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
+static int mdc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
{
struct ptlrpc_request *req;
struct obd_statfs *msfs;
int rc, size = sizeof(*msfs);
ENTRY;
- req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_STATFS, 0, NULL,
- NULL);
+ req = ptlrpc_prep_req(exp->exp_obd->u.cli.cl_import, MDS_STATFS, 0,
+ NULL, NULL);
if (!req)
RETURN(-ENOMEM);
RETURN(0);
}
-inline void fixup_handle_for_resent_req(struct ptlrpc_request *req,
+static void fixup_handle_for_resent_req(struct ptlrpc_request *req,
+ struct ldlm_lock *new_lock,
struct lustre_handle *lockh)
{
struct obd_export *exp = req->rq_export;
list_for_each(iter, &exp->exp_ldlm_data.led_held_locks) {
struct ldlm_lock *lock;
lock = list_entry(iter, struct ldlm_lock, l_export_chain);
+ if (lock == new_lock)
+ continue;
if (lock->l_remote_handle.cookie == remote_hdl.cookie) {
lockh->cookie = lock->l_handle.h_cookie;
DEBUG_REQ(D_HA, req, "restoring lock cookie "LPX64,
rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
rep->lock_policy_res1 = IT_INTENT_EXEC;
- fixup_handle_for_resent_req(req, &lockh);
+ fixup_handle_for_resent_req(req, lock, &lockh);
/* execute policy */
switch ((long)it->opc) {
struct ldlm_reply *rep;
struct mds_body *body;
int disp, rc;
+ struct list_head *t;
+ int put_child = 1;
ENTRY;
LASSERT(offset == 2); /* only called via intent */
/* If we didn't get as far as trying to open, then some locking thing
* probably went wrong, and we'll just bail here.
*/
- if ((disp & IT_OPEN_OPEN) == 0) {
+ if ((disp & IT_OPEN_OPEN) == 0)
GOTO(out_dput, 0);
- }
/* If we failed, then we must have failed opening, so don't look for
* file descriptor or anything, just give the client the bad news.
*/
- if (req->rq_status) {
+ if (req->rq_status)
GOTO(out_dput, 0);
+
+ mfd = NULL;
+ list_for_each(t, &med->med_open_head) {
+ mfd = list_entry(t, struct mds_file_data, mfd_list);
+ if (mfd->mfd_xid == req->rq_xid)
+ break;
+ mfd = NULL;
}
if (req->rq_export->exp_outstanding_reply) {
- struct list_head *t;
- mfd = NULL;
- /* XXX can we just look in the old reply to find the handle in
- * XXX O(1) here? */
- list_for_each(t, &med->med_open_head) {
- mfd = list_entry(t, struct mds_file_data, mfd_list);
- if (mfd->mfd_xid == req->rq_xid)
- break;
- mfd = NULL;
- }
/* if we're not recovering, it had better be found */
LASSERT(mfd);
- } else {
+ } else if (mfd == NULL) {
mntget(mds->mds_vfsmnt);
+ CERROR("Re-opened file \n");
mfd = mds_dentry_open(child, mds->mds_vfsmnt,
rec->ur_flags & ~(O_DIRECT | O_TRUNC), req);
if (!mfd) {
CERROR("mds: out of memory\n");
GOTO(out_dput, req->rq_status = -ENOMEM);
}
+ put_child = 0;
}
body->handle.cookie = mfd->mfd_handle.h_cookie;
out_dput:
- l_dput(child);
+ if (put_child)
+ l_dput(child);
l_dput(parent);
EXIT;
}
if (S_ISLNK(dchild->d_inode->i_mode))
GOTO(cleanup, rc = 0);
+ if ((rec->ur_flags & O_DIRECTORY) && !S_ISDIR(dchild->d_inode->i_mode))
+ GOTO(cleanup, rc = -ENOTDIR);
+
/* Step 5: mds_open it */
rep->lock_policy_res1 |= IT_OPEN_OPEN;
obdclass_SOURCES = class_obd.c debug.c genops.c sysctl.c uuid.c simple.c
obdclass_SOURCES += lprocfs_status.c lustre_handles.c lustre_peer.c
-obdclass_SOURCES += fsfilt.c statfs_pack.c
+obdclass_SOURCES += fsfilt.c statfs_pack.c otree.c
endif
include $(top_srcdir)/Rules
obd->obd_minor = minor;
obd->obd_type = type;
INIT_LIST_HEAD(&obd->obd_exports);
+ obd->obd_num_exports = 0;
INIT_LIST_HEAD(&obd->obd_imports);
spin_lock_init(&obd->obd_dev_lock);
init_waitqueue_head(&obd->obd_refcount_waitq);
EXPORT_SYMBOL(class_get_type);
EXPORT_SYMBOL(class_put_type);
EXPORT_SYMBOL(class_name2dev);
+EXPORT_SYMBOL(class_name2obd);
EXPORT_SYMBOL(class_uuid2dev);
EXPORT_SYMBOL(class_uuid2obd);
EXPORT_SYMBOL(class_export_get);
return -1;
}
+struct obd_device *class_name2obd(char *name)
+{
+ int dev = class_name2dev(name);
+ if (dev < 0)
+ return NULL;
+ return &obd_dev[dev];
+}
+
int class_uuid2dev(struct obd_uuid *uuid)
{
int i;
struct obd_device *class_uuid2obd(struct obd_uuid *uuid)
{
- int i;
-
- for (i = 0; i < MAX_OBD_DEVICES; i++) {
- struct obd_device *obd = &obd_dev[i];
- if (obd_uuid_equals(uuid, &obd->obd_uuid))
- return obd;
- }
-
- return NULL;
+ int dev = class_uuid2dev(uuid);
+ if (dev < 0)
+ return NULL;
+ return &obd_dev[dev];
}
void obd_cleanup_caches(void)
{
ENTRY;
+ LASSERT(exp);
CDEBUG(D_INFO, "PUTting export %p : new refcount %d\n", exp,
atomic_read(&exp->exp_refcount) - 1);
LASSERT(atomic_read(&exp->exp_refcount) > 0);
LASSERT(!obddev->obd_stopping); /* shouldn't happen, but might race */
atomic_inc(&obddev->obd_refcount);
list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports);
+ export->exp_obd->obd_num_exports++;
spin_unlock(&obddev->obd_dev_lock);
return export;
}
spin_lock(&exp->exp_obd->obd_dev_lock);
list_del_init(&exp->exp_obd_chain);
+ exp->exp_obd->obd_num_exports--;
spin_unlock(&exp->exp_obd->obd_dev_lock);
class_export_put(exp);
/* Abort any inflight DLM requests and NULL out their (about to be
* freed) import. */
+ /* Invalidate all requests on import, would be better to call
+ ptlrpc_set_import_active(imp, 0); */
+ import->imp_generation++;
ptlrpc_abort_inflight_superhack(import);
class_import_put(import);
if (next)
cur_root = (proc ? proc :
proc_mkdir(cur, cur_root));
- else if (!proc)
- proc = create_proc_entry(cur, 0444, cur_root);
+ else if (!proc) {
+ mode_t mode = 0444;
+ if (list->write_fptr)
+ mode = 0644;
+ proc = create_proc_entry(cur, mode, cur_root);
+ }
}
OBD_FREE(pathcopy, pathsize);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc
+ *
+ * our offset trees (otrees) track single-bit state of offsets in an
+ * extent tree.
+ */
+
+#define EXPORT_SYMTAB
+#include <linux/version.h>
+#include <linux/config.h>
+#include <linux/module.h>
+
+#define DEBUG_SUBSYSTEM S_OSC
+#include <linux/kp30.h>
+#include <linux/obd.h>
+#include <linux/lustre_debug.h>
+#include <linux/lustre_otree.h>
+
+struct offset_extent {
+ rb_node_t oe_node;
+ unsigned long oe_start, oe_end;
+};
+
+static struct offset_extent * ot_find_oe(rb_root_t *root,
+ struct offset_extent *needle)
+{
+ struct rb_node_s *node = root->rb_node;
+ struct offset_extent *oe;
+ ENTRY;
+
+ CDEBUG(D_INODE, "searching [%lu -> %lu]\n", needle->oe_start,
+ needle->oe_end);
+
+ while (node) {
+ oe = rb_entry(node, struct offset_extent, oe_node);
+ if (needle->oe_end < oe->oe_start)
+ node = node->rb_left;
+ else if (needle->oe_start > oe->oe_end)
+ node = node->rb_right;
+ else {
+ CDEBUG(D_INODE, "returning [%lu -> %lu]\n",
+ oe->oe_start, oe->oe_end);
+ RETURN(oe);
+ }
+ }
+ RETURN(NULL);
+}
+
+/* do the rbtree mechanics to insert a node, callers are responsible
+ * for making sure that this new node doesn't overlap with existing
+ * nodes */
+static void ot_indert_oe(rb_root_t *root, struct offset_extent *new_oe)
+{
+ rb_node_t ** p = &root->rb_node;
+ rb_node_t * parent = NULL;
+ struct offset_extent *oe;
+ ENTRY;
+
+ LASSERT(new_oe->oe_start <= new_oe->oe_end);
+
+ while (*p) {
+ parent = *p;
+ oe = rb_entry(parent, struct offset_extent, oe_node);
+ if ( new_oe->oe_end < oe->oe_start )
+ p = &(*p)->rb_left;
+ else if ( new_oe->oe_start > oe->oe_end )
+ p = &(*p)->rb_right;
+ else
+ LBUG();
+ }
+ rb_link_node(&new_oe->oe_node, parent, p);
+ rb_insert_color(&new_oe->oe_node, root);
+ EXIT;
+}
+
+int ot_mark_offset(struct otree *ot, unsigned long offset)
+{
+ struct offset_extent needle, *oe, *new_oe;
+ int rc = 0;
+ ENTRY;
+
+ OBD_ALLOC(new_oe, sizeof(*new_oe));
+ if (new_oe == NULL)
+ RETURN(-ENOMEM);
+
+ spin_lock(&ot->ot_lock);
+
+ /* find neighbours that we might glom on to */
+ needle.oe_start = (offset > 0) ? offset - 1 : offset;
+ needle.oe_end = (offset < ~0) ? offset + 1 : offset;
+ oe = ot_find_oe(&ot->ot_root, &needle);
+ if ( oe == NULL ) {
+ new_oe->oe_start = offset;
+ new_oe->oe_end = offset;
+ ot_indert_oe(&ot->ot_root, new_oe);
+ ot->ot_num_marked++;
+ new_oe = NULL;
+ GOTO(out, rc);
+ }
+
+ /* already recorded */
+ if ( offset >= oe->oe_start && offset <= oe->oe_end )
+ GOTO(out, rc);
+
+ /* ok, need to check for adjacent neighbours */
+ needle.oe_start = offset;
+ needle.oe_end = offset;
+ if (ot_find_oe(&ot->ot_root, &needle))
+ GOTO(out, rc);
+
+ /* ok, its safe to extend the oe we found */
+ if ( offset == oe->oe_start - 1 )
+ oe->oe_start--;
+ else if ( offset == oe->oe_end + 1 )
+ oe->oe_end++;
+ else
+ LBUG();
+ ot->ot_num_marked++;
+
+out:
+ CDEBUG(D_INODE, "%lu now dirty\n", ot->ot_num_marked);
+ spin_unlock(&ot->ot_lock);
+ if (new_oe)
+ OBD_FREE(new_oe, sizeof(*new_oe));
+ RETURN(rc);
+}
+
+int ot_clear_extent(struct otree *ot, unsigned long start, unsigned long end)
+{
+ struct offset_extent needle, *oe, *new_oe;
+ int rc = 0;
+ ENTRY;
+
+ /* will allocate more intelligently later */
+ OBD_ALLOC(new_oe, sizeof(*new_oe));
+ if (new_oe == NULL)
+ RETURN(-ENOMEM);
+
+ needle.oe_start = start;
+ needle.oe_end = end;
+
+ spin_lock(&ot->ot_lock);
+ for ( ; (oe = ot_find_oe(&ot->ot_root, &needle)) ; ) {
+ rc = 0;
+
+ /* see if we're punching a hole and need to create a node */
+ if (oe->oe_start < start && oe->oe_end > end) {
+ new_oe->oe_start = end + 1;
+ new_oe->oe_end = oe->oe_end;
+ oe->oe_end = start - 1;
+ ot_indert_oe(&ot->ot_root, new_oe);
+ new_oe = NULL;
+ ot->ot_num_marked -= end - start + 1;
+ break;
+ }
+
+ /* overlapping edges */
+ if (oe->oe_start < start && oe->oe_end <= end) {
+ ot->ot_num_marked -= oe->oe_end - start + 1;
+ oe->oe_end = start - 1;
+ oe = NULL;
+ continue;
+ }
+ if (oe->oe_end > end && oe->oe_start >= start) {
+ ot->ot_num_marked -= end - oe->oe_start + 1;
+ oe->oe_start = end + 1;
+ oe = NULL;
+ continue;
+ }
+
+ /* an extent entirely within the one we're clearing */
+ rb_erase(&oe->oe_node, &ot->ot_root);
+ ot->ot_num_marked -= oe->oe_end - oe->oe_start + 1;
+ spin_unlock(&ot->ot_lock);
+ OBD_FREE(oe, sizeof(*oe));
+ spin_lock(&ot->ot_lock);
+ }
+ CDEBUG(D_INODE, "%lu now dirty\n", ot->ot_num_marked);
+ spin_unlock(&ot->ot_lock);
+ if (new_oe)
+ OBD_FREE(new_oe, sizeof(*new_oe));
+ RETURN(rc);
+}
+
+int ot_find_marked_extent(struct otree *ot, unsigned long *start,
+ unsigned long *end)
+{
+ struct offset_extent needle, *oe;
+ int rc = -ENOENT;
+ ENTRY;
+
+ needle.oe_start = *start;
+ needle.oe_end = *end;
+
+ spin_lock(&ot->ot_lock);
+ oe = ot_find_oe(&ot->ot_root, &needle);
+ if (oe) {
+ *start = oe->oe_start;
+ *end = oe->oe_end;
+ rc = 0;
+ }
+ spin_unlock(&ot->ot_lock);
+
+ RETURN(rc);
+}
+
+int ot_last_marked(struct otree *ot, unsigned long *last)
+{
+ struct rb_node_s *found, *node;
+ struct offset_extent *oe;
+ int rc = -ENOENT;
+ ENTRY;
+
+ spin_lock(&ot->ot_lock);
+ for (node = ot->ot_root.rb_node, found = NULL;
+ node;
+ found = node, node = node->rb_right)
+ ;
+
+ if (found) {
+ oe = rb_entry(found, struct offset_extent, oe_node);
+ *last = oe->oe_end;
+ rc = 0;
+ }
+ spin_unlock(&ot->ot_lock);
+ RETURN(rc);
+}
+
+unsigned long ot_num_marked(struct otree *ot)
+{
+ return ot->ot_num_marked;
+}
+
+void ot_init(struct otree *ot)
+{
+ CDEBUG(D_INODE, "initializing %p\n", ot);
+ spin_lock_init(&ot->ot_lock);
+ ot->ot_num_marked = 0;
+ ot->ot_root.rb_node = NULL;
+}
+
+EXPORT_SYMBOL(ot_mark_offset);
+EXPORT_SYMBOL(ot_clear_extent);
+EXPORT_SYMBOL(ot_find_marked_extent);
+EXPORT_SYMBOL(ot_last_marked);
+EXPORT_SYMBOL(ot_num_marked);
+EXPORT_SYMBOL(ot_init);
int obd_self_statfs(struct obd_device *obd, struct statfs *sfs)
{
- struct lustre_handle conn;
struct obd_export *export, *my_export = NULL;
struct obd_statfs osfs = { 0 };
int rc;
export = class_export_get(export);
spin_unlock(&obd->obd_dev_lock);
}
- conn.cookie = export->exp_handle.h_cookie;
- rc = obd_statfs(&conn, &osfs);
+ rc = obd_statfs(export, &osfs);
if (!rc)
statfs_unpack(sfs, &osfs);
/* This allows us to verify that desc_private is passed unmolested */
#define DESC_PRIV 0x10293847
-int echo_preprw(int cmd, struct obd_export *export, int objcount,
- struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb,
- struct niobuf_local *res, void **desc_private,
- struct obd_trans_info *oti)
+int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
+ int objcount, struct obd_ioobj *obj, int niocount,
+ struct niobuf_remote *nb, struct niobuf_local *res,
+ void **desc_private, struct obd_trans_info *oti)
{
struct obd_device *obd;
struct niobuf_local *r = res;
struct obd_ioctl_data* data = buf;
struct echo_client_obd *ec = &obddev->u.echo_client;
struct obd_device *tgt;
- struct obd_uuid uuid;
struct lov_stripe_md *lsm = NULL;
struct obd_uuid echo_uuid = { "ECHO_UUID" };
int rc;
ENTRY;
if (data->ioc_inllen1 < 1) {
- CERROR("requires a TARGET OBD UUID\n");
- RETURN(-EINVAL);
- }
- if (data->ioc_inllen1 > 37) {
- CERROR("OBD UUID must be less than 38 characters\n");
+ CERROR("requires a TARGET OBD name\n");
RETURN(-EINVAL);
}
- obd_str2uuid(&uuid, data->ioc_inlbuf1);
- tgt = class_uuid2obd(&uuid);
+ tgt = class_name2obd(data->ioc_inlbuf1);
if (!tgt || !tgt->obd_attached || !tgt->obd_set_up) {
- CERROR("device not attached or not set up (%d)\n",
- data->ioc_dev);
- RETURN(rc = -EINVAL);
+ CERROR("device not attached or not set up (%d/%s)\n",
+ data->ioc_dev, data->ioc_inlbuf1);
+ RETURN(-EINVAL);
}
spin_lock_init (&ec->ec_lock);
RETURN(rc);
/* we don't allocate new transnos for replayed requests */
-#if 0
- /* perhaps if transno already set? or should level be in oti? */
- if (req->rq_level == LUSTRE_CONN_RECOVD)
- GOTO(out, rc = 0);
-#endif
-
- off = fed->fed_lr_off;
-
- spin_lock(&filter->fo_translock);
- last_rcvd = le64_to_cpu(filter->fo_fsd->fsd_last_rcvd);
- filter->fo_fsd->fsd_last_rcvd = cpu_to_le64(last_rcvd + 1);
- spin_unlock(&filter->fo_translock);
- if (oti)
+ if (oti && oti->oti_transno == 0) {
+ spin_lock(&filter->fo_translock);
+ last_rcvd = le64_to_cpu(filter->fo_fsd->fsd_last_rcvd) + 1;
+ filter->fo_fsd->fsd_last_rcvd = cpu_to_le64(last_rcvd);
+ spin_unlock(&filter->fo_translock);
oti->oti_transno = last_rcvd;
- fcd->fcd_last_rcvd = cpu_to_le64(last_rcvd);
- fcd->fcd_mount_count = filter->fo_fsd->fsd_mount_count;
-
- /* get this from oti */
-#if 0
- if (oti)
- fcd->fcd_last_xid = cpu_to_le64(oti->oti_xid);
- else
-#else
- fcd->fcd_last_xid = 0;
-#endif
- fsfilt_set_last_rcvd(obd, last_rcvd, handle, filter_commit_cb);
- written = lustre_fwrite(filter->fo_rcvd_filp, (char *)fcd, sizeof(*fcd),
- &off);
- CDEBUG(D_INODE, "wrote trans #"LPD64" for client %s at #%d: written = "
- LPSZ"\n", last_rcvd, fcd->fcd_uuid, fed->fed_lr_idx, written);
+ fcd->fcd_last_rcvd = cpu_to_le64(last_rcvd);
+ fcd->fcd_mount_count = filter->fo_fsd->fsd_mount_count;
+
+ /* could get xid from oti, if it's ever needed */
+ fcd->fcd_last_xid = 0;
+
+ off = fed->fed_lr_off;
+ fsfilt_set_last_rcvd(obd, last_rcvd, handle, filter_commit_cb);
+ written = lustre_fwrite(filter->fo_rcvd_filp, (char *)fcd,
+ sizeof(*fcd), &off);
+ CDEBUG(D_HA, "wrote trans #"LPD64" for client %s at #%d: "
+ "written = "LPSZ"\n", last_rcvd, fcd->fcd_uuid,
+ fed->fed_lr_idx, written);
+
+ if (written == sizeof(*fcd))
+ RETURN(0);
+ CERROR("error writing to last_rcvd file: rc = %d\n",
+ (int)written);
+ if (written >= 0)
+ RETURN(-EIO);
- if (written == sizeof(*fcd))
- RETURN(0);
- CERROR("error writing to last_rcvd file: rc = %d\n", (int)written);
- if (written >= 0)
- RETURN(-EIO);
+ RETURN(written);
+ }
- RETURN(written);
+ RETURN(0);
}
static inline void f_dput(struct dentry *dentry)
LASSERT(bitmap != NULL);
- /* XXX if mcd_uuid were a real obd_uuid, I could use obd_uuid_equals */
+ /* XXX if fcd_uuid were a real obd_uuid, I could use obd_uuid_equals */
if (!strcmp(fed->fed_fcd->fcd_uuid, "OBD_CLASS_UUID"))
RETURN(0);
struct obd_run_ctxt saved;
int written;
loff_t off;
+ ENTRY;
if (!fed->fed_fcd)
RETURN(0);
- if (failover != 0) {
- OBD_FREE(fed->fed_fcd, sizeof(*fed->fed_fcd));
- RETURN(0);
- }
+ if (failover != 0)
+ GOTO(free, 0);
+
+ /* XXX if fcd_uuid were a real obd_uuid, I could use obd_uuid_equals */
+ if (!strcmp(fed->fed_fcd->fcd_uuid, "OBD_CLASS_UUID"))
+ GOTO(free, 0);
LASSERT(filter->fo_last_rcvd_slots != NULL);
fed->fed_fcd->fcd_uuid, fed->fed_lr_idx,fed->fed_lr_off);
}
+free:
OBD_FREE(fed->fed_fcd, sizeof(*fed->fed_fcd));
- return 0;
+ RETURN(0);
}
static int filter_free_server_data(struct filter_obd *filter)
struct filter_dentry_data *fdd = dchild->d_fsdata;
struct lustre_handle parent_lockh;
int rc, rc2, cleanup_phase = 0;
- struct dentry *dparent;
+ struct dentry *dparent = NULL;
struct obd_run_ctxt saved;
ENTRY;
return lustre_commit_write(lnb);
}
-static int filter_preprw(int cmd, struct obd_export *exp,
+static int filter_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *nb,
struct niobuf_local *res, void **desc_private,
ioo.ioo_type = S_IFREG;
ioo.ioo_bufcnt = oa_bufs;
- ret = filter_preprw(cmd, export, 1, &ioo, oa_bufs, rnb, lnb,
+ ret = filter_preprw(cmd, export, NULL, 1, &ioo, oa_bufs, rnb, lnb,
&desc_private, oti);
if (ret != 0)
GOTO(out, ret);
RETURN(rc);
}
-static int filter_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
+static int filter_statfs(struct obd_export *exp, struct obd_statfs *osfs)
{
- struct obd_device *obd;
+ struct obd_device *obd = exp->exp_obd;
ENTRY;
- obd = class_conn2obd(conn);
-
RETURN(fsfilt_statfs(obd, obd->u.filter.fo_sb, osfs));
}
#include <linux/kp30.h>
#include <linux/lustre_mds.h> /* for mds_objid */
+#include <linux/lustre_otree.h>
#include <linux/obd_ost.h>
+#include <linux/obd_lov.h>
#ifndef __CYGWIN__
#include <linux/ctype.h>
}
}
- lsm_size = sizeof(**lsmp);
+ lsm_size = lov_stripe_md_size(1);
if (!lsmp)
RETURN(lsm_size);
OBD_ALLOC(*lsmp, lsm_size);
if (!*lsmp)
RETURN(-ENOMEM);
+
+ (*lsmp)->lsm_oinfo[0].loi_dirty_ot =
+ &(*lsmp)->lsm_oinfo[0].loi_dirty_ot_inline;
+ ot_init((*lsmp)->lsm_oinfo[0].loi_dirty_ot);
}
if (lmm) {
/* XXX zero *lsmp? */
(*lsmp)->lsm_object_id = le64_to_cpu (lmm->lmm_object_id);
- (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
LASSERT((*lsmp)->lsm_object_id);
}
+ (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
+
RETURN(lsm_size);
}
RETURN (rc);
}
- body = lustre_swab_repbuf (req, 0, sizeof (*body),
- lustre_swab_ost_body);
+ body = lustre_swab_repbuf(req, 0, sizeof (*body), lustre_swab_ost_body);
if (body == NULL) {
CERROR ("can't unpack ost_body\n");
RETURN (-EPROTO);
oa->o_blksize = OSC_BRW_MAX_SIZE;
oa->o_valid |= OBD_MD_FLBLKSZ;
+ /* XXX LOV STACKING: the lsm that is passed to us from LOV does not
+ * have valid lsm_oinfo data structs, so don't go touching that.
+ * This needs to be fixed in a big way.
+ */
lsm->lsm_object_id = oa->o_id;
lsm->lsm_stripe_count = 0;
lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
return rc;
}
+static void osc_announce_cached(struct client_obd *cli, struct ost_body *body)
+{
+ obd_flag bits = OBD_MD_FLBLOCKS|OBD_MD_FLRDEV;
+
+ LASSERT(!(body->oa.o_valid & bits));
+
+ body->oa.o_valid |= bits;
+ down(&cli->cl_dirty_sem);
+ body->oa.o_blocks = cli->cl_dirty;
+ body->oa.o_rdev = cli->cl_dirty_granted;
+ up(&cli->cl_dirty_sem);
+ CDEBUG(D_INODE, "announcing "LPU64" dirty "LPU64" granted\n",
+ cli->cl_dirty, cli->cl_dirty_granted);
+}
+
+static void osc_update_grant(struct client_obd *cli, struct ost_body *body)
+{
+ if(!(body->oa.o_valid & OBD_MD_FLRDEV)) {
+ if (cli->cl_ost_can_grant) {
+ CDEBUG(D_INODE, "%s can't grant\n",
+ cli->cl_import->imp_target_uuid.uuid);
+ }
+ cli->cl_ost_can_grant = 0;
+ return;
+ }
+
+ CDEBUG(D_INODE, "got "LPU64" grant\n", body->oa.o_rdev);
+ down(&cli->cl_dirty_sem);
+ cli->cl_dirty_granted = body->oa.o_rdev;
+ /* XXX check for over-run and wake up the io thread that
+ * doesn't exist yet */
+ up(&cli->cl_dirty_sem);
+}
+
/* We assume that the reason this OSC got a short read is because it read
* beyond the end of a stripe file; i.e. lustre is reading a sparse file
* via the LOV, and it _knows_ it's reading inside the file, it's just that
}
#if CHECKSUM_BULK
-static __u64 cksum_pages(int nob, obd_count page_count, struct brw_page *pga)
+static obd_count cksum_pages(int nob, obd_count page_count,
+ struct brw_page *pga)
{
- __u64 cksum = 0;
+ obd_count cksum = 0;
char *ptr;
int i;
{
struct ptlrpc_request *req;
struct ptlrpc_bulk_desc *desc;
+ struct client_obd *cli = &imp->imp_obd->u.cli;
struct ost_body *body;
struct obd_ioobj *ioobj;
struct niobuf_remote *niobuf;
requested_nob += pg->count;
- if (i > 0 &&
- can_merge_pages (pg_prev, pg)) {
+ if (i > 0 && can_merge_pages (pg_prev, pg)) {
niobuf--;
niobuf->len += pg->count;
} else {
#if CHECKSUM_BULK
body->oa.o_valid |= OBD_MD_FLCKSUM;
if (opc == OST_BRW_WRITE)
- body->oa.o_rdev = cksum_pages (requested_nob, page_count, pga);
+ body->oa.o_nlink = cksum_pages (requested_nob, page_count, pga);
#endif
+ osc_announce_cached(cli, body);
spin_lock_irqsave (&req->rq_lock, flags);
req->rq_no_resend = 1;
spin_unlock_irqrestore (&req->rq_lock, flags);
obd_count page_count, struct brw_page *pga,
int rc)
{
+ struct client_obd *cli = &req->rq_import->imp_obd->u.cli;
+ struct ost_body *body;
if (rc < 0)
return (rc);
+ body = lustre_swab_repbuf(req, 0, sizeof (*body), lustre_swab_ost_body);
+ if (body == NULL) {
+ CERROR ("Can't unpack body\n");
+ RETURN(-EPROTO);
+ }
+ osc_update_grant(cli, body);
+
if (req->rq_reqmsg->opc == OST_WRITE) {
if (rc > 0) {
CERROR ("Unexpected +ve rc %d\n", rc);
}
if (rc < requested_nob)
- handle_short_read (rc, page_count, pga);
+ handle_short_read(rc, page_count, pga);
#if CHECKSUM_BULK
- imp = req->rq_import;
- body = lustre_swab_repmsg (req, 0, sizeof (*body),
- lustre_swab_ost_body);
- if (body == NULL) {
- CERROR ("Can't unpack body\n");
- } else if (body->oa.o_valid & OBD_MD_FLCKSUM) {
+ if (body->oa.o_valid & OBD_MD_FLCKSUM) {
static int cksum_counter;
- __u64 server_cksum = body->oa.o_rdev;
- __u64 cksum = cksum_pages (rc, page_count, pga);
+ obd_count server_cksum = body->oa.o_nlink;
+ obd_count cksum = cksum_pages(rc, page_count, pga);
cksum_counter++;
if (server_cksum != cksum) {
imp->imp_connection->c_peer.peer_nid);
cksum_counter = 0;
} else if ((cksum_counter & (-cksum_counter)) == cksum_counter)
- CERROR("Checksum %u from "LPX64" OK: "LPX64"\n",
+ CERROR("Checksum %u from "LPX64" OK: %x\n",
cksum_counter,
imp->imp_connection->c_peer.peer_nid, cksum);
} else {
#endif
#endif
+static int osc_mark_page_dirty(struct lustre_handle *conn,
+ struct lov_stripe_md *lsm, unsigned long offset)
+{
+ struct client_obd *cli = &class_conn2obd(conn)->u.cli;
+ struct otree *dirty_ot = lsm->lsm_oinfo[0].loi_dirty_ot;
+ int rc;
+ ENTRY;
+
+ down(&cli->cl_dirty_sem);
+
+ if (cli->cl_ost_can_grant &&
+ (cli->cl_dirty + PAGE_CACHE_SIZE >= cli->cl_dirty_granted)) {
+ CDEBUG(D_INODE, "granted "LPU64" < "LPU64"\n",
+ cli->cl_dirty_granted, cli->cl_dirty + PAGE_CACHE_SIZE);
+ GOTO(out, rc = -EDQUOT);
+ }
+
+ rc = ot_mark_offset(dirty_ot, offset);
+ if (rc)
+ GOTO(out, rc);
+
+ cli->cl_dirty += PAGE_CACHE_SIZE;
+ CDEBUG(D_INODE, "dirtied off %lu, now "LPU64" bytes dirty\n",
+ offset, cli->cl_dirty);
+out:
+ up(&cli->cl_dirty_sem);
+ RETURN(rc);
+}
+
+static int osc_clear_dirty_pages(struct lustre_handle *conn,
+ struct lov_stripe_md *lsm,
+ unsigned long start, unsigned long end,
+ unsigned long *cleared)
+{
+ struct client_obd *cli = &class_conn2obd(conn)->u.cli;
+ struct otree *dirty_ot = lsm->lsm_oinfo[0].loi_dirty_ot;
+ unsigned long old_marked, new_marked;
+ int rc;
+ ENTRY;
+
+ down(&cli->cl_dirty_sem);
+
+ old_marked = ot_num_marked(dirty_ot);
+
+ rc = ot_clear_extent(dirty_ot, start, end);
+ if (rc)
+ GOTO(out, rc);
+
+ new_marked = ot_num_marked(dirty_ot);
+
+ LASSERT(new_marked <= old_marked);
+ LASSERT(old_marked * PAGE_CACHE_SIZE <= cli->cl_dirty);
+ *cleared = old_marked - new_marked;
+ cli->cl_dirty -= (__u64)*cleared << PAGE_CACHE_SHIFT;
+ CDEBUG(D_INODE, "cleared [%lu,%lu], now "LPU64" bytes dirty\n",
+ start, end, cli->cl_dirty);
+
+out:
+ up(&cli->cl_dirty_sem);
+ RETURN(rc);
+}
+
+static int osc_last_dirty_offset(struct lustre_handle *conn,
+ struct lov_stripe_md *lsm,
+ unsigned long *offset)
+{
+ struct otree *dirty_ot = lsm->lsm_oinfo[0].loi_dirty_ot;
+ int rc;
+ ENTRY;
+
+ rc = ot_last_marked(dirty_ot, offset);
+ RETURN(rc);
+}
+
static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm,
struct lustre_handle *parent_lock,
__u32 type, void *extentp, int extent_len, __u32 mode,
opaque);
}
-static int osc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
+static int osc_statfs(struct obd_export *exp, struct obd_statfs *osfs)
{
struct obd_statfs *msfs;
struct ptlrpc_request *request;
int rc, size = sizeof(*osfs);
ENTRY;
- request = ptlrpc_prep_req(class_conn2cliimp(conn), OST_STATFS, 0, NULL,
- NULL);
+ request = ptlrpc_prep_req(exp->exp_obd->u.cli.cl_import, OST_STATFS, 0,
+ NULL, NULL);
if (!request)
RETURN(-ENOMEM);
o_cancel: osc_cancel,
o_cancel_unused: osc_cancel_unused,
o_iocontrol: osc_iocontrol,
- o_get_info: osc_get_info
+ o_get_info: osc_get_info,
+ .o_mark_page_dirty = osc_mark_page_dirty,
+ .o_clear_dirty_pages = osc_clear_dirty_pages,
+ .o_last_dirty_offset = osc_last_dirty_offset,
};
struct obd_ops sanosc_obd_ops = {
o_cancel: osc_cancel,
o_cancel_unused: osc_cancel_unused,
o_iocontrol: osc_iocontrol,
+ .o_mark_page_dirty = osc_mark_page_dirty,
+ .o_clear_dirty_pages = osc_clear_dirty_pages,
+ .o_last_dirty_offset = osc_last_dirty_offset,
};
int __init osc_init(void)
#include <linux/init.h>
#include <linux/lprocfs_status.h>
+inline void oti_init(struct obd_trans_info *oti,
+ struct ptlrpc_request *req)
+{
+ if(oti == NULL)
+ return;
+ memset(oti, 0, sizeof *oti);
+
+
+ if (req->rq_repmsg && req->rq_reqmsg != 0)
+ oti->oti_transno = req->rq_repmsg->transno;
+
+ EXIT;
+}
+
inline void oti_to_request(struct obd_trans_info *oti,
struct ptlrpc_request *req)
{
static int ost_statfs(struct ptlrpc_request *req)
{
- struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg;
struct obd_statfs *osfs;
int rc, size = sizeof(*osfs);
ENTRY;
osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*osfs));
memset(osfs, 0, size);
- req->rq_status = obd_statfs(conn, osfs);
+ req->rq_status = obd_statfs(req->rq_export, osfs);
if (req->rq_status != 0)
CERROR("ost: statfs failed: rc %d\n", req->rq_status);
if (desc == NULL)
GOTO(out_local, rc = -ENOMEM);
- rc = obd_preprw(OBD_BRW_READ, req->rq_export, 1, ioo, npages,
+ rc = obd_preprw(OBD_BRW_READ, req->rq_export, NULL, 1, ioo, npages,
pp_rnb, local_nb, &desc_priv, NULL);
if (rc != 0)
GOTO(out_bulk, rc);
if (desc == NULL)
GOTO(out_local, rc = -ENOMEM);
- rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, objcount, ioo,
+ rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, NULL, objcount, ioo,
npages, pp_rnb, local_nb, &desc_priv, oti);
if (rc != 0)
GOTO (out_bulk, rc);
static int ost_handle(struct ptlrpc_request *req)
{
- struct obd_trans_info trans_info = { 0, }, *oti = &trans_info;
+ struct obd_trans_info trans_info = { 0, };
+ struct obd_trans_info *oti = &trans_info;
int should_process, fail = OBD_FAIL_OST_ALL_REPLY_NET, rc = 0;
ENTRY;
if (strcmp(req->rq_obd->obd_type->typ_name, "ost") != 0)
GOTO(out, rc = -EINVAL);
+ oti_init(oti, req);
+
switch (req->rq_reqmsg->opc) {
case OST_CONNECT:
CDEBUG(D_INODE, "connect\n");
return (-EINVAL);
err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count);
+ kportal_put_ni (data->ioc_nal);
break;
}
GOTO(out_req, rc = -ENOMEM);
request->rq_replen = lustre_msg_size(0, NULL);
- request->rq_level = LUSTRE_CONN_RECOVD;
+ request->rq_level = LUSTRE_CONN_RECOVER;
rc = ptlrpc_queue_wait(request);
int type, int portal)
{
struct obd_import *imp = req->rq_import;
- unsigned long flags;
struct ptlrpc_bulk_desc *desc;
LASSERT (type == BULK_PUT_SINK || type == BULK_GET_SOURCE);
if (desc == NULL)
RETURN(NULL);
- /* Is this sampled at the right place? Do we want to get the import
- * generation just before we send? Should it match the generation of
- * the request? */
- spin_lock_irqsave(&imp->imp_lock, flags);
- desc->bd_import_generation = imp->imp_generation;
- spin_unlock_irqrestore(&imp->imp_lock, flags);
-
+ desc->bd_import_generation = req->rq_import_generation;
desc->bd_import = class_import_get(imp);
desc->bd_req = req;
desc->bd_type = type;
RETURN(-ENOTCONN);
}
- rc = ptlrpc_request_handle_eviction(req);
- if (rc)
- CERROR("can't reconnect to %s@%s: %d\n",
- imp->imp_target_uuid.uuid,
- imp->imp_connection->c_remote_uuid.uuid, rc);
- else
- ptlrpc_wake_delayed(imp);
+ ptlrpc_request_handle_eviction(req);
if (req->rq_err)
RETURN(-EIO);
/* Replay-enabled imports return commit-status information. */
if (req->rq_repmsg->last_committed) {
- if (req->rq_repmsg->last_committed <
- imp->imp_peer_committed_transno) {
- CERROR("%s went back in time (transno "LPD64
- " was committed, server claims "LPD64
- ")! is shared storage not coherent?\n",
- imp->imp_target_uuid.uuid,
- imp->imp_peer_committed_transno,
- req->rq_repmsg->last_committed);
- }
imp->imp_peer_committed_transno =
req->rq_repmsg->last_committed;
}
RETURN(rc);
}
-static int check_set(struct ptlrpc_request_set *set)
+int ptlrpc_check_set(struct ptlrpc_request_set *set)
{
unsigned long flags;
struct list_head *tmp;
list_del(&req->rq_list);
list_add_tail(&req->rq_list,
&imp->imp_sending_list);
+
+ if (req->rq_import_generation <
+ imp->imp_generation) {
+ req->rq_status = -EIO;
+ req->rq_phase = RQ_PHASE_INTERPRET;
+ spin_unlock_irqrestore(&imp->imp_lock,
+ flags);
+ GOTO (interpret, req->rq_status);
+ }
spin_unlock_irqrestore(&imp->imp_lock, flags);
req->rq_waiting = 0;
LASSERT (req->rq_phase == RQ_PHASE_INTERPRET);
LASSERT (!req->rq_receiving_reply);
+ ptlrpc_unregister_reply(req);
if (req->rq_bulk != NULL)
ptlrpc_unregister_bulk (req);
RETURN (set->set_remaining == 0);
}
-static int expire_one_request(struct ptlrpc_request *req)
+int ptlrpc_expire_one_request(struct ptlrpc_request *req)
{
unsigned long flags;
struct obd_import *imp = req->rq_import;
continue;
/* deal with this guy */
- expire_one_request (req);
+ ptlrpc_expire_one_request (req);
}
/* When waiting for a whole set, we always to break out of the
continue;
}
+ req->rq_import_generation = imp->imp_generation;
+
if (req->rq_level > imp->imp_level) {
if (req->rq_no_recov || imp->imp_obd->obd_no_recov ||
imp->imp_dlm_fake) {
/* XXX this is the same as ptlrpc_queue_wait */
LASSERT(list_empty(&req->rq_list));
list_add_tail(&req->rq_list, &imp->imp_sending_list);
- req->rq_import_generation = imp->imp_generation;
spin_unlock_irqrestore(&imp->imp_lock, flags);
CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:ni:nid:opc"
* req times out */
CDEBUG(D_HA, "set %p going to sleep for %d seconds\n",
set, timeout);
- lwi = LWI_TIMEOUT_INTR(timeout ? timeout * HZ : 1,
+ lwi = LWI_TIMEOUT_INTR((timeout ? timeout : 1) * HZ,
expired_set, interrupted_set, set);
- rc = l_wait_event(set->set_waitq, check_set(set), &lwi);
+ rc = l_wait_event(set->set_waitq, ptlrpc_check_set(set), &lwi);
LASSERT (rc == 0 || rc == -EINTR || rc == -ETIMEDOUT);
if (request == NULL)
RETURN(1);
- if (request == (void *)(long)(0x5a5a5a5a5a5a5a5a)) {
+ if (request == (void *)(long)(0x5a5a5a5a5a5a5a5a) ||
+ request->rq_obd == (void *)(long)(0x5a5a5a5a5a5a5a5a)) {
CERROR("dereferencing freed request (bug 575)\n");
LBUG();
RETURN(1);
LASSERT (req != last_req);
last_req = req;
+ if (req->rq_import_generation < imp->imp_generation) {
+ DEBUG_REQ(D_HA, req, "freeing request with old gen");
+ GOTO(free_req, 0);
+ }
+
if (req->rq_replay) {
DEBUG_REQ(D_HA, req, "keeping (FL_REPLAY)");
continue;
DEBUG_REQ(D_HA, req, "committing (last_committed "LPU64")",
imp->imp_peer_committed_transno);
+free_req:
list_del_init(&req->rq_list);
__ptlrpc_req_finished(req, 1);
}
struct ptlrpc_request *req = data;
ENTRY;
- RETURN(expire_one_request(req));
+ RETURN(ptlrpc_expire_one_request(req));
}
static void interrupted_request(void *data)
/* Mark phase here for a little debug help */
req->rq_phase = RQ_PHASE_RPC;
+ spin_lock_irqsave(&imp->imp_lock, flags);
+ req->rq_import_generation = imp->imp_generation;
restart:
/*
* If the import has been invalidated (such as by an OST failure), the
* through, though, so that they have a chance to revalidate the
* import.
*/
- spin_lock_irqsave(&imp->imp_lock, flags);
if (req->rq_import->imp_invalid && req->rq_level == LUSTRE_CONN_FULL) {
DEBUG_REQ(D_ERROR, req, "IMP_INVALID:");
spin_unlock_irqrestore(&imp->imp_lock, flags);
GOTO (out, rc = -EIO);
}
+ if (req->rq_import_generation < imp->imp_generation) {
+ DEBUG_REQ(D_ERROR, req, "req old gen:");
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
+ GOTO (out, rc = -EIO);
+ }
+
if (req->rq_level > imp->imp_level) {
list_del(&req->rq_list);
if (req->rq_no_recov || obd->obd_no_recov ||
spin_lock_irqsave(&imp->imp_lock, flags);
list_del_init(&req->rq_list);
- if (req->rq_err)
+ if (req->rq_err ||
+ req->rq_import_generation < imp->imp_generation)
rc = -EIO;
+
if (rc) {
spin_unlock_irqrestore(&imp->imp_lock, flags);
GOTO (out, rc);
/* XXX this is the same as ptlrpc_set_wait */
LASSERT(list_empty(&req->rq_list));
list_add_tail(&req->rq_list, &imp->imp_sending_list);
- req->rq_import_generation = imp->imp_generation;
spin_unlock_irqrestore(&imp->imp_lock, flags);
rc = ptl_send_rpc(req);
ptlrpc_unregister_bulk (req);
DEBUG_REQ(D_HA, req, "resending: ");
+ spin_lock_irqsave(&imp->imp_lock, flags);
goto restart;
}
if (req->rq_bulk != NULL)
ptlrpc_unregister_bulk (req);
DEBUG_REQ(D_HA, req, "resending: ");
+ spin_lock_irqsave(&imp->imp_lock, flags);
goto restart;
}
old_level = req->rq_level;
if (req->rq_replied)
old_status = req->rq_repmsg->status;
- req->rq_level = LUSTRE_CONN_RECOVD;
+ req->rq_level = LUSTRE_CONN_RECOVER;
rc = ptl_send_rpc(req);
if (rc) {
CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
* this flag and then putting requests on sending_list or delayed_list.
*/
spin_lock_irqsave(&imp->imp_lock, flags);
- if (!imp->imp_replayable)
- /* on b_devel, I moved this line to
- ptlrpc_set_import_active because I thought it made
- more sense there and possibly not all callers of
- this function expect this. I'll leave it here until
- I can figure out if it's correct or not. - rread 5/12/03 */
- imp->imp_invalid = 1;
/* XXX locking? Maybe we should remove each request with the list
* locked? Also, how do we know if the requests on the list are
DEBUG_REQ(D_HA, req, "inflight");
spin_lock (&req->rq_lock);
- req->rq_err = 1;
- if (req->rq_set != NULL)
- wake_up(&req->rq_set->set_waitq);
- else
- wake_up(&req->rq_wait_for_rep);
+ if (req->rq_import_generation < imp->imp_generation) {
+ req->rq_err = 1;
+ if (req->rq_set != NULL)
+ wake_up(&req->rq_set->set_waitq);
+ else
+ wake_up(&req->rq_wait_for_rep);
+ }
spin_unlock (&req->rq_lock);
}
DEBUG_REQ(D_HA, req, "aborting waiting req");
spin_lock (&req->rq_lock);
- req->rq_err = 1;
- if (req->rq_set != NULL)
- wake_up(&req->rq_set->set_waitq);
- else
- wake_up(&req->rq_wait_for_rep);
- spin_unlock (&req->rq_lock);
+ if (req->rq_import_generation < imp->imp_generation) {
+ req->rq_err = 1;
+ if (req->rq_set != NULL)
+ wake_up(&req->rq_set->set_waitq);
+ else
+ wake_up(&req->rq_wait_for_rep);
+ spin_unlock (&req->rq_lock);
+ }
}
/* Last chance to free reqs left on the replay list, but we
#include <linux/lustre_net.h>
#include <linux/lustre_lib.h>
#include <linux/obd.h>
+#include "ptlrpc_internal.h"
static int ptl_send_buf(struct ptlrpc_request *request,
struct ptlrpc_connection *conn, int portal)
spin_unlock_irqrestore (&request->rq_lock, flags);
request->rq_sent = LTIME_S(CURRENT_TIME);
+ ptlrpc_pinger_sending_on_import(request->rq_import);
rc = ptl_send_buf(request, request->rq_connection,
request->rq_request_portal);
if (rc == 0)
* Portal-RPC reconnection and replay operations, for use in recovery.
*
* Copyright (c) 2003 Cluster File Systems, Inc.
- * Author: Phil Schwan <phil@clusterfs.com>
+ * Authors: Phil Schwan <phil@clusterfs.com>
+ * Mike Shaver <shaver@clusterfs.com>
*
* This file is part of Lustre, http://www.lustre.org.
*
*/
#include <linux/version.h>
+#include <asm/semaphore.h>
#define DEBUG_SUBSYSTEM S_RPC
#include <linux/obd_support.h>
#include "ptlrpc_internal.h"
static struct ptlrpc_thread *pinger_thread = NULL;
-static spinlock_t pinger_lock = SPIN_LOCK_UNLOCKED;
+static DECLARE_MUTEX(pinger_sem);
static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports);
+int ptlrpc_start_pinger(void);
+int ptlrpc_stop_pinger(void);
+
+void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
+{
+ down(&pinger_sem);
+ imp->imp_next_ping = jiffies + (obd_timeout * HZ);
+ up(&pinger_sem);
+}
+
int ptlrpc_pinger_add_import(struct obd_import *imp)
{
+ int rc;
ENTRY;
+
+#ifndef ENABLE_PINGER
+ RETURN(0);
+#else
if (!list_empty(&imp->imp_pinger_chain))
RETURN(-EALREADY);
- spin_lock(&pinger_lock);
- list_add(&imp->imp_pinger_chain, &pinger_imports);
- spin_unlock(&pinger_lock);
+ down(&pinger_sem);
+ if (list_empty(&pinger_imports)) {
+ up(&pinger_sem);
+ rc = ptlrpc_start_pinger();
+ if (rc < 0)
+ RETURN(rc);
+ down(&pinger_sem);
+ }
+
+ CDEBUG(D_HA, "adding pingable import %s->%s\n",
+ imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
+ imp->imp_next_ping = jiffies + (obd_timeout * HZ);
+ list_add_tail(&imp->imp_pinger_chain, &pinger_imports); /* XXX sort, blah blah */
+ class_import_get(imp);
+ up(&pinger_sem);
RETURN(0);
+#endif
}
int ptlrpc_pinger_del_import(struct obd_import *imp)
{
+ int rc;
ENTRY;
+
+#ifndef ENABLE_PINGER
+ RETURN(0);
+#else
if (list_empty(&imp->imp_pinger_chain))
- RETURN(-EALREADY);
+ RETURN(-ENOENT);
- spin_lock(&pinger_lock);
+ down(&pinger_sem);
list_del_init(&imp->imp_pinger_chain);
- spin_unlock(&pinger_lock);
+ CDEBUG(D_HA, "removing pingable import %s->%s\n",
+ imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid);
+ class_import_put(imp);
+ if (list_empty(&pinger_imports)) {
+ up(&pinger_sem);
+ rc = ptlrpc_stop_pinger();
+ if (rc)
+ RETURN(rc);
+ down(&pinger_sem);
+ }
+ up(&pinger_sem);
RETURN(0);
-}
-
-static void ptlrpc_pinger_do_stuff(void)
-{
-
-
-
+#endif
}
static int ptlrpc_pinger_main(void *arg)
struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
struct ptlrpc_thread *thread = data->thread;
unsigned long flags;
- int rc = 0;
ENTRY;
lock_kernel();
thread->t_flags = SVC_RUNNING;
wake_up(&thread->t_ctl_waitq);
- /* And now, loop forever on requests */
+ /* And now, loop forever, pinging as needed. */
while (1) {
- struct l_wait_info lwi = LWI_TIMEOUT(5 * HZ, NULL, NULL);
- l_wait_event(thread->t_ctl_waitq,
- thread->t_flags & SVC_STOPPING, &lwi);
+ unsigned long this_ping = jiffies;
+ long time_to_next_ping;
+ struct l_wait_info lwi = LWI_TIMEOUT(10 * HZ, NULL, NULL);
+ struct ptlrpc_request_set *set;
+ struct ptlrpc_request *req;
+ struct list_head *iter;
+ wait_queue_t set_wait;
+ int rc;
+
+ set = ptlrpc_prep_set();
+ down(&pinger_sem);
+ list_for_each(iter, &pinger_imports) {
+ struct obd_import *imp =
+ list_entry(iter, struct obd_import, imp_pinger_chain);
+ int generation, level;
+ unsigned long flags;
+
+ if (imp->imp_next_ping <= this_ping) {
+ /* Add a ping. */
+ spin_lock_irqsave(&imp->imp_lock, flags);
+ generation = imp->imp_generation;
+ level = imp->imp_level;
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
+
+ if (level != LUSTRE_CONN_FULL) {
+ CDEBUG(D_HA, "not pinging %s (in recovery)\n",
+ imp->imp_target_uuid.uuid);
+ continue;
+ }
+
+ req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, NULL);
+ if (!req) {
+ CERROR("OOM trying to ping\n");
+ break;
+ }
+ req->rq_replen = lustre_msg_size(0, NULL);
+ req->rq_level = LUSTRE_CONN_FULL;
+ req->rq_phase = RQ_PHASE_RPC;
+ req->rq_import_generation = generation;
+ ptlrpc_set_add_req(set, req);
+ } else {
+ CDEBUG(D_HA, "don't need to ping %s (%lu > %lu)\n",
+ imp->imp_target_uuid.uuid, imp->imp_next_ping,
+ this_ping);
+ }
+ }
+ up(&pinger_sem);
+
+ /* Might be empty, that's OK. */
+ if (set->set_remaining == 0)
+ CDEBUG(D_HA, "nothing to ping\n");
+ list_for_each(iter, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(iter, struct ptlrpc_request, rq_set_chain);
+ DEBUG_REQ(D_HA, req, "pinging %s->%s",
+ req->rq_import->imp_obd->obd_uuid.uuid,
+ req->rq_import->imp_target_uuid.uuid);
+ (void)ptl_send_rpc(req);
+ }
+
+ /* Have to wait on both the thread's queue and the set's. */
+ init_waitqueue_entry(&set_wait, current);
+ add_wait_queue(&set->set_waitq, &set_wait);
+ rc = l_wait_event(thread->t_ctl_waitq,
+ thread->t_flags & SVC_STOPPING || ptlrpc_check_set(set),
+ &lwi);
+ remove_wait_queue(&set->set_waitq, &set_wait);
+ CDEBUG(D_HA, "ping complete (%lu)\n", jiffies);
if (thread->t_flags & SVC_STOPPING) {
thread->t_flags &= ~SVC_STOPPING;
+ list_for_each(iter, &set->set_requests) {
+ req = list_entry(iter, struct ptlrpc_request,
+ rq_set_chain);
+ if (!req->rq_replied)
+ ptlrpc_unregister_reply(req);
+ }
+ ptlrpc_set_destroy(set);
EXIT;
break;
}
- ptlrpc_pinger_do_stuff();
+
+ /* Expire all the requests that didn't come back. */
+ down(&pinger_sem);
+ list_for_each(iter, &set->set_requests) {
+ req = list_entry(iter, struct ptlrpc_request, rq_set_chain);
+
+ if (req->rq_replied)
+ continue;
+
+ req->rq_phase = RQ_PHASE_COMPLETE;
+ set->set_remaining--;
+ /* If it was disconnected, don't sweat it. */
+ if (list_empty(&req->rq_import->imp_pinger_chain))
+ continue;
+
+ ptlrpc_expire_one_request(req);
+ }
+ up(&pinger_sem);
+ ptlrpc_set_destroy(set);
+
+ /* Wait until the next ping time, or until we're stopped. */
+ time_to_next_ping = this_ping + (obd_timeout * HZ) - jiffies;
+ CDEBUG(D_HA, "next ping in %lu (%lu)\n", time_to_next_ping,
+ this_ping + (obd_timeout * HZ));
+ if (time_to_next_ping > 0) {
+ lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL);
+ l_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPING,
+ &lwi);
+ if (thread->t_flags & SVC_STOPPING) {
+ thread->t_flags &= ~SVC_STOPPING;
+ EXIT;
+ break;
+ }
+ }
}
thread->t_flags = SVC_STOPPED;
wake_up(&thread->t_ctl_waitq);
- CDEBUG(D_NET, "pinger thread exiting, process %d: rc = %d\n",
- current->pid, rc);
- return rc;
+ CDEBUG(D_NET, "pinger thread exiting, process %d\n", current->pid);
+ return 0;
}
-int ptlrpc_pinger_start(void)
+int ptlrpc_start_pinger(void)
{
struct l_wait_info lwi = { 0 };
struct ptlrpc_svc_data d;
int rc;
ENTRY;
- spin_lock(&pinger_lock);
+ down(&pinger_sem);
if (pinger_thread != NULL)
GOTO(out, rc = -EALREADY);
GOTO(out, rc = -ENOMEM);
init_waitqueue_head(&pinger_thread->t_ctl_waitq);
- d.name = "Lustre pinger";
+ d.name = "ll_ping";
d.thread = pinger_thread;
/* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
pinger_thread->t_flags & SVC_RUNNING, &lwi);
out:
- spin_unlock(&pinger_lock);
+ up(&pinger_sem);
RETURN(rc);
}
int rc = 0;
ENTRY;
- spin_lock(&pinger_lock);
+ down(&pinger_sem);
if (pinger_thread == NULL)
GOTO(out, rc = -EALREADY);
OBD_FREE(pinger_thread, sizeof(*pinger_thread));
out:
- spin_unlock(&pinger_lock);
+ up(&pinger_sem);
RETURN(rc);
}
struct ldlm_namespace;
struct obd_import;
struct ldlm_res_id;
+struct ptlrpc_request_set;
/* ldlm hooks that we need, managed via inter_module_{get,put} */
extern int (*ptlrpc_ldlm_namespace_cleanup)(struct ldlm_namespace *, int);
int ptlrpc_get_ldlm_hooks(void);
void ptlrpc_daemonize(void);
-int ptlrpc_request_handle_eviction(struct ptlrpc_request *);
+void ptlrpc_request_handle_eviction(struct ptlrpc_request *);
void lustre_assert_wire_constants (void);
void ptlrpc_lprocfs_register_service(struct obd_device *obddev,
PTLRPC_LAST_CNTR
};
+int ptlrpc_expire_one_request(struct ptlrpc_request *req);
+int ptlrpc_check_set(struct ptlrpc_request_set *set);
+
+void ptlrpc_pinger_sending_on_import(struct obd_import *imp);
#endif /* PTLRPC_INTERNAL_H */
memcpy(server_uuid.uuid, data->ioc_inlbuf2, MIN(data->ioc_inllen2,
sizeof(server_uuid)));
+ init_MUTEX(&cli->cl_dirty_sem);
+ cli->cl_dirty = 0;
+ cli->cl_dirty_granted = 0;
+ cli->cl_ost_can_grant = 1;
+
conn = ptlrpc_uuid_to_connection(&server_uuid);
if (conn == NULL)
RETURN(-ENOENT);
imp->imp_obd = obddev;
imp->imp_connect_op = connect_op;
imp->imp_generation = 0;
+ INIT_LIST_HEAD(&imp->imp_pinger_chain);
memcpy(imp->imp_target_uuid.uuid, data->ioc_inlbuf1, data->ioc_inllen1);
class_import_put(imp);
EXPORT_SYMBOL(client_obd_setup);
EXPORT_SYMBOL(client_obd_cleanup);
+/* pinger.c */
+EXPORT_SYMBOL(ptlrpc_pinger_add_import);
+EXPORT_SYMBOL(ptlrpc_pinger_del_import);
+EXPORT_SYMBOL(ptlrpc_pinger_sending_on_import);
+
#ifdef __KERNEL__
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
MODULE_DESCRIPTION("Lustre Request Processor");
#include "ptlrpc_internal.h"
-int ptlrpc_reconnect_import(struct obd_import *imp,
- struct ptlrpc_request **reqptr)
+enum reconnect_result {
+ RECON_RESULT_RECOVERING = 1,
+ RECON_RESULT_RECONNECTED = 2,
+ RECON_RESULT_EVICTED = 3,
+};
+
+int ptlrpc_reconnect_import(struct obd_import *imp)
{
struct obd_device *obd = imp->imp_obd;
- int flags, rc, size[] = {sizeof(imp->imp_target_uuid),
+ int rc, size[] = {sizeof(imp->imp_target_uuid),
sizeof(obd->obd_uuid),
sizeof(imp->imp_dlm_handle)};
char *tmp[] = {imp->imp_target_uuid.uuid,
struct ptlrpc_connection *conn = imp->imp_connection;
struct ptlrpc_request *req;
struct lustre_handle old_hdl;
-
- spin_lock_irqsave(&imp->imp_lock, flags);
- imp->imp_generation++;
- spin_unlock_irqrestore(&imp->imp_lock, flags);
+ __u64 committed_before_reconnect = imp->imp_peer_committed_transno;
CERROR("reconnect handle "LPX64"\n",
imp->imp_dlm_handle.cookie);
req->rq_replen = lustre_msg_size(0, NULL);
rc = ptlrpc_queue_wait(req);
if (rc) {
+ /* what if rc > 0 ??*/
CERROR("cannot connect to %s@%s: rc = %d\n",
imp->imp_target_uuid.uuid, conn->c_remote_uuid.uuid, rc);
GOTO(out_disc, rc);
imp->imp_remote_handle.cookie,
req->rq_repmsg->handle.cookie);
imp->imp_remote_handle = req->rq_repmsg->handle;
- GOTO(out_disc, rc = 0);
+ GOTO(out_disc, rc = RECON_RESULT_RECONNECTED);
}
CERROR("reconnected to %s@%s after partition\n",
imp->imp_target_uuid.uuid, conn->c_remote_uuid.uuid);
- GOTO(out_disc, rc = 0);
+ GOTO(out_disc, rc = RECON_RESULT_RECONNECTED);
+ } else if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECOVERING) {
+ rc = RECON_RESULT_RECOVERING;
+ } else {
+ rc = RECON_RESULT_EVICTED;
}
-
+
old_hdl = imp->imp_remote_handle;
imp->imp_remote_handle = req->rq_repmsg->handle;
CERROR("reconnected to %s@%s ("LPX64", was "LPX64")!\n",
imp->imp_target_uuid.uuid, conn->c_remote_uuid.uuid,
imp->imp_remote_handle.cookie, old_hdl.cookie);
- GOTO(out_disc, rc = 0);
+ if (req->rq_repmsg->last_committed < committed_before_reconnect) {
+ CERROR("%s went back in time (transno "LPD64
+ " was committed, server claims "LPD64
+ ")! is shared storage not coherent?\n",
+ imp->imp_target_uuid.uuid,
+ imp->imp_peer_committed_transno,
+ req->rq_repmsg->last_committed);
+ }
+
+ GOTO(out_disc, rc);
out_disc:
- *reqptr = req;
+ ptlrpc_req_finished(req);
return rc;
}
void ptlrpc_run_failed_import_upcall(struct obd_import* imp)
{
- char *argv[6];
+ char *argv[7];
char *envp[3];
int rc;
argv[0] = obd_lustre_upcall;
argv[1] = "FAILED_IMPORT";
argv[2] = imp->imp_target_uuid.uuid;
- argv[3] = imp->imp_obd->obd_uuid.uuid;
+ argv[3] = imp->imp_obd->obd_name;
argv[4] = imp->imp_connection->c_remote_uuid.uuid;
- argv[5] = NULL;
+ argv[5] = imp->imp_obd->obd_uuid.uuid;
+ argv[6] = NULL;
envp[0] = "HOME=/";
envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
* list, so we don't need to hold the lock during this iteration and
* resend process.
*/
+ /* Well... what if lctl recover is called twice at the same time?
+ */
spin_lock_irqsave(&imp->imp_lock, flags);
- LASSERT(imp->imp_level < LUSTRE_CONN_FULL);
+ LASSERT(imp->imp_level == LUSTRE_CONN_RECOVER);
spin_unlock_irqrestore(&imp->imp_lock, flags);
list_for_each_safe(tmp, pos, &imp->imp_sending_list) {
ptlrpc_abort_inflight(imp);
}
-int ptlrpc_request_handle_eviction(struct ptlrpc_request *failed_req)
+
+void ptlrpc_handle_failed_import(struct obd_import *imp)
{
- int rc = 0, in_recovery = 0;
+ ENTRY;
+ if (!imp->imp_replayable) {
+ CDEBUG(D_HA,
+ "import %s@%s for %s not replayable, deactivating\n",
+ imp->imp_target_uuid.uuid,
+ imp->imp_connection->c_remote_uuid.uuid,
+ imp->imp_obd->obd_name);
+ ptlrpc_set_import_active(imp, 0);
+ }
+
+ ptlrpc_run_failed_import_upcall(imp);
+ EXIT;
+}
+
+void ptlrpc_request_handle_eviction(struct ptlrpc_request *failed_req)
+{
+ int rc;
struct obd_import *imp= failed_req->rq_import;
unsigned long flags;
struct ptlrpc_request *req;
-
- spin_lock_irqsave(&imp->imp_lock, flags);
-
- if (imp->imp_level == LUSTRE_CONN_NOTCONN)
- in_recovery = 1;
-
- if (failed_req->rq_import_generation == imp->imp_generation)
- imp->imp_level = LUSTRE_CONN_NOTCONN;
- else
- in_recovery = 1;
-
- spin_unlock_irqrestore(&imp->imp_lock, flags);
-
- if (in_recovery) {
- ptlrpc_resend_req(failed_req);
- RETURN(rc);
- }
+ ENTRY;
CDEBUG(D_HA, "import %s of %s@%s evicted: reconnecting\n",
imp->imp_obd->obd_name,
imp->imp_target_uuid.uuid,
imp->imp_connection->c_remote_uuid.uuid);
- rc = ptlrpc_reconnect_import(imp, &req);
+ rc = ptlrpc_recover_import(imp, NULL);
if (rc) {
ptlrpc_resend_req(failed_req);
- ptlrpc_fail_import(imp, imp->imp_generation);
+ if (rc != -EALREADY)
+ ptlrpc_handle_failed_import(imp);
} else {
+ LASSERT(failed_req->rq_import_generation < imp->imp_generation);
spin_lock_irqsave (&failed_req->rq_lock, flags);
failed_req->rq_err = 1;
spin_unlock_irqrestore (&failed_req->rq_lock, flags);
- spin_lock_irqsave(&imp->imp_lock, flags);
- imp->imp_level = LUSTRE_CONN_FULL;
- imp->imp_invalid = 0;
- spin_unlock_irqrestore(&imp->imp_lock, flags);
- ptlrpc_invalidate_import_state(imp/*, req->rq_import_generation*/);
}
ptlrpc_req_finished(req);
- RETURN(rc);
+ EXIT;
}
int ptlrpc_set_import_active(struct obd_import *imp, int active)
/* When deactivating, mark import invalid, and
abort in-flight requests. */
if (!active) {
+ CDEBUG(D_ERROR, "setting import %s INVALID\n", imp->imp_target_uuid.uuid);
spin_lock_irqsave(&imp->imp_lock, flags);
imp->imp_invalid = 1;
+ imp->imp_generation++;
spin_unlock_irqrestore(&imp->imp_lock, flags);
-
- ptlrpc_abort_inflight(imp);
+ ptlrpc_invalidate_import_state(imp);
+// ptlrpc_abort_inflight(imp);
}
- imp->imp_invalid = !active;
-
if (notify_obd == NULL)
GOTO(out, rc = 0);
out:
/* When activating, mark import valid */
if (active) {
+ CDEBUG(D_ERROR, "setting import %s VALID\n", imp->imp_target_uuid.uuid);
spin_lock_irqsave(&imp->imp_lock, flags);
imp->imp_invalid = 0;
spin_unlock_irqrestore(&imp->imp_lock, flags);
LASSERT (!imp->imp_dlm_fake);
- /* If we were already in recovery, or if the import's connection to its
- * service is newer than the failing operation's original attempt, then
- * we don't want to recover again. */
spin_lock_irqsave(&imp->imp_lock, flags);
-
- if (imp->imp_level == LUSTRE_CONN_RECOVD)
- in_recovery = 1;
-
- if (generation == imp->imp_generation) {
- imp->imp_level = LUSTRE_CONN_RECOVD;
- imp->imp_generation++;
- } else {
+ if (imp->imp_level != LUSTRE_CONN_FULL)
in_recovery = 1;
- }
-
+ else
+ imp->imp_level = LUSTRE_CONN_NOTCONN;
spin_unlock_irqrestore(&imp->imp_lock, flags);
if (in_recovery) {
return;
}
- if (!imp->imp_replayable) {
- CDEBUG(D_HA,
- "import %s@%s for %s not replayable, deactivating\n",
- imp->imp_target_uuid.uuid,
- imp->imp_connection->c_remote_uuid.uuid,
- imp->imp_obd->obd_name);
- ptlrpc_set_import_active(imp, 0);
- }
-
- ptlrpc_run_failed_import_upcall(imp);
+ ptlrpc_handle_failed_import(imp);
EXIT;
}
RETURN(-ENOMEM);
req->rq_replen = lustre_msg_size(0, NULL);
- req->rq_level = LUSTRE_CONN_RECOVD;
+ req->rq_level = LUSTRE_CONN_RECOVER;
req->rq_reqmsg->flags |= MSG_LAST_REPLAY;
rc = ptlrpc_queue_wait(req);
int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid)
{
- int msg_flags = 0, rc;
+ int rc;
unsigned long flags;
- struct ptlrpc_request *req;
+ int in_recover = 0;
+ int recon_result;
ENTRY;
spin_lock_irqsave(&imp->imp_lock, flags);
- if (imp->imp_level == LUSTRE_CONN_FULL) {
- imp->imp_level = LUSTRE_CONN_RECOVD;
- imp->imp_generation++;
- }
+ if (imp->imp_level == LUSTRE_CONN_FULL ||
+ imp->imp_level == LUSTRE_CONN_NOTCONN)
+ imp->imp_level = LUSTRE_CONN_RECOVER;
+ else
+ in_recover = 1;
spin_unlock_irqrestore(&imp->imp_lock, flags);
+ if (in_recover == 1)
+ RETURN(-EALREADY);
+
if (new_uuid) {
struct ptlrpc_connection *conn;
struct obd_uuid uuid;
}
- rc = ptlrpc_reconnect_import(imp, &req);
+ recon_result = ptlrpc_reconnect_import(imp);
- if (rc) {
+ if (recon_result < 0) {
CERROR("failed to reconnect to %s@%s: %d\n",
imp->imp_target_uuid.uuid,
- imp->imp_connection->c_remote_uuid.uuid, rc);
- RETURN(rc);
+ imp->imp_connection->c_remote_uuid.uuid, recon_result);
+ spin_lock_irqsave(&imp->imp_lock, flags);
+ imp->imp_level = LUSTRE_CONN_NOTCONN;
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
+ RETURN(recon_result);
}
- if (req->rq_repmsg)
- msg_flags = lustre_msg_get_op_flags(req->rq_repmsg);
-
- if (msg_flags & MSG_CONNECT_RECOVERING) {
+ if (recon_result == RECON_RESULT_RECOVERING) {
CDEBUG(D_HA, "replay requested by %s\n",
imp->imp_target_uuid.uuid);
rc = ptlrpc_replay(imp);
rc = signal_completed_replay(imp);
if (rc)
GOTO(out, rc);
- } else if (msg_flags & MSG_CONNECT_RECONNECT) {
+ } else if (recon_result == RECON_RESULT_RECONNECTED) {
CDEBUG(D_HA, "reconnected to %s@%s\n",
imp->imp_target_uuid.uuid,
imp->imp_connection->c_remote_uuid.uuid);
- } else {
+ } else if (recon_result == RECON_RESULT_EVICTED) {
CDEBUG(D_HA, "evicted from %s@%s; invalidating\n",
imp->imp_target_uuid.uuid,
imp->imp_connection->c_remote_uuid.uuid);
- ptlrpc_invalidate_import_state(imp);
+ ptlrpc_set_import_active(imp, 0);
+// ptlrpc_invalidate_import_state(imp);
+ } else {
+ LBUG();
}
+ ptlrpc_set_import_active(imp, 1);
+
rc = ptlrpc_resend(imp);
spin_lock_irqsave(&imp->imp_lock, flags);
imp->imp_level = LUSTRE_CONN_FULL;
- imp->imp_invalid = 0;
spin_unlock_irqrestore(&imp->imp_lock, flags);
ptlrpc_wake_delayed(imp);
EXIT;
out:
- ptlrpc_req_finished(req);
return rc;
}
%define version b_devel
%define kversion @RELEASE@
%define linuxdir @LINUX@
-Release: 0305281701chaos
+Release: 0306170928kernel
Summary: Lustre Lite File System
Name: lustre-lite
Configures openldap server for LDAP Lustre config database
-%package -n liblustre
-Summary: Lustre Lib
-Group: Development/Kernel
+#%package -n liblustre
+#Summary: Lustre Lib
+#Group: Development/Kernel
-%description -n liblustre
-Lustre lib binary package.
+#%description -n liblustre
+#Lustre lib binary package.
%prep
%setup -qn lustre-%{version}
-%setup -c -n lustre-%{version}-lib
+#%setup -c -n lustre-%{version}-lib
%build
rm -rf $RPM_BUILD_ROOT
./configure --with-linux='%{linuxdir}'
make
-%ifarch i386
-cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version}
-./configure --with-lib
-make
-%endif
+#%ifarch i386
+#cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version}
+#./configure --with-lib
+#make
+#%endif
%install
cd $RPM_BUILD_DIR/lustre-%{version}
make install prefix=$RPM_BUILD_ROOT
-%ifarch i386
-cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version}
-make install prefix=$RPM_BUILD_ROOT
-%endif
+#%ifarch i386
+#cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version}
+#make install prefix=$RPM_BUILD_ROOT
+#%endif
%ifarch alpha
# this hurts me
%attr(-, root, root) /usr/sbin/lmc
%attr(-, root, root) /usr/sbin/lctl
%attr(-, root, root) /usr/sbin/lconf
+%attr(-, root, root) /usr/sbin/lactive
%attr(-, root, root) /usr/sbin/llanalyze
%attr(-, root, root) /usr/sbin/lfind
%attr(-, root, root) /usr/sbin/lstripe
%attr(-, root, root) /usr/sbin/mcreate
%attr(-, root, root) /usr/sbin/mkdirmany
+%attr(-, root, root) /usr/sbin/llstat.pl
+%attr(-, root, root) /usr/sbin/llobdstat.pl
+%attr(-, root, root) /usr/sbin/load_ldap.sh
%attr(-, root, root) /usr/lib/lustre/python/*
%attr(-, root, root) /usr/lib/lustre/examples/llmount.sh
%attr(-, root, root) /usr/lib/lustre/examples/llmountcleanup.sh
%attr(-, root, root) /usr/lib/lustre/examples/local.sh
%attr(-, root, root) /usr/lib/lustre/examples/uml.sh
%attr(-, root, root) /usr/lib/lustre/examples/lov.sh
+%attr(-, root, root) /usr/lib/lustre/examples/echo.sh
+%attr(-, root, root) /usr/lib/lustre/examples/llechocleanup.sh
+
%attr(-, root, root) /etc/init.d/lustre
%attr(-, root, root) /usr/sbin/acceptor
%attr(-, root, root) /usr/sbin/ptlctl
%attr(-, root, root) /lib/libptlctl.a
%attr(-, root, root) /lib/libtcpnal.a
%attr(-, root, root) /usr/include/lustre/*.h
+%attr(-, root, root) /usr/sbin/lload
+%attr(-, root, root) /usr/sbin/obdbarrier
+%attr(-, root, root) /usr/sbin/obdio
+%attr(-, root, root) /usr/sbin/routerstat
+%attr(-, root, root) /usr/sbin/wirecheck
+
%ifarch alpha
%attr(-, root, root) /usr/sbin/mcpload
%endif
%files -n lustre-doc
-#%attr(-, root, root) %doc COPYING FDL
+%attr(-, root, root) %doc COPYING FDL
%attr(-, root, root) %doc doc/lustre.pdf doc/lustre-HOWTO.txt
-%attr(-, root, root) %doc tests/client-echo.cfg tests/client-mount.cfg
-%attr(-, root, root) %doc tests/client-mount2.cfg
-%attr(-, root, root) %doc tests/elan-client.cfg tests/elan-server.cfg
-%attr(-, root, root) %doc tests/ldlm.cfg tests/lustre.cfg
-%attr(-, root, root) %doc tests/mds.cfg tests/net-client.cfg
-%attr(-, root, root) %doc tests/net-local.cfg tests/net-server.cfg
-%attr(-, root, root) %doc tests/obdecho.cfg tests/obdfilter.cfg
+#%attr(-, root, root) %doc tests/client-echo.cfg tests/client-mount.cfg
+#%attr(-, root, root) %doc tests/client-mount2.cfg
+#%attr(-, root, root) %doc tests/elan-client.cfg tests/elan-server.cfg
+#%attr(-, root, root) %doc tests/ldlm.cfg tests/lustre.cfg
+#%attr(-, root, root) %doc tests/mds.cfg tests/net-client.cfg
+#%attr(-, root, root) %doc tests/net-local.cfg tests/net-server.cfg
+#%attr(-, root, root) %doc tests/obdecho.cfg tests/obdfilter.cfg
%files -n lustre-modules
%attr(-, root, root) %doc COPYING
%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/mdc.o
%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/mds.o
%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/fsfilt_ext3.o
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/fsfilt_reiserfs.o
%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/obdclass.o
%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/obdecho.o
%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/obdfilter.o
%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/osc.o
%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/ost.o
%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/ptlrpc.o
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/ptlbd.o
+%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/cobd.o
#portals modules
%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/lustre/kptlrouter.o
%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/lustre/*nal.o
%files -n lustre-source
%attr(-, root, root) /usr/src/lustre-%{version}
-%ifarch i386
-%files -n liblustre
-%attr(-, root, root) /lib/lustre
-%attr(-, root, root) /lib/lustre/liblov.a
-%attr(-, root, root) /lib/lustre/liblustreclass.a
-%attr(-, root, root) /lib/lustre/libptlrpc.a
-%attr(-, root, root) /lib/lustre/libobdecho.a
-%attr(-, root, root) /lib/lustre/libldlm.a
-%attr(-, root, root) /lib/lustre/libosc.a
-%attr(-, root, root) /usr/sbin/lctl
-%attr(-, root, root) /usr/sbin/lfind
-%attr(-, root, root) /usr/sbin/lstripe
-%attr(-, root, root) /usr/sbin/obdio
-%attr(-, root, root) /usr/sbin/obdbarrier
-%attr(-, root, root) /usr/sbin/obdstat
-%attr(-, root, root) /usr/sbin/lload
-%attr(-, root, root) /usr/sbin/lconf
-%attr(-, root, root) /usr/sbin/lmc
-%attr(-, root, root) /usr/sbin/llanalyze
-%endif
+#%ifarch i386
+#%files -n liblustre
+#%attr(-, root, root) /lib/lustre
+#%attr(-, root, root) /lib/lustre/liblov.a
+#%attr(-, root, root) /lib/lustre/liblustreclass.a
+#%attr(-, root, root) /lib/lustre/libptlrpc.a
+#%attr(-, root, root) /lib/lustre/libobdecho.a
+#%attr(-, root, root) /lib/lustre/libldlm.a
+#%attr(-, root, root) /lib/lustre/libosc.a
+#%attr(-, root, root) /usr/sbin/lctl
+#%attr(-, root, root) /usr/sbin/lfind
+#%attr(-, root, root) /usr/sbin/lstripe
+#%attr(-, root, root) /usr/sbin/obdio
+#%attr(-, root, root) /usr/sbin/obdbarrier
+#%attr(-, root, root) /usr/sbin/obdstat
+#%attr(-, root, root) /usr/sbin/lload
+#%attr(-, root, root) /usr/sbin/lconf
+#%attr(-, root, root) /usr/sbin/lmc
+#%attr(-, root, root) /usr/sbin/llanalyze
+#%endif
%files -n lustre-ldap
%attr(-, root, root) /etc/openldap/schema/lustre.schema
%attr(-, root, root) /usr/lib/lustre/lustre2ldif.xsl
%attr(-, root, root) /usr/lib/lustre/top.ldif
-%dir /var/lib/ldap/lustre
+#%dir /var/lib/ldap/lustre
%attr(700, ldap, ldap) /var/lib/ldap/lustre
%post
openfile
unlinkmany
fchdir_test
+getdents
+o_directory
noinst_PROGRAMS = openunlink testreq truncate directio openme writeme open_delay
noinst_PROGRAMS += munlink tchmod toexcl fsx test_brw openclose createdestroy
noinst_PROGRAMS += stat createmany statmany multifstat createtest mlink
-noinst_PROGRAMS += opendirunlink opendevunlink unlinkmany fchdir_test
-# noinst_PROGRAMS += ldaptest
-noinst_PROGRAMS += checkstat wantedi statone runas openfile
+noinst_PROGRAMS += opendirunlink opendevunlink unlinkmany fchdir_test checkstat
+noinst_PROGRAMS += wantedi statone runas openfile getdents o_directory
+# noinst_PROGRAMS += ldaptest
sbin_PROGRAMS = mcreate mkdirmany
# ldaptest_SOURCES = ldaptest.c
opendirunlink_SOURCES=opendirunlink.c
opendevunlink_SOURCES=opendirunlink.c
fchdir_test_SOURCES=fchdir_test.c
+getdents_SOURCES=getdents.c
+o_directory_SOURCES = o_directory.c
#mkdirdeep_SOURCES= mkdirdeep.c
#mkdirdeep_LDADD=-L../portals/util -lptlctl
#mkdirdeep_CPPFLAGS=-I$(top_srcdir)/portals/include
--- /dev/null
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <errno.h>
+
+int main(int argc, char **argv)
+{
+ DIR *dir;
+ struct dirent64 *entry;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s dirname\n", argv[0]);
+ return 1;
+ }
+
+ dir = opendir(argv[1]);
+ if (!dir) {
+ int rc = errno;
+ perror("opendir");
+ return rc;
+ }
+
+ while ((entry = readdir64(dir))) {
+ puts(entry->d_name);
+ }
+
+ closedir(dir);
+
+ return 0;
+}
+
#!/bin/sh
# suggested boilerplate for test script
-LCONF=${LCONF:-../utils/lconf}
+export PATH=`dirname $0`/../utils:$PATH
+
+LCONF=${LCONF:-lconf}
NAME=${NAME:-local}
config=$NAME.xml
verbose="-v"
fi
-[ -x $LCONF ] || chmod a+rx $LCONF
-
-${LCONF} $portals_opt $lustre_opt $node_opt --reformat --gdb \
+${LCONF} $portals_opt $lustre_opt $node_opt ${REFORMAT:---reformat} --gdb \
$verbose $conf_opt || exit 2
#!/bin/sh
-LCONF=${LCONF:-../utils/lconf}
+export PATH=`dirname $0`/../utils:$PATH
+
+LCONF=${LCONF:-lconf}
NAME=${NAME:-local}
TMP=${TMP:-/tmp}
-
#!/bin/bash
+export PATH=`dirname $0`/../utils:$PATH
+
config=${1:-local.xml}
-LMC="${LMC:-../utils/lmc} -m $config"
+LMC="${LMC:-lmc} -m $config"
TMP=${TMP:-/tmp}
MDSDEV=${MDSDEV:-$TMP/mds1}
#!/bin/bash
+export PATH=`dirname $0`/../utils:$PATH
+
config=${1:-lov.xml}
-LMC=${LMC:-../utils/lmc}
+LMC=${LMC:-lmc}
TMP=${TMP:-/tmp}
MDSDEV=${MDSDEV:-$TMP/mds1}
--- /dev/null
+#!/bin/bash
+
+set -e
+
+PATH=$PATH:.
+
+CHECKSTAT=${CHECKSTAT:-"checkstat -v"}
+MOUNT1=${MOUNT1:-/mnt/lustre1}
+MOUNT2=${MOUNT2:-/mnt/lustre2}
+DIRNAME=${DIRNAME:-"ls-timing"}
+DIRSIZE=${DIRSIZE:-200}
+export NAME=${NAME:-mount2}
+
+error () {
+ echo FAIL
+ exit 1
+}
+
+pass() {
+ echo PASS
+}
+echo "Mounting..."
+mount | grep $MOUNT1 || sh llmount.sh
+
+echo -n "Preparing test directory with $DIRSIZE files..."
+rm -rf "$MOUNT1/$DIRNAME"
+rm -rf "$MOUNT2/$DIRNAME"
+mkdir -p "$MOUNT1/$DIRNAME"
+[ -d "$MOUNT2/$DIRNAME" ] || error
+createmany -o $MOUNT1/$DIRNAME/file 0 $DIRSIZE &> /dev/null
+echo "done"
+
+echo -n "Cached ls: "
+time ls -lr $MOUNT1/$DIRNAME 1> /dev/null
+
+echo -n "Uncached ls: "
+time ls -lr $MOUNT2/$DIRNAME 1> /dev/null
+
+
+fsx $MOUNT1/$DIRNAME/fsx.file &>/dev/null &
+fsxpid=$!
+
+echo -n "Cached busy ls:"
+time ls -lr $MOUNT1/$DIRNAME 1> /dev/null
+
+echo -n "Uncached busy ls: "
+time ls -lr $MOUNT2/$DIRNAME 1> /dev/null
+
+kill $fsxpid
+
+exit
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Compile with:
- * cc -I../../portals/include -o mkdirdeep mkdirdeep.c
- * -L../../portals/linux/utils -lptlctl
+ * cc -I../../portals/include -o mkdirdeep mkdirdeep.c
+ * -L../../portals/linux/utils -lptlctl
*/
#include <stdio.h>
#include <linux/limits.h>
#include <portals/lltrace.h>
-static int opt_depth = 1;
-static int opt_mknod = 0;
static int opt_verbose = 0;
-static int opt_trace = 1;
-static char* basepathname = 0;
-static char mycwd[PATH_MAX];
-static char* pname = 0;
-static char* outputfilename = 0;
+static int opt_trace = 0;
-void usage()
+void usage(const char *pname)
{
- fprintf(stderr, "Usage: %s --depth <d> --output <outputtracefilename>"
- "[--mknod] [--verbose] [--notrace] <basepath>\n", pname);
+ fprintf(stderr, "Usage: %s --depth <d> [--output <outputtracefilename>]"
+ " [--mknod] [--verbose] [--notrace] <basepath>\n", pname);
exit(1);
}
-int do_mkdir(char* path)
+int do_mkdir(char *path)
{
int rc = mkdir(path, 0755);
- if (rc!=0)
+
+ if (rc) {
fprintf(stderr, "mkdir(%s) failed: %s\n",
path, strerror(errno));
+ exit(1);
+ }
if (opt_verbose)
printf("mkdir %s\n", path);
+
return rc;
}
-int do_mknod(char* path)
+int do_mknod(char *path)
{
int rc = mknod(path, 0755, S_IFIFO);
- if (rc!=0)
+
+ if (rc) {
fprintf(stderr, "mkdir(%s) failed: %s\n",
path, strerror(errno));
+ exit(1);
+ }
if (opt_verbose)
printf("mknod %s\n", path);
+
return rc;
}
int do_chdir(char* path)
{
int rc = chdir(path);
- if (rc!=0)
+
+ if (rc) {
fprintf(stderr, "chdir(%s) failed: %s\n",
path, strerror(errno));
+ exit(1);
+ }
if (opt_verbose)
printf("chdir %s\n", path);
return rc;
}
-
-int do_stat(char* path)
+int do_stat(char *path)
{
- char mark_buf[PATH_MAX];
+ char mark_buf[PATH_MAX + 50];
struct stat mystat;
int rc = stat(path, &mystat);
- if (rc!=0)
+
+ if (rc) {
fprintf(stderr, "stat(%s) failed: %s\n",
path, strerror(errno));
+ exit(1);
+ }
if (opt_verbose)
printf("stat %s = inode %lu\n", path, mystat.st_ino);
if (opt_trace) {
- snprintf(mark_buf, PATH_MAX, "stat %s = inode %lu",
+ snprintf(mark_buf, PATH_MAX, "stat %s = inode %lu",
path, mystat.st_ino);
ltrace_mark(0, mark_buf);
}
int main(int argc, char** argv)
{
- int c, opt_index, i, mypid;
-
- static struct option long_options[] = {
- {"depth", 1, 0, 0 },
- {"help", 0, 0, 0 },
- {"mknod", 0, 0, 0 },
- {"verbose", 0, 0, 0 },
- {"notrace", 0, 0, 0 },
- {"output", 1, 0, 0 },
+ int c, i, mypid;
+ int opt_depth = 1;
+ int opt_mknod = 0;
+
+ static struct option long_opt[] = {
+ {"depth", 1, 0, 'd' },
+ {"help", 0, 0, 'h' },
+ {"mknod", 0, 0, 'm' },
+ {"output", 1, 0, 'o' },
+ {"trace", 1, 0, 't' },
+ {"verbose", 0, 0, 'v' },
{0,0,0,0}
};
- char full_pathname[PATH_MAX];
- char rel_pathname[PATH_MAX];
- char mark_buf[PATH_MAX];
+ char *outputfilename = NULL;
+ char *base_pathname;
+ char pathname[PATH_MAX];
+ char mark_buf[PATH_MAX + 50];
+ char mycwd[PATH_MAX];
+ char *pname = argv[0];
- pname = strdup(argv[0]);
-
- while (1) {
- c = getopt_long(argc, argv, "d:mhv", long_options, &opt_index);
- if (c == -1)
- break;
- if (c==0) {
- if (!strcmp(long_options[opt_index].name, "notrace")) {
- opt_trace = 0;
- continue;
- }
- c = long_options[opt_index].name[0];
- }
+ while ((c = getopt_long(argc, argv, "d:mhvo:", long_opt, NULL)) != -1) {
switch (c) {
- case 'd':
+ case 'd':
opt_depth = atoi(optarg);
- if ((opt_depth == 0) || (opt_depth > 100))
- usage();
+ if ((opt_depth == 0) || (opt_depth > 1100))
+ usage(pname);
break;
case 'm':
opt_mknod = 1;
break;
+ case 't':
+ opt_trace = 1;
+ break;
case 'v':
opt_verbose = 1;
break;
outputfilename = optarg;
break;
case 'h':
- case '?':
- case ':':
+ case '?':
+ case ':':
default:
- usage();
+ usage(pname);
break;
}
}
-
- if (optind != (argc-1))
- usage();
- if (outputfilename == NULL)
- usage();
+ if (optind != (argc - 1))
+ usage(pname);
- basepathname = argv[optind];
+ base_pathname = argv[optind];
mypid = getpid();
-
- printf("%s(pid=%d) depth=%d mknod=%d, basepathname=%s, "
- "trace=%d, outputfilename=%s\n",
- pname, mypid, opt_depth, opt_mknod, basepathname, opt_trace,
- outputfilename);
if (!getcwd(&mycwd[0], sizeof(mycwd))) {
fprintf(stderr, "%s: unable to getcwd()\n", pname);
exit(1);
}
+ printf("%s(pid=%d) depth=%d mknod=%d, basepathname=%s, trace=%d\n",
+ pname, mypid, opt_depth, opt_mknod, base_pathname, opt_trace);
+
+ if (outputfilename)
+ printf("outputfilename=%s\n", outputfilename);
+
if (opt_trace) {
ltrace_start();
ltrace_clear();
- snprintf(mark_buf, PATH_MAX,
- "Initialize - mkdir %s; chdir %s",
- basepathname, basepathname);
+ snprintf(mark_buf, PATH_MAX, "Initialize - mkdir %s; chdir %s",
+ base_pathname, base_pathname);
ltrace_mark(2, mark_buf);
}
- if (do_mkdir(basepathname)!=0)
+ if (do_mkdir(base_pathname)!=0)
exit(1);
- if (do_chdir(basepathname)!=0)
+ if (do_chdir(base_pathname)!=0)
exit(1);
/* Create directory tree with depth level of subdirectories */
if (opt_trace) {
- snprintf(mark_buf, PATH_MAX,
+ snprintf(mark_buf, PATH_MAX,
"Create Directory Tree (depth %d)", opt_depth);
ltrace_mark(2, mark_buf);
}
- for (i=0; i<opt_depth; i++) {
-
- snprintf(rel_pathname, sizeof(rel_pathname),"%d", i+1);
-
- if (i == (opt_depth-1)) {
- /* Last Iteration */
-
- if (opt_trace) {
- snprintf(mark_buf, PATH_MAX,
- "Tree Leaf (%d) %s/stat", i,
- (opt_mknod ? "mknod" : "mkdir"));
- ltrace_mark(3, mark_buf);
- }
-
- if (opt_mknod)
- do_mknod(rel_pathname);
- else
- do_mkdir(rel_pathname);
- /* Now stat it */
- do_stat(rel_pathname);
- }
- else {
+ for (i = 0; i < opt_depth; i++) {
+ snprintf(pathname, sizeof(pathname), "%d", i + 1);
+
+ if (i == (opt_depth - 1)) {
+ /* Last Iteration */
+
+ if (opt_trace) {
+ snprintf(mark_buf, PATH_MAX,
+ "Tree Leaf (%d) %s/stat", i,
+ (opt_mknod ? "mknod" : "mkdir"));
+ ltrace_mark(3, mark_buf);
+ }
+
+ if (opt_mknod)
+ do_mknod(pathname);
+ else
+ do_mkdir(pathname);
+ /* Now stat it */
+ do_stat(pathname);
+ } else {
/* Not Leaf */
if (opt_trace) {
- snprintf(mark_buf, PATH_MAX,
- "Tree Level (%d) mkdir/stat/chdir",
- i);
+ snprintf(mark_buf, sizeof(mark_buf),
+ "Tree Level (%d) mkdir/stat/chdir", i);
ltrace_mark(3, mark_buf);
}
-
- do_mkdir(rel_pathname);
- do_stat(rel_pathname);
- do_chdir(rel_pathname);
+
+ do_mkdir(pathname);
+ do_stat(pathname);
+ do_chdir(pathname);
}
}
-
+
/* Stat through directory tree with fullpaths */
if (opt_trace) {
ltrace_mark(2, mark_buf);
}
- do_chdir(basepathname);
+ do_chdir(base_pathname);
- strncpy(full_pathname, basepathname, sizeof(full_pathname));
+ strncpy(pathname, base_pathname, sizeof(pathname));
- for (i=0; i<opt_depth; i++) {
- snprintf(rel_pathname, sizeof(rel_pathname),"%d", i+1);
- strcat(full_pathname, "/");
- strcat(full_pathname, rel_pathname);
+ c = strlen(base_pathname);
+ for (i = 0; i < opt_depth; i++) {
+ c += snprintf(pathname + c, sizeof(pathname) - c, "/%d", i+1);
if (opt_trace) {
- snprintf(mark_buf, PATH_MAX, "stat %s",
- full_pathname);
+ snprintf(mark_buf, PATH_MAX, "stat %s", pathname);
ltrace_mark(2, mark_buf);
}
- do_stat(full_pathname);
+ do_stat(pathname);
}
- /* Cleanup */
-
- if (opt_trace) {
- snprintf(mark_buf, PATH_MAX, "Cleanup");
- ltrace_mark(2, mark_buf);
- }
-
- if (opt_trace) {
+ if (opt_trace && outputfilename) {
ltrace_write_file(outputfilename);
ltrace_add_processnames(outputfilename);
ltrace_stop();
}
- do_chdir(basepathname);
-
- snprintf(full_pathname, sizeof(full_pathname),
- "rm -rf %s\n", basepathname);
- if (opt_verbose)
- printf("Cleanup: %s", full_pathname);
-
- system(full_pathname);
+ do_chdir(base_pathname);
printf("%s (pid=%d) done.\n", pname, mypid);
+
return 0;
}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+
+/* for O_DIRECTORY */
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+int main(int argc, char **argv)
+{
+ int fd, rc;
+
+ if (argc != 2) {
+ printf("Usage: %s <filename>\n", argv[0]);
+ exit(1);
+ }
+
+ fd = open(argv[1], O_RDONLY | O_CREAT, 0600);
+ if (fd == -1) {
+ printf("Error opening %s for create: %s\n", argv[1],
+ strerror(errno));
+ exit(1);
+ }
+ rc = close(fd);
+ if (rc < 0) {
+ printf("Error closing %s: %s\n", argv[1], strerror(errno));
+ exit(1);
+ }
+
+ fd = open(argv[1], O_DIRECTORY);
+ if (fd >= 0) {
+ printf("opening %s as directory should have returned an "
+ "error!\n", argv[1]);
+ exit(1);
+ }
+ if (errno != ENOTDIR) {
+ printf("opening %s as directory, expected -ENOTDIR and got "
+ "%s\n", argv[1], strerror(errno));
+ exit(1);
+ }
+
+ return 0;
+}
}
setup() {
- start_mds ${REFORMAT}
- start_ost --timeout=$(($TIMEOUT*2)) ${REFORMAT}
+ start_mds --timeout=$TIMEOUT ${REFORMAT}
+ start_ost --timeout=$TIMEOUT ${REFORMAT}
# XXX we should write our own upcall, when we move this somewhere better.
mount_client --timeout=${TIMEOUT} \
--lustre_upcall=$UPCALL
drop_request "munlink /mnt/lustre/link1"
drop_reply "munlink /mnt/lustre/link2"
+#bug 1423
+drop_reply "touch /mnt/lustre/renamed"
+
$CLEANUP
default:
//fprintf(stderr, "Bad parameters.\n");
//Usage_and_abort ();
+ break;
}
}
#!/bin/sh
+DIR=${DIR:-/mnt/lustre/`hostname`}
#[ -e /proc/sys/portals/debug ] && echo 0 > /proc/sys/portals/debug
-TGT=/mnt/lustre/client.txt
-SRC=/usr/lib/dbench/client.txt
+mkdir -p $DIR
+TGT=$DIR/client.txt
+SRC=${SRC:-/usr/lib/dbench/client.txt}
[ ! -e $TGT -a -e $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT
SRC=/usr/lib/dbench/client_plain.txt
[ ! -e $TGT -a -e $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT
-cd /mnt/lustre
+cd $DIR
echo "running 'dbench $@' on $PWD at `date`"
dbench -c client.txt $@
CREATETEST=${CREATETEST:-createtest}
LFIND=${LFIND:-lfind}
LSTRIPE=${LSTRIPE:-lstripe}
+LCTL=${LCTL:-lctl}
MCREATE=${MCREATE:-mcreate}
TOEXCL=${TOEXCL:-toexcl}
TRUNCATE=${TRUNCATE:-truncate}
test_29() {
mkdir $DIR/d29
touch $DIR/d29/foo
+ log 'first d29'
ls -l $DIR/d29
MDCDIR=${MDCDIR:-/proc/fs/lustre/ldlm/ldlm/MDC_*}
LOCKCOUNTORIG=`cat $MDCDIR/lock_count`
LOCKUNUSEDCOUNTORIG=`cat $MDCDIR/lock_unused_count`
+ log 'second d29'
ls -l $DIR/d29
+ log 'done'
LOCKCOUNTCURRENT=`cat $MDCDIR/lock_count`
LOCKUNUSEDCOUNTCURRENT=`cat $MDCDIR/lock_unused_count`
if [ $LOCKCOUNTCURRENT -gt $LOCKCOUNTORIG ]; then
run_test 36a "cvs init ========================================="
test_36b() {
+ # on the LLNL clusters, runas will still pick up root's $TMP settings,
+ # which will not be writable for the runas user, and then you get a CVS
+ # error message with a corrupt path string (CVS bug) and panic.
+ # We're not using much space, so just stick it in /tmp, which is
+ # safe.
+ OLDTMPDIR=$TMPDIR
+ OLDTMP=$TMP
+ TMPDIR=/tmp
+ TMP=/tmp
+
cd /etc/init.d
$RUNAS cvs -d $DIR/cvsroot import -m "nomesg" reposname vtag rtag
+
+ TMPDIR=$OLDTMPDIR
+ TMP=$OLDTMP
}
run_test 36b "cvs import ======================================="
}
run_test 36f "cvs commit ======================================="
+test_37() {
+ mkdir -p $DIR/dextra
+ echo f > $DIR/dextra/fbugfile
+ mount -t ext2 -o loop /$EXT2_DEV $DIR/dextra
+ ls $DIR/dextra |grep "\<fbugfile\>" && error
+ umount /$EXT2_DEV
+ rm -f DIR/dextra/fbugfile
+}
+run_test 37 "ls a mounted file system to check the old contents ====="
+
+# open(file, O_DIRECTORY) will leak a request and not cleanup (bug 1501)
+test_38() {
+ o_directory $DIR/test38
+}
+run_test 38 "open a regular file with O_DIRECTORY =============="
+
+
log "cleanup: ======================================================"
rm -r $DIR/[Rdfs][1-9]*
if [ "$I_MOUNTED" = "yes" ]; then
#include <time.h>
#include <string.h>
#include <utime.h>
+#include <errno.h>
void usage(char *prog)
{
{
long before_mknod, after_mknod;
long before_utime, after_utime;
+ const char *prog = argv[0];
+ const char *filename = argv[1];
struct stat st;
int rc;
usage(argv[0]);
before_mknod = time(0);
- rc = mknod(argv[1], 0700, S_IFREG);
+ rc = mknod(filename, 0700, S_IFREG);
after_mknod = time(0);
- if (rc) {
+ if (rc && errno != EEXIST) {
fprintf(stderr, "%s: mknod(%s) failed: rc %d: %s\n",
- argv[0], argv[1], rc, strerror(rc));
+ prog, filename, errno, strerror(errno));
return 2;
- }
+ } else if (!rc) {
+ rc = stat(filename, &st);
+ if (rc) {
+ fprintf(stderr, "%s: stat(%s) failed: rc %d: %s\n",
+ prog, filename, errno, strerror(errno));
+ return 3;
+ }
- rc = stat(argv[1], &st);
- if (rc) {
- fprintf(stderr, "%s: stat(%s) failed: rc %d: %s\n",
- argv[0], argv[1], rc, strerror(rc));
- return 3;
- }
+ if (st.st_mtime < before_mknod || st.st_mtime > after_mknod) {
+ fprintf(stderr,
+ "%s: bad mknod times %lu <= %lu <= %lu false\n",
+ prog, before_mknod, st.st_mtime, after_mknod);
+ return 4;
+ }
- if (st.st_mtime < before_mknod || st.st_mtime > after_mknod) {
- fprintf(stderr, "%s: bad mknod times %lu <= %lu <= %lu false\n",
- argv[0], before_mknod, st.st_mtime, after_mknod);
- return 4;
- }
-
- printf("%s: good mknod times %lu <= %lu <= %lu\n",
- argv[0], before_mknod, st.st_mtime, after_mknod);
+ printf("%s: good mknod times %lu <= %lu <= %lu\n",
+ prog, before_mknod, st.st_mtime, after_mknod);
- sleep(5);
+ sleep(5);
+ }
before_utime = time(0);
- rc = utime(argv[0], NULL);
+ rc = utime(filename, NULL);
after_utime = time(0);
if (rc) {
- fprintf(stderr, "%s: stat(%s) failed: rc %d: %s\n",
- argv[0], argv[1], rc, strerror(rc));
+ fprintf(stderr, "%s: utime(%s) failed: rc %d: %s\n",
+ prog, filename, errno, strerror(errno));
return 5;
}
- rc = stat(argv[1], &st);
+ rc = stat(filename, &st);
if (rc) {
fprintf(stderr, "%s: second stat(%s) failed: rc %d: %s\n",
- argv[0], argv[1], rc, strerror(rc));
+ prog, filename, errno, strerror(errno));
return 6;
}
if (st.st_mtime < before_utime || st.st_mtime > after_utime) {
fprintf(stderr, "%s: bad utime times %lu <= %lu <= %lu false\n",
- argv[0], before_utime, st.st_mtime, after_utime);
+ prog, before_utime, st.st_mtime, after_utime);
return 7;
}
printf("%s: good utime times %lu <= %lu <= %lu\n",
- argv[0], before_mknod, st.st_mtime, after_mknod);
+ prog, before_utime, st.st_mtime, after_utime);
return 0;
}
lctl_LDADD := $(LIBREADLINE) -lptlctl
lload_LDADD := -lptlctl
sbin_PROGRAMS = lctl lfind lstripe obdio obdbarrier lload wirecheck
-sbin_SCRIPTS = lconf lmc llanalyze
+sbin_SCRIPTS = lconf lmc llanalyze llstat.pl llobdstat.pl lactive load_ldap.sh
wirecheck_SOURCES = wirecheck.c
lctl_SOURCES = parser.c obd.c lctl.c parser.h obdctl.h
lload_SOURCES = lload.c
import sys, getopt, types
import string, os
import ldap
+PYMOD_DIR = "/usr/lib/lustre/python"
+
+def development_mode():
+ base = os.path.dirname(sys.argv[0])
+ if os.access(base+"/Makefile.am", os.R_OK):
+ return 1
+ return 0
+
+if not development_mode():
+ sys.path.append(PYMOD_DIR)
+
import Lustre
lactive_options = [
self.run(cmds)
# Recover a device
- def recover(self, dev_uuid, new_conn):
+ def recover(self, dev_name, new_conn):
cmds = """
- device %%%s
+ device $%s
probe
- recover %s""" %(dev_uuid, new_conn)
+ recover %s""" %(dev_name, new_conn)
self.run(cmds)
# add a route to a range
return local
-def is_prepared(uuid):
- """Return true if a device exists for the uuid"""
- if config.lctl_dump:
- return 0
- if config.noexec and config.cleanup:
- return 1
- try:
- # expect this format:
- # 1 UP ldlm ldlm ldlm_UUID 2
- out = lctl.device_list()
- for s in out:
- if uuid == string.split(s)[4]:
- return 1
- except CommandError, e:
- e.dump()
- return 0
-
-def is_prepared_name(name):
+# XXX: instead of device_list, ask for $name and see what we get
+def is_prepared(name):
"""Return true if a device exists for the name"""
if config.lctl_dump:
return 0
def is_network_prepared():
"""If the LDLM device exists, then assume that all networking
has been configured"""
- return is_prepared('ldlm_UUID')
+ return is_prepared('ldlm')
def fs_is_mounted(path):
"""Return true if path is a mounted lustre filesystem"""
self.add_lustre_module('ldlm', 'ldlm')
def prepare(self):
- if is_prepared(self.uuid):
+ if is_prepared(self.name):
return
self.info()
- lctl.newdev(attach="ldlm %s %s" % (self.name, self.uuid))
+ lctl.newdev(attach="ldlm %s %s" % ('ldlm', 'ldlm_UUID'))
def safe_to_clean(self):
out = lctl.device_list()
return len(out) <= 1
def cleanup(self):
- if is_prepared(self.uuid):
+ if is_prepared(self.name):
Module.cleanup(self)
class LOV(Module):
- def __init__(self,db):
+ def __init__(self, db, uuid):
Module.__init__(self, 'LOV', db)
self.add_lustre_module('mdc', 'mdc')
self.add_lustre_module('lov', 'lov')
self.devlist = self.db.get_refs('obd')
self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist))
self.osclist = []
- self.mdc_uuid = ''
+ self.client_uuid = generate_client_uuid(self.name)
+ self.mdc_name = ''
+ self.mdc = get_mdc(db, self.client_uuid, self.name, self.mds_uuid)
for obd_uuid in self.devlist:
obd = self.db.lookup(obd_uuid)
- osc = get_osc(obd, self.name)
+ osc = get_osc(obd, self.client_uuid, self.name)
if osc:
self.osclist.append(osc)
else:
panic('osc not found:', obd_uuid)
def prepare(self):
- if is_prepared(self.uuid):
+ if is_prepared(self.name):
return
for osc in self.osclist:
try:
except CommandError, e:
print "Error preparing OSC %s (inactive)\n" % osc.uuid
raise e
- self.mdc_uuid = prepare_mdc(self.db, self.name, self.mds_uuid)
+ self.mdc.prepare()
+ self.mdc_name = self.mdc.name
self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz,
self.stripe_off, self.pattern, self.devlist, self.mds_name)
lctl.newdev(attach="lov %s %s" % (self.name, self.uuid),
- setup ="%s" % (self.mdc_uuid))
+ setup ="%s" % (self.mdc_name))
def cleanup(self):
- if is_prepared(self.uuid):
+ if is_prepared(self.name):
Module.cleanup(self)
for osc in self.osclist:
osc.cleanup()
- cleanup_mdc(self.db, self.name, self.mds_uuid)
+ mdc = get_mdc(self.db, self.client_uuid, self.name, self.mds_uuid)
+ mdc.cleanup()
def load_module(self):
for osc in self.osclist:
self.lov_uuid = self.db.get_first_ref('lov')
l = self.db.lookup(self.lov_uuid)
- self.lov = LOV(l)
+ self.lov = LOV(l, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
def prepare(self):
lov = self.lov
Module.load_module(self)
def prepare(self):
- if is_prepared(self.uuid):
+ if is_prepared(self.name):
return
if not self.active:
debug(self.uuid, "not active")
run_acceptors()
blkdev = block_dev(self.devpath, self.size, self.fstype, self.format,
self.journal_size)
- if not is_prepared('MDT_UUID'):
+ if not is_prepared('MDT'):
lctl.newdev(attach="mdt %s %s" % ('MDT', 'MDT_UUID'),
setup ="")
if self.nspath:
log("open clients for filesystem:", uuid)
fs = self.db.lookup(uuid)
obd_uuid = fs.get_first_ref('obd')
- client = VOSC(self.db.lookup(obd_uuid), self.name)
+ client_uuid = generate_client_uuid(self.name)
+ client = VOSC(client_uuid, self.db.lookup(obd_uuid), self.name)
client.prepare()
if not self.active:
debug(self.uuid, "not active")
return
- if is_prepared(self.uuid):
+ if is_prepared(self.name):
self.info()
try:
lctl.cleanup(self.name, self.uuid, config.force,
obd_uuid = fs.get_first_ref('obd')
client = VOSC(self.db.lookup(obd_uuid), self.name)
client.cleanup()
- if not self.msd_remaining() and is_prepared('MDT_UUID'):
+ if not self.msd_remaining() and is_prepared('MDT'):
try:
lctl.cleanup("MDT", "MDT_UUID", config.force,
config.failover)
# formatting anything.
# FIXME: check if device is already formatted.
def prepare(self):
- if is_prepared(self.uuid):
+ if is_prepared(self.name):
return
if not self.active:
debug(self.uuid, "not active")
lctl.newdev(attach="%s %s %s" % (self.osdtype, self.name, self.uuid),
setup ="%s %s %s %s" %(blkdev, self.fstype,
self.failover_ost, self.nspath))
- if not is_prepared('OSS_UUID'):
+ if not is_prepared('OSS'):
lctl.newdev(attach="ost %s %s" % ('OSS', 'OSS_UUID'),
setup ="")
if not self.active:
debug(self.uuid, "not active")
return
- if is_prepared(self.uuid):
+ if is_prepared(self.name):
self.info()
try:
lctl.cleanup(self.name, self.uuid, config.force,
log(self.module_name, "cleanup failed: ", self.name)
e.dump()
cleanup_error(e.rc)
- if not self.osd_remaining() and is_prepared('OSS_UUID'):
+ if not self.osd_remaining() and is_prepared('OSS'):
try:
lctl.cleanup("OSS", "OSS_UUID", config.force,
config.failover)
# Generic client module, used by OSC and MDC
class Client(Module):
- def __init__(self, tgtdb, module, owner):
+ def __init__(self, tgtdb, uuid, module, owner):
self.target_name = tgtdb.getName()
self.target_uuid = tgtdb.getUUID()
self.db = tgtdb
self.module_name = string.upper(module)
self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(),
self.target_name, owner)
- self.uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
- self.name,
- int(random.random() * 1048576),
- int(random.random() * 1048576))
- self.uuid = self.uuid[0:36]
+ self.uuid = uuid
self.lookup_server(self.tgt_dev_uuid)
self.add_lustre_module(module, module)
def prepare(self, ignore_connect_failure = 0):
self.info(self.target_uuid)
- if is_prepared_name(self.name):
+ if is_prepared(self.name):
self.cleanup()
try:
srv = choose_local_server(self.get_servers())
setup ="%s %s" %(self.target_uuid, srv.uuid))
def cleanup(self):
- if is_prepared_name(self.name):
+ if is_prepared(self.name):
Module.cleanup(self)
try:
srv = choose_local_server(self.get_servers())
class MDC(Client):
- def __init__(self, db, owner):
- Client.__init__(self, db, 'mdc', owner)
+ def __init__(self, db, uuid, owner):
+ Client.__init__(self, db, uuid, 'mdc', owner)
class OSC(Client):
- def __init__(self, db, owner):
- Client.__init__(self, db, 'osc', owner)
+ def __init__(self, db, uuid, owner):
+ Client.__init__(self, db, uuid, 'osc', owner)
class COBD(Module):
# formatting anything.
# FIXME: check if device is already formatted.
def prepare(self):
- if is_prepared(self.uuid):
+ if is_prepared(self.name):
return
self.info(self.real_uuid, self.cache_uuid)
lctl.newdev(attach="cobd %s %s" % (self.name, self.uuid),
# virtual interface for OSC and LOV
class VOSC(Module):
- def __init__(self,db, owner):
+ def __init__(self, db, uuid, owner):
Module.__init__(self, 'VOSC', db)
if db.get_class() == 'lov':
- self.osc = LOV(db)
+ self.osc = LOV(db, uuid)
else:
- self.osc = get_osc(db, owner)
+ self.osc = get_osc(db, uuid, owner)
def get_uuid(self):
return self.osc.uuid
+ def get_name(self):
+ return self.osc.name
def prepare(self):
self.osc.prepare()
def cleanup(self):
self.osc.cleanup_module()
def need_mdc(self):
return self.db.get_class() != 'lov'
- def get_mdc_uuid(self):
+ def get_mdc_name(self):
if self.db.get_class() == 'lov':
- return self.osc.mdc_uuid
+ return self.osc.mdc_name
return ''
self.add_lustre_module('obdecho', 'obdecho')
self.obd_uuid = self.db.get_first_ref('obd')
obd = self.db.lookup(self.obd_uuid)
- self.osc = VOSC(obd, self.name)
+ self.osc = VOSC(obd, self.uuid, self.name)
def prepare(self):
- if is_prepared(self.uuid):
+ if is_prepared(self.name):
return
self.osc.prepare() # XXX This is so cheating. -p
self.info(self.obd_uuid)
lctl.newdev(attach="echo_client %s %s" % (self.name, self.uuid),
- setup = self.osc.get_uuid())
+ setup = self.osc.get_name())
def cleanup(self):
- if is_prepared(self.uuid):
+ if is_prepared(self.name):
Module.cleanup(self)
self.osc.cleanup()
Module.cleanup_module(self)
self.osc.cleanup_module()
+def generate_client_uuid(name):
+ client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576),
+ name,
+ int(random.random() * 1048576),
+ int(random.random() * 1048576))
+ return client_uuid[:36]
class Mountpoint(Module):
def __init__(self,db):
self.mds_uuid = fs.get_first_ref('mds')
self.obd_uuid = fs.get_first_ref('obd')
obd = self.db.lookup(self.obd_uuid)
- self.vosc = VOSC(obd, self.name)
+ client_uuid = generate_client_uuid(self.name)
+ self.vosc = VOSC(obd, client_uuid, self.name)
if self.vosc.need_mdc():
self.add_lustre_module('mdc', 'mdc')
+ self.mdc = get_mdc(db, client_uuid, self.name, self.mds_uuid)
self.add_lustre_module('llite', 'llite')
return
self.vosc.prepare()
if self.vosc.need_mdc():
- mdc_uuid = prepare_mdc(self.db, self.name, self.mds_uuid)
+ self.mdc.prepare()
+ mdc_name = self.mdc.name
else:
- mdc_uuid = self.vosc.get_mdc_uuid()
- if not mdc_uuid:
+ mdc_name = self.vosc.get_mdc_name()
+ if not mdc_name:
self.vosc.cleanup()
- panic("Unable to determine MDC UUID. Probably need to cleanup before re-mounting.")
+ panic("Unable to determine MDC name. Probably need to cleanup before re-mounting.")
self.info(self.path, self.mds_uuid, self.obd_uuid)
if config.lctl_dump:
- cmd = "osc=%s,mdc=%s" % (self.vosc.get_uuid(), mdc_uuid)
+ cmd = "osc=%s,mdc=%s" % (self.vosc.get_name(), mdc_name)
lctl.mount_option(cmd)
return
- cmd = "mount -t lustre_lite -o osc=%s,mdc=%s none %s" % \
- (self.vosc.get_uuid(), mdc_uuid, self.path)
+ cmd = "mount -t lustre_lite -o osc=%s,mdc=%s %s %s" % \
+ (self.vosc.get_name(), mdc_name, config.config, self.path)
run("mkdir", self.path)
ret, val = run(cmd)
if ret:
self.vosc.cleanup()
if self.vosc.need_mdc():
- cleanup_mdc(self.db, self.name, self.mds_uuid)
+ self.mdc.cleanup()
panic("mount failed:", self.path, ":", string.join(val))
def cleanup(self):
self.vosc.cleanup()
if self.vosc.need_mdc():
- cleanup_mdc(self.db, self.name, self.mds_uuid)
+ self.mdc.cleanup()
def load_module(self):
self.vosc.load_module()
#
# OSC is no longer in the xml, so we have to fake it.
# this is getting ugly and begging for another refactoring
-def get_osc(ost_db, owner):
- osc = OSC(ost_db, owner)
+def get_osc(ost_db, uuid, owner):
+ osc = OSC(ost_db, uuid, owner)
return osc
-def get_mdc(db, owner, mds_uuid):
+def get_mdc(db, uuid, owner, mds_uuid):
mds_db = db.lookup(mds_uuid);
if not mds_db:
panic("no mds:", mds_uuid)
- mdc = MDC(mds_db, owner)
+ mdc = MDC(mds_db, uuid, owner)
return mdc
-def prepare_mdc(db, owner, mds_uuid):
- mdc = get_mdc(db, owner, mds_uuid)
- mdc.prepare()
- return mdc.uuid
-
-def cleanup_mdc(db, owner, mds_uuid):
- mdc = get_mdc(db, owner, mds_uuid)
- mdc.cleanup()
-
-
############################################################
# routing ("rooting")
if type == 'ldlm':
n = LDLM(db)
elif type == 'lov':
- n = LOV(db)
+ n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID")
elif type == 'network':
n = Network(db)
elif type == 'routetbl':
panic("%s does not appear to be a config file." % (args[0]))
sys.exit(1) # make sure to die here, even in debug mode.
db = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement)
+ if not config.config:
+ config.config = os.path.basename(args[0])# use full path?
+ if config.config[-4:] == '.xml':
+ config.config = config.config[:-4]
elif config.ldapurl:
if not config.config:
panic("--ldapurl requires --config name")
{"newdev", jt_obd_newdev, 0, "create a new device\n"
"usage: newdev"},
{"device", jt_obd_device, 0,
- "set current device to <%uuid|$name|devno>\n"
- "usage: device <%uuid|$name|devno>"},
+ "set current device to <%name|$name|devno>\n"
+ "usage: device <%name|$name|devno>"},
{"device_list", jt_obd_list, 0, "show all devices\n"
"usage: device_list"},
{"lustre_build_version", jt_get_version, 0,
-#define _XOPEN_SOURCE 500
-
#include <stdio.h>
#include <getopt.h>
#include <stdlib.h>
/* XXX Max obds per lov currently hardcoded to 1000 in lov/lov_obd.c */
#define MAX_LOV_UUID_COUNT 1000
-#define OBD_NOT_FOUND ((__u32)-1)
+#define OBD_NOT_FOUND (-1)
char * cmd;
struct option longOpts[] = {
int max_ost_count = MAX_LOV_UUID_COUNT;
struct obd_uuid * obduuid;
-__u32 obdcount;
-__u32 obdindex;
char * buf;
int buflen;
+struct obd_uuid * uuids;
struct obd_ioctl_data data;
struct lov_desc desc;
-struct obd_uuid * uuids;
int uuidslen;
int cfglen;
struct lov_mds_md *lmm;
void init();
void usage(FILE *stream);
void errMsg(char *fmt, ...);
-void processPath(char *path);
-int processFile(
- const char *path,
- const struct stat *sp,
- int flag,
- struct FTW *ftwp
- );
-__u32 getobdindex(const char *path);
+void processPath(const char *path);
int
main (int argc, char **argv) {
}
void
-processPath(char *path)
-{
- obdindex = OBD_NOT_FOUND;
- nftw((const char *)path, processFile, 128, FTW_PHYS|FTW_MOUNT);
-}
-
-int
-processFile(const char *path, const struct stat *sp, int flag, struct FTW *ftwp)
+processPath(const char *path)
{
int fd;
- int count;
int rc;
int i;
+ int obdindex;
+ int obdcount;
+ struct obd_uuid *uuidp;
- if (flag != FTW_F)
- return 0;
-
- if (getobdindex(path) == OBD_NOT_FOUND && obdcount == 0) {
- /* terminate nftw walking this tree */
- return(1);
+ if (query || verbose && !obduuid) {
+ printf("%s\n", path);
}
if ((fd = open(path, O_RDONLY | O_LOV_DELAY_CREATE)) < 0) {
errMsg("open \"%.20s\" failed.", path);
perror("open");
+ return;
+ }
+
+ memset(&data, 0, sizeof(data));
+ data.ioc_inllen1 = sizeof(desc);
+ data.ioc_inlbuf1 = (char *)&desc;
+ data.ioc_inllen2 = uuidslen;
+ data.ioc_inlbuf2 = (char *)uuids;
+
+ memset(&desc, 0, sizeof(desc));
+ desc.ld_tgt_count = max_ost_count;
+
+ if (obd_ioctl_pack(&data, &buf, buflen)) {
+ errMsg("internal buffering error.");
+ exit(1);
+ }
+
+ rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf);
+ if (rc) {
+ if (errno == ENOTTY) {
+ if (!obduuid) {
+ printf("Not a regular file or not Lustre file.\n\n");
+ }
+ return;
+ }
+ errMsg("OBD_IOC_LOV_GET_CONFIG ioctl failed: %d.", errno);
+ perror("ioctl");
exit(1);
+ }
+
+ if (obd_ioctl_unpack(&data, buf, buflen)) {
+ errMsg("Invalid reply from ioctl.");
+ exit(1);
+ }
+
+ obdcount = desc.ld_tgt_count;
+ if (obdcount == 0)
+ return;
+
+ obdindex = OBD_NOT_FOUND;
+
+ if (obduuid) {
+ for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) {
+ if (strncmp((const char *)obduuid, (const char *)uuidp,
+ sizeof(*uuidp)) == 0) {
+ obdindex = i;
+ }
+ }
+
+ if (obdindex == OBD_NOT_FOUND)
+ return;
+ } else if (query || verbose) {
+ printf("OBDS:\n");
+ for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++)
+ printf("%4d: %s\n", i, (char *)uuidp);
}
memset((void *)buf, 0, buflen);
lmm->lmm_magic = LOV_MAGIC;
lmm->lmm_ost_count = max_ost_count;
- if ((rc = ioctl(fd, LL_IOC_LOV_GETSTRIPE, (void *)lmm)) < 0) {
- errMsg("LL_IOC_LOV_GETSTRIPE ioctl failed.");
- perror("ioctl");
- return 0;
+ rc = ioctl(fd, LL_IOC_LOV_GETSTRIPE, (void *)lmm);
+ if (rc) {
+ if (errno == ENODATA) {
+ if(!obduuid) {
+ printf("Has no stripe information.\n\n");
+ }
+ }
+ else {
+ errMsg("LL_IOC_LOV_GETSTRIPE ioctl failed. %d", errno);
+ perror("ioctl");
+ }
+ return;
}
close(fd);
- if (query || verbose ||
- (obdindex != OBD_NOT_FOUND &&
- lmm->lmm_objects[obdindex].l_object_id))
+ if (obduuid && lmm->lmm_objects[obdindex].l_object_id)
printf("%s\n", path);
if (verbose) {
printf("lmm_stripe_pattern: %d\n", lmm->lmm_magic & 0xf);
}
- count = lmm->lmm_ost_count;
-
if (query || verbose) {
long long oid;
int ost = lmm->lmm_stripe_offset;
int header = 1;
- for (i = 0; i < count; i++, ost++) {
+ for (i = 0; i < lmm->lmm_ost_count; i++, ost++) {
ost %= lmm->lmm_ost_count;
if ((oid = lmm->lmm_objects[ost].l_object_id)) {
if (header) {
ost, oid, obdindex == ost ? " *" : "");
}
}
-
- if (query)
- return(0);
+ printf("\n");
}
-
- return(0);
-}
-
-__u32
-getobdindex(const char *path)
-{
- struct obd_uuid *uuidp;
- int fd;
- int rc;
- int i;
-
- if ((fd = open(path, O_RDONLY)) < 0) {
- errMsg("open \"%.20s\" failed.", path);
- perror("open");
- exit(1);
- }
-
- memset(&data, 0, sizeof data);
- data.ioc_inllen1 = sizeof(desc);
- data.ioc_inlbuf1 = (char *)&desc;
- data.ioc_inllen2 = uuidslen;
- data.ioc_inlbuf2 = (char *)uuids;
-
- memset(&desc, 0, sizeof(desc));
- desc.ld_tgt_count = max_ost_count;
-
- if (obd_ioctl_pack(&data, &buf, buflen)) {
- errMsg("internal buffering error.");
- exit(1);
- }
-
- rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf);
- if (rc) {
- errMsg("OBD_IOC_LOV_GET_CONFIG ioctl failed: %d.", errno);
- perror("ioctl");
- exit(1);
- }
-
- if (obd_ioctl_unpack(&data, buf, buflen)) {
- errMsg("Invalid reply from ioctl.");
- exit(1);
- }
-
- close(fd);
-
- obdcount = desc.ld_tgt_count;
-
- if (query || verbose) {
- printf("OBDS:\n");
- for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++)
- printf("%4d: %s\n", i, (char *)uuidp);
-
- return(0);
- }
-
- for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) {
- rc = strncmp((const char *)obduuid, (const char *)uuidp,
- sizeof(*uuidp));
- if (rc == 0) {
- obdindex = i;
- break;
- }
- }
-
- if (obdindex == OBD_NOT_FOUND) {
- errMsg("obd UUID '%s' not found.", obduuid);
- return(OBD_NOT_FOUND);
- }
-
- return(0);
}
PARAM = Lustre.Options.PARAM
lmc_options = [
# lmc input/output options
- ('reference', "Print short reference for commands"),
- ('verbose,v', "Print system commands as they are run"),
- ('merge,m', "", PARAM),
- ('output,o', "", PARAM),
+ ('reference', "Print short reference for commands."),
+ ('verbose,v', "Print system commands as they are run."),
+ ('merge,m', "Append to the specified config file.", PARAM),
+ ('output,o', "Write XML configuration into given output file. Overwrite existing content.", PARAM),
('input,i', "", PARAM),
- ('batch', "", PARAM),
+ ('batch', "Used to execute lmc commands in batch mode.", PARAM),
# commands
('add', "", PARAM),
# node options
- ('node', "", PARAM),
- ('timeout', "", PARAM),
+ ('node', "Add a new node in the cluster configuration.", PARAM),
+ ('timeout', "Set timeout to initiate recovery.", PARAM),
('upcall', "Set both lustre and portals upcall scripts.", PARAM),
('lustre_upcall', "Set location of lustre upcall script.", PARAM),
('portals_upcall', "Set location of portals upcall script.", PARAM),
# network
- ('nettype', "", PARAM),
- ('nid', "", PARAM),
- ('tcpbuf', "", PARAM, 0),
- ('port', "", PARAM, DEFAULT_PORT),
- ('nid_exchange', "", PARAM, 0),
- ('irq_affinity', "", PARAM, 0),
- ('hostaddr', "", PARAM, ""),
- ('cluster_id', "", PARAM, "0"),
+ ('nettype', "Specify the network type. This can be tcp/elan/gm/scimac.", PARAM),
+ ('nid', "Give the network ID, e.g ElanID/IP Address as used by portals.", PARAM),
+ ('tcpbuf', "Optional arguement to specify the TCP buffer size.", PARAM, "0"),
+ ('port', "Optional arguement to specify the TCP port number.", PARAM, DEFAULT_PORT),
+ ('nid_exchange', "Optional arguement to indicate if nid exchange should be done.", PARAM, 0),
+ ('irq_affinity', "Optional arguement.", PARAM, 0),
+ ('hostaddr', "", PARAM,""),
+ ('cluster_id', "Specify the cluster ID", PARAM, "0"),
# routes
- ('route', "", PARAM),
- ('router', ""),
- ('gw', "", PARAM),
+ ('route', "Add a new route for the cluster.", PARAM),
+ ('router', "Optional flag to mark a node as router."),
+ ('gw', "Specify the nid of the gateway for a route.", PARAM),
('gw_cluster_id', "", PARAM, "0"),
('target_cluster_id', "", PARAM, "0"),
- ('lo', "", PARAM),
- ('hi', "", PARAM, ""),
+ ('lo', "For a range route, this is the low value nid.", PARAM),
+ ('hi', "For a range route, this is a hi value nid.", PARAM,""),
# servers: mds and ost
- ('mds', "", PARAM),
- ('ost', "", PARAM, ""),
- ('osdtype', "", PARAM, "obdfilter"),
+ ('mds', "Specify MDS name.", PARAM),
+ ('ost', "Specify the OST name.", PARAM,""),
+ ('osdtype', "This could obdfilter or obdecho.", PARAM, "obdfilter"),
('failover', ""),
('group', "", PARAM),
- ('dev', "", PARAM, ""),
- ('size', "", PARAM, 0),
- ('journal_size', "", PARAM, 0),
- ('fstype', "", PARAM, "ext3"),
- ('ostuuid', "", PARAM, ""),
- ('nspath', "Local mount point of server namespace.", PARAM, ""),
+ ('dev', "Path of the device on local system.", PARAM,""),
+ ('size', "Specify the size of the device if needed.", PARAM,"0"),
+ ('journal_size', "Specify new journal size for underlying ext3 file system.", PARAM,"0"),
+ ('fstype', "Optional arguement to specify the filesystem type.", PARAM, "ext3"),
+ ('ostuuid', "", PARAM,""),
+ ('nspath', "Local mount point of server namespace.", PARAM,""),
('format', ""),
# clients: mountpoint and echo
('echo_client', "", PARAM),
- ('path', "", PARAM),
- ('filesystem', "Lustre filesystem name", PARAM, ''),
+ ('path', "Specify the mountpoint for Lustre.", PARAM),
+ ('filesystem', "Lustre filesystem name", PARAM,""),
# lov
- ('lov', "", PARAM, ''),
- ('stripe_sz', "", PARAM),
- ('stripe_cnt', "", PARAM, 0),
- ('stripe_pattern', "", PARAM, 0),
+ ('lov', "Specify LOV name.", PARAM,""),
+ ('stripe_sz', "Specify the stripe size in bytes.", PARAM),
+ ('stripe_cnt', "Specify the number of OSTs each file should be striped on.", PARAM, 0),
+ ('stripe_pattern', "Specify the stripe pattern. RAID 0 is the only one currently supported.", PARAM, 0),
# cobd
('real_obd', "", PARAM),
LDAP_AUTH="-x -D $LDAP_ROOTDN -w $LDAP_PW"
LUSTRE=${LUSTRE:-`dirname $0`/..}
+if [ -f $LUSTRE/Makefile.am ]; then
+ CONFDIR=$LUSTRE/conf
+else
+ CONFDIR=/usr/lib/lustre
+fi
+
+TOP=$CONFDIR/top.ldif
+XSL=$CONFDIR/lustre2ldif.xsl
+
[ ! -z $LDAPURL ] && LDAP_AUTH="$LDAP_AUTH -H $LDAPURL"
XML=${XML:-$1}
# add the top level record, if needed
ldapsearch $LDAP_AUTH -b $LDAP_BASE > /dev/null 2>&1 ||
- ldapadd $LDAP_AUTH -f $LUSTRE/conf/top.ldif
+ ldapadd $LDAP_AUTH -f $TOP
# If this config already exists, then delete it
ldapsearch $LDAP_AUTH -b config=$NAME,$LDAP_BASE > /dev/null 2>&1 &&
ldapdelete $LDAP_AUTH -r config=$NAME,$LDAP_BASE
-4xslt -D config=$NAME $XML $LUSTRE/conf/lustre2ldif.xsl > $LDIF
+4xslt -D config=$NAME $XML $XSL > $LDIF
echo "Loading config to 'config=$NAME,$LDAP_BASE' ..."
ldapadd $LDAP_AUTH -f $LDIF
return data.ioc_dev + N2D_OFF;
}
-static int do_uuid2dev(char *func, char *uuid)
-{
- struct obd_ioctl_data data;
- int rc;
-
- IOC_INIT(data);
-
- data.ioc_inllen1 = strlen(uuid) + 1;
- data.ioc_inlbuf1 = uuid;
-
- IOC_PACK(func, data);
- rc = l_ioctl(OBD_DEV_ID, OBD_IOC_UUID2DEV, buf);
- if (rc < 0)
- return errno;
- IOC_UNPACK(func, data);
-
- return data.ioc_dev + N2D_OFF;
-}
-
/*
* resolve a device name to a device number.
* supports a number, $name or %uuid.
if (!name)
return ret;
- if (name[0] == '$') {
+ if (name[0] == '$' || name[0] == '%') {
name++;
rc = do_name2dev(func, name);
if (rc >= N2D_OFF) {
printf("No device found for name %s: %s\n",
name, strerror(rc));
}
- } else if (name[0] == '%') {
- name++;
- rc = do_uuid2dev(func, name);
- if (rc >= N2D_OFF) {
- ret = rc - N2D_OFF;
- printf("UUID %s is device %d\n", name, ret);
- } else {
- printf("No device found for UUID %s: %s\n",
- name, strerror(rc));
- }
} else {
/* Assume it's a number. This means that bogus strings become
* 0. I might care about that some day. */