From 8d55483b7c26838e5ded2d0725f1ebb1fa7e1126 Mon Sep 17 00:00:00 2001 From: pschwan Date: Thu, 3 Jul 2003 19:55:01 +0000 Subject: [PATCH] Merge b_devel into b_orphan (20030703 update tag) --- lustre/include/linux/lustre_otree.h | 31 +++ .../patches/ext3-delete_thread-2.4.18.patch | 84 ++++--- .../kernel_patches/patches/netconsole_sysrq.patch | 41 ++++ lustre/kernel_patches/patches/tg3_netconsole.patch | 247 +++++++++++++++++++ lustre/kernel_patches/pc/netconsole_sysrq.pc | 2 + lustre/kernel_patches/pc/tg3_netconsole.pc | 1 + lustre/llite/llite_internal.h | 27 +++ lustre/mds/mds_internal.h | 4 +- lustre/obdclass/otree.c | 266 +++++++++++++++++++++ lustre/tests/getdents.c | 31 +++ lustre/tests/lstiming.sh | 51 ++++ lustre/tests/o_directory.c | 51 ++++ lustre/tests/runvmstat | 5 +- 13 files changed, 806 insertions(+), 35 deletions(-) create mode 100644 lustre/include/linux/lustre_otree.h create mode 100644 lustre/kernel_patches/patches/netconsole_sysrq.patch create mode 100644 lustre/kernel_patches/patches/tg3_netconsole.patch create mode 100644 lustre/kernel_patches/pc/netconsole_sysrq.pc create mode 100644 lustre/kernel_patches/pc/tg3_netconsole.pc create mode 100644 lustre/obdclass/otree.c create mode 100644 lustre/tests/getdents.c create mode 100644 lustre/tests/lstiming.sh create mode 100644 lustre/tests/o_directory.c diff --git a/lustre/include/linux/lustre_otree.h b/lustre/include/linux/lustre_otree.h new file mode 100644 index 0000000..3d8d510 --- /dev/null +++ b/lustre/include/linux/lustre_otree.h @@ -0,0 +1,31 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef _LUSTRE_OTREE_H +#define _LUSTRE_OTREE_H + +/* XXX ok, I can't make sense of our header nest right now.. */ +#ifdef __KERNEL__ +#include +#include + +struct otree { + rb_root_t ot_root; + spinlock_t ot_lock; + unsigned long ot_num_marked; +}; +#else +struct otree { + unsigned long lalala; +}; +#endif + +int ot_mark_offset(struct otree *ot, unsigned long offset); +int ot_clear_extent(struct otree *ot, unsigned long start, unsigned long end); +int ot_find_marked_extent(struct otree *ot, unsigned long *start, + unsigned long *end); +int ot_last_marked(struct otree *ot, unsigned long *last); +unsigned long ot_num_marked(struct otree *ot); +void ot_init(struct otree *ot); + +#endif diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch index 6b9a348..e01feca 100644 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch +++ b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch @@ -1,8 +1,11 @@ - 0 files changed + fs/ext3/super.c | 229 +++++++++++++++++++++++++++++++++++++++++++++ + include/linux/ext3_fs.h | 2 + include/linux/ext3_fs_sb.h | 10 + + 3 files changed, 241 insertions(+) ---- linux-2.4.18-chaos52/fs/ext3/super.c~ext3-delete_thread-2.4.18 2003-06-01 03:24:13.000000000 +0800 -+++ linux-2.4.18-chaos52-root/fs/ext3/super.c 2003-06-03 17:01:49.000000000 +0800 -@@ -398,6 +398,210 @@ static void dump_orphan_list(struct supe +--- linux-2.4.18-18.8.0-l15/fs/ext3/super.c~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:21 2003 ++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c Wed Jun 18 11:59:14 2003 +@@ -396,6 +396,219 @@ static void dump_orphan_list(struct supe } } @@ -36,22 +39,24 @@ + + INIT_LIST_HEAD(&sbi->s_delete_list); + wake_up(&sbi->s_delete_waiter_queue); -+ ext3_debug("EXT3-fs: delete thread on %s started\n", -+ kdevname(sb->s_dev)); ++ ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev)); + + /* main loop */ + for (;;) { -+ sleep_on(&sbi->s_delete_thread_queue); ++ wait_event_interruptible(sbi->s_delete_thread_queue, ++ !list_empty(&sbi->s_delete_list) || ++ !test_opt(sb, ASYNCDEL)); + ext3_debug("%s woken up: %lu inodes, %lu blocks\n", + tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks); + + spin_lock(&sbi->s_delete_lock); + if (list_empty(&sbi->s_delete_list)) { ++ clear_opt(sbi->s_mount_opt, ASYNCDEL); + memset(&sbi->s_delete_list, 0, + sizeof(sbi->s_delete_list)); + spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("ext3 delete thread on %s exiting\n", -+ kdevname(sb->s_dev)); ++ ext3_debug("delete thread on %s exiting\n", ++ kdevname(sb->s_dev)); + wake_up(&sbi->s_delete_waiter_queue); + break; + } @@ -73,12 +78,13 @@ + sbi->s_delete_blocks -= blocks; + sbi->s_delete_inodes--; + } -+ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) ++ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) { + ext3_warning(sb, __FUNCTION__, + "%lu blocks, %lu inodes on list?\n", + sbi->s_delete_blocks,sbi->s_delete_inodes); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; ++ sbi->s_delete_blocks = 0; ++ sbi->s_delete_inodes = 0; ++ } + spin_unlock(&sbi->s_delete_lock); + wake_up(&sbi->s_delete_waiter_queue); + } @@ -92,11 +98,11 @@ + int rc; + + spin_lock_init(&sbi->s_delete_lock); -+ memset(&sbi->s_delete_list, 0, sizeof(sbi->s_delete_list)); + init_waitqueue_head(&sbi->s_delete_thread_queue); + init_waitqueue_head(&sbi->s_delete_waiter_queue); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; ++ ++ if (!test_opt(sb, ASYNCDEL)) ++ return; + + rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES); + if (rc < 0) @@ -108,6 +114,10 @@ + +static void ext3_stop_delete_thread(struct ext3_sb_info *sbi) +{ ++ if (sbi->s_delete_list.next == 0) /* thread never started */ ++ return; ++ ++ clear_opt(sbi->s_mount_opt, ASYNCDEL); + wake_up(&sbi->s_delete_thread_queue); + wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list)); +} @@ -135,8 +145,8 @@ + clear_inode(old_inode); + return; + } -+ -+ if (!test_opt (old_inode->i_sb, ASYNCDEL)) { ++ ++ if (!test_opt(old_inode->i_sb, ASYNCDEL)) { + ext3_delete_inode(old_inode); + return; + } @@ -148,7 +158,8 @@ + return; + } + -+ if (EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) { ++ if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) || ++ (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { + ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", + old_inode->i_ino, blocks); + ext3_delete_inode(old_inode); @@ -174,7 +185,7 @@ + } + if (!new_inode) { + up(&sbi->s_orphan_lock); -+ ext3_debug(KERN_DEBUG "delete inode %lu directly (bad read)\n", ++ ext3_debug("delete inode %lu directly (bad read)\n", + old_inode->i_ino); + ext3_delete_inode(old_inode); + return; @@ -194,8 +205,6 @@ + + clear_inode(old_inode); + -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); + spin_lock(&sbi->s_delete_lock); + J_ASSERT(list_empty(&new_inode->i_dentry)); + list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); @@ -203,6 +212,9 @@ + sbi->s_delete_inodes++; + spin_unlock(&sbi->s_delete_lock); + ++ ext3_debug("delete inode %lu (%lu blocks) by thread\n", ++ new_inode->i_ino, blocks); ++ + wake_up(&sbi->s_delete_thread_queue); +} +#else @@ -213,7 +225,7 @@ void ext3_put_super (struct super_block * sb) { struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -405,6 +609,7 @@ void ext3_put_super (struct super_block +@@ -403,6 +615,7 @@ void ext3_put_super (struct super_block kdev_t j_dev = sbi->s_journal->j_dev; int i; @@ -221,7 +233,7 @@ ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); if (!(sb->s_flags & MS_RDONLY)) { -@@ -453,7 +658,11 @@ static struct super_operations ext3_sops +@@ -451,7 +664,11 @@ static struct super_operations ext3_sops write_inode: ext3_write_inode, /* BKL not held. Don't need */ dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */ put_inode: ext3_put_inode, /* BKL not held. Don't need */ @@ -232,21 +244,23 @@ +#endif put_super: ext3_put_super, /* BKL held */ write_super: ext3_write_super, /* BKL held */ - sync_fs: ext3_sync_fs, -@@ -514,6 +723,12 @@ static int parse_options (char * options + write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */ +@@ -511,6 +728,14 @@ static int parse_options (char * options this_char = strtok (NULL, ",")) { if ((value = strchr (this_char, '=')) != NULL) *value++ = 0; +#ifdef EXT3_DELETE_THREAD + if (!strcmp(this_char, "asyncdel")) + set_opt(*mount_options, ASYNCDEL); ++ else if (!strcmp(this_char, "noasyncdel")) ++ clear_opt(*mount_options, ASYNCDEL); + else +#endif + if (!strcmp (this_char, "bsddf")) clear_opt (*mount_options, MINIX_DF); else if (!strcmp (this_char, "nouid32")) { -@@ -1209,6 +1424,7 @@ struct super_block * ext3_read_super (st +@@ -1206,6 +1431,7 @@ struct super_block * ext3_read_super (st } ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); @@ -254,8 +268,18 @@ /* * akpm: core read_super() calls in here with the superblock locked. * That deadlocks, because orphan cleanup needs to lock the superblock ---- linux-2.4.18-chaos52/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18 2003-06-01 03:24:11.000000000 +0800 -+++ linux-2.4.18-chaos52-root/include/linux/ext3_fs.h 2003-06-03 17:03:28.000000000 +0800 +@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s + if (!parse_options(data, &tmp, sbi, &tmp, 1)) + return -EINVAL; + ++ if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY)) ++ ext3_stop_delete_thread(sbi); ++ + if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) + ext3_abort(sb, __FUNCTION__, "Abort forced by user"); + +--- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:20 2003 ++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h Tue Jun 17 12:36:56 2003 @@ -190,6 +190,7 @@ struct ext3_group_desc */ #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ @@ -272,8 +296,8 @@ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H ---- linux-2.4.18-chaos52/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18 2003-06-01 03:24:13.000000000 +0800 -+++ linux-2.4.18-chaos52-root/include/linux/ext3_fs_sb.h 2003-06-03 16:59:24.000000000 +0800 +--- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:21 2003 ++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h Tue Jun 17 12:36:56 2003 @@ -29,6 +29,8 @@ #define EXT3_MAX_GROUP_LOADED 32 diff --git a/lustre/kernel_patches/patches/netconsole_sysrq.patch b/lustre/kernel_patches/patches/netconsole_sysrq.patch new file mode 100644 index 0000000..b995461 --- /dev/null +++ b/lustre/kernel_patches/patches/netconsole_sysrq.patch @@ -0,0 +1,41 @@ + 0 files changed + +--- linux-2.4.20-rh/drivers/net/netconsole.c~netconsole_sysrq 2003-04-11 14:04:57.000000000 +0800 ++++ linux-2.4.20-rh-root/drivers/net/netconsole.c 2003-07-01 11:10:26.000000000 +0800 +@@ -988,7 +988,15 @@ static void netconsole_netdump (struct p + reply.info = 0; + send_netdump_skb(dev, tmp, strlen(tmp), &reply); + break; +- ++ case COMM_SYSRQ: ++ Dprintk("got SYSRQ command.\n"); ++ printk("netdump: got SYSRQ command %d \n", req->from); ++ handle_sysrq(req->from, regs, NULL, NULL); ++ reply.code = REPLY_SYSRQ; ++ reply.nr = req->nr; ++ reply.info = req->from; ++ send_netdump_skb(dev, tmp, strlen(tmp), &reply); ++ break; + default: + reply.code = REPLY_ERROR; + reply.nr = req->nr; +--- linux-2.4.20-rh/drivers/net/netconsole.h~netconsole_sysrq 2003-04-11 14:04:57.000000000 +0800 ++++ linux-2.4.20-rh-root/drivers/net/netconsole.h 2003-07-01 11:11:29.000000000 +0800 +@@ -42,6 +42,7 @@ enum netdump_commands { + COMM_START_NETDUMP_ACK = 7, + COMM_GET_REGS = 8, + COMM_SHOW_STATE = 9, ++ COMM_SYSRQ=10, + }; + + #define NETDUMP_REQ_SIZE (8+4*4) +@@ -69,6 +70,7 @@ enum netdump_replies { + REPLY_REGS = 10, + REPLY_MAGIC = 11, + REPLY_SHOW_STATE = 12, ++ REPLY_SYSRQ=13, + }; + + typedef struct netdump_reply_s { + +_ diff --git a/lustre/kernel_patches/patches/tg3_netconsole.patch b/lustre/kernel_patches/patches/tg3_netconsole.patch new file mode 100644 index 0000000..267dedd --- /dev/null +++ b/lustre/kernel_patches/patches/tg3_netconsole.patch @@ -0,0 +1,247 @@ + 0 files changed + +--- linux-2.4.20-rh/drivers/net/tg3.c~tg3_netconsole 2003-04-11 14:04:56.000000000 +0800 ++++ linux-2.4.20-rh-root/drivers/net/tg3.c 2003-07-01 11:27:46.000000000 +0800 +@@ -170,6 +170,10 @@ static void tg3_write_indirect_reg32(str + } + } + ++#ifdef HAVE_POLL_CONTROLLER ++static void Poll_tg3(struct net_device *); ++#endif ++ + #define tw32(reg,val) tg3_write_indirect_reg32(tp,(reg),(val)) + #define tw32_mailbox(reg, val) writel(((val) & 0xffffffff), tp->regs + (reg)) + #define tw16(reg,val) writew(((val) & 0xffff), tp->regs + (reg)) +@@ -1899,7 +1903,138 @@ static int tg3_vlan_rx(struct tg3 *tp, s + return vlan_hwaccel_receive_skb(skb, tp->vlgrp, vlan_tag); + } + #endif ++/* for netconsole */ ++static int upcall_rx_hook(struct net_device *dev) ++{ ++ struct tg3 *tp = dev->priv; ++ u32 work_mask; ++ u32 rx_rcb_ptr = tp->rx_rcb_ptr; ++ u16 hw_idx, sw_idx; ++ int received; ++ ++ hw_idx = tp->hw_status->idx[0].rx_producer; ++ sw_idx = rx_rcb_ptr % TG3_RX_RCB_RING_SIZE; ++ work_mask = 0; ++ received = 0; ++ while (sw_idx != hw_idx) { ++ struct tg3_rx_buffer_desc *desc = &tp->rx_rcb[sw_idx]; ++ unsigned int len; ++ struct sk_buff *skb; ++ dma_addr_t dma_addr; ++ u32 opaque_key, desc_idx, *post_ptr; ++ ++ desc_idx = desc->opaque & RXD_OPAQUE_INDEX_MASK; ++ opaque_key = desc->opaque & RXD_OPAQUE_RING_MASK; ++ if (opaque_key == RXD_OPAQUE_RING_STD) { ++ dma_addr = pci_unmap_addr(&tp->rx_std_buffers[desc_idx], ++ mapping); ++ skb = tp->rx_std_buffers[desc_idx].skb; ++ post_ptr = &tp->rx_std_ptr; ++ } else if (opaque_key == RXD_OPAQUE_RING_JUMBO) { ++ dma_addr = pci_unmap_addr(&tp->rx_jumbo_buffers[desc_idx], ++ mapping); ++ skb = tp->rx_jumbo_buffers[desc_idx].skb; ++ post_ptr = &tp->rx_jumbo_ptr; ++ } ++ else { ++ goto next_pkt_nopost; ++ } ++ ++ work_mask |= opaque_key; ++ ++ if ((desc->err_vlan & RXD_ERR_MASK) != 0 && ++ (desc->err_vlan != RXD_ERR_ODD_NIBBLE_RCVD_MII)) { ++ drop_it: ++ tg3_recycle_rx(tp, opaque_key, ++ desc_idx, *post_ptr); ++ drop_it_no_recycle: ++ /* Other statistics kept track of by card. */ ++ tp->net_stats.rx_dropped++; ++ goto next_pkt; ++ } ++ ++ len = ((desc->idx_len & RXD_LEN_MASK) >> RXD_LEN_SHIFT) - 4; /* omit crc */ ++ ++ if (len > RX_COPY_THRESHOLD) { ++ int skb_size; ++ ++ skb_size = tg3_alloc_rx_skb(tp, opaque_key, ++ desc_idx, *post_ptr); ++ if (skb_size < 0) ++ goto drop_it; ++ ++ pci_unmap_single(tp->pdev, dma_addr, ++ skb_size - tp->rx_offset, ++ PCI_DMA_FROMDEVICE); ++ ++ skb_put(skb, len); ++ } else { ++ struct sk_buff *copy_skb; ++ ++ tg3_recycle_rx(tp, opaque_key, ++ desc_idx, *post_ptr); ++ ++ copy_skb = dev_alloc_skb(len + 2); ++ if (copy_skb == NULL) ++ goto drop_it_no_recycle; ++ ++ copy_skb->dev = tp->dev; ++ skb_reserve(copy_skb, 2); ++ skb_put(copy_skb, len); ++ pci_dma_sync_single(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE); ++ memcpy(copy_skb->data, skb->data, len); ++ ++ /* We'll reuse the original ring buffer. */ ++ skb = copy_skb; ++ } ++ ++ if ((tp->tg3_flags & TG3_FLAG_RX_CHECKSUMS) && ++ (desc->type_flags & RXD_FLAG_TCPUDP_CSUM) && ++ (((desc->ip_tcp_csum & RXD_TCPCSUM_MASK) ++ >> RXD_TCPCSUM_SHIFT) == 0xffff)) ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ else ++ skb->ip_summed = CHECKSUM_NONE; ++ ++ skb->protocol = eth_type_trans(skb, tp->dev); ++/*into netconsole driver*/ ++ dev->rx_hook(skb); ++ kfree_skb(skb); ++ tp->dev->last_rx = jiffies; ++ received++; ++next_pkt: ++ (*post_ptr)++; ++next_pkt_nopost: ++ rx_rcb_ptr++; ++ sw_idx = rx_rcb_ptr % TG3_RX_RCB_RING_SIZE; ++ } ++ ++ /* ACK the status ring. */ ++ tp->rx_rcb_ptr = rx_rcb_ptr; ++ tw32_mailbox(MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW, ++ (rx_rcb_ptr % TG3_RX_RCB_RING_SIZE)); ++ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER) ++ tr32(MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW); + ++ /* Refill RX ring(s). */ ++ if (work_mask & RXD_OPAQUE_RING_STD) { ++ sw_idx = tp->rx_std_ptr % TG3_RX_RING_SIZE; ++ tw32_mailbox(MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW, ++ sw_idx); ++ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER) ++ tr32(MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW); ++ } ++ if (work_mask & RXD_OPAQUE_RING_JUMBO) { ++ sw_idx = tp->rx_jumbo_ptr % TG3_RX_JUMBO_RING_SIZE; ++ tw32_mailbox(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW, ++ sw_idx); ++ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER) ++ tr32(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW); ++ } ++ ++ return received; ++ ++} + /* The RX ring scheme is composed of multiple rings which post fresh + * buffers to the chip, and one special ring the chip uses to report + * status back to the host. +@@ -2006,7 +2141,7 @@ static int tg3_rx(struct tg3 *tp, int bu + /* We'll reuse the original ring buffer. */ + skb = copy_skb; + } +- ++ + if ((tp->tg3_flags & TG3_FLAG_RX_CHECKSUMS) && + (desc->type_flags & RXD_FLAG_TCPUDP_CSUM) && + (((desc->ip_tcp_csum & RXD_TCPCSUM_MASK) +@@ -2016,6 +2151,8 @@ static int tg3_rx(struct tg3 *tp, int bu + skb->ip_summed = CHECKSUM_NONE; + + skb->protocol = eth_type_trans(skb, tp->dev); ++ ++ + #if TG3_VLAN_TAG_USED + if (tp->vlgrp != NULL && + desc->type_flags & RXD_FLAG_VLAN) { +@@ -2058,7 +2195,6 @@ next_pkt_nopost: + if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER) + tr32(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW); + } +- + return received; + } + +@@ -2151,7 +2287,6 @@ static void tg3_interrupt(int irq, void + unsigned long flags; + + spin_lock_irqsave(&tp->lock, flags); +- + if (sblk->status & SD_STATUS_UPDATED) { + /* + * writing any value to intr-mbox-0 clears PCI INTA# and +@@ -2169,8 +2304,17 @@ static void tg3_interrupt(int irq, void + tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); + sblk->status &= ~SD_STATUS_UPDATED; + +- if (likely(tg3_has_work(dev, tp))) +- netif_rx_schedule(dev); /* schedule NAPI poll */ ++ if (likely(tg3_has_work(dev, tp))){ ++ if (unlikely(dev->rx_hook != NULL) && netdump_mode) { ++ int ret; ++ struct sk_buff *skb; ++ ret = upcall_rx_hook(dev); ++ if (!ret){ ++ goto out; ++ } ++ } ++ netif_rx_schedule(dev); /* schedule NAPI poll */ ++ } + else { + /* no work, shared interrupt perhaps? re-enable + * interrupts, and flush that PCI write +@@ -2180,7 +2324,7 @@ static void tg3_interrupt(int irq, void + tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); + } + } +- ++out: + spin_unlock_irqrestore(&tp->lock, flags); + } + +@@ -6804,7 +6948,10 @@ static int __devinit tg3_init_one(struct + dev->watchdog_timeo = TG3_TX_TIMEOUT; + dev->change_mtu = tg3_change_mtu; + dev->irq = pdev->irq; +- ++#ifdef HAVE_POLL_CONTROLLER ++ dev->poll_controller = &Poll_tg3; ++#endif ++ + err = tg3_get_invariants(tp); + if (err) { + printk(KERN_ERR PFX "Problem fetching invariants of chip, " +@@ -6882,6 +7029,15 @@ err_out_disable_pdev: + return err; + } + ++#ifdef HAVE_POLL_CONTROLLER ++static void Poll_tg3(struct net_device *dev) ++{ ++ if (!netdump_mode) disable_irq(dev->irq); ++ tg3_interrupt(dev->irq, dev, NULL); ++ if (!netdump_mode) enable_irq(dev->irq); ++} ++#endif ++ + static void __devexit tg3_remove_one(struct pci_dev *pdev) + { + struct net_device *dev = pci_get_drvdata(pdev); + +_ diff --git a/lustre/kernel_patches/pc/netconsole_sysrq.pc b/lustre/kernel_patches/pc/netconsole_sysrq.pc new file mode 100644 index 0000000..030fc19 --- /dev/null +++ b/lustre/kernel_patches/pc/netconsole_sysrq.pc @@ -0,0 +1,2 @@ +drivers/net/netconsole.c +drivers/net/netconsole.h diff --git a/lustre/kernel_patches/pc/tg3_netconsole.pc b/lustre/kernel_patches/pc/tg3_netconsole.pc new file mode 100644 index 0000000..6653b7b --- /dev/null +++ b/lustre/kernel_patches/pc/tg3_netconsole.pc @@ -0,0 +1 @@ +drivers/net/tg3.c diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index e53b605..4684383 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -1,2 +1,29 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2003 Cluster File Systems, Inc. + * + * This code is issued under the GNU General Public License. + * See the file COPYING in this distribution + */ + +#ifndef LLITE_INTERNAL_H +#define LLITE_INTERNAL_H + +struct lustre_handle; +struct lov_stripe_md; + int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode, int flags, void *opaque); +int ll_rd_dirty_pages(char *page, char **start, off_t off, int count, + int *eof, void *data); +int ll_rd_max_dirty_pages(char *page, char **start, off_t off, int count, + int *eof, void *data); +int ll_wr_max_dirty_pages(struct file *file, const char *buffer, + unsigned long count, void *data); +int ll_clear_dirty_pages(struct lustre_handle *conn, struct lov_stripe_md *lsm, + unsigned long start, unsigned long end); +int ll_mark_dirty_page(struct lustre_handle *conn, struct lov_stripe_md *lsm, + unsigned long index); + +#endif /* LLITE_INTERNAL_H */ diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index 24746c2..a1413e8 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -22,9 +22,7 @@ void mds_mfd_destroy(struct mds_file_data *mfd); void mds_commit_cb(struct obd_device *obd, __u64 last_rcvd, void *cb_data, int error); int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, - struct ptlrpc_request *req, int rc, __u32 op_data, - fsfilt_cb_t cb_fn, void *cb_data); - + struct ptlrpc_request *req, int rc, __u32 op_data); /* mds/mds_lib.c */ int mds_update_unpack(struct ptlrpc_request *, int offset, diff --git a/lustre/obdclass/otree.c b/lustre/obdclass/otree.c new file mode 100644 index 0000000..16ef088 --- /dev/null +++ b/lustre/obdclass/otree.c @@ -0,0 +1,266 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Copyright (C) 2002, 2003 Cluster File Systems, Inc + * + * our offset trees (otrees) track single-bit state of offsets in an + * extent tree. + */ + +#define EXPORT_SYMTAB +#include +#include +#include + +#define DEBUG_SUBSYSTEM S_OSC +#include +#include +#include +#include + +struct offset_extent { + rb_node_t oe_node; + unsigned long oe_start, oe_end; +}; + +static struct offset_extent * ot_find_oe(rb_root_t *root, + struct offset_extent *needle) +{ + struct rb_node_s *node = root->rb_node; + struct offset_extent *oe; + ENTRY; + + CDEBUG(D_INODE, "searching [%lu -> %lu]\n", needle->oe_start, + needle->oe_end); + + while (node) { + oe = rb_entry(node, struct offset_extent, oe_node); + if (needle->oe_end < oe->oe_start) + node = node->rb_left; + else if (needle->oe_start > oe->oe_end) + node = node->rb_right; + else { + CDEBUG(D_INODE, "returning [%lu -> %lu]\n", + oe->oe_start, oe->oe_end); + RETURN(oe); + } + } + RETURN(NULL); +} + +/* do the rbtree mechanics to insert a node, callers are responsible + * for making sure that this new node doesn't overlap with existing + * nodes */ +static void ot_indert_oe(rb_root_t *root, struct offset_extent *new_oe) +{ + rb_node_t ** p = &root->rb_node; + rb_node_t * parent = NULL; + struct offset_extent *oe; + ENTRY; + + LASSERT(new_oe->oe_start <= new_oe->oe_end); + + while (*p) { + parent = *p; + oe = rb_entry(parent, struct offset_extent, oe_node); + if ( new_oe->oe_end < oe->oe_start ) + p = &(*p)->rb_left; + else if ( new_oe->oe_start > oe->oe_end ) + p = &(*p)->rb_right; + else + LBUG(); + } + rb_link_node(&new_oe->oe_node, parent, p); + rb_insert_color(&new_oe->oe_node, root); + EXIT; +} + +int ot_mark_offset(struct otree *ot, unsigned long offset) +{ + struct offset_extent needle, *oe, *new_oe; + int rc = 0; + ENTRY; + + OBD_ALLOC(new_oe, sizeof(*new_oe)); + if (new_oe == NULL) + RETURN(-ENOMEM); + + spin_lock(&ot->ot_lock); + + /* find neighbours that we might glom on to */ + needle.oe_start = (offset > 0) ? offset - 1 : offset; + needle.oe_end = (offset < ~0) ? offset + 1 : offset; + oe = ot_find_oe(&ot->ot_root, &needle); + if ( oe == NULL ) { + new_oe->oe_start = offset; + new_oe->oe_end = offset; + ot_indert_oe(&ot->ot_root, new_oe); + ot->ot_num_marked++; + new_oe = NULL; + GOTO(out, rc); + } + + /* already recorded */ + if ( offset >= oe->oe_start && offset <= oe->oe_end ) + GOTO(out, rc); + + /* ok, need to check for adjacent neighbours */ + needle.oe_start = offset; + needle.oe_end = offset; + if (ot_find_oe(&ot->ot_root, &needle)) + GOTO(out, rc); + + /* ok, its safe to extend the oe we found */ + if ( offset == oe->oe_start - 1 ) + oe->oe_start--; + else if ( offset == oe->oe_end + 1 ) + oe->oe_end++; + else + LBUG(); + ot->ot_num_marked++; + +out: + CDEBUG(D_INODE, "%lu now dirty\n", ot->ot_num_marked); + spin_unlock(&ot->ot_lock); + if (new_oe) + OBD_FREE(new_oe, sizeof(*new_oe)); + RETURN(rc); +} + +int ot_clear_extent(struct otree *ot, unsigned long start, unsigned long end) +{ + struct offset_extent needle, *oe, *new_oe; + int rc = 0; + ENTRY; + + /* will allocate more intelligently later */ + OBD_ALLOC(new_oe, sizeof(*new_oe)); + if (new_oe == NULL) + RETURN(-ENOMEM); + + needle.oe_start = start; + needle.oe_end = end; + + spin_lock(&ot->ot_lock); + for ( ; (oe = ot_find_oe(&ot->ot_root, &needle)) ; ) { + rc = 0; + + /* see if we're punching a hole and need to create a node */ + if (oe->oe_start < start && oe->oe_end > end) { + new_oe->oe_start = end + 1; + new_oe->oe_end = oe->oe_end; + oe->oe_end = start - 1; + ot_indert_oe(&ot->ot_root, new_oe); + new_oe = NULL; + ot->ot_num_marked -= end - start + 1; + break; + } + + /* overlapping edges */ + if (oe->oe_start < start && oe->oe_end <= end) { + ot->ot_num_marked -= oe->oe_end - start + 1; + oe->oe_end = start - 1; + oe = NULL; + continue; + } + if (oe->oe_end > end && oe->oe_start >= start) { + ot->ot_num_marked -= end - oe->oe_start + 1; + oe->oe_start = end + 1; + oe = NULL; + continue; + } + + /* an extent entirely within the one we're clearing */ + rb_erase(&oe->oe_node, &ot->ot_root); + ot->ot_num_marked -= oe->oe_end - oe->oe_start + 1; + spin_unlock(&ot->ot_lock); + OBD_FREE(oe, sizeof(*oe)); + spin_lock(&ot->ot_lock); + } + CDEBUG(D_INODE, "%lu now dirty\n", ot->ot_num_marked); + spin_unlock(&ot->ot_lock); + if (new_oe) + OBD_FREE(new_oe, sizeof(*new_oe)); + RETURN(rc); +} + +int ot_find_marked_extent(struct otree *ot, unsigned long *start, + unsigned long *end) +{ + struct offset_extent needle, *oe; + int rc = -ENOENT; + ENTRY; + + needle.oe_start = *start; + needle.oe_end = *end; + + spin_lock(&ot->ot_lock); + oe = ot_find_oe(&ot->ot_root, &needle); + if (oe) { + *start = oe->oe_start; + *end = oe->oe_end; + rc = 0; + } + spin_unlock(&ot->ot_lock); + + RETURN(rc); +} + +int ot_last_marked(struct otree *ot, unsigned long *last) +{ + struct rb_node_s *found, *node; + struct offset_extent *oe; + int rc = -ENOENT; + ENTRY; + + spin_lock(&ot->ot_lock); + for (node = ot->ot_root.rb_node, found = NULL; + node; + found = node, node = node->rb_right) + ; + + if (found) { + oe = rb_entry(found, struct offset_extent, oe_node); + *last = oe->oe_end; + rc = 0; + } + spin_unlock(&ot->ot_lock); + RETURN(rc); +} + +unsigned long ot_num_marked(struct otree *ot) +{ + return ot->ot_num_marked; +} + +void ot_init(struct otree *ot) +{ + CDEBUG(D_INODE, "initializing %p\n", ot); + spin_lock_init(&ot->ot_lock); + ot->ot_num_marked = 0; + ot->ot_root.rb_node = NULL; +} + +EXPORT_SYMBOL(ot_mark_offset); +EXPORT_SYMBOL(ot_clear_extent); +EXPORT_SYMBOL(ot_find_marked_extent); +EXPORT_SYMBOL(ot_last_marked); +EXPORT_SYMBOL(ot_num_marked); +EXPORT_SYMBOL(ot_init); diff --git a/lustre/tests/getdents.c b/lustre/tests/getdents.c new file mode 100644 index 0000000..b4155a9 --- /dev/null +++ b/lustre/tests/getdents.c @@ -0,0 +1,31 @@ +#include +#include +#include +#include + +int main(int argc, char **argv) +{ + DIR *dir; + struct dirent64 *entry; + + if (argc < 2) { + fprintf(stderr, "Usage: %s dirname\n", argv[0]); + return 1; + } + + dir = opendir(argv[1]); + if (!dir) { + int rc = errno; + perror("opendir"); + return rc; + } + + while ((entry = readdir64(dir))) { + puts(entry->d_name); + } + + closedir(dir); + + return 0; +} + diff --git a/lustre/tests/lstiming.sh b/lustre/tests/lstiming.sh new file mode 100644 index 0000000..0b494e4 --- /dev/null +++ b/lustre/tests/lstiming.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +set -e + +PATH=$PATH:. + +CHECKSTAT=${CHECKSTAT:-"checkstat -v"} +MOUNT1=${MOUNT1:-/mnt/lustre1} +MOUNT2=${MOUNT2:-/mnt/lustre2} +DIRNAME=${DIRNAME:-"ls-timing"} +DIRSIZE=${DIRSIZE:-200} +export NAME=${NAME:-mount2} + +error () { + echo FAIL + exit 1 +} + +pass() { + echo PASS +} +echo "Mounting..." +mount | grep $MOUNT1 || sh llmount.sh + +echo -n "Preparing test directory with $DIRSIZE files..." +rm -rf "$MOUNT1/$DIRNAME" +rm -rf "$MOUNT2/$DIRNAME" +mkdir -p "$MOUNT1/$DIRNAME" +[ -d "$MOUNT2/$DIRNAME" ] || error +createmany -o $MOUNT1/$DIRNAME/file 0 $DIRSIZE &> /dev/null +echo "done" + +echo -n "Cached ls: " +time ls -lr $MOUNT1/$DIRNAME 1> /dev/null + +echo -n "Uncached ls: " +time ls -lr $MOUNT2/$DIRNAME 1> /dev/null + + +fsx $MOUNT1/$DIRNAME/fsx.file &>/dev/null & +fsxpid=$! + +echo -n "Cached busy ls:" +time ls -lr $MOUNT1/$DIRNAME 1> /dev/null + +echo -n "Uncached busy ls: " +time ls -lr $MOUNT2/$DIRNAME 1> /dev/null + +kill $fsxpid + +exit diff --git a/lustre/tests/o_directory.c b/lustre/tests/o_directory.c new file mode 100644 index 0000000..d4b2c1b --- /dev/null +++ b/lustre/tests/o_directory.c @@ -0,0 +1,51 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ + +/* for O_DIRECTORY */ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char **argv) +{ + int fd, rc; + + if (argc != 2) { + printf("Usage: %s \n", argv[0]); + exit(1); + } + + fd = open(argv[1], O_RDONLY | O_CREAT, 0600); + if (fd == -1) { + printf("Error opening %s for create: %s\n", argv[1], + strerror(errno)); + exit(1); + } + rc = close(fd); + if (rc < 0) { + printf("Error closing %s: %s\n", argv[1], strerror(errno)); + exit(1); + } + + fd = open(argv[1], O_DIRECTORY); + if (fd >= 0) { + printf("opening %s as directory should have returned an " + "error!\n", argv[1]); + exit(1); + } + if (errno != ENOTDIR) { + printf("opening %s as directory, expected -ENOTDIR and got " + "%s\n", argv[1], strerror(errno)); + exit(1); + } + + return 0; +} diff --git a/lustre/tests/runvmstat b/lustre/tests/runvmstat index 3ce6810..b04d84c 100755 --- a/lustre/tests/runvmstat +++ b/lustre/tests/runvmstat @@ -1,5 +1,6 @@ #!/bin/sh vmstat 1 | while read LINE ; do - echo "`date +s`: $LINE" - [ "$1" ] && echo "`date +s`: $LINE" >> $1 + LINE="`date +%s`: $LINE" + echo $LINE + [ "$1" ] && echo $LINE >> $1 done -- 1.8.3.1