#define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */
#define EXT3_MOUNT_IOPEN 0x40000 /* Allow access via iopen */
#define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */
-+#define EXT3_MOUNT_EXTENTS 0x10000 /* Extents support */
-+#define EXT3_MOUNT_EXTDEBUG 0x20000 /* Extents debug */
++#define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */
++#define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef clear_opt
void __exit
kqswnal_finalise (void)
{
+ kqswnal_tx_t *ktx;
+ kqswnal_rx_t *krx;
+
switch (kqswnal_data.kqn_init)
{
default:
* ep_dvma_release() get fixed (and releases any mappings in the
* region), we can delete all the code from here --------> */
- if (kqswnal_data.kqn_txds != NULL) {
- int i;
-
- for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++) {
- kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
-
- /* If ktx has a buffer, it got mapped; unmap now.
- * NB only the pre-mapped stuff is still mapped
- * since all tx descs must be idle */
+ for (ktx = kqswnal_data.kqn_txds; ktx != NULL; ktx =ktx->ktx_alloclist){
+ /* If ktx has a buffer, it got mapped; unmap now. NB only
+ * the pre-mapped stuff is still mapped since all tx descs
+ * must be idle */
- if (ktx->ktx_buffer != NULL)
- ep_dvma_unload(kqswnal_data.kqn_ep,
- kqswnal_data.kqn_ep_tx_nmh,
- &ktx->ktx_ebuffer);
- }
+ if (ktx->ktx_buffer != NULL)
+ ep_dvma_unload(kqswnal_data.kqn_ep,
+ kqswnal_data.kqn_ep_tx_nmh,
+ &ktx->ktx_ebuffer);
}
- if (kqswnal_data.kqn_rxds != NULL) {
- int i;
-
- for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) {
- kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
+ for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx =krx->krx_alloclist){
+ /* If krx_kiov[0].kiov_page got allocated, it got mapped.
+ * NB subsequent pages get merged */
- /* If krx_kiov[0].kiov_page got allocated, it got mapped.
- * NB subsequent pages get merged */
-
- if (krx->krx_kiov[0].kiov_page != NULL)
- ep_dvma_unload(kqswnal_data.kqn_ep,
- kqswnal_data.kqn_ep_rx_nmh,
- &krx->krx_elanbuffer);
- }
+ if (krx->krx_kiov[0].kiov_page != NULL)
+ ep_dvma_unload(kqswnal_data.kqn_ep,
+ kqswnal_data.kqn_ep_rx_nmh,
+ &krx->krx_elanbuffer);
}
/* <----------- to here */
if (kqswnal_data.kqn_ep_rx_nmh != NULL)
- ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_rx_nmh);
+ ep_dvma_release(kqswnal_data.kqn_ep,kqswnal_data.kqn_ep_rx_nmh);
if (kqswnal_data.kqn_ep_tx_nmh != NULL)
- ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_tx_nmh);
+ ep_dvma_release(kqswnal_data.kqn_ep,kqswnal_data.kqn_ep_tx_nmh);
#else
if (kqswnal_data.kqn_eprxdmahandle != NULL)
{
}
#endif
- if (kqswnal_data.kqn_txds != NULL)
- {
- int i;
+ while (kqswnal_data.kqn_txds != NULL) {
+ ktx = kqswnal_data.kqn_txds;
- for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++)
- {
- kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
+ if (ktx->ktx_buffer != NULL)
+ PORTAL_FREE(ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
- if (ktx->ktx_buffer != NULL)
- PORTAL_FREE(ktx->ktx_buffer,
- KQSW_TX_BUFFER_SIZE);
- }
-
- PORTAL_FREE(kqswnal_data.kqn_txds,
- sizeof (kqswnal_tx_t) * (KQSW_NTXMSGS +
- KQSW_NNBLK_TXMSGS));
+ kqswnal_data.kqn_txds = ktx->ktx_alloclist;
+ PORTAL_FREE(ktx, sizeof(*ktx));
}
- if (kqswnal_data.kqn_rxds != NULL)
- {
- int i;
- int j;
+ while (kqswnal_data.kqn_rxds != NULL) {
+ int i;
- for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
- {
- kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
-
- for (j = 0; j < krx->krx_npages; j++)
- if (krx->krx_kiov[j].kiov_page != NULL)
- __free_page (krx->krx_kiov[j].kiov_page);
- }
+ krx = kqswnal_data.kqn_rxds;
+ for (i = 0; i < krx->krx_npages; i++)
+ if (krx->krx_kiov[i].kiov_page != NULL)
+ __free_page (krx->krx_kiov[i].kiov_page);
- PORTAL_FREE(kqswnal_data.kqn_rxds,
- sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL +
- KQSW_NRXMSGS_LARGE));
+ kqswnal_data.kqn_rxds = krx->krx_alloclist;
+ PORTAL_FREE(krx, sizeof (*krx));
}
/* resets flags, pointers to NULL etc */
#endif
int rc;
int i;
+ kqswnal_rx_t *krx;
+ kqswnal_tx_t *ktx;
int elan_page_idx;
int pkmem = atomic_read(&portal_kmemory);
/**********************************************************************/
/* Allocate/Initialise transmit descriptors */
- PORTAL_ALLOC(kqswnal_data.kqn_txds,
- sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
- if (kqswnal_data.kqn_txds == NULL)
- {
- kqswnal_finalise ();
- return (-ENOMEM);
- }
-
- /* clear flags, null pointers etc */
- memset(kqswnal_data.kqn_txds, 0,
- sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
+ kqswnal_data.kqn_txds = NULL;
for (i = 0; i < (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS); i++)
{
int premapped_pages;
- kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
int basepage = i * KQSW_NTXMSGPAGES;
+ PORTAL_ALLOC (ktx, sizeof(*ktx));
+ if (ktx == NULL) {
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ ktx->ktx_alloclist = kqswnal_data.kqn_txds;
+ kqswnal_data.kqn_txds = ktx;
+
PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
if (ktx->ktx_buffer == NULL)
{
/**********************************************************************/
/* Allocate/Initialise receive descriptors */
- PORTAL_ALLOC (kqswnal_data.kqn_rxds,
- sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE));
- if (kqswnal_data.kqn_rxds == NULL)
- {
- kqswnal_finalise ();
- return (-ENOMEM);
- }
-
- memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */
- sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL+KQSW_NRXMSGS_LARGE));
-
+ kqswnal_data.kqn_rxds = NULL;
elan_page_idx = 0;
for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
{
E3_Addr elanbuffer;
#endif
int j;
- kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
+
+ PORTAL_ALLOC(krx, sizeof(*krx));
+ if (krx == NULL) {
+ kqswnal_finalise();
+ return (-ENOSPC);
+ }
+
+ krx->krx_alloclist = kqswnal_data.kqn_rxds;
+ kqswnal_data.kqn_rxds = krx;
if (i < KQSW_NRXMSGS_SMALL)
{
/**********************************************************************/
/* Queue receives, now that it's OK to run their completion callbacks */
- for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
- {
- kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
-
+ for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx =krx->krx_alloclist){
/* NB this enqueue can allocate/sleep (attr == 0) */
#if MULTIRAIL_EKC
rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
#endif
} kqswnal_remotemd_t;
-typedef struct
+typedef struct kqswnal_rx
{
struct list_head krx_list; /* enqueue -> thread */
+ struct kqswnal_rx *krx_alloclist; /* stack in kqn_rxds */
EP_RCVR *krx_eprx; /* port to post receives to */
EP_RXD *krx_rxd; /* receive descriptor (for repost) */
#if MULTIRAIL_EKC
ptl_kiov_t krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */
} kqswnal_rx_t;
-typedef struct
+typedef struct kqswnal_tx
{
struct list_head ktx_list; /* enqueue idle/active */
struct list_head ktx_delayed_list; /* enqueue delayedtxds */
+ struct kqswnal_tx *ktx_alloclist; /* stack in kqn_txds */
unsigned int ktx_isnblk:1; /* reserved descriptor? */
unsigned int ktx_state:7; /* What I'm doing */
unsigned int ktx_firsttmpfrag:1; /* ktx_frags[0] is in my ebuffer ? 0 : 1 */
#if CONFIG_SYSCTL
struct ctl_table_header *kqn_sysctl; /* sysctl interface */
#endif
- kqswnal_rx_t *kqn_rxds; /* all the receive descriptors */
- kqswnal_tx_t *kqn_txds; /* all the transmit descriptors */
+ kqswnal_rx_t *kqn_rxds; /* stack of all the receive descriptors */
+ kqswnal_tx_t *kqn_txds; /* stack of all the transmit descriptors */
struct list_head kqn_idletxds; /* transmit descriptors free to use */
struct list_head kqn_nblk_idletxds; /* reserved free transmit descriptors */
ent->write_proc = trace_write_daemon_file;
ent->read_proc = trace_read_daemon_file;
- ent = create_proc_entry("sys/portals/debug_size", 0, NULL);
+ ent = create_proc_entry("sys/portals/debug_mb", 0, NULL);
if (ent == NULL) {
- CERROR("couldn't register debug_size\n");
+ CERROR("couldn't register debug_mb\n");
return -1;
}
- ent->write_proc = trace_write_debug_size;
- ent->read_proc = trace_read_debug_size;
+ ent->write_proc = trace_write_debug_mb;
+ ent->read_proc = trace_read_debug_mb;
return 0;
}
remove_proc_entry("sys/portals/dump_kernel", NULL);
remove_proc_entry("sys/portals/daemon_file", NULL);
- remove_proc_entry("sys/portals/debug_size", NULL);
+ remove_proc_entry("sys/portals/debug_mb", NULL);
#ifdef CONFIG_SYSCTL
if (portals_table_header)
#include <linux/portals_compat25.h>
#include <linux/libcfs.h>
-#define TCD_MAX_PAGES 1280
+#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
/* XXX move things up to the top, comment */
atomic_t tctl_shutdown;
};
+#define TRACEFILE_SIZE (500 << 20)
static DECLARE_RWSEM(tracefile_sem);
static char *tracefile = NULL;
+static long long tracefile_size = TRACEFILE_SIZE;
static struct tracefiled_ctl trace_tctl;
static DECLARE_MUTEX(trace_thread_sem);
static int thread_running = 0;
/* the kernel should print a message for us. fall back
* to using the last page in the ring buffer. */
goto ring_buffer;
- return NULL;
}
page->index = 0;
page->mapping = (void *)(long)smp_processor_id();
struct trace_cpu_data *tcd;
struct ptldebug_header header;
struct page *page;
- char *debug_buf;
- int known_size, needed, max_nob;
+ char *debug_buf = format;
+ int known_size, needed = 85 /* average message length */, max_nob;
va_list ap;
unsigned long flags;
struct timeval tv;
known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
- page = trace_get_page(tcd, known_size + 40); /* slop */
retry:
- if (page == NULL)
+ page = trace_get_page(tcd, needed + known_size);
+ if (page == NULL) {
+ debug_buf = format;
+ if (needed + known_size > PAGE_SIZE)
+ mask |= D_ERROR;
+ needed = strlen(format);
goto out;
+ }
debug_buf = page_address(page) + page->index + known_size;
- va_start(ap, format);
max_nob = PAGE_SIZE - page->index - known_size;
LASSERT(max_nob > 0);
+ va_start(ap, format);
needed = vsnprintf(debug_buf, max_nob, format, ap);
va_end(ap);
- if (needed > max_nob) {
- /* overflow. oh poop. */
- page = trace_get_page(tcd, needed + known_size);
+ if (needed > max_nob) /* overflow. oh poop. */
goto retry;
- }
header.ph_len = known_size + needed;
debug_buf = page_address(page) + page->index;
printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n",
page->index);
+ out:
if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk)
print_to_console(&header, mask, debug_buf, needed, file, fn);
- out:
trace_put_tcd(tcd, flags);
}
EXPORT_SYMBOL(portals_debug_msg);
down_write(&tracefile_sem);
- filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY, 0600);
+ filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600);
if (IS_ERR(filp)) {
rc = PTR_ERR(filp);
printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
filp = NULL;
down_read(&tracefile_sem);
if (tracefile != NULL) {
- filp = filp_open(tracefile, O_CREAT|O_RDWR|O_APPEND,
- 0600);
+ filp = filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
+ 0600);
if (IS_ERR(filp)) {
printk("couldn't open %s: %ld\n", tracefile,
PTR_ERR(filp));
hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
list_for_each_safe(pos, tmp, &pc.pc_pages) {
+ static loff_t f_pos;
page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
LASSERT(page->index <= PAGE_SIZE);
LASSERT(page_count(page) > 0);
+ if (f_pos >= tracefile_size)
+ f_pos = 0;
+ else if (f_pos > filp->f_dentry->d_inode->i_size)
+ f_pos = filp->f_dentry->d_inode->i_size;
+
rc = filp->f_op->write(filp, page_address(page),
- page->index, &filp->f_pos);
+ page->index, &f_pos);
if (rc != page->index) {
printk(KERN_WARNING "wanted to write %lu but "
"wrote %d\n", page->index, rc);
tracefile = NULL;
trace_stop_thread();
goto out_sem;
+ } else if (strncmp(name, "size=", 5) == 0) {
+ tracefile_size = simple_strtoul(name + 5, NULL, 0);
+ if (tracefile_size < 10 || tracefile_size > 20480)
+ tracefile_size = TRACEFILE_SIZE;
+ else
+ tracefile_size <<= 20;
+ goto out_sem;
}
if (name[0] != '/') {
name = NULL;
printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
- "to %s\n", name);
+ "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10));
trace_start_thread();
up_write(&tracefile_sem);
out:
- if (name)
- kfree(name);
+ kfree(name);
return count;
}
return rc;
}
-int trace_write_debug_size(struct file *file, const char *buffer,
- unsigned long count, void *data)
+int trace_write_debug_mb(struct file *file, const char *buffer,
+ unsigned long count, void *data)
{
- char *string;
- int rc, i;
+ char string[32];
+ int i;
unsigned max;
- string = kmalloc(count + 1, GFP_KERNEL);
- if (string == NULL)
- return -ENOMEM;
-
- if (copy_from_user(string, buffer, count)) {
- rc = -EFAULT;
- goto out;
+ if (count >= sizeof(string)) {
+ printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n",
+ count);
+ return -EOVERFLOW;
}
+ if (copy_from_user(string, buffer, count))
+ return -EFAULT;
+
max = simple_strtoul(string, NULL, 0);
- if (max == 0) {
- rc = -EINVAL;
- goto out;
- }
+ if (max == 0)
+ return -EINVAL;
max /= smp_num_cpus;
- if (max > num_physpages / 5 * 4) {
+ if (max * smp_num_cpus > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5) {
printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
- "%d pages, which is more than 80%% of physical pages "
- "(%lu).\n", max * smp_num_cpus, num_physpages / 5 * 4);
+ "%d MB, which is more than 80%% of physical RAM "
+ "(%lu).\n", max * smp_num_cpus,
+ (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5);
return count;
}
for (i = 0; i < NR_CPUS; i++) {
struct trace_cpu_data *tcd;
tcd = &trace_data[i].tcd;
- tcd->tcd_max_pages = max;
+ tcd->tcd_max_pages = max << (20 - PAGE_SHIFT);
}
- out:
- kfree(string);
return count;
}
-int trace_read_debug_size(char *page, char **start, off_t off, int count,
- int *eof, void *data)
+int trace_read_debug_mb(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
struct trace_cpu_data *tcd;
unsigned long flags;
int rc;
tcd = trace_get_tcd(flags);
- rc = snprintf(page, count, "%lu\n", tcd->tcd_max_pages * smp_num_cpus);
+ rc = snprintf(page, count, "%lu\n",
+ tcd->tcd_max_pages * smp_num_cpus << (20 - PAGE_SHIFT));
trace_put_tcd(tcd, flags);
return rc;
unsigned long count, void *data);
int trace_read_daemon_file(char *page, char **start, off_t off, int count,
int *eof, void *data);
-int trace_write_debug_size(struct file *file, const char *buffer,
- unsigned long count, void *data);
-int trace_read_debug_size(char *page, char **start, off_t off, int count,
- int *eof, void *data);
+int trace_write_debug_mb(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+int trace_read_debug_mb(char *page, char **start, off_t off, int count,
+ int *eof, void *data);
int trace_dk(struct file *file, const char *buffer, unsigned long count,
void *data);
*start = page + prd->skip;
user_len = -prd->skip;
- for (; prd->curr != &kpr_routes; prd->curr = prd->curr->next) {
+ while ((prd->curr != NULL) && (prd->curr != &kpr_routes)) {
re = list_entry(prd->curr, kpr_route_entry_t, kpre_list);
ge = re->kpre_gateway;
chunk_len += line_len;
user_len += line_len;
- /* The route table will exceed one page */
- if ((chunk_len > (PAGE_SIZE - 80)) || (user_len > count)) {
- prd->curr = prd->curr->next;
- break;
+ /* Abort the route list changed */
+ if (prd->curr->next == NULL) {
+ prd->curr = NULL;
+ read_unlock(&kpr_rwlock);
+ return sprintf(page, "\nError: Routes Changed\n");
}
+
+ prd->curr = prd->curr->next;
+
+ /* The route table will exceed one page, break the while loop
+ * so the function can be re-called with a new page.
+ */
+ if ((chunk_len > (PAGE_SIZE - 80)) || (user_len > count))
+ break;
}
*eof = 0;
in = fopen(filename, "r");
if (in == NULL) {
+ if (errno == ENOENT) /* no dump file created */
+ return 0;
+
fprintf(stderr, "fopen(%s) failed: %s\n", filename,
strerror(errno));
return 1;
return parse_buffer(in, out);
}
-const char debug_daemon_usage[]="usage: debug_daemon {start file [MB]|stop}\n";
+const char debug_daemon_usage[] = "usage: %s {start file [MB]|stop}\n";
+#define DAEMON_FILE "/proc/sys/portals/daemon_file"
int jt_dbg_debug_daemon(int argc, char **argv)
{
- int i, rc, fd;
- unsigned int cmd = 0;
- struct portal_ioctl_data data;
+ int rc = 1, fd;
if (argc <= 1) {
- fprintf(stderr, debug_daemon_usage);
- return 0;
+ fprintf(stderr, debug_daemon_usage, argv[0]);
+ return 1;
}
- fd = open("/proc/sys/portals/daemon_file", O_WRONLY);
+ fd = open(DAEMON_FILE, O_WRONLY);
if (fd < 0) {
- fprintf(stderr, "open(daemon_file) failed: %s\n",
+ fprintf(stderr, "open %s failed: %s\n", DAEMON_FILE,
strerror(errno));
- return 1;
- }
-
- if (strcasecmp(argv[1], "start") == 0) {
- if (argc != 3) {
- fprintf(stderr, debug_daemon_usage);
- return 1;
+ } else if (strcasecmp(argv[1], "start") == 0) {
+ if (argc < 3 || argc > 4 ||
+ (argc == 4 && strlen(argv[3]) > 5)) {
+ fprintf(stderr, debug_daemon_usage, argv[0]);
+ goto out;
}
+ if (argc == 4) {
+ char size[12] = "size=";
+ long sizecheck;
+
+ sizecheck = strtoul(argv[3], NULL, 0);
+ if (sizecheck < 10 || sizecheck > 20480) {
+ fprintf(stderr, "size %s invalid, must be in "
+ "the range 20-20480 MB\n", argv[3]);
+ } else {
+ strncat(size, argv[3], sizeof(size) - 6);
+ rc = write(fd, size, strlen(size));
+ if (rc != strlen(size)) {
+ fprintf(stderr, "set %s failed: %s\n", size, strerror(errno));
+ }
+ }
+ }
rc = write(fd, argv[2], strlen(argv[2]));
if (rc != strlen(argv[2])) {
- fprintf(stderr, "write(%s) failed: %s\n", argv[2],
- strerror(errno));
- close(fd);
- return 1;
+ fprintf(stderr, "start debug_daemon on %s failed: %s\n",
+ argv[2], strerror(errno));
+ goto out;
}
+
+ rc = 0;
} else if (strcasecmp(argv[1], "stop") == 0) {
rc = write(fd, "stop", 4);
if (rc != 4) {
- fprintf(stderr, "write(stop) failed: %s\n",
+ fprintf(stderr, "stopping debug_daemon failed: %s\n",
strerror(errno));
- close(fd);
- return 1;
+ goto out;
}
+ rc = 0;
} else {
- fprintf(stderr, debug_daemon_usage);
- return 1;
+ fprintf(stderr, debug_daemon_usage, argv[0]);
+ rc = 1;
}
+out:
close(fd);
return 0;
}
+tbd Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.2.8
+ * bug fixes
+ - allocate qswnal tx descriptors singly to avoid fragmentation (4504)
+ - don't LBUG on obdo_alloc() failure, use OBD_SLAB_ALLOC() (4800)
+ - fix NULL dereference in /proc/sys/portals/routes (4827)
+ - allow failed mdc_close() operations to be interrupted (4561)
+ - stop precreate on OST before MDS would time out on it (4778)
+ - don't send partial-page writes before EOF from client (4410)
+ - discard client grant for sub-page writes on large-page clients (4520)
+ - don't free dentries not owned by NFS code, check generation (4806)
+ - fix lsm leak if mds_create_objects() fails (4801)
+ - limit debug_daemon file size, always print CERROR messages (4789)
+ - use transno after validating reply (3892)
+ - process timed out requests if import state changes (3754)
+ - update mtime on OST during writes, return in glimpse (4829)
+
2004-10-07 Cluster File Systems, Inc. <info@clusterfs.com>
* version 1.2.7
* bug fixes
- ignore -ENOENT errors in osc_destroy (3639)
- notify osc create thread that OSC is being cleaned up (4600)
- - bind to privileged port in socknal and tcpnal (3689)
- add nettype argument for llmount in #5d in conf-sanity.sh (3936)
- reconstruct ost_handle() like mds_handle() (4657)
- create a new thread to do import eviction to avoid deadlock (3969)
- OST returns ENOSPC from object create when no space left (4539)
- don't send truncate RPC if file size isn't changing (4410)
- limit OSC precreate to 1/2 of value OST considers bogus (4778)
+ - bind to privileged port in socknal and tcpnal (3689)
* miscellania
- rate limit CERROR/CWARN console message to avoid overload (4519)
- GETFILEINFO dir ioctl returns LOV EA + MDS stat in 1 call (3327)
- revalidate should check working dir is a directory (4134)
* miscellania
- don't always mark "slow" obdfilter messages as errors (4418)
-
+
2004-08-24 Cluster File Systems, Inc. <info@clusterfs.com>
* version 1.2.5
* bug fixes
AC_INIT
AC_CANONICAL_SYSTEM
-AM_INIT_AUTOMAKE(lustre, 1.2.7)
+AM_INIT_AUTOMAKE(lustre, 1.2.7.4)
# AM_MAINTAINER_MODE
# Four main targets: lustre kernel modules, utilities, tests, and liblustre
obd_id oscc_last_id;//last available pre-created object
obd_id oscc_next_id;// what object id to give out next
int oscc_grow_count;
- struct osc_created *oscc_osccd;
struct obdo oscc_oa;
int oscc_flags;
wait_queue_head_t oscc_waitq; /* creating procs wait on this */
* we define this to be 2T - 4k, which is the ext3 maxbytes. */
#define LUSTRE_STRIPE_MAXBYTES 0x1fffffff000ULL
+#define CHECKSUM_CHUNK 4096
#define CHECKSUM_BULK 0
#define POISON_BULK 0
#if CHECKSUM_BULK
-static inline void ost_checksum(obd_count *cksum, void *addr, int len)
+static inline void ost_checksum(obd_count *cksum,int *psum, void *addr, int len)
{
unsigned char *ptr = (unsigned char *)addr;
- obd_count sum = 0;
+ int sum = 0;
/* very stupid, but means I don't have to think about byte order */
while (len-- > 0)
sum += *ptr++;
*cksum = (*cksum << 2) + sum;
+ if (psum)
+ *psum = sum;
}
#endif
set_current_state(TASK_INTERRUPTIBLE); \
if (condition) \
break; \
- if (signal_pending(current)) { \
- if (info->lwi_on_signal) \
- info->lwi_on_signal(info->lwi_cb_data); \
- ret = -EINTR; \
- break; \
- } \
if (info->lwi_timeout && !__timed_out) { \
if (schedule_timeout(info->lwi_timeout) == 0) { \
__timed_out = 1; \
} else { \
schedule(); \
} \
+ if (condition) \
+ break; \
+ if (signal_pending(current)) { \
+ if (info->lwi_on_signal) \
+ info->lwi_on_signal(info->lwi_cb_data); \
+ ret = -EINTR; \
+ break; \
+ } \
} \
\
SIGNAL_MASK_LOCK(current, irqflags); \
/* Spare the preprocessor, spoil the bugs. */
#define FLAG(field, str) (field ? str : "")
-#define PTLRPC_REQUEST_COMPLETE(req) ((req)->rq_phase > RQ_PHASE_RPC)
-
#define DEBUG_REQ_FLAGS(req) \
((req->rq_phase == RQ_PHASE_NEW) ? "New" : \
(req->rq_phase == RQ_PHASE_RPC) ? "Rpc" : \
{
struct obdo *oa;
- oa = kmem_cache_alloc(obdo_cachep, SLAB_KERNEL);
- if (oa == NULL)
- LBUG();
- CDEBUG(D_MALLOC, "kmem_cache_alloced oa at %p\n", oa);
- memset(oa, 0, sizeof (*oa));
+ OBD_SLAB_ALLOC(oa, obdo_cachep, SLAB_KERNEL, sizeof(*oa));
return oa;
}
static inline void obdo_free(struct obdo *oa)
{
- if (!oa)
- return;
- CDEBUG(D_MALLOC, "kmem_cache_freed oa at %p\n", oa);
- kmem_cache_free(obdo_cachep, oa);
+ OBD_SLAB_FREE(oa, obdo_cachep, sizeof(*oa));
}
#if !defined(__KERNEL__) || (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
extern unsigned long event;
-Index: kernel-2.4.21/include/asm-x86_64/current.h
-===================================================================
---- kernel-2.4.21.orig/include/asm-x86_64/current.h 2003-06-13 15:26:52.000000000 -0700
-+++ kernel-2.4.21/include/asm-x86_64/current.h 2003-12-04 12:00:13.000000000 -0800
-@@ -5,6 +5,7 @@
- struct task_struct;
-
- #include <asm/pda.h>
-+#include <asm/page.h>
-
- static inline struct task_struct *get_current(void)
- {
extern unsigned long event;
-Index: linux-2.4.21-171/include/asm-x86_64/current.h
-===================================================================
---- linux-2.4.21-171.orig/include/asm-x86_64/current.h 2004-03-31 14:58:26.000000000 -0500
-+++ linux-2.4.21-171/include/asm-x86_64/current.h 2004-04-03 16:02:32.000000000 -0500
-@@ -5,6 +5,7 @@
- struct task_struct;
-
- #include <asm/pda.h>
-+#include <asm/page.h>
-
- static inline struct task_struct *get_current(void)
- {
#define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */
#define EXT3_MOUNT_IOPEN 0x40000 /* Allow access via iopen */
#define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */
-+#define EXT3_MOUNT_EXTENTS 0x10000 /* Extents support */
-+#define EXT3_MOUNT_EXTDEBUG 0x20000 /* Extents debug */
++#define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */
++#define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef clear_opt
#define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */
#define EXT3_MOUNT_IOPEN 0x40000 /* Allow access via iopen */
#define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */
-+#define EXT3_MOUNT_EXTENTS 0x10000 /* Extents support */
-+#define EXT3_MOUNT_EXTDEBUG 0x20000 /* Extents debug */
++#define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */
++#define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef clear_opt
*/
@@ -335,6 +337,7 @@
#define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */
- #define EXT3_MOUNT_EXTENTS 0x10000 /* Extents support */
- #define EXT3_MOUNT_EXTDEBUG 0x20000 /* Extents debug */
-+#define EXT3_MOUNT_MBALLOC 0x100000/* Buddy allocation support */
+ #define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */
++#define EXT3_MOUNT_MBALLOC 0x400000/* Buddy allocation support */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef clear_opt
--- linux-2.4.20-hp4-pnnl13/fs/nfsd/vfs.c~nfs_export_kernel-2.4.20-hp 2002-11-29 02:53:15.000000000 +0300
+++ linux-2.4.20-hp4-pnnl13-alexey/fs/nfsd/vfs.c 2003-10-08 10:54:08.000000000 +0400
-@@ -77,6 +77,128 @@ struct raparms {
+@@ -77,6 +77,126 @@ struct raparms {
static struct raparms * raparml;
static struct raparms * raparm_cache;
+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it) &&
+ !d_invalidate(dentry)) {
-+ dput(dentry);
+ err = -EINVAL;
-+ dentry = NULL;
+ return err;
+ }
+ }
--- linux-2.4.20-rh-20.9/fs/nfsd/vfs.c~nfs_export_kernel-2.4.20-rh 2003-09-13 19:34:15.000000000 +0400
+++ linux-2.4.20-rh-20.9-alexey/fs/nfsd/vfs.c 2003-10-08 10:48:38.000000000 +0400
-@@ -77,6 +77,128 @@ struct raparms {
+@@ -77,6 +77,126 @@ struct raparms {
static struct raparms * raparml;
static struct raparms * raparm_cache;
+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it) &&
+ !d_invalidate(dentry)) {
-+ dput(dentry);
+ err = -EINVAL;
-+ dentry = NULL;
+ return err;
+ }
+ }
diff -uprN linux/fs/nfsd/vfs.c linux-2.4.20/fs/nfsd/vfs.c
--- linux/fs/nfsd/vfs.c Fri Nov 29 05:23:15 2002
+++ linux-2.4.20/fs/nfsd/vfs.c Sun Oct 5 21:47:45 2003
-@@ -77,6 +77,128 @@ struct raparms {
+@@ -77,6 +77,126 @@ struct raparms {
static struct raparms * raparml;
static struct raparms * raparm_cache;
+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it) &&
+ !d_invalidate(dentry)) {
-+ dput(dentry);
+ err = -EINVAL;
-+ dentry = NULL;
+ return err;
+ }
+ }
===================================================================
--- linux-2.4.21-chaos.orig/fs/nfsd/vfs.c 2003-09-19 03:49:54.000000000 +0400
+++ linux-2.4.21-chaos/fs/nfsd/vfs.c 2003-12-12 16:19:25.000000000 +0300
-@@ -78,6 +78,128 @@
+@@ -78,6 +78,126 @@
static struct raparms * raparml;
static struct raparms * raparm_cache;
+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it) &&
+ !d_invalidate(dentry)) {
-+ dput(dentry);
+ err = -EINVAL;
-+ dentry = NULL;
+ return err;
+ }
+ }
===================================================================
--- linux-2.4.21-chaos.orig/fs/nfsd/vfs.c 2003-09-19 03:49:54.000000000 +0400
+++ linux-2.4.21-chaos/fs/nfsd/vfs.c 2003-12-12 16:19:25.000000000 +0300
-@@ -78,6 +78,128 @@
+@@ -78,6 +78,126 @@
static struct raparms * raparml;
static struct raparms * raparm_cache;
+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it) &&
+ !d_invalidate(dentry)) {
-+ dput(dentry);
+ err = -EINVAL;
-+ dentry = NULL;
+ return err;
+ }
+ }
--- linux-2.4.22-ac1/fs/nfsd/vfs.c~nfs_export_kernel-2.4.22-rh 2003-08-25 15:44:43.000000000 +0400
+++ linux-2.4.22-ac1-alexey/fs/nfsd/vfs.c 2003-10-08 13:41:27.000000000 +0400
-@@ -77,6 +77,128 @@ struct raparms {
+@@ -77,6 +77,126 @@ struct raparms {
static struct raparms * raparml;
static struct raparms * raparm_cache;
+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it) &&
+ !d_invalidate(dentry)) {
-+ dput(dentry);
+ err = -EINVAL;
-+ dentry = NULL;
+ return err;
+ }
+ }
===================================================================
--- linux-2.4.22-vanilla.orig/fs/nfsd/vfs.c 2003-11-03 23:22:11.000000000 +0300
+++ linux-2.4.22-vanilla/fs/nfsd/vfs.c 2003-11-03 23:47:41.000000000 +0300
-@@ -77,6 +77,128 @@
+@@ -77,6 +77,126 @@
static struct raparms * raparml;
static struct raparms * raparm_cache;
+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
+ if (!dentry->d_op->d_revalidate_it(dentry, 0, it) &&
+ !d_invalidate(dentry)) {
-+ dput(dentry);
+ err = -EINVAL;
-+ dentry = NULL;
+ return err;
+ }
+ }
export-2.6-suse.patch
header-guards-2.6-suse.patch
lookup_bdev_init_intent.patch
+ext3-super-ntohl.patch
ext3-ea-in-inode-2.6-suse.patch
export-ext3-2.6-suse.patch
ext3-include-fixes-2.6-suse.patch
-ext3-super-ntohl.patch
ext3-extents-2.6.7.patch
ext3-mballoc2-2.6.7.patch
ext3-nlinks-2.6.7.patch
configurable-x86-stack-2.4.21-suse-171.patch
+configurable-x86_64-2.4.21.patch
dev_read_only_2.4.20-rh.patch
exports_2.4.20-rh-hp.patch
lustre_version.patch
uml-2.4.20-do_mmap_pgoff-fix.patch
uml-export-end_iomem.patch
configurable-x86-stack-2.4.20.patch
+configurable-x86_64-2.4.21.patch
dev_read_only_2.4.20-rh.patch
exports_2.4.20-rh-hp.patch
lustre_version.patch
ptlrpc_reply(req);
target_release_saved_req(req);
}
- obd->obd_recovery_end = LTIME_S(CURRENT_TIME);
+ obd->obd_recovery_end = CURRENT_SECONDS;
return;
}
}
}
+ /* the lock could already be expired, get the elt_lock also */
spin_lock_bh(&expired_lock_thread.elt_lock);
list_del_init(&lock->l_pending_chain);
spin_unlock_bh(&expired_lock_thread.elt_lock);
}
#if 0
- if (LTIME_S(CURRENT_TIME) - lock->l_export->exp_last_request_time > 30){
+ if (CURRENT_SECONDS - lock->l_export->exp_last_request_time > 30){
ldlm_failed_ast(lock, -ETIMEDOUT, "Not-attempted blocking");
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
RETURN(-ETIMEDOUT);
lli->lli_st_size = lov_merge_size(lli->lli_smd, 0);
lli->lli_st_blocks = lov_merge_blocks(lli->lli_smd);
- //lli->lli_st_mtime = lov_merge_mtime(lli->lli_smd, lli->lli_st_mtime);
+ lli->lli_st_mtime = lov_merge_mtime(lli->lli_smd, lli->lli_st_mtime);
CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %lu\n",
lli->lli_st_size, lli->lli_st_blocks);
LASSERT(lld != NULL);
LASSERT(lld->lld_cwd_count == 0);
LASSERT(lld->lld_mnt_count == 0);
- OBD_FREE(de->d_fsdata, sizeof(struct ll_dentry_data));
+ OBD_FREE(de->d_fsdata, sizeof(*lld));
EXIT;
}
lvb = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*lvb));
lvb->lvb_size = lli->lli_smd->lsm_oinfo[stripe].loi_kms;
-
- LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64,
- inode->i_size, stripe, lvb->lvb_size);
+ lvb->lvb_mtime = LTIME_S(inode->i_mtime);
+ lvb->lvb_atime = LTIME_S(inode->i_atime);
+ lvb->lvb_ctime = LTIME_S(inode->i_ctime);
+
+ LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64
+ "atime "LPU64", mtime "LPU64", ctime "LPU64,
+ inode->i_size, stripe, lvb->lvb_size, lvb->lvb_mtime,
+ lvb->lvb_atime, lvb->lvb_ctime);
GOTO(iput, 0);
iput:
iput(inode);
inode->i_size = lov_merge_size(lli->lli_smd, 0);
inode->i_blocks = lov_merge_blocks(lli->lli_smd);
- //inode->i_mtime = lov_merge_mtime(lli->lli_smd, inode->i_mtime);
+ inode->i_mtime = lov_merge_mtime(lli->lli_smd, inode->i_mtime);
CDEBUG(D_DLMTRACE, "glimpse: size: %llu, blocks: %lu\n",
inode->i_size, inode->i_blocks);
}
sbi->ll_osc_exp = class_conn2export(&osc_conn);
+ /* need to do a statfs to initialize the per-OSC osfs cache -
+ * that is used by the OSC IO code to know the blocksize */
+ err = obd_statfs(obd, &osfs, jiffies - HZ);
+ if (err)
+ GOTO(out_mdc, err);
+
err = mdc_getstatus(sbi->ll_mdc_exp, &rootfid);
if (err) {
CERROR("cannot mds_connect: rc = %d\n", err);
inode = igrab(lock->l_ast_data);
} else {
inode = lock->l_ast_data;
- CDEBUG(inode->i_state & I_FREEING ? D_INFO : D_WARNING,
- "l_ast_data %p is bogus: magic %0x8\n",
- lock->l_ast_data, lli->lli_inode_magic);
+ __LDLM_DEBUG(inode->i_state & I_FREEING ?
+ D_INFO : D_WARNING, lock,
+ "l_ast_data %p is bogus: magic %08x\n",
+ lock->l_ast_data, lli->lli_inode_magic);
inode = NULL;
}
}
if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n",
LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
- LTIME_S(CURRENT_TIME));
+ CURRENT_SECONDS);
if (lsm)
attr->ia_valid &= ~ATTR_SIZE;
return (key0 << 1);
}
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+static int ll_nfs_test_inode(struct inode *inode, unsigned long ino, void *opaque)
+#else
+static int ll_nfs_test_inode(struct inode *inode, void *opaque)
+#endif
+{
+ struct ll_fid *iid = opaque;
+
+ if (inode->i_ino == iid->id && inode->i_generation == iid->generation)
+ return 1;
+
+ return 0;
+}
+
static struct inode * search_inode_for_lustre(struct super_block *sb,
unsigned long ino,
unsigned long generation,
unsigned long valid = 0;
int eadatalen = 0, rc;
struct inode *inode = NULL;
+ struct ll_fid iid = { .id = ino, .generation = generation };
- inode = ILOOKUP(sb, ino, NULL, NULL);
+ inode = ILOOKUP(sb, ino, ll_nfs_test_inode, &iid);
if (inode)
return inode;
if (IS_ERR(inode)) {
return ERR_PTR(PTR_ERR(inode));
}
- if (is_bad_inode(inode)
- || (generation && inode->i_generation != generation)
- ){
+ if (is_bad_inode(inode) ||
+ (generation && inode->i_generation != generation)){
/* we didn't find the right inode.. */
- CERROR(" Inode %lu, Bad count: %lu %d or version %u %u\n",
- inode->i_ino,
- (unsigned long)inode->i_nlink,
- atomic_read(&inode->i_count),
- inode->i_generation,
- generation);
+ CERROR("Inode %lu, Bad count: %lu %d or version %u %u\n",
+ inode->i_ino, (unsigned long)inode->i_nlink,
+ atomic_read(&inode->i_count), inode->i_generation,
+ generation);
iput(inode);
return ERR_PTR(-ESTALE);
}
-
+
/* now to find a dentry.
* If possible, get a well-connected one
*/
return ERR_PTR(-ENOMEM);
}
result->d_flags |= DCACHE_DISCONNECTED;
-
+
ll_set_dd(result);
result->d_op = &ll_d_ops;
return result;
spin_lock(&sbi->ll_lock);
- seq_printf(seq, "snapshot_time: %lu:%lu (secs:usecs)\n",
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
now.tv_sec, now.tv_usec);
seq_printf(seq, "pending issued pages: %lu\n",
ra->ra_cur_pages);
data->name = name;
data->namelen = namelen;
data->create_mode = mode;
- data->mod_time = LTIME_S(CURRENT_TIME);
+ data->mod_time = CURRENT_SECONDS;
}
static void ll_d_add(struct dentry *de, struct inode *inode)
static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
- struct lookup_intent *it, int flags)
+ struct lookup_intent *it, int lookup_flags)
{
struct dentry *save = dentry, *retval;
struct ll_fid pfid;
rc = mdc_intent_lock(ll_i2mdcexp(parent), &ctxt, &pfid,
dentry->d_name.name, dentry->d_name.len, NULL, 0,
- NULL, it, flags, &req, ll_mdc_blocking_ast);
+ NULL, it, lookup_flags, &req, ll_mdc_blocking_ast);
if (rc < 0)
GOTO(out, retval = ERR_PTR(rc));
else
pg.count = PAGE_SIZE;
- CDEBUG(D_PAGE, "%s %d bytes ino %lu at "LPU64"/"LPX64"\n",
- cmd & OBD_BRW_WRITE ? "write" : "read", pg.count, inode->i_ino,
- pg.off, pg.off);
+ LL_CDEBUG_PAGE(D_PAGE, page, "%s %d bytes ino %lu at "LPU64"/"LPX64"\n",
+ cmd & OBD_BRW_WRITE ? "write" : "read", pg.count,
+ inode->i_ino, pg.off, pg.off);
if (pg.count == 0) {
CERROR("ZERO COUNT: ino %lu: size %p:%Lu(%p:%Lu) idx %lu off "
LPU64"\n",
if (rc)
RETURN(rc);
- if (PageUptodate(page))
+ if (PageUptodate(page)) {
+ LL_CDEBUG_PAGE(D_PAGE, page, "uptodate\n");
RETURN(0);
+ }
/* We're completely overwriting an existing page, so _don't_ set it up
* to date until commit_write */
if (from == 0 && to == PAGE_SIZE) {
+ LL_CDEBUG_PAGE(D_PAGE, page, "full page write\n");
POISON_PAGE(page, 0x11);
RETURN(0);
}
* treat it like i_size. */
kms = lov_merge_size(lsm, 1);
if (kms <= offset) {
+ LL_CDEBUG_PAGE(D_PAGE, page, "kms "LPU64" <= offset "LPU64"\n",
+ kms, offset);
memset(kmap(page), 0, PAGE_SIZE);
kunmap(page);
GOTO(prepare_done, rc = 0);
RETURN(llap);
}
-static int queue_or_sync_write(struct obd_export *exp,
- struct lov_stripe_md *lsm,
+static int queue_or_sync_write(struct obd_export *exp, struct inode *inode,
struct ll_async_page *llap,
unsigned to, obd_flag async_flags)
{
+ unsigned long size_index = inode->i_size >> PAGE_SHIFT;
struct obd_io_group *oig;
int rc;
ENTRY;
/* _make_ready only sees llap once we've unlocked the page */
llap->llap_write_queued = 1;
- rc = obd_queue_async_io(exp, lsm, NULL, llap->llap_cookie,
- OBD_BRW_WRITE, 0, 0, 0, async_flags);
+ rc = obd_queue_async_io(exp, ll_i2info(inode)->lli_smd, NULL,
+ llap->llap_cookie, OBD_BRW_WRITE, 0, 0, 0,
+ async_flags);
if (rc == 0) {
LL_CDEBUG_PAGE(D_PAGE, llap->llap_page, "write queued\n");
//llap_write_pending(inode, llap);
if (rc)
GOTO(out, rc);
- rc = obd_queue_group_io(exp, lsm, NULL, oig, llap->llap_cookie,
- OBD_BRW_WRITE, 0, to, 0, ASYNC_READY |
- ASYNC_URGENT | ASYNC_COUNT_STABLE |
- ASYNC_GROUP_SYNC);
+ /* make full-page requests if we are not at EOF (bug 4410) */
+ if (llap->llap_page->index < size_index) {
+ LL_CDEBUG_PAGE(D_PAGE, llap->llap_page,
+ "sync write before EOF: size_index %lu, to %d\n",
+ size_index, to);
+ to = PAGE_SIZE;
+ } else if (llap->llap_page->index == size_index) {
+ int size_to = inode->i_size & ~PAGE_MASK;
+ LL_CDEBUG_PAGE(D_PAGE, llap->llap_page,
+ "sync write at EOF: size_index %lu, to %d/%d\n",
+ size_index, to, size_to);
+ if (to < size_to)
+ to = size_to;
+ }
+
+ rc = obd_queue_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig,
+ llap->llap_cookie, OBD_BRW_WRITE, 0, to, 0,
+ ASYNC_READY | ASYNC_URGENT |
+ ASYNC_COUNT_STABLE | ASYNC_GROUP_SYNC);
if (rc)
GOTO(free_oig, rc);
- rc = obd_trigger_group_io(exp, lsm, NULL, oig);
+ rc = obd_trigger_group_io(exp, ll_i2info(inode)->lli_smd, NULL, oig);
if (rc)
GOTO(free_oig, rc);
struct inode *inode = page->mapping->host;
struct ll_inode_info *lli = ll_i2info(inode);
struct lov_stripe_md *lsm = lli->lli_smd;
- struct obd_export *exp = NULL;
+ struct obd_export *exp;
struct ll_async_page *llap;
loff_t size;
int rc = 0;
lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
LPROC_LL_DIRTY_MISSES);
- exp = ll_i2obdexp(inode);
- if (exp == NULL)
- RETURN(-EINVAL);
-
- rc = queue_or_sync_write(exp, ll_i2info(inode)->lli_smd, llap,
- to, 0);
+ rc = queue_or_sync_write(exp, inode, llap, to, 0);
if (rc)
GOTO(out, rc);
} else {
out:
size = (((obd_off)page->index) << PAGE_SHIFT) + to;
if (rc == 0) {
- size = (((obd_off)page->index) << PAGE_SHIFT) + to;
obd_increase_kms(exp, lsm, size);
if (size > inode->i_size)
inode->i_size = size;
llap->llap_cookie,
ASYNC_READY | ASYNC_URGENT);
} else {
- rc = queue_or_sync_write(exp, ll_i2info(inode)->lli_smd, llap,
- PAGE_SIZE, ASYNC_READY |
- ASYNC_URGENT);
+ rc = queue_or_sync_write(exp, inode, llap, PAGE_SIZE,
+ ASYNC_READY | ASYNC_URGENT);
}
if (rc)
page_cache_release(page);
CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
i, tmp_oa->o_size, src_oa->o_size);
}
+ if (src_oa->o_valid & OBD_MD_FLMTIME)
+ loi->loi_mtime = src_oa->o_mtime;
err = obd_setattr(lov->tgts[loi->loi_ost_idx].ltd_exp, tmp_oa,
NULL, NULL);
int i = 0;
ENTRY;
- for (loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, loi++)
+ for (loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, loi++) {
if (obdos[i].o_valid & OBD_MD_FLBLOCKS)
loi->loi_blocks = obdos[i].o_blocks;
+ if (obdos[i].o_valid & OBD_MD_FLMTIME)
+ loi->loi_mtime = obdos[i].o_mtime;
+ }
OBD_FREE(obdos, lsm->lsm_stripe_count * sizeof(*obdos));
OBD_FREE(aa->aa_ioarr, sizeof(*aa->aa_ioarr) * aa->aa_oa_bufs);
submd->lsm_oinfo->loi_rss = loi->loi_rss;
submd->lsm_oinfo->loi_kms = loi->loi_kms;
submd->lsm_oinfo->loi_blocks = loi->loi_blocks;
- loi->loi_mtime = submd->lsm_oinfo->loi_mtime;
+ submd->lsm_oinfo->loi_mtime = loi->loi_mtime;
/* XXX submd is not fully initialized here */
*flags = save_flags;
rc = obd_enqueue(lov->tgts[loi->loi_ost_idx].ltd_exp, submd,
LASSERT(lock != NULL);
loi->loi_rss = tmp;
+ loi->loi_mtime = submd->lsm_oinfo->loi_mtime;
loi->loi_blocks = submd->lsm_oinfo->loi_blocks;
/* Extend KMS up to the end of this lock and no further
* A lock on [x,y] means a KMS of up to y + 1 bytes! */
if (tmp > lock->l_policy_data.l_extent.end)
tmp = lock->l_policy_data.l_extent.end + 1;
if (tmp >= loi->loi_kms) {
- CDEBUG(D_DLMTRACE, "lock acquired, setting rss="
- LPU64", kms="LPU64"\n", loi->loi_rss,
- tmp);
+ LDLM_DEBUG(lock, "acquired set stripe %d rss="
+ LPU64", kms="LPU64"\n", i,
+ loi->loi_rss, tmp);
loi->loi_kms = tmp;
loi->loi_kms_valid = 1;
} else {
- CDEBUG(D_DLMTRACE, "lock acquired, setting rss="
- LPU64"; leaving kms="LPU64", end="LPU64
- "\n", loi->loi_rss, loi->loi_kms,
- lock->l_policy_data.l_extent.end);
+ LDLM_DEBUG(lock, "acquired, set stripe %d rss="
+ LPU64"; leaving kms="LPU64", end="
+ LPU64"\n", i, loi->loi_rss,
+ loi->loi_kms,
+ lock->l_policy_data.l_extent.end);
}
ldlm_lock_allow_match(lock);
LDLM_LOCK_PUT(lock);
save_flags & LDLM_FL_HAS_INTENT) {
memset(lov_lockhp, 0, sizeof(*lov_lockhp));
loi->loi_rss = submd->lsm_oinfo->loi_rss;
+ loi->loi_mtime = submd->lsm_oinfo->loi_mtime;
loi->loi_blocks = submd->lsm_oinfo->loi_blocks;
- CDEBUG(D_DLMTRACE, "glimpsed, setting rss="LPU64
- "; leaving kms="LPU64"\n", loi->loi_rss,
+ CDEBUG(D_DLMTRACE, "glimpsed, set stripe %d rss="LPU64
+ "; leaving kms="LPU64"\n", i, loi->loi_rss,
loi->loi_kms);
} else {
memset(lov_lockhp, 0, sizeof(*lov_lockhp));
if (lov->tgts[loi->loi_ost_idx].active) {
CERROR("error: enqueue objid "LPX64" subobj "
- LPX64" on OST idx %d: rc = %d\n",
+ LPX64" stripe %d idx %d: rc = %d\n",
lsm->lsm_object_id, loi->loi_id,
- loi->loi_ost_idx, rc);
+ i, loi->loi_ost_idx, rc);
GOTO(out_locks, rc);
}
}
__u64 blocks = 0;
int i;
- for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
- i++, loi++) {
+ for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++){
blocks += loi->loi_blocks;
}
return blocks;
struct lov_oinfo *loi;
int i;
- for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
- i++, loi++) {
+ for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++){
if (loi->loi_mtime > current_time)
current_time = loi->loi_mtime;
}
}
journal_start:
- lock_kernel();
LASSERTF(nblocks > 0, "can't start %d credit transaction\n", nblocks);
+ lock_kernel();
handle = journal_start(EXT3_JOURNAL(inode), nblocks);
unlock_kernel();
EXT3_SINGLEDATA_TRANS_BLOCKS;
#endif
- LASSERTF(needed > 0, "can't start %d credit transaction\n", needed);
return needed;
}
needed = journal->j_max_transaction_buffers;
}
+ LASSERTF(needed > 0, "can't start %d credit transaction\n", needed);
lock_kernel();
handle = journal_start(journal, needed);
unlock_kernel();
data->name = name;
data->namelen = namelen;
data->create_mode = mode;
- data->mod_time = LTIME_S(CURRENT_TIME);
+ data->mod_time = CURRENT_SECONDS;
}
static int it_to_lock_mode(struct lookup_intent *it)
return rc;
}
-static int go_back_to_sleep(void *unused)
-{
- return 0;
-}
-
int mdc_close(struct obd_export *exp, struct obdo *oa,
struct obd_client_handle *och, struct ptlrpc_request **request)
{
req->rq_async_args.pointer_arg[0] = obd->u.cli.cl_rpc_lock;
req->rq_async_args.pointer_arg[1] = obd;
ptlrpcd_add_req(req);
- lwi = LWI_TIMEOUT_INTR(MAX(req->rq_timeout * HZ, 1), go_back_to_sleep,
- NULL, NULL);
+ lwi = LWI_TIMEOUT_INTR(MAX(req->rq_timeout * HZ, 1), NULL, NULL, NULL);
rc = l_wait_event(req->rq_reply_waitq, mdc_close_check_reply(req),
&lwi);
if (req->rq_repmsg == NULL) {
int rc, size = sizeof(*body);
ENTRY;
- body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body));
+ body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_mds_body);
if (body == NULL)
GOTO(out, rc = -EPROTO);
obd->obd_recoverable_clients, mds->mds_last_transno);
obd->obd_next_recovery_transno = obd->obd_last_committed + 1;
obd->obd_recovering = 1;
- obd->obd_recovery_start = LTIME_S(CURRENT_TIME);
+ obd->obd_recovery_start = CURRENT_SECONDS;
}
mds->mds_mount_count = mount_count + 1;
LASSERT(lsm && lsm->lsm_object_id);
lmm = NULL;
rc = obd_packmd(mds->mds_osc_exp, &lmm, lsm);
- if (!rec->ur_fid2->id)
- obd_free_memmd(mds->mds_osc_exp, &lsm);
LASSERT(rc >= 0);
lmm_size = rc;
body->eadatasize = rc;
OBD_FREE(*ids, mds->mds_lov_desc.ld_tgt_count * sizeof(**ids));
*ids = NULL;
}
+ if (lsm)
+ obd_free_memmd(mds->mds_osc_exp, &lsm);
RETURN(rc);
}
GOTO(out_dput, 0);
}
- /* get lock (write for O_CREAT, read otherwise) */
-
mds_pack_inode2fid(&body->fid1, dchild->d_inode);
mds_pack_inode2body(body, dchild->d_inode);
if (S_ISREG(dchild->d_inode->i_mode)) {
acc_mode = accmode(rec->ur_flags);
/* Step 1: Find and lock the parent */
- if (rec->ur_flags & O_CREAT)
+ if (rec->ur_flags & MDS_OPEN_CREAT)
parent_mode = LCK_PW;
dparent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, parent_mode,
&parent_lockh, rec->ur_name,
*/
int mds_fix_attr(struct inode *inode, struct mds_update_record *rec)
{
- time_t now = LTIME_S(CURRENT_TIME);
+ time_t now = CURRENT_SECONDS;
struct iattr *attr = &rec->ur_iattr;
unsigned int ia_valid = attr->ia_valid;
int error;
llh->llh_hdr.lrh_type = LLOG_HDR_MAGIC;
llh->llh_hdr.lrh_len = llh->llh_tail.lrt_len = LLOG_CHUNK_SIZE;
llh->llh_hdr.lrh_index = llh->llh_tail.lrt_index = 0;
- llh->llh_timestamp = LTIME_S(CURRENT_TIME);
+ llh->llh_timestamp = CURRENT_SECONDS;
if (uuid)
memcpy(&llh->llh_tgtuuid, uuid, sizeof(llh->llh_tgtuuid));
llh->llh_bitmap_offset = offsetof(typeof(*llh),llh_bitmap);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw_async);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, prep_async_page);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, queue_async_io);
- LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_async_flags);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, queue_group_io);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, trigger_group_io);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_async_flags);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, teardown_async_page);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, increase_kms);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, punch);
class_release_dev(obd);
case 1:
class_put_type(type);
- obd->obd_type = NULL;
}
return rc;
}
obd->obd_last_committed = le64_to_cpu(fsd->fsd_last_transno);
if (obd->obd_recoverable_clients) {
- CWARN("RECOVERY: %d recoverable clients, last_rcvd "
- LPU64"\n", obd->obd_recoverable_clients,
+ CWARN("RECOVERY: service %s, %d recoverable clients, "
+ "last_rcvd "LPU64"\n", obd->obd_name,
+ obd->obd_recoverable_clients,
le64_to_cpu(fsd->fsd_last_transno));
obd->obd_next_recovery_transno = obd->obd_last_committed + 1;
obd->obd_recovering = 1;
- obd->obd_recovery_start = LTIME_S(CURRENT_TIME);
+ obd->obd_recovery_start = CURRENT_SECONDS;
}
out:
}
if (dchild->d_inode != NULL && is_bad_inode(dchild->d_inode)) {
- CERROR("%s: got bad inode "LPU64"\n", obd->obd_name, id);
+ CERROR("%s: got bad object "LPU64" inode %lu\n",
+ obd->obd_name, id, dchild->d_inode->i_ino);
f_dput(dchild);
RETURN(ERR_PTR(-ENOENT));
}
level = D_ERROR;
if (maxsize > 0) { /* we may not have done a statfs yet */
LASSERTF(fed->fed_grant + fed->fed_pending <= maxsize,
- "cli %s/%p %ld+%ld > "LPU64"\n",
+ "%s: cli %s/%p %ld+%ld > "LPU64"\n", func,
exp->exp_client_uuid.uuid, exp,
fed->fed_grant, fed->fed_pending, maxsize);
LASSERTF(fed->fed_dirty <= maxsize,
- "cli %s/%p %ld > "LPU64"\n",
+ "%s: cli %s/%p %ld > "LPU64"\n", func,
exp->exp_client_uuid.uuid, exp,
fed->fed_dirty, maxsize);
}
struct obd_device *obd = exp->exp_obd;
struct filter_obd *filter = &obd->u.filter;
struct filter_export_data *fed = &exp->exp_filter_data;
- int level = D_CACHE;
spin_lock(&obd->obd_osfs_lock);
spin_lock(&obd->obd_dev_lock);
list_del_init(&exp->exp_obd_chain);
spin_unlock(&obd->obd_dev_lock);
- if (fed->fed_dirty < 0 || fed->fed_grant < 0 || fed->fed_pending < 0)
- level = D_ERROR;
- CDEBUG(level, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n",
- obd->obd_name, exp->exp_client_uuid.uuid, exp,
- fed->fed_dirty, fed->fed_pending, fed->fed_grant);
-
LASSERTF(filter->fo_tot_granted >= fed->fed_grant,
"%s: tot_granted "LPU64" cli %s/%p fed_grant %ld\n",
obd->obd_name, filter->fo_tot_granted,
if (diff >= 0)
RETURN(diff);
if (-diff > OST_MAX_PRECREATE) {
- CERROR("ignoring bogus orphan destroy request: obdid "
- LPU64" last_id "LPU64"\n",
+ CERROR("%s: ignoring bogus orphan destroy request: "
+ "obdid "LPU64" last_id "LPU64"\n", obd->obd_name,
oa->o_id, filter_last_id(filter, oa));
RETURN(-EINVAL);
}
filter_destroy_precreated(exp, oa, filter);
rc = filter_update_last_objid(obd, group, 0);
if (rc)
- CERROR("unable to write lastobjid, but orphans"
- "were deleted\n");
+ CERROR("%s: unable to write lastobjid, but orphans"
+ "were deleted\n", obd->obd_name);
RETURN(0);
} else {
/* only precreate if group == 0 and o_id is specfied */
(group != 0 || oa->o_id == 0))
RETURN(1);
- LASSERTF(diff >= 0, LPU64" - "LPU64" = %d\n", oa->o_id,
- filter_last_id(filter, oa), diff);
+ LASSERTF(diff >= 0,"%s: "LPU64" - "LPU64" = %d\n",obd->obd_name,
+ oa->o_id, filter_last_id(filter, oa), diff);
RETURN(diff);
}
}
struct filter_obd *filter;
struct obd_statfs *osfs;
int err = 0, rc = 0, recreate_obj = 0, i;
+ unsigned long enough_time = jiffies + (obd_timeout * HZ) / 3;
__u64 next_id;
void *handle = NULL;
ENTRY;
if (rc)
break;
+ if (time_after(jiffies, enough_time)) {
+ CDEBUG(D_INODE,"%s: precreate slow - want %d got %d \n",
+ obd->obd_name, *num, i);
+ break;
+ }
}
*num = i;
GOTO(cleanup, rc = -ENOENT);
}
+ if (oa)
+ obdo_to_inode(dentry->d_inode, oa, OBD_MD_FLATIME);
fso[i].fso_dentry = dentry;
fso[i].fso_bufcnt = o->ioo_bufcnt;
}
for (i = 0; i < fso[obj].fso_bufcnt; i++, n++) {
int tmp, bytes;
- /* FIXME: this is calculated with PAGE_SIZE on client */
+ /* should match the code in osc_exit_cache */
bytes = rnb[n].len;
bytes += rnb[n].offset & (blocksize - 1);
tmp = (rnb[n].offset + rnb[n].len) & (blocksize - 1);
fsfilt_check_slow(now, obd_timeout, "preprw_write setup");
spin_lock(&exp->exp_obd->obd_osfs_lock);
- if (oa)
+ if (oa) {
filter_grant_incoming(exp, oa);
+ obdo_to_inode(dentry->d_inode, oa,
+ OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+ }
cleanup_phase = 0;
left = filter_grant_space_left(exp);
err = fsfilt_commit_wait(obd, inode, wait_handle);
if (err)
rc = err;
- if (obd_sync_filter)
+ if (obd_sync_filter && !err)
LASSERT(oti->oti_transno <= obd->obd_last_committed);
fsfilt_check_slow(now, obd_timeout, "commitrw commit");
iattr.ia_size);
iattr.ia_valid |= ATTR_SIZE;
-
- fsfilt_setattr(obd, res->dentry, oti->oti_handle,
- &iattr, 0);
}
+
+ fsfilt_setattr(obd, res->dentry, oti->oti_handle, &iattr, 0);
up(&inode->i_sem);
fsfilt_check_slow(now, obd_timeout, "direct_io");
+ if (rc == 0)
+ obdo_from_inode(oa, inode, FILTER_VALID_FLAGS);
+
rc = filter_finish_transno(exp, oti, rc);
err = fsfilt_commit(obd, inode, oti->oti_handle, obd_sync_filter);
if (err)
rc = err;
- if (obd_sync_filter)
+ if (obd_sync_filter && !err)
LASSERT(oti->oti_transno <= obd->obd_last_committed);
fsfilt_check_slow(now, obd_timeout, "commitrw commit");
/* When this (destroy) operation is committed, return the cancel cookie */
void filter_cancel_cookies_cb(struct obd_device *obd, __u64 transno,
- void *cb_data, int error)
+ void *cb_data, int error)
{
struct llog_cookie *cookie = cb_data;
int rc;
+
+ if (error != 0) {
+ CDEBUG(D_INODE, "not cancelling llog cookie on error %d\n",
+ error);
+ return;
+ }
+
rc = llog_cancel(llog_get_context(obd, cookie->lgc_subsys + 1),
NULL, 1, cookie, 0);
if (rc)
/* this sampling races with updates */
- seq_printf(seq, "snapshot_time: %lu:%lu (secs:usecs)\n",
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
now.tv_sec, now.tv_usec);
seq_printf(seq, "\n\t\t\tread\t\t\twrite\n");
spin_lock_irqsave(&cli->cl_loi_list_lock, flags);
- seq_printf(seq, "snapshot_time: %lu:%lu (secs:usecs)\n",
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
now.tv_sec, now.tv_usec);
seq_printf(seq, "read RPCs in flight: %d\n",
cli->cl_r_in_flight);
spin_lock(&oscc->oscc_lock);
oscc->oscc_flags &= ~OSCC_FLAG_CREATING;
if (rc == -ENOSPC || rc == -EROFS) {
- DEBUG_REQ(D_INODE, req, "OST out of space, flagging");
oscc->oscc_flags |= OSCC_FLAG_NOSPC;
- if (body && rc == -ENOSPC)
+ if (body && rc == -ENOSPC) {
+ oscc->oscc_grow_count = OST_MIN_PRECREATE;
oscc->oscc_last_id = body->oa.o_id;
+ }
spin_unlock(&oscc->oscc_lock);
+ DEBUG_REQ(D_INODE, req, "OST out of space, flagging");
} else if (rc != 0 && rc != -EIO) {
- DEBUG_REQ(D_ERROR, req,
- "unknown rc %d from async create: failing oscc", rc);
oscc->oscc_flags |= OSCC_FLAG_RECOVERING;
oscc->oscc_grow_count = OST_MIN_PRECREATE;
spin_unlock(&oscc->oscc_lock);
+ DEBUG_REQ(D_ERROR, req,
+ "unknown rc %d from async create: failing oscc", rc);
ptlrpc_fail_import(req->rq_import, req->rq_import_generation);
} else {
if (rc == 0) {
oscc->oscc_flags &= ~OSCC_FLAG_LOW;
- if (body)
+ if (body) {
+ int diff = body->oa.o_id - oscc->oscc_last_id;
+ if (diff != oscc->oscc_grow_count)
+ oscc->oscc_grow_count =
+ max(diff/3, OST_MIN_PRECREATE);
oscc->oscc_last_id = body->oa.o_id;
+ }
}
spin_unlock(&oscc->oscc_lock);
}
-
CDEBUG(D_HA, "preallocated through id "LPU64" (last used "LPU64")\n",
oscc->oscc_last_id, oscc->oscc_next_id);
spin_lock(&oscc->oscc_lock);
body->oa.o_id = oscc->oscc_last_id + oscc->oscc_grow_count;
body->oa.o_valid |= OBD_MD_FLID;
+ spin_unlock(&oscc->oscc_lock);
CDEBUG(D_HA, "preallocating through id "LPU64" (last used "LPU64")\n",
body->oa.o_id, oscc->oscc_next_id);
- spin_unlock(&oscc->oscc_lock);
request->rq_replen = lustre_msg_size(1, &size);
return 0;
}
oscc->oscc_flags |= OSCC_FLAG_SYNC_IN_PROGRESS;
+ spin_unlock(&oscc->oscc_lock);
CDEBUG(D_HA, "%s: oscc recovery started\n",
oscc->oscc_obd->obd_name);
- spin_unlock(&oscc->oscc_lock);
/* delete from next_id on up */
oa->o_valid |= OBD_MD_FLID;
}
spin_unlock(&oscc->oscc_lock);
rc = oscc_precreate(oscc, try_again);
- if (rc == -EIO)
+ if (rc)
break;
}
oa->o_valid |= bits;
spin_lock(&cli->cl_loi_list_lock);
oa->o_dirty = cli->cl_dirty;
- oa->o_undirty = cli->cl_dirty_max - oa->o_dirty;
+ if (cli->cl_dirty > cli->cl_dirty_max) {
+ CERROR("dirty %lu > dirty_max %lu\n",
+ cli->cl_dirty, cli->cl_dirty_max);
+ oa->o_undirty = 0;
+ } else if (cli->cl_dirty_max - cli->cl_dirty > 0x7fffffff) {
+ CERROR("dirty %lu - dirty_max %lu too big???\n",
+ cli->cl_dirty, cli->cl_dirty_max);
+ oa->o_undirty = 0;
+ } else {
+ oa->o_undirty = cli->cl_dirty_max - oa->o_dirty;
+ }
oa->o_grant = cli->cl_avail_grant;
oa->o_dropped = cli->cl_lost_grant;
cli->cl_lost_grant = 0;
}
#if CHECKSUM_BULK
-static obd_count cksum_pages(int nob, obd_count page_count,
- struct brw_page *pga)
+static obd_count cksum_blocks(int nob, obd_count page_count,
+ struct brw_page *pga)
{
obd_count cksum = 0;
- char *ptr;
+ LASSERT (page_count > 0);
while (nob > 0) {
- LASSERT (page_count > 0);
-
- ptr = kmap(pga->pg);
- ost_checksum(&cksum, ptr + (pga->off & (PAGE_SIZE - 1)),
- pga->count > nob ? nob : pga->count);
- kunmap(pga->pg);
-
+ char *ptr = kmap(pga->pg);
+ int psum, off = pga->off & ~PAGE_MASK;
+ int count = pga->count > nob ? nob : pga->count;
+
+ while (count > 0) {
+ ost_checksum(&cksum, &psum, ptr + off,
+ count > CHECKSUM_CHUNK ?
+ CHECKSUM_CHUNK : count);
+ LL_CDEBUG_PAGE(D_PAGE, pga->pg, "off %d checksum %x\n",
+ off, psum);
+ off += CHECKSUM_CHUNK;
+ count -= CHECKSUM_CHUNK;
+ }
nob -= pga->count;
page_count--;
+ kunmap(pga->pg);
+
pga++;
}
CWARN("Checksum %u from "LPX64" (%s) OK: %x\n",
cksum_counter, peer->peer_nid, str, cksum);
}
+ CDEBUG(D_PAGE, "checksum %x\n", cksum);
} else {
static int cksum_missed;
oap->oap_request = NULL;
}
- if (rc == 0 && oa != NULL)
- oap->oap_loi->loi_blocks = oa->o_blocks;
+ if (rc == 0 && oa != NULL) {
+ if (oa->o_valid & OBD_MD_FLBLOCKS)
+ oap->oap_loi->loi_blocks = oa->o_blocks;
+ if (oa->o_valid & OBD_MD_FLMTIME)
+ oap->oap_loi->loi_mtime = oa->o_mtime;
+ }
if (oap->oap_oig) {
oig_complete_one(oap->oap_oig, &oap->oap_occ, rc);
static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap,
int sent)
{
+ int blocksize = cli->cl_import->imp_obd->obd_osfs.os_bsize ? : 4096;
ENTRY;
if (!(oap->oap_brw_flags & OBD_BRW_FROM_GRANT)) {
cli->cl_lost_grant += PAGE_SIZE;
CDEBUG(D_CACHE, "lost grant: %lu avail grant: %lu dirty: %lu\n",
cli->cl_lost_grant, cli->cl_avail_grant, cli->cl_dirty);
+ } else if (PAGE_SIZE != blocksize && oap->oap_count != PAGE_SIZE) {
+ /* For short writes we shouldn't count parts of pages that
+ * span a whole block on the OST side, or our accounting goes
+ * wrong. Should match the code in filter_grant_check. */
+ int offset = (oap->oap_obj_off +oap->oap_page_off) & ~PAGE_MASK;
+ int count = oap->oap_count + (offset & (blocksize - 1));
+ int end = (offset + oap->oap_count) & (blocksize - 1);
+ if (end)
+ count += blocksize - end;
+
+ cli->cl_lost_grant += PAGE_SIZE - count;
+ CDEBUG(D_CACHE, "lost %lu grant: %lu avail: %lu dirty: %lu\n",
+ PAGE_SIZE - count, cli->cl_lost_grant,
+ cli->cl_avail_grant, cli->cl_dirty);
}
EXIT;
}
if ((*flags & LDLM_FL_HAS_INTENT && rc == ELDLM_LOCK_ABORTED) || !rc) {
- CDEBUG(D_INODE, "received kms == "LPU64", blocks == "LPU64"\n",
- lvb.lvb_size, lvb.lvb_blocks);
+ CDEBUG(D_INODE,"got kms "LPU64" blocks "LPU64" mtime "LPU64"\n",
+ lvb.lvb_size, lvb.lvb_blocks, lvb.lvb_mtime);
lsm->lsm_oinfo->loi_rss = lvb.lvb_size;
+ lsm->lsm_oinfo->loi_mtime = lvb.lvb_mtime;
lsm->lsm_oinfo->loi_blocks = lvb.lvb_blocks;
}
oscc->oscc_flags |= OSCC_FLAG_RECOVERING;
spin_unlock(&oscc->oscc_lock);
}
+
break;
}
case IMP_EVENT_INACTIVE: {
#include <linux/lustre_net.h>
#include <linux/lustre_dlm.h>
#include <linux/lustre_export.h>
+#include <linux/lustre_debug.h>
#include <linux/init.h>
#include <linux/lprocfs_status.h>
#include <linux/lustre_commit_confd.h>
obd_count ost_checksum_bulk(struct ptlrpc_bulk_desc *desc)
{
obd_count cksum = 0;
- struct ptlrpc_bulk_page *bp;
+ int i;
- list_for_each_entry(bp, &desc->bd_page_list, bp_link) {
- ost_checksum(&cksum, kmap(bp->bp_page) + bp->bp_pageoffset,
- bp->bp_buflen);
- kunmap(bp->bp_page);
+ for (i = 0; i < desc->bd_page_count; i++) {
+ struct page *page = desc->bd_iov[i].kiov_page;
+ char *ptr = kmap(page);
+ int psum, off = desc->bd_iov[i].kiov_offset & ~PAGE_MASK;
+ int count = desc->bd_iov[i].kiov_len;
+
+ while (count > 0) {
+ ost_checksum(&cksum, &psum, ptr + off,
+ count > CHECKSUM_CHUNK ?
+ CHECKSUM_CHUNK : count);
+ LL_CDEBUG_PAGE(D_PAGE, page, "off %d checksum %x\n",
+ off, psum);
+ off += CHECKSUM_CHUNK;
+ count -= CHECKSUM_CHUNK;
+ }
+ kunmap(page);
}
return cksum;
req->rq_status = rc;
ptlrpc_error(req);
} else {
- if (req->rq_reply_state != NULL) {
- /* reply out callback would free */
- lustre_free_reply_state (req->rq_reply_state);
- }
if (req->rq_reqmsg->conn_cnt == req->rq_export->exp_conn_cnt) {
CERROR("bulk IO comms error: "
"evicting %s@%s nid "LPX64" (%s)\n",
obd_count client_cksum = body->oa.o_cksum;
obd_count cksum = ost_checksum_bulk(desc);
- portals_nid2str(req->rq_connection->c_peer.peer_ni->pni_number,
- req->rq_connection->c_peer.peer_nid, str);
+ portals_nid2str(req->rq_peer.peer_ni->pni_number,
+ req->rq_peer.peer_nid, str);
if (client_cksum != cksum) {
CERROR("Bad checksum: client %x, server %x, client NID "
LPX64" (%s)\n", client_cksum, cksum,
- req->rq_connection->c_peer.peer_nid, str);
+ req->rq_peer.peer_nid, str);
cksum_counter = 1;
repbody->oa.o_cksum = cksum;
} else {
cksum_counter++;
if ((cksum_counter & (-cksum_counter)) == cksum_counter)
- CWARN("Checksum %u from "LPX64": %x OK\n",
- cksum_counter,
- req->rq_connection->c_peer.peer_nid,
- cksum);
+ CWARN("Checksum %u from "LPX64" (%s): %x OK\n",
+ cksum_counter, req->rq_peer.peer_nid,
+ str, cksum);
}
}
#endif
req->rq_status = rc;
ptlrpc_error(req);
} else {
- if (req->rq_reply_state != NULL) {
- /* reply out callback would free */
- lustre_free_reply_state (req->rq_reply_state);
- }
if (req->rq_reqmsg->conn_cnt == req->rq_export->exp_conn_cnt) {
CERROR("bulk IO comms error: "
"evicting %s@%s nid "LPX64" (%s)\n",
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ *
+ * This code is issued under the GNU General Public License.
+ * See the file COPYING in this distribution
+ */
+
+#ifndef OST_INTERNAL_H
+#define OST_INTERNAL_H
+
+#ifdef LPROCFS
+extern void ost_print_req(void *seq_file, struct ptlrpc_request *req);
+#else
+# define ost_print_req NULL
+#endif
+
+#endif /* OST_INTERNAL_H */
void __exit
kqswnal_finalise (void)
{
+ kqswnal_tx_t *ktx;
+ kqswnal_rx_t *krx;
+
switch (kqswnal_data.kqn_init)
{
default:
* ep_dvma_release() get fixed (and releases any mappings in the
* region), we can delete all the code from here --------> */
- if (kqswnal_data.kqn_txds != NULL) {
- int i;
-
- for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++) {
- kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
-
- /* If ktx has a buffer, it got mapped; unmap now.
- * NB only the pre-mapped stuff is still mapped
- * since all tx descs must be idle */
+ for (ktx = kqswnal_data.kqn_txds; ktx != NULL; ktx =ktx->ktx_alloclist){
+ /* If ktx has a buffer, it got mapped; unmap now. NB only
+ * the pre-mapped stuff is still mapped since all tx descs
+ * must be idle */
- if (ktx->ktx_buffer != NULL)
- ep_dvma_unload(kqswnal_data.kqn_ep,
- kqswnal_data.kqn_ep_tx_nmh,
- &ktx->ktx_ebuffer);
- }
+ if (ktx->ktx_buffer != NULL)
+ ep_dvma_unload(kqswnal_data.kqn_ep,
+ kqswnal_data.kqn_ep_tx_nmh,
+ &ktx->ktx_ebuffer);
}
- if (kqswnal_data.kqn_rxds != NULL) {
- int i;
-
- for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) {
- kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
+ for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx =krx->krx_alloclist){
+ /* If krx_kiov[0].kiov_page got allocated, it got mapped.
+ * NB subsequent pages get merged */
- /* If krx_kiov[0].kiov_page got allocated, it got mapped.
- * NB subsequent pages get merged */
-
- if (krx->krx_kiov[0].kiov_page != NULL)
- ep_dvma_unload(kqswnal_data.kqn_ep,
- kqswnal_data.kqn_ep_rx_nmh,
- &krx->krx_elanbuffer);
- }
+ if (krx->krx_kiov[0].kiov_page != NULL)
+ ep_dvma_unload(kqswnal_data.kqn_ep,
+ kqswnal_data.kqn_ep_rx_nmh,
+ &krx->krx_elanbuffer);
}
/* <----------- to here */
if (kqswnal_data.kqn_ep_rx_nmh != NULL)
- ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_rx_nmh);
+ ep_dvma_release(kqswnal_data.kqn_ep,kqswnal_data.kqn_ep_rx_nmh);
if (kqswnal_data.kqn_ep_tx_nmh != NULL)
- ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_tx_nmh);
+ ep_dvma_release(kqswnal_data.kqn_ep,kqswnal_data.kqn_ep_tx_nmh);
#else
if (kqswnal_data.kqn_eprxdmahandle != NULL)
{
}
#endif
- if (kqswnal_data.kqn_txds != NULL)
- {
- int i;
+ while (kqswnal_data.kqn_txds != NULL) {
+ ktx = kqswnal_data.kqn_txds;
- for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++)
- {
- kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
+ if (ktx->ktx_buffer != NULL)
+ PORTAL_FREE(ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
- if (ktx->ktx_buffer != NULL)
- PORTAL_FREE(ktx->ktx_buffer,
- KQSW_TX_BUFFER_SIZE);
- }
-
- PORTAL_FREE(kqswnal_data.kqn_txds,
- sizeof (kqswnal_tx_t) * (KQSW_NTXMSGS +
- KQSW_NNBLK_TXMSGS));
+ kqswnal_data.kqn_txds = ktx->ktx_alloclist;
+ PORTAL_FREE(ktx, sizeof(*ktx));
}
- if (kqswnal_data.kqn_rxds != NULL)
- {
- int i;
- int j;
+ while (kqswnal_data.kqn_rxds != NULL) {
+ int i;
- for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
- {
- kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
-
- for (j = 0; j < krx->krx_npages; j++)
- if (krx->krx_kiov[j].kiov_page != NULL)
- __free_page (krx->krx_kiov[j].kiov_page);
- }
+ krx = kqswnal_data.kqn_rxds;
+ for (i = 0; i < krx->krx_npages; i++)
+ if (krx->krx_kiov[i].kiov_page != NULL)
+ __free_page (krx->krx_kiov[i].kiov_page);
- PORTAL_FREE(kqswnal_data.kqn_rxds,
- sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL +
- KQSW_NRXMSGS_LARGE));
+ kqswnal_data.kqn_rxds = krx->krx_alloclist;
+ PORTAL_FREE(krx, sizeof (*krx));
}
/* resets flags, pointers to NULL etc */
#endif
int rc;
int i;
+ kqswnal_rx_t *krx;
+ kqswnal_tx_t *ktx;
int elan_page_idx;
int pkmem = atomic_read(&portal_kmemory);
/**********************************************************************/
/* Allocate/Initialise transmit descriptors */
- PORTAL_ALLOC(kqswnal_data.kqn_txds,
- sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
- if (kqswnal_data.kqn_txds == NULL)
- {
- kqswnal_finalise ();
- return (-ENOMEM);
- }
-
- /* clear flags, null pointers etc */
- memset(kqswnal_data.kqn_txds, 0,
- sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS));
+ kqswnal_data.kqn_txds = NULL;
for (i = 0; i < (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS); i++)
{
int premapped_pages;
- kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i];
int basepage = i * KQSW_NTXMSGPAGES;
+ PORTAL_ALLOC (ktx, sizeof(*ktx));
+ if (ktx == NULL) {
+ kqswnal_finalise ();
+ return (-ENOMEM);
+ }
+
+ ktx->ktx_alloclist = kqswnal_data.kqn_txds;
+ kqswnal_data.kqn_txds = ktx;
+
PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
if (ktx->ktx_buffer == NULL)
{
/**********************************************************************/
/* Allocate/Initialise receive descriptors */
- PORTAL_ALLOC (kqswnal_data.kqn_rxds,
- sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE));
- if (kqswnal_data.kqn_rxds == NULL)
- {
- kqswnal_finalise ();
- return (-ENOMEM);
- }
-
- memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */
- sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL+KQSW_NRXMSGS_LARGE));
-
+ kqswnal_data.kqn_rxds = NULL;
elan_page_idx = 0;
for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
{
E3_Addr elanbuffer;
#endif
int j;
- kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
+
+ PORTAL_ALLOC(krx, sizeof(*krx));
+ if (krx == NULL) {
+ kqswnal_finalise();
+ return (-ENOSPC);
+ }
+
+ krx->krx_alloclist = kqswnal_data.kqn_rxds;
+ kqswnal_data.kqn_rxds = krx;
if (i < KQSW_NRXMSGS_SMALL)
{
/**********************************************************************/
/* Queue receives, now that it's OK to run their completion callbacks */
- for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
- {
- kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i];
-
+ for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx =krx->krx_alloclist){
/* NB this enqueue can allocate/sleep (attr == 0) */
#if MULTIRAIL_EKC
rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
#endif
} kqswnal_remotemd_t;
-typedef struct
+typedef struct kqswnal_rx
{
struct list_head krx_list; /* enqueue -> thread */
+ struct kqswnal_rx *krx_alloclist; /* stack in kqn_rxds */
EP_RCVR *krx_eprx; /* port to post receives to */
EP_RXD *krx_rxd; /* receive descriptor (for repost) */
#if MULTIRAIL_EKC
ptl_kiov_t krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */
} kqswnal_rx_t;
-typedef struct
+typedef struct kqswnal_tx
{
struct list_head ktx_list; /* enqueue idle/active */
struct list_head ktx_delayed_list; /* enqueue delayedtxds */
+ struct kqswnal_tx *ktx_alloclist; /* stack in kqn_txds */
unsigned int ktx_isnblk:1; /* reserved descriptor? */
unsigned int ktx_state:7; /* What I'm doing */
unsigned int ktx_firsttmpfrag:1; /* ktx_frags[0] is in my ebuffer ? 0 : 1 */
#if CONFIG_SYSCTL
struct ctl_table_header *kqn_sysctl; /* sysctl interface */
#endif
- kqswnal_rx_t *kqn_rxds; /* all the receive descriptors */
- kqswnal_tx_t *kqn_txds; /* all the transmit descriptors */
+ kqswnal_rx_t *kqn_rxds; /* stack of all the receive descriptors */
+ kqswnal_tx_t *kqn_txds; /* stack of all the transmit descriptors */
struct list_head kqn_idletxds; /* transmit descriptors free to use */
struct list_head kqn_nblk_idletxds; /* reserved free transmit descriptors */
ent->write_proc = trace_write_daemon_file;
ent->read_proc = trace_read_daemon_file;
- ent = create_proc_entry("sys/portals/debug_size", 0, NULL);
+ ent = create_proc_entry("sys/portals/debug_mb", 0, NULL);
if (ent == NULL) {
- CERROR("couldn't register debug_size\n");
+ CERROR("couldn't register debug_mb\n");
return -1;
}
- ent->write_proc = trace_write_debug_size;
- ent->read_proc = trace_read_debug_size;
+ ent->write_proc = trace_write_debug_mb;
+ ent->read_proc = trace_read_debug_mb;
return 0;
}
remove_proc_entry("sys/portals/dump_kernel", NULL);
remove_proc_entry("sys/portals/daemon_file", NULL);
- remove_proc_entry("sys/portals/debug_size", NULL);
+ remove_proc_entry("sys/portals/debug_mb", NULL);
#ifdef CONFIG_SYSCTL
if (portals_table_header)
#include <linux/portals_compat25.h>
#include <linux/libcfs.h>
-#define TCD_MAX_PAGES 1280
+#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
/* XXX move things up to the top, comment */
atomic_t tctl_shutdown;
};
+#define TRACEFILE_SIZE (500 << 20)
static DECLARE_RWSEM(tracefile_sem);
static char *tracefile = NULL;
+static long long tracefile_size = TRACEFILE_SIZE;
static struct tracefiled_ctl trace_tctl;
static DECLARE_MUTEX(trace_thread_sem);
static int thread_running = 0;
/* the kernel should print a message for us. fall back
* to using the last page in the ring buffer. */
goto ring_buffer;
- return NULL;
}
page->index = 0;
page->mapping = (void *)(long)smp_processor_id();
struct trace_cpu_data *tcd;
struct ptldebug_header header;
struct page *page;
- char *debug_buf;
- int known_size, needed, max_nob;
+ char *debug_buf = format;
+ int known_size, needed = 85 /* average message length */, max_nob;
va_list ap;
unsigned long flags;
struct timeval tv;
known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
- page = trace_get_page(tcd, known_size + 40); /* slop */
retry:
- if (page == NULL)
+ page = trace_get_page(tcd, needed + known_size);
+ if (page == NULL) {
+ debug_buf = format;
+ if (needed + known_size > PAGE_SIZE)
+ mask |= D_ERROR;
+ needed = strlen(format);
goto out;
+ }
debug_buf = page_address(page) + page->index + known_size;
- va_start(ap, format);
max_nob = PAGE_SIZE - page->index - known_size;
LASSERT(max_nob > 0);
+ va_start(ap, format);
needed = vsnprintf(debug_buf, max_nob, format, ap);
va_end(ap);
- if (needed > max_nob) {
- /* overflow. oh poop. */
- page = trace_get_page(tcd, needed + known_size);
+ if (needed > max_nob) /* overflow. oh poop. */
goto retry;
- }
header.ph_len = known_size + needed;
debug_buf = page_address(page) + page->index;
printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n",
page->index);
+ out:
if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk)
print_to_console(&header, mask, debug_buf, needed, file, fn);
- out:
trace_put_tcd(tcd, flags);
}
EXPORT_SYMBOL(portals_debug_msg);
down_write(&tracefile_sem);
- filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY, 0600);
+ filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600);
if (IS_ERR(filp)) {
rc = PTR_ERR(filp);
printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
filp = NULL;
down_read(&tracefile_sem);
if (tracefile != NULL) {
- filp = filp_open(tracefile, O_CREAT|O_RDWR|O_APPEND,
- 0600);
+ filp = filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
+ 0600);
if (IS_ERR(filp)) {
printk("couldn't open %s: %ld\n", tracefile,
PTR_ERR(filp));
hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
list_for_each_safe(pos, tmp, &pc.pc_pages) {
+ static loff_t f_pos;
page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
LASSERT(page->index <= PAGE_SIZE);
LASSERT(page_count(page) > 0);
+ if (f_pos >= tracefile_size)
+ f_pos = 0;
+ else if (f_pos > filp->f_dentry->d_inode->i_size)
+ f_pos = filp->f_dentry->d_inode->i_size;
+
rc = filp->f_op->write(filp, page_address(page),
- page->index, &filp->f_pos);
+ page->index, &f_pos);
if (rc != page->index) {
printk(KERN_WARNING "wanted to write %lu but "
"wrote %d\n", page->index, rc);
tracefile = NULL;
trace_stop_thread();
goto out_sem;
+ } else if (strncmp(name, "size=", 5) == 0) {
+ tracefile_size = simple_strtoul(name + 5, NULL, 0);
+ if (tracefile_size < 10 || tracefile_size > 20480)
+ tracefile_size = TRACEFILE_SIZE;
+ else
+ tracefile_size <<= 20;
+ goto out_sem;
}
if (name[0] != '/') {
name = NULL;
printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
- "to %s\n", name);
+ "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10));
trace_start_thread();
up_write(&tracefile_sem);
out:
- if (name)
- kfree(name);
+ kfree(name);
return count;
}
return rc;
}
-int trace_write_debug_size(struct file *file, const char *buffer,
- unsigned long count, void *data)
+int trace_write_debug_mb(struct file *file, const char *buffer,
+ unsigned long count, void *data)
{
- char *string;
- int rc, i;
+ char string[32];
+ int i;
unsigned max;
- string = kmalloc(count + 1, GFP_KERNEL);
- if (string == NULL)
- return -ENOMEM;
-
- if (copy_from_user(string, buffer, count)) {
- rc = -EFAULT;
- goto out;
+ if (count >= sizeof(string)) {
+ printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n",
+ count);
+ return -EOVERFLOW;
}
+ if (copy_from_user(string, buffer, count))
+ return -EFAULT;
+
max = simple_strtoul(string, NULL, 0);
- if (max == 0) {
- rc = -EINVAL;
- goto out;
- }
+ if (max == 0)
+ return -EINVAL;
max /= smp_num_cpus;
- if (max > num_physpages / 5 * 4) {
+ if (max * smp_num_cpus > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5) {
printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
- "%d pages, which is more than 80%% of physical pages "
- "(%lu).\n", max * smp_num_cpus, num_physpages / 5 * 4);
+ "%d MB, which is more than 80%% of physical RAM "
+ "(%lu).\n", max * smp_num_cpus,
+ (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5);
return count;
}
for (i = 0; i < NR_CPUS; i++) {
struct trace_cpu_data *tcd;
tcd = &trace_data[i].tcd;
- tcd->tcd_max_pages = max;
+ tcd->tcd_max_pages = max << (20 - PAGE_SHIFT);
}
- out:
- kfree(string);
return count;
}
-int trace_read_debug_size(char *page, char **start, off_t off, int count,
- int *eof, void *data)
+int trace_read_debug_mb(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
{
struct trace_cpu_data *tcd;
unsigned long flags;
int rc;
tcd = trace_get_tcd(flags);
- rc = snprintf(page, count, "%lu\n", tcd->tcd_max_pages * smp_num_cpus);
+ rc = snprintf(page, count, "%lu\n",
+ tcd->tcd_max_pages * smp_num_cpus << (20 - PAGE_SHIFT));
trace_put_tcd(tcd, flags);
return rc;
unsigned long count, void *data);
int trace_read_daemon_file(char *page, char **start, off_t off, int count,
int *eof, void *data);
-int trace_write_debug_size(struct file *file, const char *buffer,
- unsigned long count, void *data);
-int trace_read_debug_size(char *page, char **start, off_t off, int count,
- int *eof, void *data);
+int trace_write_debug_mb(struct file *file, const char *buffer,
+ unsigned long count, void *data);
+int trace_read_debug_mb(char *page, char **start, off_t off, int count,
+ int *eof, void *data);
int trace_dk(struct file *file, const char *buffer, unsigned long count,
void *data);
*start = page + prd->skip;
user_len = -prd->skip;
- for (; prd->curr != &kpr_routes; prd->curr = prd->curr->next) {
+ while ((prd->curr != NULL) && (prd->curr != &kpr_routes)) {
re = list_entry(prd->curr, kpr_route_entry_t, kpre_list);
ge = re->kpre_gateway;
chunk_len += line_len;
user_len += line_len;
- /* The route table will exceed one page */
- if ((chunk_len > (PAGE_SIZE - 80)) || (user_len > count)) {
- prd->curr = prd->curr->next;
- break;
+ /* Abort the route list changed */
+ if (prd->curr->next == NULL) {
+ prd->curr = NULL;
+ read_unlock(&kpr_rwlock);
+ return sprintf(page, "\nError: Routes Changed\n");
}
+
+ prd->curr = prd->curr->next;
+
+ /* The route table will exceed one page, break the while loop
+ * so the function can be re-called with a new page.
+ */
+ if ((chunk_len > (PAGE_SIZE - 80)) || (user_len > count))
+ break;
}
*eof = 0;
in = fopen(filename, "r");
if (in == NULL) {
+ if (errno == ENOENT) /* no dump file created */
+ return 0;
+
fprintf(stderr, "fopen(%s) failed: %s\n", filename,
strerror(errno));
return 1;
return parse_buffer(in, out);
}
-const char debug_daemon_usage[]="usage: debug_daemon {start file [MB]|stop}\n";
+const char debug_daemon_usage[] = "usage: %s {start file [MB]|stop}\n";
+#define DAEMON_FILE "/proc/sys/portals/daemon_file"
int jt_dbg_debug_daemon(int argc, char **argv)
{
- int i, rc, fd;
- unsigned int cmd = 0;
- struct portal_ioctl_data data;
+ int rc = 1, fd;
if (argc <= 1) {
- fprintf(stderr, debug_daemon_usage);
- return 0;
+ fprintf(stderr, debug_daemon_usage, argv[0]);
+ return 1;
}
- fd = open("/proc/sys/portals/daemon_file", O_WRONLY);
+ fd = open(DAEMON_FILE, O_WRONLY);
if (fd < 0) {
- fprintf(stderr, "open(daemon_file) failed: %s\n",
+ fprintf(stderr, "open %s failed: %s\n", DAEMON_FILE,
strerror(errno));
- return 1;
- }
-
- if (strcasecmp(argv[1], "start") == 0) {
- if (argc != 3) {
- fprintf(stderr, debug_daemon_usage);
- return 1;
+ } else if (strcasecmp(argv[1], "start") == 0) {
+ if (argc < 3 || argc > 4 ||
+ (argc == 4 && strlen(argv[3]) > 5)) {
+ fprintf(stderr, debug_daemon_usage, argv[0]);
+ goto out;
}
+ if (argc == 4) {
+ char size[12] = "size=";
+ long sizecheck;
+
+ sizecheck = strtoul(argv[3], NULL, 0);
+ if (sizecheck < 10 || sizecheck > 20480) {
+ fprintf(stderr, "size %s invalid, must be in "
+ "the range 20-20480 MB\n", argv[3]);
+ } else {
+ strncat(size, argv[3], sizeof(size) - 6);
+ rc = write(fd, size, strlen(size));
+ if (rc != strlen(size)) {
+ fprintf(stderr, "set %s failed: %s\n", size, strerror(errno));
+ }
+ }
+ }
rc = write(fd, argv[2], strlen(argv[2]));
if (rc != strlen(argv[2])) {
- fprintf(stderr, "write(%s) failed: %s\n", argv[2],
- strerror(errno));
- close(fd);
- return 1;
+ fprintf(stderr, "start debug_daemon on %s failed: %s\n",
+ argv[2], strerror(errno));
+ goto out;
}
+
+ rc = 0;
} else if (strcasecmp(argv[1], "stop") == 0) {
rc = write(fd, "stop", 4);
if (rc != 4) {
- fprintf(stderr, "write(stop) failed: %s\n",
+ fprintf(stderr, "stopping debug_daemon failed: %s\n",
strerror(errno));
- close(fd);
- return 1;
+ goto out;
}
+ rc = 0;
} else {
- fprintf(stderr, debug_daemon_usage);
- return 1;
+ fprintf(stderr, debug_daemon_usage, argv[0]);
+ rc = 1;
}
+out:
close(fd);
return 0;
}
ENTRY;
LASSERT(desc != NULL);
- LASSERT(desc->bd_page_count != 0x5a5a5a5a); /* not freed already */
+ LASSERT(desc->bd_page_count != LI_POISON); /* not freed already */
LASSERT(!desc->bd_network_rw); /* network hands off or */
LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL));
if (desc->bd_export)
else
class_import_put(desc->bd_import);
- OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc,
+ OBD_FREE(desc, offsetof(struct ptlrpc_bulk_desc,
bd_iov[desc->bd_max_pages]));
EXIT;
}
RETURN(-EPROTO);
}
- /* Store transno in reqmsg for replay. */
- req->rq_reqmsg->transno = req->rq_transno = req->rq_repmsg->transno;
-
rc = ptlrpc_check_status(req);
/* Either we've been evicted, or the server has failed for
RETURN(rc);
}
+ /* Store transno in reqmsg for replay. */
+ req->rq_reqmsg->transno = req->rq_transno = req->rq_repmsg->transno;
+
if (req->rq_import->imp_replayable) {
spin_lock_irqsave(&imp->imp_lock, flags);
if (req->rq_replay || req->rq_transno != 0)
}
if (req->rq_phase == RQ_PHASE_RPC) {
- if (req->rq_waiting || req->rq_resend) {
+ if (req->rq_timedout||req->rq_waiting||req->rq_resend) {
int status;
ptlrpc_unregister_reply(req);
spin_unlock_irqrestore(&imp->imp_lock,
flags);
continue;
- }
+ }
list_del_init(&req->rq_list);
if (status != 0) {
ENTRY;
DEBUG_REQ(D_ERROR, req, "timeout (sent at %lu, %lus ago)",
- (long)req->rq_sent, LTIME_S(CURRENT_TIME) - req->rq_sent);
+ (long)req->rq_sent, CURRENT_SECONDS - req->rq_sent);
spin_lock_irqsave (&req->rq_lock, flags);
req->rq_timedout = 1;
{
struct ptlrpc_request_set *set = data;
struct list_head *tmp;
- time_t now = LTIME_S (CURRENT_TIME);
+ time_t now = CURRENT_SECONDS;
ENTRY;
LASSERT(set != NULL);
int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set)
{
struct list_head *tmp;
- time_t now = LTIME_S(CURRENT_TIME);
+ time_t now = CURRENT_SECONDS;
time_t deadline;
int timeout = 0;
struct ptlrpc_request *req;
CDEBUG(D_HA, "set %p going to sleep for %d seconds\n",
set, timeout);
lwi = LWI_TIMEOUT_INTR((timeout ? timeout : 1) * HZ,
- ptlrpc_expired_set,
+ ptlrpc_expired_set,
ptlrpc_interrupted_set, set);
rc = l_wait_event(set->set_waitq, ptlrpc_check_set(set), &lwi);
if (req->rq_err) {
rc = -EIO;
- }
+ }
else if (req->rq_intr) {
rc = -EINTR;
}
else {
GOTO(restart, rc);
}
- }
+ }
if (rc != 0) {
list_del_init(&req->rq_list);
ENTRY;
spin_lock_irqsave(&imp->imp_lock, flags);
- CDEBUG(D_HA, "setting import %s INVALID\n",
- imp->imp_target_uuid.uuid);
+ CDEBUG(D_HA, "setting import %s INVALID\n", imp->imp_target_uuid.uuid);
imp->imp_invalid = 1;
imp->imp_generation++;
spin_unlock_irqrestore(&imp->imp_lock, flags);
struct l_wait_info lwi;
int rc;
- LASSERT (!in_interrupt ()); /* might sleep */
+ LASSERT (!in_interrupt ()); /* might sleep */
- if (!ptlrpc_bulk_active(desc)) /* completed or */
- return; /* never registered */
-
- LASSERT (desc->bd_req == req); /* bd_req NULL until registered */
+ if (!ptlrpc_bulk_active(desc)) /* completed or */
+ return; /* never registered */
+
+ LASSERT (desc->bd_req == req); /* bd_req NULL until registered */
/* the unlink ensures the callback happens ASAP and is the last
* one. If it fails, it must be because completion just
LASSERT(!ptlrpc_bulk_active(desc));
return;
}
-
+
LASSERT (rc == PTL_OK);
-
+
if (req->rq_set != NULL)
wq = &req->rq_set->set_waitq;
else
rc = l_wait_event(*wq, !ptlrpc_bulk_active(desc), &lwi);
if (rc == 0)
return;
-
+
LASSERT (rc == -ETIMEDOUT);
- CWARN("Unexpectedly long timeout: desc %p\n", desc);
+ DEBUG_REQ(D_WARNING,req,"Unexpectedly long timeout: desc %p\n",
+ desc);
}
}
ptlrpc_request_addref(request); /* +1 ref for the SENT callback */
- request->rq_sent = LTIME_S(CURRENT_TIME);
+ request->rq_sent = CURRENT_SECONDS;
ptlrpc_pinger_sending_on_import(request->rq_import);
rc = ptl_send_buf(&request->rq_req_md_h,
request->rq_reqmsg, request->rq_reqlen,
goto put_conn;
}
- request->rq_export->exp_last_request_time =
- LTIME_S(CURRENT_TIME);
+ request->rq_export->exp_last_request_time = CURRENT_SECONDS;
}
CDEBUG(D_RPCTRACE, "Handling RPC pname:cluuid+ref:pid:xid:ni:nid:opc "
if [ "$REPLAY_DUAL" != "no" ]; then
sh replay-dual.sh
fi
+
+RC=$?
+echo "completed with rc $RC" && exit $RC
build_test_filter
+if [ "$ONLY" == "cleanup" ]; then
+ cleanup
+ exit
+fi
+
#create single point mountpoint
gen_config
${LCONF} $NOMOD $portals_opt $lustre_opt $node_opt ${REFORMAT:---reformat} $@ \
$conf_opt || exit 2
+[ $DEBUG ] && sysctl -w portals.debug=$DEBUG
+
if [ "$MOUNT2" ]; then
$LLMOUNT -v `hostname`:/mds1/client $MOUNT2 || exit 3
fi
${LCONF} $NOMOD $portals_opt $lustre_opt $node_opt $@ $conf_opt || exit 2
+[ $DEBUG ] && sysctl -w portals.debug=$DEBUG
+
if [ "$MOUNT2" ]; then
$LLMOUNT -v `hostname`:/mds1/client $MOUNT2 || exit 3
fi
STRIPECOUNT=`cat /proc/fs/lustre/lov/*/activeobd | head -n 1`
ORIGFREE=`cat /proc/fs/lustre/llite/*/kbytesavail | head -n 1`
-MAXFREE=${MAXFREE:-$((200000 * $STRIPECOUNT))}
+MAXFREE=${MAXFREE:-$((400000 * $STRIPECOUNT))}
if [ $ORIGFREE -gt $MAXFREE ]; then
echo "skipping out-of-space test on $OSC"
echo "reports ${ORIGFREE}kB free, more tham MAXFREE ${MAXFREE}kB"
STRIPECOUNT=`cat /proc/fs/lustre/lov/*/activeobd | head -n 1`
ORIGFREE=`cat /proc/fs/lustre/llite/*/kbytesavail | head -n 1`
-MAXFREE=${MAXFREE:-$((200000 * $STRIPECOUNT))}
+MAXFREE=${MAXFREE:-$((400000 * $STRIPECOUNT))}
if [ $ORIGFREE -gt $MAXFREE ]; then
echo "skipping out-of-space test on $OSC"
echo "reports ${ORIGFREE}kB free, more tham MAXFREE ${MAXFREE}kB"
echo "skipping out-of-space test on OST0"
return
fi
- if [ ! -d $DIR/d27 ]; then
- mkdir -p $DIR/d27
- fi
+ mkdir -p $DIR/d27
$LSTRIPE $DIR/d27/f27m_1 0 0 1
dd if=/dev/zero of=$DIR/d27/f27m_1 bs=1024 count=$MAXFREE && \
error "dd should fill OST0"
i=2
while $LSTRIPE $DIR/d27/f27m_$i 0 0 1 ; do
i=`expr $i + 1`
- if [ $i -gt 2000 ] ; then
- break
- fi
+ [ $i -gt 256 ] && break
done
i=`expr $i + 1`
touch $DIR/d27/f27m_$i
OPENUNLINK=${OPENUNLINK:-openunlink}
TOEXCL=${TOEXCL:-toexcl}
TRUNCATE=${TRUNCATE:-truncate}
+export TMP=${TMP:-/tmp}
if [ $UID -ne 0 ]; then
RUNAS_ID="$UID"
$LCTL mark "REPLAY BARRIER"
}
+replay_barrier_nodf() {
+ local facet=$1
+ do_facet $facet sync
+ do_facet $facet $LCTL --device %${facet}_svc readonly
+ do_facet $facet $LCTL --device %${facet}_svc notransno
+ do_facet $facet $LCTL mark "REPLAY BARRIER"
+ $LCTL mark "REPLAY BARRIER"
+}
+
mds_evict_client() {
UUID=`cat /proc/fs/lustre/mdc/*_MNT_*/uuid`
do_facet mds "echo $UUID > /proc/fs/lustre/mds/mds_svc/evict_client"
}
declare -fx h2elan
+h2openib() {
+ if [ "$1" = "client" -o "$1" = "'*'" ]; then echo \'*\'; else
+ echo $1 | sed 's/[^0-9]*//g'
+ fi
+}
+declare -fx h2openib
+
facet_host() {
local facet=$1
varname=${facet}_HOST
log() {
echo "$*"
- lctl mark "$*" 2> /dev/null || true
+ $LCTL mark "$*" 2> /dev/null || true
}
pass() {
equals_msg $testnum: $message
BEFORE=`date +%s`
- log "== test $testnum: $message =========== `date +%H:%M:%S` ($BEFORE)"
+ log "== test $testnum: $message ============ `date +%H:%M:%S` ($BEFORE)"
test_${testnum} || error "test_$testnum failed with $?"
pass "($((`date +%s` - $BEFORE))s)"
}
struct stat st;
int rc;
- utb.actime = 0x47114711;
- utb.modtime = 0x11471147;
-
+ utb.actime = 0x47114711;
+ utb.modtime = 0x11471147;
if (argc != 2)
usage(argv[0]);
exit(0);
}
+ rc = access(target, F_OK);
+ if (rc) {
+ rc = errno;
+ fprintf(stderr, "%s: %s inaccessible: %s\n", progname, target,
+ strerror(errno));
+ return rc;
+ }
+
rc = mount(source, target, "lustre", 0, (void *)&lmd);
if (rc) {
rc = errno;
- perror(argv[0]);
+ fprintf(stderr, "%s: mount(%s, %s) failed: %s\n", source,
+ target, progname, strerror(errno));
if (rc == ENODEV)
fprintf(stderr, "Are the lustre modules loaded?\n"
"Check /etc/modules.conf and /proc/filesystems\n");